--- /dev/null
+EXTRA_CFLAGS= -Ifs/lustre/include -Ifs/lustre/portals/include
--- /dev/null
+EXTRA_CFLAGS= -Ifs/lustre/include -Ifs/lustre/portals/include
--- /dev/null
+# Copyright (C) 2001 Cluster File Systems, Inc.
+#
+# This code is issued under the GNU General Public License.
+# See the file COPYING in this distribution
+
+EXTRA_DIST = Rules.linux archdep.m4 MCP
+DIST_SUBDIRS = libcfs portals knals unals utils tests doc router
+SUBDIRS = libcfs portals knals unals utils tests doc router
--- /dev/null
+include fs/lustre/portals/Kernelenv
+
+obj-y += portals/
+obj-y += libcfs/
+obj-y += knals/
+obj-y += router/
--- /dev/null
+# included in Linux kernel directories
+# Rules for module building
+
+MODLINK=@MOD_LINK@
+if LINUX25
+
+
+basename=$(shell echo $< | sed -e 's/\.c//g' | sed -e 's/-//g' | sed -e 's/\.o//g')
+AM_CPPFLAGS= -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -pipe -mpreferred-stack-boundary=2 -DKBUILD_MODNAME=$(MODULE) -DKBUILD_BASENAME=$(basename)
+
+$(MODULE).o: $($(MODULE)_OBJECTS)
+ $(LD) -m $(MOD_LINK) -r -o $(MODULE).o $($(MODULE)_OBJECTS)
+
+
+
+else
+
+
+$(MODULE).o: $($(MODULE)_OBJECTS)
+ $(LD) -m $(MOD_LINK) -r -o $(MODULE).o $($(MODULE)_OBJECTS)
+
+
+
+endif
+
+
+tags:
+ rm -f $(top_srcdir)/TAGS
+ rm -f $(top_srcdir)/tags
+ find $(top_srcdir)/../portals/ -name '*.[hc]' | xargs etags -a
+ find $(top_srcdir) -name '*.[hc]' | grep -v ".orig" | xargs etags -a
+ find $(top_srcdir)/../portals/ -name '*.[hc]' | xargs ctags -a
+ find $(top_srcdir) -name '*.[hc]' | grep -v ".orig" | xargs ctags -a
+
+
+
+
--- /dev/null
+
+# -------- in kernel compilation? (2.5 only) -------------
+AC_ARG_ENABLE(inkernel, [ --enable-inkernel set up 2.5 kernel makefiles])
+AM_CONDITIONAL(INKERNEL, test x$enable_inkernel = xyes)
+echo "Makefile for in kernel build: $INKERNEL"
+
+# -------- liblustre compilation --------------
+AC_ARG_WITH(lib, [ --with-lib compile lustre library], host_cpu="lib")
+
+# -------- set linuxdir ------------
+
+AC_ARG_WITH(linux, [ --with-linux=[path] set path to Linux source (default=/usr/src/linux)],LINUX=$with_linux,LINUX=/usr/src/linux)
+AC_SUBST(LINUX)
+
+# --------- UML? --------------------
+AC_MSG_CHECKING(if you are running user mode linux for $host_cpu ...)
+if test $host_cpu = "lib" ; then
+ host_cpu="lib"
+ AC_MSG_RESULT(no building Lustre library)
+else
+ if test -e $LINUX/include/asm-um ; then
+ if test X`ls -id $LINUX/include/asm/ | awk '{print $1}'` = X`ls -id $LINUX/include/asm-um | awk '{print $1}'` ; then
+ host_cpu="um";
+ AC_MSG_RESULT(yes)
+ else
+ AC_MSG_RESULT(no (asm doesn't point at asm-um))
+ fi
+
+ else
+ AC_MSG_RESULT(no (asm-um missing))
+ fi
+fi
+
+# --------- Linux 25 ------------------
+
+AC_MSG_CHECKING(if you are running linux 2.5)
+if test -e $LINUX/include/linux/namei.h ; then
+ linux25="yes"
+ AC_MSG_RESULT(yes)
+else
+ linux25="no"
+ AC_MSG_RESULT(no)
+fi
+AM_CONDITIONAL(LINUX25, test x$linux25 = xyes)
+echo "Makefiles for in linux 2.5 build: $LINUX25"
+
+# ------- Makeflags ------------------
+
+AC_MSG_CHECKING(setting make flags system architecture: )
+case ${host_cpu} in
+ lib )
+ AC_MSG_RESULT($host_cpu)
+ KCFLAGS='-g -Wall '
+ KCPPFLAGS='-D__arch_lib__ '
+ MOD_LINK=elf_i386
+;;
+ um )
+ AC_MSG_RESULT($host_cpu)
+ KCFLAGS='-g -Wall -pipe -Wno-trigraphs -Wstrict-prototypes -fno-strict-aliasing -fno-common '
+ case ${linux25} in
+ yes )
+ KCPPFLAGS='-D__KERNEL__ -U__i386__ -Ui386 -DUM_FASTCALL -D__arch_um__ -DSUBARCH="i386" -DNESTING=0 -D_LARGEFILE64_SOURCE -Derrno=kernel_errno -DPATCHLEVEL=4 -DMODULE -I$(LINUX)/arch/um/include -I$(LINUX)/arch/um/kernel/tt/include -I$(LINUX)/arch/um/kernel/skas/include -O2 -nostdinc -iwithprefix include -DKBUILD_BASENAME=$(MODULE) -DKBUILD_MODNAME=$(MODULE) '
+ ;;
+ * )
+ KCPPFLAGS='-D__KERNEL__ -U__i386__ -Ui386 -DUM_FASTCALL -D__arch_um__ -DSUBARCH="i386" -DNESTING=0 -D_LARGEFILE64_SOURCE -Derrno=kernel_errno -DPATCHLEVEL=4 -DMODULE -I$(LINUX)/arch/um/kernel/tt/include -I$(LINUX)/arch/um/include '
+ ;;
+ esac
+
+ MOD_LINK=elf_i386
+;;
+ i*86 )
+ AC_MSG_RESULT($host_cpu)
+ KCFLAGS='-g -O2 -Wall -Wstrict-prototypes -pipe'
+ case ${linux25} in
+ yes )
+ KCPPFLAGS='-D__KERNEL__ -DMODULE -march=i686 -I$(LINUX)/include/asm-i386/mach-default -nostdinc -iwithprefix include '
+ ;;
+ * )
+ KCPPFLAGS='-D__KERNEL__ -DMODULE '
+ ;;
+ esac
+ MOD_LINK=elf_i386
+;;
+
+ alphaev6 )
+ AC_MSG_RESULT($host_cpu)
+ KCFLAGS='-g -O2 -Wall -Wstrict-prototypes -Wno-trigraphs -fomit-frame-pointer -fno-strict-aliasing -fno-common -pipe -mno-fp-regs -ffixed-8 -mcpu=ev5 -Wa,-mev6'
+ KCPPFLAGS='-D__KERNEL__ -DMODULE '
+ MOD_LINK=elf64alpha
+;;
+
+ alphaev67 )
+ AC_MSG_RESULT($host_cpu)
+ KCFLAGS='-g -O2 -Wall -Wstrict-prototypes -Wno-trigraphs -fomit-frame-pointer -fno-strict-aliasing -fno-common -pipe -mno-fp-regs -ffixed-8 -mcpu=ev5 -Wa,-mev6'
+ KCPPFLAGS='-D__KERNEL__ -DMODULE '
+ MOD_LINK=elf64alpha
+;;
+
+ alpha* )
+ AC_MSG_RESULT($host_cpu)
+ KCFLAGS='-g -O2 -Wall -Wstrict-prototypes -Wno-trigraphs -fomit-frame-pointer -fno-strict-aliasing -fno-common -pipe -mno-fp-regs -ffixed-8 -mcpu=ev5 -Wa,-mev5'
+ KCPPFLAGS='-D__KERNEL__ -DMODULE '
+ MOD_LINK=elf64alpha
+;;
+
+ ia64 )
+ AC_MSG_RESULT($host_cpu)
+ KCFLAGS='-gstabs -O2 -Wall -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -pipe -ffixed-r13 -mfixed-range=f10-f15,f32-f127 -falign-functions=32 -mb-step'
+ KCPPFLAGS='-D__KERNEL__ -DMODULE'
+ MOD_LINK=elf64_ia64
+;;
+
+ sparc64 )
+ AC_MSG_RESULT($host_cpu)
+ KCFLAGS='-O2 -Wall -Wstrict-prototypes -Wno-trigraphs -fomit-frame-pointer -fno-strict-aliasing -fno-common -Wno-unused -m64 -pipe -mno-fpu -mcpu=ultrasparc -mcmodel=medlow -ffixed-g4 -fcall-used-g5 -fcall-used-g7 -Wno-sign-compare -Wa,--undeclared-regs'
+ KCPPFLAGS='-D__KERNEL__'
+ MOD_LINK=elf64_sparc
+
+;;
+
+ powerpc )
+ AC_MSG_RESULT($host_cpu)
+ KCFLAGS='-O2 -Wall -Wstrict-prototypes -Wno-trigraphs -fomit-frame-pointer -fno-strict-aliasing -fno-common -D__powerpc__ -fsigned-char -msoft-float -pipe -ffixed-r2 -Wno-uninitialized -mmultiple -mstring'
+ KCPPFLAGS='-D__KERNEL__'
+ MOD_LINK=elf32ppclinux
+;;
+
+ *)
+ AC_ERROR("Unknown Linux Platform: $host_cpu")
+;;
+esac
+
+# ----------- make dep run? ------------------
+
+if test $host_cpu != "lib" ; then
+ AC_MSG_CHECKING(if make dep has been run in kernel source (host $host_cpu) )
+ if test -f $LINUX/include/linux/config.h ; then
+ AC_MSG_RESULT(yes)
+ else
+ AC_MSG_ERROR(** cannot find $LINUX/include/linux/config.h. Run make dep in $LINUX.)
+ fi
+fi
+
+# ------------ include paths ------------------
+
+if test $host_cpu != "lib" ; then
+ KINCFLAGS='-I$(top_srcdir)/include -I$(top_srcdir)/portals/include -I$(LINUX)/include'
+else
+ KINCFLAGS='-I$(top_srcdir)/include -I$(top_srcdir)/portals/include'
+fi
+CPPFLAGS="$KINCFLAGS $ARCHCPPFLAGS"
+
+if test $host_cpu != "lib" ; then
+# ------------ autoconf.h ------------------
+ AC_MSG_CHECKING(if autoconf.h is in kernel source)
+ if test -f $LINUX/include/linux/autoconf.h ; then
+ AC_MSG_RESULT(yes)
+ else
+ AC_MSG_ERROR(** cannot find $LINUX/include/linux/autoconf.h. Run make config in $LINUX.)
+ fi
+
+# ------------ RELEASE and moduledir ------------------
+ AC_MSG_CHECKING(for Linux release)
+
+ dnl We need to rid ourselves of the nasty [ ] quotes.
+ changequote(, )
+ dnl Get release from version.h
+ RELEASE="`sed -ne 's/.*UTS_RELEASE[ \"]*\([0-9.a-zA-Z_-]*\).*/\1/p' $LINUX/include/linux/version.h`"
+ changequote([, ])
+
+ moduledir='$(libdir)/modules/'$RELEASE/kernel
+ AC_SUBST(moduledir)
+
+ modulefsdir='$(moduledir)/fs/$(PACKAGE)'
+ AC_SUBST(modulefsdir)
+
+ AC_MSG_RESULT($RELEASE)
+ AC_SUBST(RELEASE)
+
+# ---------- modversions? --------------------
+ AC_MSG_CHECKING(for MODVERSIONS)
+ if egrep -e 'MODVERSIONS.*1' $LINUX/include/linux/autoconf.h >/dev/null 2>&1;
+ then
+ MFLAGS="-DMODULE -DMODVERSIONS -include $LINUX/include/linux/modversions.h -DEXPORT_SYMTAB"
+ AC_MSG_RESULT(yes)
+ else
+ MFLAGS=
+ AC_MSG_RESULT(no)
+ fi
+fi
+
+# ---------- SMP -------------------
+#AC_MSG_CHECKING(for SMP)
+#if egrep -e SMP=y $LINUX/.config >/dev/null 2>&1; then
+# SMPFLAG=
+# AC_MSG_RESULT(yes)
+#else
+# SMPFLAG=
+# AC_MSG_RESULT(no)
+#fi
+
+CFLAGS="$KCFLAGS"
+CPPFLAGS="$KINCFLAGS $KCPPFLAGS $MFLAGS "
+
+AC_SUBST(MOD_LINK)
+AC_SUBST(LINUX25)
\ No newline at end of file
--- /dev/null
+#!/bin/sh
+
+aclocal &&
+automake --add-missing &&
+${AUTOCONF:-autoconf}
--- /dev/null
+
+# ---------- directories ---------
+
+
+# --------- unsigned long long sane? -------
+
+AC_CHECK_SIZEOF(unsigned long long, 0)
+echo "---> size SIZEOF $SIZEOF_unsigned_long_long"
+echo "---> size SIZEOF $ac_cv_sizeof_unsigned_long_long"
+if test $ac_cv_sizeof_unsigned_long_long != 8 ; then
+ AC_MSG_ERROR([** we assume that sizeof(long long) == 8. Tell phil@clusterfs.com])
+fi
+
+# directories for binaries
+ac_default_prefix=
+bindir='${exec_prefix}/usr/bin'
+sbindir='${exec_prefix}/usr/sbin'
+includedir='${prefix}/usr/include'
+
+# Directories for documentation and demos.
+docdir='${prefix}/usr/share/doc/$(PACKAGE)'
+AC_SUBST(docdir)
+demodir='$(docdir)/demo'
+AC_SUBST(demodir)
+pkgexampledir='${prefix}/usr/lib/$(PACKAGE)/examples'
+AC_SUBST(pkgexampledir)
+pymoddir='${prefix}/usr/lib/${PACKAGE}/python/Lustre'
+AC_SUBST(pymoddir)
+modulenetdir='$(moduledir)/net/$(PACKAGE)'
+AC_SUBST(modulenetdir)
+
+
+# ---------- BAD gcc? ------------
+AC_PROG_RANLIB
+AC_PROG_CC
+AC_MSG_CHECKING(for buggy compiler)
+CC_VERSION=`$CC -v 2>&1 | grep "^gcc version"`
+bad_cc() {
+ echo
+ echo " '$CC_VERSION'"
+ echo " has been known to generate bad code, "
+ echo " please get an updated compiler."
+ AC_MSG_ERROR(sorry)
+}
+TMP_VERSION=`echo $CC_VERSION | cut -c 1-16`
+if test "$TMP_VERSION" = "gcc version 2.95"; then
+ bad_cc
+fi
+case "$CC_VERSION" in
+ # ost_pack_niobuf putting 64bit NTOH temporaries on the stack
+ # without "sub $0xc,%esp" to protect the stack from being
+ # stomped on by interrupts (bug 606)
+ "gcc version 2.96 20000731 (Red Hat Linux 7.1 2.96-98)")
+ bad_cc
+ ;;
+ # mandrake's similar sub 0xc compiler bug
+ # http://marc.theaimsgroup.com/?l=linux-kernel&m=104748366226348&w=2
+ "gcc version 2.96 20000731 (Mandrake Linux 8.1 2.96-0.62mdk)")
+ bad_cc
+ ;;
+ *)
+ AC_MSG_RESULT(no known problems)
+ ;;
+esac
+# end ------ BAD gcc? ------------
+
+# -------- Check for required packages --------------
+
+# this doesn't seem to work on older autoconf
+# AC_CHECK_LIB(readline, readline,,)
+AC_ARG_ENABLE(readline, [ --enable-readline use readline library],,
+ enable_readline="yes")
+
+if test "$enable_readline" = "yes" ; then
+ LIBREADLINE="-lreadline -lncurses"
+ HAVE_LIBREADLINE="-DHAVE_LIBREADLINE=1"
+else
+ LIBREADLINE=""
+ HAVE_LIBREADLINE=""
+fi
+AC_SUBST(LIBREADLINE)
+AC_SUBST(HAVE_LIBREADLINE)
+
+AC_ARG_ENABLE(efence, [ --enable-efence use efence library],,
+ enable_efence="no")
+
+if test "$enable_efence" = "yes" ; then
+ LIBEFENCE="-lefence"
+ HAVE_LIBEFENCE="-DHAVE_LIBEFENCE=1"
+else
+ LIBEFENCE=""
+ HAVE_LIBEFENCE=""
+fi
+AC_SUBST(LIBEFENCE)
+AC_SUBST(HAVE_LIBEFENCE)
+
+AM_CONDITIONAL(LIBLUSTRE, test x$host_cpu = xlib)
+AC_MSG_CHECKING(if you are building lib lustre)
+if test "$host_cpu" = "lib"; then
+ AC_MSG_RESULT(yes)
+ libdir='${exec_prefix}/lib/lustre'
+else
+ AC_MSG_RESULT(no)
+fi
+
+# end -------- Kernel build environment. -----------------
+
+
--- /dev/null
+# This version is here to make autoconf happy; the name is a file which is
+# "unique" to this directory so that configure knows where it should run.
+AC_INIT(knals/Makefile.am, 3.0)
+AC_CANONICAL_SYSTEM
+# Copyright (C) 2001 Cluster File Systems, Inc.
+#
+# This code is issued under the GNU General Public License.
+# See the file COPYING in this distribution
+
+# Automake variables. Steal the version number from packaging/intersync.spec
+AM_INIT_AUTOMAKE(portals, builtin([esyscmd], [sed -ne '/.*define IVERSION /{ s/.*IVERSION //; p; }' libcfs/module.c]))
+# AM_MAINTAINER_MODE
+
+sinclude(archdep.m4)
+sinclude(build.m4)
+sinclude<portalsconf.m4)
+
+if test x$enable_inkernel = xyes ; then
+cp Kernelenv.mk Kernelenv.in
+cp Makefile.mk Makefile.in
+cp libcfs/Makefile.mk libcfs/Makefile.in
+cp portals/Makefile.mk portals/Makefile.in
+cp knals/Makefile.mk knals/Makefile.in
+cp knals/socknal/Makefile.mk knals/socknal/Makefile.in
+cp router/Makefile.mk router/Makefile.in
+AC_OUTPUT(Kernelenv)
+fi
+
+
+AM_CONFIG_HEADER(include/config.h)
+
+AC_OUTPUT([Rules.linux Makefile libcfs/Makefile portals/Makefile \
+ unals/Makefile knals/Makefile router/Makefile \
+ knals/socknal/Makefile knals/gmnal/Makefile knals/qswnal/Makefile \
+ knals/scimacnal/Makefile knals/toenal/Makefile \
+ utils/Makefile tests/Makefile doc/Makefile \
+ packaging/Makefile packaging/portals.spec ])
+
--- /dev/null
+Makefile
+Makefile.in
+*.eps
+*.pdf
--- /dev/null
+In this document I will try to draw the data structures and how they
+interrelate in the Portals 3 reference implementation. It is probably
+best shown with a drawing, so there may be an additional xfig or
+Postscript figure.
+
+
+MEMORY POOLS:
+------------
+
+First, a digression on memory allocation in the library. As mentioned
+in the NAL Writer's Guide, the library does not link against any
+standard C libraries and as such is unable to dynamically allocate
+memory on its own. It requires that the NAL implement a method
+for allocation that is appropriate for the protection domain in
+which the library lives. This is only called when a network
+interface is initialized to allocate the Portals object pools.
+
+These pools are preallocate blocks of objects that the library
+can rapidly make active and manage with a minimum of overhead.
+It is also cuts down on overhead for setting up structures
+since the NAL->malloc() callback does not need to be called
+for each object.
+
+The objects are maintained on a per-object type singly linked free
+list and contain a pointer to the next free object. This pointer
+is NULL if the object is not on the free list and is non-zero
+if it is on the list. The special sentinal value of 0xDEADBEEF
+is used to mark the end of the free list since NULL could
+indicate that the last object in the list is not free.
+
+When one of the lib_*_alloc() functions is called, the library
+returns the head of the free list and advances the head pointer
+to the next item on the list. The special case of 0xDEADBEEF is
+checked and a NULL pointer is returned if there are no more
+objects of this type available. The lib_*_free() functions
+are even simpler -- check to ensure that the object is not already
+free, set its next pointer to the current head and then set
+the head to be this newly freed object.
+
+Since C does not have templates, I did the next best thing and wrote
+the memory pool allocation code as a macro that expands based on the
+type of the argument. The mk_alloc(T) macro expands to
+write the _lib_T_alloc() and lib_T_free() functions.
+It requires that the object have a pointer of the type T named
+"next_free". There are also functions that map _lib_T_alloc()
+to lib_T_alloc() so that the library can add some extra
+functionality to the T constructor.
+
+
+
+LINKED LISTS:
+------------
+
+Many of the active Portals objects are stored in doubly linked lists
+when they are active. These are always implemented with the pointer
+to the next object and a pointer to the next pointer of the
+previous object. This avoids the "dummy head" object or
+special cases for inserting at the beginning or end of the list.
+The pointer manipulations are a little hairy at times, but
+I hope that they are understandable.
+
+The actual linked list code is implemented as macros in <lib-p30.h>,
+although the object has to know about
+
+
--- /dev/null
+# Copyright (C) 2001 Cluster File Systems, Inc.
+#
+# This code is issued under the GNU General Public License.
+# See the file COPYING in this distribution
+
+LYX2PDF = lyx --export pdf
+LYX2TXT = lyx --export text
+LYX2HTML = lyx --export html
+SUFFIXES = .lin .lyx .pdf .sgml .html .txt .fig .eps
+
+DOCS = portals3.pdf
+IMAGES = file.eps flow_new.eps get.eps mpi.eps portals.eps put.eps
+LYXFILES= portals3.lyx
+
+MAINTAINERCLEANFILES = $(IMAGES) $(DOCS) $(GENERATED)
+GENERATED =
+EXTRA_DIST = $(DOCS) $(IMAGES) $(LYXFILES)
+
+all: $(DOCS)
+
+# update date and version in document
+date := $(shell date +%x)
+tag := $(shell echo '$$Name: $$' | sed -e 's/^\$$Na''me: *\$$$$/HEAD/; s/^\$$Na''me: \(.*\) \$$$$/\1/')
+addversion = sed -e 's|@T''AG@|$(tag)|g; s|@VER''SION@|$(VERSION)|g; s|@DA''TE@|$(date)|g'
+
+# Regenerate when the $(VERSION) or $Name: $ changes.
+.INTERMEDIATE: $(GENERATED)
+$(GENERATED) : %.lyx: %.lin Makefile
+ $(addversion) $< > $@
+
+.lyx.pdf:
+ @$(LYX2PDF) $< || printf "\n*** Warning: not creating PDF docs; install lyx to rectify this\n"
+
+.lyx.txt:
+ @$(LYX2TXT) $< || printf "\n*** Warning: not creating text docs; install lyx to rectify this\n"
+.lyx.html:
+ @$(LYX2HTML) $< || printf "\n*** Warning: not creating HTML docs; install lyx to rectify this\n"
+.fig.eps:
+ -fig2dev -L eps $< > $@
+
+portals3.pdf portals3.txt portals3.html: $(IMAGES) portals3.lyx
+
+syncweb: portals3.pdf
+# cp lustre.pdf /usr/src/www/content/lustre/docs/lustre.pdf
+# ( cd /usr/src/www ; make lustre ; make synclustre )
+
--- /dev/null
+This documents the life cycle of message as it arrives and is handled by
+a basic async, packetized NAL. There are four types of messages that have
+slightly different life cycles, so they are addressed independently.
+
+
+Put request
+-----------
+
+1. NAL notices that there is a incoming message header on the network
+and reads an ptl_hdr_t in from the wire.
+
+2. It may store additional NAL specific data that provides context
+for this event in a void* that it will interpret in some fashion
+later.
+
+3. The NAL calls lib_parse() with a pointer to the header and its
+private data structure.
+
+4. The library decodes the header and may build a message state
+object that describes the event to be written and the ACK to be
+sent, if any. It then calls nal->recv() with the private data
+that the NAL passed in, a pointer to the message state object
+and a translated user address.
+
+ The NAL will have been given a chance to pretranslate
+ all user addresses when the buffers are created. This
+ process is described in the NAL-HOWTO.
+
+5. The NAL should restore what ever context it required from the
+private data pointer, begin receiving the bytes and possibly store
+some extra state of its own. It should return at this point.
+
+
+
+Get request
+-----------
+
+1. As with a Put, the NAL notices the incoming message header and
+passes it to lib_parse().
+
+2. The library decodes the header and calls nal->recv() with a
+zero byte length, offset and destination to instruct it to clean
+up the wire after reading the header. The private data will
+be passed in as well, allowing the NAL to retrieve any state
+or context that it requires.
+
+3. The library may build a message state object to possibly
+write an event log or invalidate a memory region.
+
+4. The library will build a ptl_msg_t header that specifies the
+Portals protocol information for delivery at the remote end.
+
+5. The library calls nal->send() with the pre-built header,
+the optional message state object, the four part address
+component, a translated user pointer + offset, and some
+other things.
+
+6. The NAL is to put the header on the wire or copy it at
+this point (since it off the stack). It should store some
+amount of state about its current position in the message and
+the destination address.
+
+7. And then return to the library.
+
+
+Reply request
+-------------
+
+1. Starting at "The library decodes the header..."
+
+2. The library decodes the header and calls nal->recv()
+to bring in the rest of the message. Flow continues in
+exactly the same fashion as with all other receives.
+
+
+Ack request
+-----------
+
+1. The library decodes the header, builds the appropriate data
+structures for the event in a message state object and calls nal->recv()
+with a zero byte length, etc.
+
+
+Packet arrival
+--------------
+
+1. The NAL should notice the arrival of a packet, retrieve whatever
+state it needs from the message ID or other NAL specific header data
+and place the data bytes directly into the user address that were
+given to nal->recv().
+
+ How this happens is outside the scope of the Portals library
+ and soley determined by the NAL...
+
+2. If this is the last packet in a message, the NAL should retrieve
+the lib_msg_t *cookie that it was given in the call to nal->recv()
+and pass it to lib_finalize(). lib_finalize() may call nal->send()
+to send an ACK, nal->write() to record an entry in the event log,
+nal->invalidate() to unregister a region of memory or do nothing at all.
+
+3. It should then clean up any remaining NAL specific state about
+the message and go back into the main loop.
+
+
+Outgoing packets
+----------------
+
+1. When the NAL has pending output, it should put the packets on
+the wire wrapped with whatever implementation specified wrappers.
+
+2. Once it has output all the packets of a message it should
+call lib_finalize() with the message state object that was
+handed to nal->send(). This will allows the library to clean
+up its state regarding the message and write any pending event
+entries.
+
+
+
--- /dev/null
+This document is a first attempt at describing how to write a NAL
+for the Portals 3 library. It also defines the library architecture
+and the abstraction of protection domains.
+
+
+First, an overview of the architecture:
+
+ Application
+
+----|----+--------
+ |
+ API === NAL (User space)
+ |
+---------+---|-----
+ |
+ LIB === NAL (Library space)
+ |
+---------+---|-----
+
+ Physical wire (NIC space)
+
+
+Application
+ API
+API-side NAL
+------------
+LIB-side NAL
+ LIB
+LIB-side NAL
+ wire
+
+Communication is through the indicated paths via well defined
+interfaces. The API and LIB portions are written to be portable
+across platforms and do not depend on the network interface.
+
+Communcation between the application and the API code is
+defined in the Portals 3 API specification. This is the
+user-visible portion of the interface and should be the most
+stable.
+
+
+
+API-side NAL:
+------------
+
+The user space NAL needs to implement only a few functions
+that are stored in a nal_t data structure and called by the
+API-side library:
+
+ int forward( nal_t *nal,
+ int index,
+ void *args,
+ size_t arg_len,
+ void *ret,
+ size_t ret_len
+ );
+
+Most of the data structures in the portals library are held in
+the LIB section of the code, so it is necessary to forward API
+calls across the protection domain to the library. This is
+handled by the NAL's forward method. Once the argument and return
+blocks are on the remote side the NAL should call lib_dispatch()
+to invoke the appropriate API function.
+
+ int validate( nal_t *nal,
+ void *base,
+ size_t extent,
+ void **trans_base,
+ void **trans_data
+ );
+
+The validate method provides a means for the NAL to prevalidate
+and possibly pretranslate user addresses into a form suitable
+for fast use by the network card or kernel module. The trans_base
+pointer will be used by the library everytime it needs to
+refer to the block of memory. The trans_data result is a
+cookie that will be handed to the NAL along with the trans_base.
+
+The library never performs calculations on the trans_base value;
+it only computes offsets that are then handed to the NAL.
+
+
+ int shutdown( nal_t *nal, int interface );
+
+Brings down the network interface. The remote NAL side should
+call lib_fini() to bring down the library side of the network.
+
+ void yield( nal_t *nal );
+
+This allows the user application to gracefully give up the processor
+while busy waiting. Performance critical applications may not
+want to take the time to call this function, so it should be an
+option to the PtlEQWait call. Right now it is not implemented as such.
+
+Lastly, the NAL must implement a function named PTL_IFACE_*, where
+* is the name of the NAL such as PTL_IFACE_IP or PTL_IFACE_MYR.
+This initialization function is to set up communication with the
+library-side NAL, which should call lib_init() to bring up the
+network interface.
+
+
+
+LIB-side NAL:
+------------
+
+On the library-side, the NAL has much more responsibility. It
+is responsible for calling lib_dispatch() on behalf of the user,
+it is also responsible for bringing packets off the wire and
+pushing bits out. As on the user side, the methods are stored
+in a nal_cb_t structure that is defined on a per network
+interface basis.
+
+The calls to lib_dispatch() need to be examined. The prototype:
+
+ void lib_dispatch(
+ nal_cb_t *nal,
+ void *private,
+ int index,
+ void *arg_block,
+ void *ret_block
+ );
+
+has two complications. The private field is a NAL-specific
+value that will be passed to any callbacks produced as a result
+of this API call. Kernel module implementations may use this
+for task structures, or perhaps network card data. It is ignored
+by the library.
+
+Secondly, the arg_block and ret_block must be in the same protection
+domain as the library. The NAL's two halves must communicate the
+sizes and perform the copies. After the call, the buffer pointed
+to by ret_block will be filled in and should be copied back to
+the user space. How this is to be done is NAL specific.
+
+ int lib_parse(
+ nal_cb_t *nal,
+ ptl_hdr_t *hdr,
+ void *private
+ );
+
+This is the only other entry point into the library from the NAL.
+When the NAL detects an incoming message on the wire it should read
+sizeof(ptl_hdr_t) bytes and pass a pointer to the header to
+lib_parse(). It may set private to be anything that it needs to
+tie the incoming message to callbacks that are made as a result
+of this event.
+
+The method calls are:
+
+ int (*send)(
+ nal_cb_t *nal,
+ void *private,
+ lib_msg_t *cookie,
+ ptl_hdr_t *hdr,
+ int nid,
+ int pid,
+ int gid,
+ int rid,
+ user_ptr trans_base,
+ user_ptr trans_data,
+ size_t offset,
+ size_t len
+ );
+
+This is a tricky function -- it must support async output
+of messages as well as properly syncronized event log writing.
+The private field is the same that was passed into lib_dispatch()
+or lib_parse() and may be used to tie this call to the event
+that initiated the entry to the library.
+
+The cookie is a pointer to a library private value that must
+be passed to lib_finalize() once the message has been completely
+sent. It should not be examined by the NAL for any meaning.
+
+The four ID fields are passed in, although some implementations
+may not use all of them.
+
+The single base pointer has been replaced with the translated
+address that the API NAL generated in the api_nal->validate()
+call. The trans_data is unchanged and the offset is in bytes.
+
+
+ int (*recv)(
+ nal_cb_t *nal,
+ void *private,
+ lib_msg_t *cookie,
+ user_ptr trans_base,
+ user_ptr trans_data,
+ size_t offset,
+ size_t mlen,
+ size_t rlen
+ );
+
+This callback will only be called in response to lib_parse().
+The cookie, trans_addr and trans_data are as discussed in send().
+The NAL should read mlen bytes from the wire, deposit them into
+trans_base + offset and then discard (rlen - mlen) bytes.
+Once the entire message has been received the NAL should call
+lib_finalize() with the lib_msg_t *cookie.
+
+The special arguments of base=NULL, data=NULL, offset=0, mlen=0, rlen=0
+is used to indicate that the NAL should clean up the wire. This could
+be implemented as a blocking call, although having it return as quickly
+as possible is desirable.
+
+ int (*write)(
+ nal_cb_t *nal,
+ void *private,
+ user_ptr trans_addr,
+ user_ptr trans_data,
+ size_t offset,
+
+ void *src_addr,
+ size_t len
+ );
+
+This is essentially a cross-protection domain memcpy(). The user address
+has been pretranslated by the api_nal->translate() call.
+
+ void *(*malloc)(
+ nal_cb_t *nal,
+ size_t len
+ );
+
+ void (*free)(
+ nal_cb_t *nal,
+ void *buf
+ );
+
+Since the NAL may be in a non-standard hosted environment it can
+not call malloc(). This allows the library side NAL to implement
+the system specific malloc(). In the current reference implementation
+the libary only calls nal->malloc() when the network interface is
+initialized and then calls free when it is brought down. The library
+maintains its own pool of objects for allocation so only one call to
+malloc is made per object type.
+
+ void (*invalidate)(
+ nal_cb_t *nal,
+ user_ptr trans_base,
+ user_ptr trans_data,
+ size_t extent
+ );
+
+User addresses are validated/translated at the user-level API NAL
+method, which is likely to push them to this level. Meanwhile,
+the library NAL will be notified when the library no longer
+needs the buffer. Overlapped buffers are not detected by the
+library, so the NAL should ref count each page involved.
+
+Unfortunately we have a few bugs when the invalidate method is
+called. It is still in progress...
+
+ void (*printf)(
+ nal_cb_t *nal,
+ const char *fmt,
+ ...
+ );
+
+As with malloc(), the library does not have any way to do printf
+or printk. It is not necessary for the NAL to implement the this
+call, although it will make debugging difficult.
+
+ void (*cli)(
+ nal_cb_t *nal,
+ unsigned long *flags
+ );
+
+ void (*sti)(
+ nal_cb_t *nal,
+ unsigned long *flags
+ );
+
+These are used by the library to mark critical sections.
+
+ int (*gidrid2nidpid)(
+ nal_cb_t *nal,
+ ptl_id_t gid,
+ ptl_id_t rid,
+ ptl_id_t *nid,
+ ptl_id_t *pid
+ );
+
+
+ int (*nidpid2gidrid)(
+ nal_cb_t *nal,
+ ptl_id_t nid,
+ ptl_id_t pid,
+ ptl_id_t *gid,
+ ptl_id_t *rid
+ );
+
+Rolf added these. I haven't looked at how they have to work yet.
--- /dev/null
+#FIG 3.2
+Landscape
+Center
+Inches
+Letter
+100.00
+Single
+-2
+1200 2
+6 1200 750 1650 1050
+2 4 0 1 0 7 100 0 -1 0.000 0 0 7 0 0 5
+ 1650 1050 1650 750 1200 750 1200 1050 1650 1050
+4 1 0 100 0 0 10 0.0000 0 105 240 1425 952 FS0\001
+-6
+6 1200 2325 1650 2625
+2 4 0 1 0 7 100 0 -1 0.000 0 0 7 0 0 5
+ 1650 2625 1650 2325 1200 2325 1200 2625 1650 2625
+4 1 0 100 0 0 10 0.0000 0 105 240 1425 2527 FS3\001
+-6
+6 1200 1800 1650 2100
+2 4 0 1 0 7 100 0 -1 0.000 0 0 7 0 0 5
+ 1650 2100 1650 1800 1200 1800 1200 2100 1650 2100
+4 1 0 100 0 0 10 0.0000 0 105 240 1425 2002 FS2\001
+-6
+6 1200 1275 1650 1575
+2 4 0 1 0 7 100 0 -1 0.000 0 0 7 0 0 5
+ 1650 1575 1650 1275 1200 1275 1200 1575 1650 1575
+4 1 0 100 0 0 10 0.0000 0 105 240 1425 1477 FS1\001
+-6
+6 450 750 900 1200
+5 1 0 1 0 7 100 0 20 0.000 0 1 0 0 675.000 750.000 450 1050 675 1125 900 1050
+1 2 0 1 0 7 100 0 20 0.000 1 0.0000 675 825 225 75 450 900 900 750
+2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2
+ 450 825 450 1050
+2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2
+ 900 1050 900 825
+-6
+6 450 2325 900 2775
+5 1 0 1 0 7 100 0 20 0.000 0 1 0 0 675.000 2325.000 450 2625 675 2700 900 2625
+1 2 0 1 0 7 100 0 20 0.000 1 0.0000 675 2400 225 75 450 2475 900 2325
+2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2
+ 450 2400 450 2625
+2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2
+ 900 2625 900 2400
+-6
+6 450 1800 900 2250
+5 1 0 1 0 7 100 0 20 0.000 0 1 0 0 675.000 1800.000 450 2100 675 2175 900 2100
+1 2 0 1 0 7 100 0 20 0.000 1 0.0000 675 1875 225 75 450 1950 900 1800
+2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2
+ 450 1875 450 2100
+2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2
+ 900 2100 900 1875
+-6
+6 450 1275 900 1725
+5 1 0 1 0 7 100 0 20 0.000 0 1 0 0 675.000 1275.000 450 1575 675 1650 900 1575
+1 2 0 1 0 7 100 0 20 0.000 1 0.0000 675 1350 225 75 450 1425 900 1275
+2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2
+ 450 1350 450 1575
+2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2
+ 900 1575 900 1350
+-6
+6 2250 750 3450 2625
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2
+ 2550 1200 3150 1200
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2
+ 2550 1500 3150 1500
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2
+ 2550 1800 3150 1800
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2
+ 2550 2100 3150 2100
+2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
+ 2550 975 3150 975 3150 2625 2550 2625 2550 975
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2
+ 2550 2400 3150 2400
+4 1 0 100 0 0 10 0.0000 0 135 1185 2850 900 Application Buffer\001
+-6
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 1 2
+ 0 0 1.00 60.00 120.00
+ 0 0 1.00 60.00 120.00
+ 1650 2400 2550 1350
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 1 2
+ 0 0 1.00 60.00 120.00
+ 0 0 1.00 60.00 120.00
+ 1650 1875 2550 1050
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 1 2
+ 0 0 1.00 60.00 120.00
+ 0 0 1.00 60.00 120.00
+ 1650 1425 2550 1950
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 1 2
+ 0 0 1.00 60.00 120.00
+ 0 0 1.00 60.00 120.00
+ 1650 900 2550 1650
+2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2
+ 900 900 1200 900
+2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2
+ 900 1425 1200 1425
+2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2
+ 900 1950 1200 1950
+2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2
+ 900 2475 1200 2475
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 1 2
+ 0 0 1.00 60.00 120.00
+ 0 0 1.00 60.00 120.00
+ 1650 2025 2550 2250
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 1 2
+ 0 0 1.00 60.00 120.00
+ 0 0 1.00 60.00 120.00
+ 1650 2550 2550 2475
+2 4 0 1 0 7 100 0 -1 0.000 0 0 7 0 0 5
+ 1875 2850 1875 600 225 600 225 2850 1875 2850
+4 1 0 100 0 0 10 0.0000 0 105 1215 1050 525 Parallel File Server\001
--- /dev/null
+#FIG 3.2
+Landscape
+Center
+Inches
+Letter
+100.00
+Single
+-2
+1200 2
+6 525 2175 1575 2925
+6 675 2287 1425 2812
+4 1 0 50 0 0 10 0.0000 4 105 255 1050 2437 MD\001
+4 1 0 50 0 0 10 0.0000 4 105 645 1050 2587 Exists and\001
+4 1 0 50 0 0 10 0.0000 4 135 555 1050 2737 Accepts?\001
+-6
+2 3 0 1 0 7 100 0 -1 0.000 0 0 0 0 0 5
+ 1575 2550 1050 2175 525 2550 1050 2925 1575 2550
+-6
+6 3450 1275 4350 1725
+6 3600 1312 4200 1687
+4 1 0 100 0 0 10 0.0000 0 135 525 3900 1612 Message\001
+4 1 0 100 0 0 10 0.0000 0 105 465 3900 1462 Discard\001
+-6
+2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
+ 3450 1275 4350 1275 4350 1725 3450 1725 3450 1275
+-6
+6 4650 1275 5550 1725
+6 4725 1312 5475 1687
+4 1 0 100 0 0 10 0.0000 0 135 735 5100 1612 Drop Count\001
+4 1 0 100 0 0 10 0.0000 0 105 630 5100 1462 Increment\001
+-6
+2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
+ 4650 1275 5550 1275 5550 1725 4650 1725 4650 1275
+-6
+6 1350 525 2250 975
+6 1350 562 2250 937
+4 1 0 100 0 0 10 0.0000 0 135 795 1800 862 Match Entry\001
+4 1 0 100 0 0 10 0.0000 0 105 585 1800 712 Get Next\001
+-6
+2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
+ 1350 525 2250 525 2250 975 1350 975 1350 525
+-6
+6 525 1125 1575 1875
+2 3 0 1 0 7 100 0 -1 0.000 0 0 0 0 0 5
+ 1575 1500 1050 1125 525 1500 1050 1875 1575 1500
+4 1 0 100 0 0 10 0.0000 0 105 465 1049 1552 Match?\001
+-6
+6 2340 1237 2940 1687
+6 2340 1237 2940 1687
+4 1 0 100 0 0 10 0.0000 0 105 345 2640 1387 More\001
+4 1 0 100 0 0 10 0.0000 0 105 405 2640 1537 Match\001
+4 1 0 100 0 0 10 0.0000 0 105 510 2640 1687 Entries?\001
+-6
+-6
+6 525 3225 1575 3975
+6 675 3375 1425 3750
+4 1 0 50 0 0 10 0.0000 4 105 255 1050 3525 MD\001
+4 1 0 50 0 0 10 0.0000 4 105 615 1050 3720 has room?\001
+-6
+2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 0 0 5
+ 525 3600 1050 3225 1575 3600 1050 3975 525 3600
+-6
+6 3300 3375 4350 3825
+6 3300 3412 4350 3787
+4 1 0 50 0 0 10 0.0000 4 105 735 3825 3562 Unlink MD\001
+4 1 0 50 0 0 10 0.0000 4 135 945 3825 3712 & Match Entry\001
+-6
+2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
+ 3300 3375 4350 3375 4350 3825 3300 3825 3300 3375
+-6
+6 1950 3225 3000 3975
+6 2250 3450 2700 3750
+4 1 0 50 0 0 10 0.0000 4 105 450 2475 3600 Unlink\001
+4 1 0 50 0 0 10 0.0000 4 105 315 2475 3750 full?\001
+-6
+2 3 0 1 0 7 100 0 -1 0.000 0 0 0 0 0 5
+ 3000 3600 2475 3225 1950 3600 2475 3975 3000 3600
+-6
+6 3150 4500 4200 4950
+6 3150 4537 4200 4912
+4 1 0 50 0 0 10 0.0000 4 105 735 3675 4687 Unlink MD\001
+4 1 0 50 0 0 10 0.0000 4 135 945 3675 4837 & Match Entry\001
+-6
+2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
+ 3150 4500 4200 4500 4200 4950 3150 4950 3150 4500
+-6
+6 600 4500 1500 4950
+6 675 4537 1425 4912
+4 1 0 50 0 0 10 0.0000 4 135 615 1050 4837 Operation\001
+4 1 0 50 0 0 10 0.0000 4 105 525 1050 4687 Perform\001
+-6
+2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
+ 600 4500 1500 4500 1500 4950 600 4950 600 4500
+-6
+6 4650 4350 5700 5100
+6 4950 4537 5400 4912
+6 4950 4537 5400 4912
+4 1 0 50 0 0 10 0.0000 4 135 435 5175 4837 Queue?\001
+4 1 0 50 0 0 10 0.0000 4 105 360 5175 4687 Event\001
+-6
+-6
+2 3 0 1 0 7 100 0 -1 0.000 0 0 0 0 0 5
+ 5700 4725 5175 4350 4650 4725 5175 5100 5700 4725
+-6
+6 6000 4500 6900 4950
+6 6225 4575 6675 4875
+4 1 0 50 0 0 10 0.0000 4 105 360 6450 4875 Event\001
+4 1 0 50 0 0 10 0.0000 4 105 435 6450 4725 Record\001
+-6
+2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
+ 6000 4500 6900 4500 6900 4950 6000 4950 6000 4500
+-6
+6 1800 4350 2850 5100
+6 2100 4575 2550 4875
+4 1 0 50 0 0 10 0.0000 4 105 450 2325 4725 Unlink\001
+4 1 0 50 0 0 10 0.0000 4 105 450 2325 4875 thresh?\001
+-6
+2 3 0 1 0 7 100 0 -1 0.000 0 0 0 0 0 5
+ 2850 4725 2325 4350 1800 4725 2325 5100 2850 4725
+-6
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 1050 1875 1050 2175
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 1575 1500 2100 1500
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 1050 450 1050 1125
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 1350 750 1050 750
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 1050 2925 1050 3225
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 3150 1500 3450 1500
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 4350 1500 4650 1500
+2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 0 0 5
+ 2100 1500 2625 1125 3150 1500 2625 1875 2100 1500
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 1575 3600 1950 3600
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 1050 3975 1050 4500
+2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 3000 3600 3300 3600
+2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 1500 4725 1800 4725
+2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 5700 4725 6000 4725
+2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 2850 4725 3150 4725
+2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 4200 4725 4650 4725
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 6900 4725 7950 4725
+3 0 0 1 0 7 100 0 -1 0.000 0 1 0 5
+ 0 0 1.00 60.00 120.00
+ 1575 2550 1650 2550 1800 2550 1800 2400 1800 1500
+ 0.000 1.000 1.000 1.000 0.000
+3 0 0 1 0 7 100 0 -1 0.000 0 0 1 5
+ 0 0 1.00 60.00 120.00
+ 2250 750 2475 750 2625 750 2625 900 2625 1125
+ 0.000 1.000 1.000 1.000 0.000
+3 0 0 1 0 7 100 0 -1 0.000 0 0 1 5
+ 0 0 1.00 60.00 120.00
+ 7500 4725 7500 1650 7500 1500 7350 1500 5550 1500
+ 0.000 1.000 1.000 1.000 0.000
+3 0 0 1 0 7 50 0 -1 0.000 0 1 0 5
+ 0 0 1.00 60.00 120.00
+ 2475 3225 2475 2400 2475 2250 2325 2250 1800 2250
+ 0.000 1.000 1.000 1.000 0.000
+3 0 0 1 0 7 50 0 -1 0.000 0 1 0 5
+ 0 0 1.00 60.00 120.00
+ 3825 3375 3825 2175 3825 2025 3675 2025 1800 2025
+ 0.000 1.000 1.000 1.000 0.000
+3 0 0 1 0 7 50 0 -1 0.000 0 1 0 8
+ 0 0 1.00 60.00 120.00
+ 2325 4350 2325 4275 2325 4125 2475 4125 4275 4125 4425 4125
+ 4425 4275 4425 4725
+ 0.000 1.000 1.000 1.000 1.000 1.000 1.000 0.000
+3 0 0 1 0 7 50 0 -1 0.000 0 1 0 8
+ 0 0 1.00 60.00 120.00
+ 5175 4350 5175 4275 5175 4125 5325 4125 7125 4125 7275 4125
+ 7275 4275 7275 4725
+ 0.000 1.000 1.000 1.000 1.000 1.000 1.000 0.000
+4 1 0 100 0 0 10 0.0000 0 75 150 1575 1425 no\001
+4 1 0 100 0 0 10 0.0000 0 135 360 825 525 Entry\001
+4 1 0 100 0 0 10 0.0000 0 75 150 1575 2475 no\001
+4 1 0 100 0 0 10 0.0000 0 105 195 1200 1950 yes\001
+4 1 0 100 0 0 10 0.0000 0 105 195 1200 3000 yes\001
+4 1 0 100 0 0 10 0.0000 0 105 195 2775 1050 yes\001
+4 1 0 100 0 0 10 0.0000 0 75 150 3225 1425 no\001
+4 1 0 100 0 0 10 0.0000 0 75 150 1650 3525 no\001
+4 1 0 100 0 0 10 0.0000 0 105 195 1200 4050 yes\001
+4 1 0 100 0 0 10 0.0000 0 105 195 3150 3525 yes\001
+4 1 0 100 0 0 10 0.0000 0 75 150 2625 3150 no\001
+4 1 0 100 0 0 10 0.0000 0 105 195 3000 4650 yes\001
+4 1 0 100 0 0 10 0.0000 0 105 195 5850 4650 yes\001
+4 1 0 100 0 0 10 0.0000 0 75 150 2475 4275 no\001
+4 1 0 100 0 0 10 0.0000 0 75 150 5325 4275 no\001
+4 1 0 50 0 0 10 0.0000 4 105 285 7800 4650 Exit\001
--- /dev/null
+#FIG 3.2
+Landscape
+Center
+Inches
+Letter
+100.00
+Single
+-2
+1200 2
+6 2775 900 3525 1200
+4 0 0 100 0 0 10 0.0000 0 105 720 2775 1200 Translation\001
+4 0 0 100 0 0 10 0.0000 0 105 405 2850 1050 Portal\001
+-6
+6 1350 1725 2175 2025
+4 0 0 100 0 0 10 0.0000 0 105 825 1350 2025 Transmission\001
+4 0 0 100 0 0 10 0.0000 0 105 285 1620 1875 Data\001
+-6
+2 1 0 1 0 7 100 0 -1 4.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 900 525 2700 750
+2 1 0 1 0 7 100 0 -1 4.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 2700 825 2700 1275
+2 1 0 1 0 7 100 0 -1 3.000 0 0 7 1 0 2
+ 0 0 1.00 60.00 120.00
+ 2700 1350 900 1950
+2 2 0 1 0 7 100 0 -1 4.000 0 0 7 0 0 5
+ 2400 300 3600 300 3600 2250 2400 2250 2400 300
+2 2 0 1 0 7 100 0 -1 4.000 0 0 7 0 0 5
+ 0 300 1200 300 1200 2250 0 2250 0 300
+4 1 0 100 0 0 10 0.0000 4 135 495 1800 825 Request\001
+4 1 0 100 0 0 10 0.0000 0 105 540 600 525 Initiator\001
+4 1 0 100 0 0 10 0.0000 0 135 405 3000 525 Target\001
--- /dev/null
+% ---------------------------------------------------------------
+%
+% $Id: ieee.bst,v 1.1.2.1 2003/05/19 04:25:30 braam Exp $
+%
+% by Paolo.Ienne@di.epfl.ch
+%
+% ---------------------------------------------------------------
+%
+% no guarantee is given that the format corresponds perfectly to
+% IEEE 8.5" x 11" Proceedings, but most features should be ok.
+%
+% ---------------------------------------------------------------
+%
+% `ieee' from BibTeX standard bibliography style `abbrv'
+% version 0.99a for BibTeX versions 0.99a or later, LaTeX version 2.09.
+% Copyright (C) 1985, all rights reserved.
+% Copying of this file is authorized only if either
+% (1) you make absolutely no changes to your copy, including name, or
+% (2) if you do make changes, you name it something other than
+% btxbst.doc, plain.bst, unsrt.bst, alpha.bst, and abbrv.bst.
+% This restriction helps ensure that all standard styles are identical.
+% The file btxbst.doc has the documentation for this style.
+
+ENTRY
+ { address
+ author
+ booktitle
+ chapter
+ edition
+ editor
+ howpublished
+ institution
+ journal
+ key
+ month
+ note
+ number
+ organization
+ pages
+ publisher
+ school
+ series
+ title
+ type
+ volume
+ year
+ }
+ {}
+ { label }
+
+INTEGERS { output.state before.all mid.sentence after.sentence after.block }
+
+FUNCTION {init.state.consts}
+{ #0 'before.all :=
+ #1 'mid.sentence :=
+ #2 'after.sentence :=
+ #3 'after.block :=
+}
+
+STRINGS { s t }
+
+FUNCTION {output.nonnull}
+{ 's :=
+ output.state mid.sentence =
+ { ", " * write$ }
+ { output.state after.block =
+ { add.period$ write$
+ newline$
+ "\newblock " write$
+ }
+ { output.state before.all =
+ 'write$
+ { add.period$ " " * write$ }
+ if$
+ }
+ if$
+ mid.sentence 'output.state :=
+ }
+ if$
+ s
+}
+
+FUNCTION {output}
+{ duplicate$ empty$
+ 'pop$
+ 'output.nonnull
+ if$
+}
+
+FUNCTION {output.check}
+{ 't :=
+ duplicate$ empty$
+ { pop$ "empty " t * " in " * cite$ * warning$ }
+ 'output.nonnull
+ if$
+}
+
+FUNCTION {output.bibitem}
+{ newline$
+ "\bibitem{" write$
+ cite$ write$
+ "}" write$
+ newline$
+ ""
+ before.all 'output.state :=
+}
+
+FUNCTION {fin.entry}
+{ add.period$
+ write$
+ newline$
+}
+
+FUNCTION {new.block}
+{ output.state before.all =
+ 'skip$
+ { after.block 'output.state := }
+ if$
+}
+
+FUNCTION {new.sentence}
+{ output.state after.block =
+ 'skip$
+ { output.state before.all =
+ 'skip$
+ { after.sentence 'output.state := }
+ if$
+ }
+ if$
+}
+
+FUNCTION {not}
+{ { #0 }
+ { #1 }
+ if$
+}
+
+FUNCTION {and}
+{ 'skip$
+ { pop$ #0 }
+ if$
+}
+
+FUNCTION {or}
+{ { pop$ #1 }
+ 'skip$
+ if$
+}
+
+FUNCTION {new.block.checka}
+{ empty$
+ 'skip$
+ 'new.block
+ if$
+}
+
+FUNCTION {new.block.checkb}
+{ empty$
+ swap$ empty$
+ and
+ 'skip$
+ 'new.block
+ if$
+}
+
+FUNCTION {new.sentence.checka}
+{ empty$
+ 'skip$
+ 'new.sentence
+ if$
+}
+
+FUNCTION {new.sentence.checkb}
+{ empty$
+ swap$ empty$
+ and
+ 'skip$
+ 'new.sentence
+ if$
+}
+
+FUNCTION {field.or.null}
+{ duplicate$ empty$
+ { pop$ "" }
+ 'skip$
+ if$
+}
+
+FUNCTION {emphasize}
+{ duplicate$ empty$
+ { pop$ "" }
+ { "{\em " swap$ * "}" * }
+ if$
+}
+
+INTEGERS { nameptr namesleft numnames }
+
+FUNCTION {format.names}
+{ 's :=
+ #1 'nameptr :=
+ s num.names$ 'numnames :=
+ numnames 'namesleft :=
+ { namesleft #0 > }
+ { s nameptr "{f.~}{vv~}{ll}{, jj}" format.name$ 't :=
+ nameptr #1 >
+ { namesleft #1 >
+ { ", " * t * }
+ { numnames #2 >
+ { "," * }
+ 'skip$
+ if$
+ t "others" =
+ { " et~al." * }
+ { " and " * t * }
+ if$
+ }
+ if$
+ }
+ 't
+ if$
+ nameptr #1 + 'nameptr :=
+ namesleft #1 - 'namesleft :=
+ }
+ while$
+}
+
+FUNCTION {format.authors}
+{ author empty$
+ { "" }
+ { author format.names }
+ if$
+}
+
+FUNCTION {format.editors}
+{ editor empty$
+ { "" }
+ { editor format.names
+ editor num.names$ #1 >
+ { ", editors" * }
+ { ", editor" * }
+ if$
+ }
+ if$
+}
+
+FUNCTION {format.title}
+{ title empty$
+ { "" }
+ { title "t" change.case$ }
+ if$
+}
+
+FUNCTION {n.dashify}
+{ 't :=
+ ""
+ { t empty$ not }
+ { t #1 #1 substring$ "-" =
+ { t #1 #2 substring$ "--" = not
+ { "--" *
+ t #2 global.max$ substring$ 't :=
+ }
+ { { t #1 #1 substring$ "-" = }
+ { "-" *
+ t #2 global.max$ substring$ 't :=
+ }
+ while$
+ }
+ if$
+ }
+ { t #1 #1 substring$ *
+ t #2 global.max$ substring$ 't :=
+ }
+ if$
+ }
+ while$
+}
+
+FUNCTION {format.date}
+{ year empty$
+ { month empty$
+ { "" }
+ { "there's a month but no year in " cite$ * warning$
+ month
+ }
+ if$
+ }
+ { month empty$
+ 'year
+ { month " " * year * }
+ if$
+ }
+ if$
+}
+
+FUNCTION {format.btitle}
+{ title emphasize
+}
+
+FUNCTION {tie.or.space.connect}
+{ duplicate$ text.length$ #3 <
+ { "~" }
+ { " " }
+ if$
+ swap$ * *
+}
+
+FUNCTION {either.or.check}
+{ empty$
+ 'pop$
+ { "can't use both " swap$ * " fields in " * cite$ * warning$ }
+ if$
+}
+
+FUNCTION {format.bvolume}
+{ volume empty$
+ { "" }
+ { "volume" volume tie.or.space.connect
+ series empty$
+ 'skip$
+ { " of " * series emphasize * }
+ if$
+ "volume and number" number either.or.check
+ }
+ if$
+}
+
+FUNCTION {format.number.series}
+{ volume empty$
+ { number empty$
+ { series field.or.null }
+ { output.state mid.sentence =
+ { "number" }
+ { "Number" }
+ if$
+ number tie.or.space.connect
+ series empty$
+ { "there's a number but no series in " cite$ * warning$ }
+ { " in " * series * }
+ if$
+ }
+ if$
+ }
+ { "" }
+ if$
+}
+
+FUNCTION {format.edition}
+{ edition empty$
+ { "" }
+ { output.state mid.sentence =
+ { edition "l" change.case$ " edition" * }
+ { edition "t" change.case$ " edition" * }
+ if$
+ }
+ if$
+}
+
+INTEGERS { multiresult }
+
+FUNCTION {multi.page.check}
+{ 't :=
+ #0 'multiresult :=
+ { multiresult not
+ t empty$ not
+ and
+ }
+ { t #1 #1 substring$
+ duplicate$ "-" =
+ swap$ duplicate$ "," =
+ swap$ "+" =
+ or or
+ { #1 'multiresult := }
+ { t #2 global.max$ substring$ 't := }
+ if$
+ }
+ while$
+ multiresult
+}
+
+FUNCTION {format.pages}
+{ pages empty$
+ { "" }
+ { pages multi.page.check
+ { "pages" pages n.dashify tie.or.space.connect }
+ { "page" pages tie.or.space.connect }
+ if$
+ }
+ if$
+}
+
+FUNCTION {format.vol.num.pages}
+{ volume field.or.null
+ number empty$
+ 'skip$
+ { "(" number * ")" * *
+ volume empty$
+ { "there's a number but no volume in " cite$ * warning$ }
+ 'skip$
+ if$
+ }
+ if$
+ pages empty$
+ 'skip$
+ { duplicate$ empty$
+ { pop$ format.pages }
+ { ":" * pages n.dashify * }
+ if$
+ }
+ if$
+}
+
+FUNCTION {format.chapter.pages}
+{ chapter empty$
+ 'format.pages
+ { type empty$
+ { "chapter" }
+ { type "l" change.case$ }
+ if$
+ chapter tie.or.space.connect
+ pages empty$
+ 'skip$
+ { ", " * format.pages * }
+ if$
+ }
+ if$
+}
+
+FUNCTION {format.in.ed.booktitle}
+{ booktitle empty$
+ { "" }
+ { editor empty$
+ { "In " booktitle emphasize * }
+ { "In " format.editors * ", " * booktitle emphasize * }
+ if$
+ }
+ if$
+}
+
+FUNCTION {empty.misc.check}
+{ author empty$ title empty$ howpublished empty$
+ month empty$ year empty$ note empty$
+ and and and and and
+ key empty$ not and
+ { "all relevant fields are empty in " cite$ * warning$ }
+ 'skip$
+ if$
+}
+
+FUNCTION {format.thesis.type}
+{ type empty$
+ 'skip$
+ { pop$
+ type "t" change.case$
+ }
+ if$
+}
+
+FUNCTION {format.tr.number}
+{ type empty$
+ { "Technical Report" }
+ 'type
+ if$
+ number empty$
+ { "t" change.case$ }
+ { number tie.or.space.connect }
+ if$
+}
+
+FUNCTION {format.article.crossref}
+{ key empty$
+ { journal empty$
+ { "need key or journal for " cite$ * " to crossref " * crossref *
+ warning$
+ ""
+ }
+ { "In {\em " journal * "\/}" * }
+ if$
+ }
+ { "In " key * }
+ if$
+ " \cite{" * crossref * "}" *
+}
+
+FUNCTION {format.crossref.editor}
+{ editor #1 "{vv~}{ll}" format.name$
+ editor num.names$ duplicate$
+ #2 >
+ { pop$ " et~al." * }
+ { #2 <
+ 'skip$
+ { editor #2 "{ff }{vv }{ll}{ jj}" format.name$ "others" =
+ { " et~al." * }
+ { " and " * editor #2 "{vv~}{ll}" format.name$ * }
+ if$
+ }
+ if$
+ }
+ if$
+}
+
+FUNCTION {format.book.crossref}
+{ volume empty$
+ { "empty volume in " cite$ * "'s crossref of " * crossref * warning$
+ "In "
+ }
+ { "Volume" volume tie.or.space.connect
+ " of " *
+ }
+ if$
+ editor empty$
+ editor field.or.null author field.or.null =
+ or
+ { key empty$
+ { series empty$
+ { "need editor, key, or series for " cite$ * " to crossref " *
+ crossref * warning$
+ "" *
+ }
+ { "{\em " * series * "\/}" * }
+ if$
+ }
+ { key * }
+ if$
+ }
+ { format.crossref.editor * }
+ if$
+ " \cite{" * crossref * "}" *
+}
+
+FUNCTION {format.incoll.inproc.crossref}
+{ editor empty$
+ editor field.or.null author field.or.null =
+ or
+ { key empty$
+ { booktitle empty$
+ { "need editor, key, or booktitle for " cite$ * " to crossref " *
+ crossref * warning$
+ ""
+ }
+ { "In {\em " booktitle * "\/}" * }
+ if$
+ }
+ { "In " key * }
+ if$
+ }
+ { "In " format.crossref.editor * }
+ if$
+ " \cite{" * crossref * "}" *
+}
+
+FUNCTION {article}
+{ output.bibitem
+ format.authors "author" output.check
+ new.block
+ format.title "title" output.check
+ new.block
+ crossref missing$
+ { journal emphasize "journal" output.check
+ format.vol.num.pages output
+ format.date "year" output.check
+ }
+ { format.article.crossref output.nonnull
+ format.pages output
+ }
+ if$
+ new.block
+ note output
+ fin.entry
+}
+
+FUNCTION {book}
+{ output.bibitem
+ author empty$
+ { format.editors "author and editor" output.check }
+ { format.authors output.nonnull
+ crossref missing$
+ { "author and editor" editor either.or.check }
+ 'skip$
+ if$
+ }
+ if$
+ new.block
+ format.btitle "title" output.check
+ crossref missing$
+ { format.bvolume output
+ new.block
+ format.number.series output
+ new.sentence
+ publisher "publisher" output.check
+ address output
+ }
+ { new.block
+ format.book.crossref output.nonnull
+ }
+ if$
+ format.edition output
+ format.date "year" output.check
+ new.block
+ note output
+ fin.entry
+}
+
+FUNCTION {booklet}
+{ output.bibitem
+ format.authors output
+ new.block
+ format.title "title" output.check
+ howpublished address new.block.checkb
+ howpublished output
+ address output
+ format.date output
+ new.block
+ note output
+ fin.entry
+}
+
+FUNCTION {inbook}
+{ output.bibitem
+ author empty$
+ { format.editors "author and editor" output.check }
+ { format.authors output.nonnull
+ crossref missing$
+ { "author and editor" editor either.or.check }
+ 'skip$
+ if$
+ }
+ if$
+ new.block
+ format.btitle "title" output.check
+ crossref missing$
+ { format.bvolume output
+ format.chapter.pages "chapter and pages" output.check
+ new.block
+ format.number.series output
+ new.sentence
+ publisher "publisher" output.check
+ address output
+ }
+ { format.chapter.pages "chapter and pages" output.check
+ new.block
+ format.book.crossref output.nonnull
+ }
+ if$
+ format.edition output
+ format.date "year" output.check
+ new.block
+ note output
+ fin.entry
+}
+
+FUNCTION {incollection}
+{ output.bibitem
+ format.authors "author" output.check
+ new.block
+ format.title "title" output.check
+ new.block
+ crossref missing$
+ { format.in.ed.booktitle "booktitle" output.check
+ format.bvolume output
+ format.number.series output
+ format.chapter.pages output
+ new.sentence
+ publisher "publisher" output.check
+ address output
+ format.edition output
+ format.date "year" output.check
+ }
+ { format.incoll.inproc.crossref output.nonnull
+ format.chapter.pages output
+ }
+ if$
+ new.block
+ note output
+ fin.entry
+}
+
+FUNCTION {inproceedings}
+{ output.bibitem
+ format.authors "author" output.check
+ new.block
+ format.title "title" output.check
+ new.block
+ crossref missing$
+ { format.in.ed.booktitle "booktitle" output.check
+ format.bvolume output
+ format.number.series output
+ format.pages output
+ address empty$
+ { organization publisher new.sentence.checkb
+ organization output
+ publisher output
+ format.date "year" output.check
+ }
+ { address output.nonnull
+ format.date "year" output.check
+ new.sentence
+ organization output
+ publisher output
+ }
+ if$
+ }
+ { format.incoll.inproc.crossref output.nonnull
+ format.pages output
+ }
+ if$
+ new.block
+ note output
+ fin.entry
+}
+
+FUNCTION {conference} { inproceedings }
+
+FUNCTION {manual}
+{ output.bibitem
+ author empty$
+ { organization empty$
+ 'skip$
+ { organization output.nonnull
+ address output
+ }
+ if$
+ }
+ { format.authors output.nonnull }
+ if$
+ new.block
+ format.btitle "title" output.check
+ author empty$
+ { organization empty$
+ { address new.block.checka
+ address output
+ }
+ 'skip$
+ if$
+ }
+ { organization address new.block.checkb
+ organization output
+ address output
+ }
+ if$
+ format.edition output
+ format.date output
+ new.block
+ note output
+ fin.entry
+}
+
+FUNCTION {mastersthesis}
+{ output.bibitem
+ format.authors "author" output.check
+ new.block
+ format.title "title" output.check
+ new.block
+ "Master's thesis" format.thesis.type output.nonnull
+ school "school" output.check
+ address output
+ format.date "year" output.check
+ new.block
+ note output
+ fin.entry
+}
+
+FUNCTION {misc}
+{ output.bibitem
+ format.authors output
+ title howpublished new.block.checkb
+ format.title output
+ howpublished new.block.checka
+ howpublished output
+ format.date output
+ new.block
+ note output
+ fin.entry
+ empty.misc.check
+}
+
+FUNCTION {phdthesis}
+{ output.bibitem
+ format.authors "author" output.check
+ new.block
+ format.btitle "title" output.check
+ new.block
+ "PhD thesis" format.thesis.type output.nonnull
+ school "school" output.check
+ address output
+ format.date "year" output.check
+ new.block
+ note output
+ fin.entry
+}
+
+FUNCTION {proceedings}
+{ output.bibitem
+ editor empty$
+ { organization output }
+ { format.editors output.nonnull }
+ if$
+ new.block
+ format.btitle "title" output.check
+ format.bvolume output
+ format.number.series output
+ address empty$
+ { editor empty$
+ { publisher new.sentence.checka }
+ { organization publisher new.sentence.checkb
+ organization output
+ }
+ if$
+ publisher output
+ format.date "year" output.check
+ }
+ { address output.nonnull
+ format.date "year" output.check
+ new.sentence
+ editor empty$
+ 'skip$
+ { organization output }
+ if$
+ publisher output
+ }
+ if$
+ new.block
+ note output
+ fin.entry
+}
+
+FUNCTION {techreport}
+{ output.bibitem
+ format.authors "author" output.check
+ new.block
+ format.title "title" output.check
+ new.block
+ format.tr.number output.nonnull
+ institution "institution" output.check
+ address output
+ format.date "year" output.check
+ new.block
+ note output
+ fin.entry
+}
+
+FUNCTION {unpublished}
+{ output.bibitem
+ format.authors "author" output.check
+ new.block
+ format.title "title" output.check
+ new.block
+ note "note" output.check
+ format.date output
+ fin.entry
+}
+
+FUNCTION {default.type} { misc }
+
+MACRO {jan} {"Jan."}
+
+MACRO {feb} {"Feb."}
+
+MACRO {mar} {"Mar."}
+
+MACRO {apr} {"Apr."}
+
+MACRO {may} {"May"}
+
+MACRO {jun} {"June"}
+
+MACRO {jul} {"July"}
+
+MACRO {aug} {"Aug."}
+
+MACRO {sep} {"Sept."}
+
+MACRO {oct} {"Oct."}
+
+MACRO {nov} {"Nov."}
+
+MACRO {dec} {"Dec."}
+
+MACRO {acmcs} {"ACM Comput. Surv."}
+
+MACRO {acta} {"Acta Inf."}
+
+MACRO {cacm} {"Commun. ACM"}
+
+MACRO {ibmjrd} {"IBM J. Res. Dev."}
+
+MACRO {ibmsj} {"IBM Syst.~J."}
+
+MACRO {ieeese} {"IEEE Trans. Softw. Eng."}
+
+MACRO {ieeetc} {"IEEE Trans. Comput."}
+
+MACRO {ieeetcad}
+ {"IEEE Trans. Comput.-Aided Design Integrated Circuits"}
+
+MACRO {ipl} {"Inf. Process. Lett."}
+
+MACRO {jacm} {"J.~ACM"}
+
+MACRO {jcss} {"J.~Comput. Syst. Sci."}
+
+MACRO {scp} {"Sci. Comput. Programming"}
+
+MACRO {sicomp} {"SIAM J. Comput."}
+
+MACRO {tocs} {"ACM Trans. Comput. Syst."}
+
+MACRO {tods} {"ACM Trans. Database Syst."}
+
+MACRO {tog} {"ACM Trans. Gr."}
+
+MACRO {toms} {"ACM Trans. Math. Softw."}
+
+MACRO {toois} {"ACM Trans. Office Inf. Syst."}
+
+MACRO {toplas} {"ACM Trans. Prog. Lang. Syst."}
+
+MACRO {tcs} {"Theoretical Comput. Sci."}
+
+READ
+
+FUNCTION {sortify}
+{ purify$
+ "l" change.case$
+}
+
+INTEGERS { len }
+
+FUNCTION {chop.word}
+{ 's :=
+ 'len :=
+ s #1 len substring$ =
+ { s len #1 + global.max$ substring$ }
+ 's
+ if$
+}
+
+FUNCTION {sort.format.names}
+{ 's :=
+ #1 'nameptr :=
+ ""
+ s num.names$ 'numnames :=
+ numnames 'namesleft :=
+ { namesleft #0 > }
+ { nameptr #1 >
+ { " " * }
+ 'skip$
+ if$
+ s nameptr "{vv{ } }{ll{ }}{ f{ }}{ jj{ }}" format.name$ 't :=
+ nameptr numnames = t "others" = and
+ { "et al" * }
+ { t sortify * }
+ if$
+ nameptr #1 + 'nameptr :=
+ namesleft #1 - 'namesleft :=
+ }
+ while$
+}
+
+FUNCTION {sort.format.title}
+{ 't :=
+ "A " #2
+ "An " #3
+ "The " #4 t chop.word
+ chop.word
+ chop.word
+ sortify
+ #1 global.max$ substring$
+}
+
+FUNCTION {author.sort}
+{ author empty$
+ { key empty$
+ { "to sort, need author or key in " cite$ * warning$
+ ""
+ }
+ { key sortify }
+ if$
+ }
+ { author sort.format.names }
+ if$
+}
+
+FUNCTION {author.editor.sort}
+{ author empty$
+ { editor empty$
+ { key empty$
+ { "to sort, need author, editor, or key in " cite$ * warning$
+ ""
+ }
+ { key sortify }
+ if$
+ }
+ { editor sort.format.names }
+ if$
+ }
+ { author sort.format.names }
+ if$
+}
+
+FUNCTION {author.organization.sort}
+{ author empty$
+ { organization empty$
+ { key empty$
+ { "to sort, need author, organization, or key in " cite$ * warning$
+ ""
+ }
+ { key sortify }
+ if$
+ }
+ { "The " #4 organization chop.word sortify }
+ if$
+ }
+ { author sort.format.names }
+ if$
+}
+
+FUNCTION {editor.organization.sort}
+{ editor empty$
+ { organization empty$
+ { key empty$
+ { "to sort, need editor, organization, or key in " cite$ * warning$
+ ""
+ }
+ { key sortify }
+ if$
+ }
+ { "The " #4 organization chop.word sortify }
+ if$
+ }
+ { editor sort.format.names }
+ if$
+}
+
+FUNCTION {presort}
+{ type$ "book" =
+ type$ "inbook" =
+ or
+ 'author.editor.sort
+ { type$ "proceedings" =
+ 'editor.organization.sort
+ { type$ "manual" =
+ 'author.organization.sort
+ 'author.sort
+ if$
+ }
+ if$
+ }
+ if$
+ " "
+ *
+ year field.or.null sortify
+ *
+ " "
+ *
+ title field.or.null
+ sort.format.title
+ *
+ #1 entry.max$ substring$
+ 'sort.key$ :=
+}
+
+ITERATE {presort}
+
+SORT
+
+STRINGS { longest.label }
+
+INTEGERS { number.label longest.label.width }
+
+FUNCTION {initialize.longest.label}
+{ "" 'longest.label :=
+ #1 'number.label :=
+ #0 'longest.label.width :=
+}
+
+FUNCTION {longest.label.pass}
+{ number.label int.to.str$ 'label :=
+ number.label #1 + 'number.label :=
+ label width$ longest.label.width >
+ { label 'longest.label :=
+ label width$ 'longest.label.width :=
+ }
+ 'skip$
+ if$
+}
+
+EXECUTE {initialize.longest.label}
+
+ITERATE {longest.label.pass}
+
+FUNCTION {begin.bib}
+{ preamble$ empty$
+ 'skip$
+ { preamble$ write$ newline$ }
+ if$
+ "\begin{thebibliography}{" longest.label *
+ "}\setlength{\itemsep}{-1ex}\small" * write$ newline$
+}
+
+EXECUTE {begin.bib}
+
+EXECUTE {init.state.consts}
+
+ITERATE {call.type$}
+
+FUNCTION {end.bib}
+{ newline$
+ "\end{thebibliography}" write$ newline$
+}
+
+EXECUTE {end.bib}
+
+% end of file ieee.bst
+% ---------------------------------------------------------------
--- /dev/null
+#FIG 3.2
+Landscape
+Center
+Inches
+Letter
+100.00
+Single
+-2
+1200 2
+6 150 1650 900 2025
+4 1 0 100 0 0 10 0.0000 0 135 735 525 1800 Unexpected\001
+4 1 0 100 0 0 10 0.0000 0 135 585 525 1995 Messages\001
+-6
+6 150 150 900 525
+4 1 0 100 0 0 10 0.0000 0 135 615 525 300 Preposted\001
+4 1 0 100 0 0 10 0.0000 0 105 525 525 495 Receives\001
+-6
+6 2550 4125 3150 4725
+4 1 0 100 0 0 10 0.0000 0 135 600 2850 4275 Length=0\001
+4 1 0 100 0 0 10 0.0000 0 105 540 2850 4470 Truncate\001
+4 1 0 100 0 0 10 0.0000 0 105 480 2850 4665 No Ack\001
+-6
+6 1050 1575 1950 1875
+2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
+ 1050 1575 1950 1575 1950 1875 1050 1875 1050 1575
+4 1 0 100 0 0 10 0.0000 0 105 780 1500 1725 Match Short\001
+-6
+6 5400 1575 6300 2175
+2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
+ 5400 1575 6300 1575 6300 2175 5400 2175 5400 1575
+4 1 0 100 0 0 10 0.0000 0 105 405 5850 1875 Buffer\001
+-6
+6 5400 2400 6300 3000
+2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
+ 5400 2400 6300 2400 6300 3000 5400 3000 5400 2400
+4 1 0 100 0 0 10 0.0000 0 105 405 5850 2700 Buffer\001
+-6
+6 1050 2400 1950 2700
+2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
+ 1050 2400 1950 2400 1950 2700 1050 2700 1050 2400
+4 1 0 100 0 0 10 0.0000 0 105 780 1500 2550 Match Short\001
+-6
+6 1050 825 1950 1125
+2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
+ 1050 825 1950 825 1950 1125 1050 1125 1050 825
+4 1 0 100 0 0 10 0.0000 0 105 765 1500 975 Match None\001
+-6
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 1500 1125 1500 1575
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 3225 2025 4050 3375
+2 1 1 1 0 7 100 0 -1 4.000 0 0 -1 0 0 2
+ 150 675 6600 675
+2 1 1 1 0 7 100 0 -1 4.000 0 0 -1 0 0 2
+ 150 1350 6600 1350
+2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
+ 2400 4125 3300 4125 3300 4725 2400 4725 2400 4125
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 3225 4500 4050 3675
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 3225 1725 5400 1725
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 3225 2550 5400 2550
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 3225 2850 4050 3450
+2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 1500 1800 1500 2400
+2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
+ 2400 825 3300 825 3300 1275 2400 1275 2400 825
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 1500 2625 1500 4125
+2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
+ 1050 4125 1950 4125 1950 4425 1050 4425 1050 4125
+2 1 0 1 0 7 100 0 -1 4.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 1500 300 1500 825
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 1875 975 2400 975
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 1875 1725 2400 1725
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 1875 2550 2400 2550
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 1875 4275 2400 4275
+2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
+ 2400 1575 3300 1575 3300 2175 2400 2175 2400 1575
+2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
+ 2400 2400 3300 2400 3300 3000 2400 3000 2400 2400
+2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
+ 4050 3300 5250 3300 5250 3750 4050 3750 4050 3300
+4 1 0 100 0 0 10 0.0000 0 105 885 1500 150 Match Entries\001
+4 1 0 100 0 0 10 0.0000 0 135 1290 2850 150 Memory Descriptors\001
+4 1 0 100 0 0 10 0.0000 0 135 1065 5850 150 Memory Regions\001
+4 1 0 100 0 0 10 0.0000 0 135 825 4500 150 Event Queues\001
+4 1 0 100 0 0 10 0.0000 0 105 585 525 1050 RcvMark\001
+4 1 0 100 0 0 10 0.0000 0 105 330 2850 1102 None\001
+4 1 0 100 0 0 10 0.0000 0 135 705 1500 4275 Match Any\001
+4 1 0 50 0 0 10 0.0000 0 150 810 2850 1725 max_offset=\001
+4 1 0 50 0 0 10 0.0000 0 150 840 2850 1875 n - short_len\001
+4 1 0 50 0 0 10 0.0000 0 150 810 2850 2550 max_offset=\001
+4 1 0 50 0 0 10 0.0000 0 150 840 2850 2700 n - short_len\001
+4 1 0 50 0 0 10 0.0000 0 105 405 2850 2100 unlink\001
+4 1 0 50 0 0 10 0.0000 0 105 405 2850 2925 unlink\001
+4 1 0 100 0 0 10 0.0000 0 135 930 4650 3675 Message Queue\001
+4 1 0 100 0 0 10 0.0000 0 135 735 4650 3525 Unexpected\001
--- /dev/null
+#FIG 3.2
+Landscape
+Center
+Inches
+Letter
+100.00
+Single
+-2
+1200 2
+2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
+ 1350 900 1650 900 1650 1200 1350 1200 1350 900
+2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
+ 1800 1350 2100 1350 2100 1650 1800 1650 1800 1350
+2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
+ 2250 1800 2550 1800 2550 2100 2250 2100 2250 1800
+2 1 1 1 0 7 100 0 -1 4.000 0 0 -1 0 0 2
+ 4200 375 4200 2100
+2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
+ 525 600 1125 600 1125 2100 525 2100 525 600
+2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
+ 4425 1275 4875 1275 4875 1950 4425 1950 4425 1275
+2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
+ 2550 1200 3150 1200 3150 1500 2550 1500 2550 1200
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 3000 1425 4425 1425
+2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
+ 3600 825 3750 825 3750 1125 3600 1125 3600 825
+2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 2025 1425 2550 1425
+2 2 0 1 0 7 50 0 -1 0.000 0 0 -1 0 0 5
+ 4425 750 4875 750 4875 1125 4425 1125 4425 750
+2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 3675 975 4425 975
+3 0 0 1 0 7 100 0 -1 0.000 0 1 0 2
+ 0 0 1.00 60.00 120.00
+ 825 1050 1350 1050
+ 0.000 0.000
+3 0 0 1 0 7 100 0 -1 0.000 0 1 0 5
+ 0 0 1.00 60.00 120.00
+ 1500 1125 1500 1350 1500 1500 1650 1500 1800 1500
+ 0.000 1.000 1.000 1.000 0.000
+3 0 0 1 0 7 100 0 -1 0.000 0 1 0 5
+ 0 0 1.00 60.00 120.00
+ 1950 1575 1950 1800 1950 1950 2100 1950 2250 1950
+ 0.000 1.000 1.000 1.000 0.000
+3 0 0 1 0 7 100 0 -1 0.000 0 0 0 2
+ 525 975 1125 975
+ 0.000 0.000
+3 0 0 1 0 7 100 0 -1 0.000 0 0 0 2
+ 525 1125 1125 1125
+ 0.000 0.000
+3 0 0 1 0 7 100 0 -1 0.000 0 1 0 7
+ 0 0 1.00 60.00 120.00
+ 3000 1275 3150 1275 3300 1275 3300 1125 3300 975 3450 975
+ 3600 975
+ 0.000 1.000 1.000 1.000 1.000 1.000 0.000
+4 0 0 100 0 0 10 0.0000 0 105 690 1275 750 Match List\001
+4 1 0 100 0 0 10 0.0000 0 105 780 825 525 Portal Table\001
+4 2 0 100 0 0 10 0.0000 0 135 825 4050 2025 Library Space\001
+4 0 0 100 0 0 10 0.0000 0 135 1110 4350 2175 Application Space\001
+4 1 0 100 0 0 10 0.0000 0 135 660 2850 1050 Descriptor\001
+4 1 0 100 0 0 10 0.0000 0 135 540 2850 825 Memory\001
+4 1 0 100 0 0 10 0.0000 0 135 765 3750 675 Event Queue\001
+4 1 0 100 0 0 10 0.0000 0 135 495 4650 675 Regions\001
+4 1 0 100 0 0 10 0.0000 0 135 540 4650 525 Memory\001
--- /dev/null
+@Article{ Cplant,
+ title = { {M}assively {P}arallel {C}omputing with
+ {C}ommodity {C}omponents },
+ author = { Ron Brightwell and David S. Greenberg and Arthur
+ B. Maccabe and Rolf Riesen },
+ journal = { Parallel Computing },
+ volume = { 26 },
+ month = { February },
+ pages = { 243-266 },
+ year = { 2000 }
+}
+
+@Manual{ Portals,
+ organization = { Sandia National Laboratories },
+ title = { {P}uma {P}ortals },
+ note = { http://www.cs.sandia.gov/puma/portals },
+ year = { 1997 }
+}
+
+@Techreport{ VIA,
+ title = { {V}irtual {I}nterface {A}rchitecture
+ {S}pecification {V}ersion 1.0 },
+ author = { {Compaq, Microsoft, and Intel} },
+ institution = { Compaq, Microsoft, and Intel },
+ month = { December },
+ year = { 1997 }
+}
+
+@Techreport{ ST,
+ title = { {I}nformation {T}echnology - {S}cheduled
+ {T}ransfer {P}rotocol - {W}orking {D}raft 2.0 },
+ author = { {Task Group of Technical Committee T11} },
+ institution = { Accredited Standards Committee NCITS },
+ month = { July },
+ year = { 1998 }
+}
+
+@Manual{ TFLOPS,
+ organization = { Sandia National Laboratories },
+ title = { ASCI Red },
+ note = { http://www.sandia.gov/ASCI/TFLOP },
+ year = { 1996 }
+}
+
+@Techreport{ GM,
+ title = { The {GM} {M}essage {P}assing {S}ystem },
+ author = { {Myricom, Inc.} },
+ institution = { {Myricom, Inc.} },
+ year = { 1997 },
+}
+
+@Article{ MPIstandard,
+ title = { {MPI}: {A} {M}essage-{P}assing {I}nterface standard },
+ author = { {Message Passing Interface Forum} },
+ journal = { The International Journal of Supercomputer Applications
+ and High Performance Computing },
+ volume = { 8 },
+ year = { 1994 }
+}
+
+@Inproceedings{ PumaOS,
+ author = "Lance Shuler and Chu Jong and Rolf Riesen and
+ David van Dresser and Arthur B. Maccabe and
+ Lee Ann Fisk and T. Mack Stallcup",
+ booktitle = "Proceeding of the 1995 Intel Supercomputer
+ User's Group Conference",
+ title = "The {P}uma Operating System for Massively Parallel Computers",
+ organization = "Intel Supercomputer User's Group",
+ year = 1995
+}
+
+@InProceedings{ SUNMOS,
+author = "Arthur B. Maccabe and Kevin S. McCurley and Rolf Riesen and
+ Stephen R. Wheat",
+title = "{SUNMOS} for the {Intel} {Paragon}: A Brief User's Guide",
+booktitle = "Proceedings of the {Intel} Supercomputer Users' Group. 1994
+ Annual North America Users' Conference.",
+year = 1994,
+pages = "245--251",
+month = "June",
+location = "ftp.cs.sandia.gov /pub/sunmos/papers/ISUG94-1.ps"
+}
+
+@InProceedings { PumaMPI,
+ title = { Design and Implementation of {MPI} on {P}uma Portals },
+ author = { Ron Brightwell and Lance Shuler },
+ booktitle = { Proceedings of the Second MPI Developer's Conference },
+ pages = { 18-25 },
+ month = { July },
+ year = { 1996 }
+}
+
+@Inproceedings{ FM2,
+ author = { Mario Lauria and Scott Pakin and Andrew Chien },
+ title = { {E}fficient {L}ayering for {H}igh {S}peed
+ {C}ommunication: {F}ast {M}essages 2.x },
+ Booktitle = { Proceedings of the IEEE International Symposium
+ on High Performance Distributed Computing },
+ year = { 1998 }
+}
+
+@Manual { CraySHMEM,
+ title = "SHMEM Technical Note for C, SG-2516 2.3",
+ organization = "Cray Research, Inc.",
+ month = "October",
+ year = 1994
+}
+
+@Manual { MPI2,
+ title = "{MPI}-2: {E}xtensions to the {M}essage-{P}assing {I}nterface",
+ organization = "Message Passing Interface Forum",
+ note = "http://www.mpi-forum.org/docs/mpi-20-html/mpi2-report.html",
+ month = "July",
+ year = 1997
+}
+
+@InProceedings { PMMPI,
+ title = { {The Design and Implementation of Zero Copy MPI Using
+ Commodity Hardware with a High Performance Network} },
+ author = { Francis O'Carroll and Hiroshi Tezuka and Atsushi Hori
+ and Yutaka Ishikawa },
+ booktitle = { Proceedings of the ICS },
+ year = { 1998 }
+}
--- /dev/null
+#LyX 1.2 created this file. For more info see http://www.lyx.org/
+\lyxformat 220
+\textclass report
+\begin_preamble
+\usepackage{fullpage}
+\renewenvironment{comment}%
+{\begin{quote}\textbf{Discussion}: \slshape}%
+{\end{quote}}
+\pagestyle{myheadings}
+\markboth{$Revision: 1.1.2.1 $\hfil$Date: 2003/05/19 04:25:30 $}%
+{$Date: 2003/05/19 04:25:30 $\hfil$Revision: 1.1.2.1 $}
+\end_preamble
+\language american
+\inputencoding auto
+\fontscheme pslatex
+\graphics default
+\paperfontsize 10
+\spacing single
+\papersize letterpaper
+\paperpackage a4
+\use_geometry 0
+\use_amsmath 0
+\use_natbib 0
+\use_numerical_citations 0
+\paperorientation portrait
+\secnumdepth 2
+\tocdepth 2
+\paragraph_separation indent
+\defskip medskip
+\quotes_language english
+\quotes_times 2
+\papercolumns 1
+\papersides 2
+\paperpagestyle headings
+
+\layout Title
+
+The Portals 3.2 Message Passing Interface
+\newline
+ Revision 1.1
+\layout Author
+
+Ron Brightwell
+\begin_inset Foot
+collapsed true
+
+\layout Standard
+
+R.
+ Brightwell and R.
+ Riesen are with the Scalable Computing Systems Department, Sandia National
+ Laboratories, P.O.
+ Box 5800, Albuquerque, NM\SpecialChar ~
+\SpecialChar ~
+87111-1110, bright@cs.sandia.gov, rolf@cs.sandia.gov.
+\end_inset
+
+, Arthur B.
+ Maccabe
+\begin_inset Foot
+collapsed true
+
+\layout Standard
+
+A.
+ B.
+ Maccabe is with the Computer Science Department, University of New Mexico,
+ Albuquerque, NM\SpecialChar ~
+\SpecialChar ~
+87131-1386, maccabe@cs.unm.edu.
+\end_inset
+
+, Rolf Riesen and Trammell Hudson
+\layout Abstract
+
+This report presents a specification for the Portals 3.2 message passing
+ interface.
+ Portals 3.2 is intended to allow scalable, high-performance network communicatio
+n between nodes of a parallel computing system.
+ Specifically, it is designed to support a parallel computing platform composed
+ of clusters of commodity workstations connected by a commodity system area
+ network fabric.
+ In addition, Portals 3.2 is well suited to massively parallel processing
+ and embedded systems.
+ Portals 3.2 represents an adaption of the data movement layer developed
+ for massively parallel processing platforms, such as the 4500-node Intel
+ TeraFLOPS machine.
+
+\layout Standard
+
+
+\begin_inset ERT
+status Collapsed
+
+\layout Standard
+
+\backslash
+clearpage
+\backslash
+pagenumbering{roman}
+\backslash
+setcounter{page}{3}
+\end_inset
+
+
+\layout Standard
+
+
+\begin_inset LatexCommand \tableofcontents{}
+
+\end_inset
+
+
+\layout Standard
+
+
+\begin_inset ERT
+status Collapsed
+
+\layout Standard
+
+\backslash
+cleardoublepage
+\end_inset
+
+
+\layout Standard
+
+
+\begin_inset FloatList figure
+
+\end_inset
+
+
+\layout Standard
+
+
+\begin_inset ERT
+status Collapsed
+
+\layout Standard
+
+\backslash
+cleardoublepage
+\end_inset
+
+
+\layout Standard
+
+
+\begin_inset FloatList table
+
+\end_inset
+
+
+\layout Standard
+
+
+\begin_inset ERT
+status Collapsed
+
+\layout Standard
+
+\backslash
+cleardoublepage
+\end_inset
+
+
+\layout Chapter*
+
+Summary of Changes for Revision 1.1
+\layout Enumerate
+
+Updated version number to 3.2 throughout the document
+\layout Enumerate
+
+Section
+\begin_inset LatexCommand \ref{sub:PtlGetId}
+
+\end_inset
+
+: added
+\family typewriter
+PTL_SEGV
+\family default
+ to error list for
+\shape italic
+PtlGetId
+\shape default
+.
+\layout Enumerate
+
+Section
+\begin_inset LatexCommand \ref{sec:meattach}
+
+\end_inset
+
+: added
+\family typewriter
+PTL_ML_TOOLONG
+\family default
+ to error list for
+\shape italic
+PtlMEAttach
+\shape default
+.
+\layout Enumerate
+
+Section
+\begin_inset LatexCommand \ref{sec:meunlink}
+
+\end_inset
+
+: removed text referring to a list of associated memory descriptors.
+\layout Enumerate
+
+Section
+\begin_inset LatexCommand \ref{sec:mdfree}
+
+\end_inset
+
+: added text to describe unlinking a free-floating memory descriptor.
+\layout Enumerate
+
+Table
+\begin_inset LatexCommand \ref{tab:types}
+
+\end_inset
+
+: added entry for
+\family typewriter
+ptl_seq_t
+\family default
+.
+\layout Enumerate
+
+Section
+\begin_inset LatexCommand \ref{sec:md-type}
+
+\end_inset
+
+:
+\begin_deeper
+\layout Enumerate
+
+added definition of
+\family typewriter
+max_offset
+\family default
+.
+\layout Enumerate
+
+added text to clarify
+\family typewriter
+PTL_MD_MANAGE_REMOTE
+\family default
+.
+\end_deeper
+\layout Enumerate
+
+Section
+\begin_inset LatexCommand \ref{sec:mdattach}
+
+\end_inset
+
+: modified text for
+\family typewriter
+unlink_op
+\family default
+.
+\layout Enumerate
+
+Section
+\begin_inset LatexCommand \ref{sec:niinit}
+
+\end_inset
+
+: added text to clarify multiple calls to
+\shape italic
+PtlNIInit
+\shape default
+.
+\layout Enumerate
+
+Section
+\begin_inset LatexCommand \ref{sec:mdattach}
+
+\end_inset
+
+: added text to clarify
+\family typewriter
+unlink_nofit
+\family default
+.
+\layout Enumerate
+
+Section
+\begin_inset LatexCommand \ref{sec:receiving}
+
+\end_inset
+
+: removed text indicating that an MD will reject a message if the associated
+ EQ is full.
+\layout Enumerate
+
+Section
+\begin_inset LatexCommand \ref{sec:mdfree}
+
+\end_inset
+
+: added
+\family typewriter
+PTL_MD_INUSE
+\family default
+ error code and text to indicate that only MDs with no pending operations
+ can be unlinked.
+\layout Enumerate
+
+Table
+\begin_inset LatexCommand \ref{tab:retcodes}
+
+\end_inset
+
+: added
+\family typewriter
+PTL_MD_INUSE
+\family default
+ return code.
+\layout Enumerate
+
+Section
+\begin_inset LatexCommand \ref{sec:event-type}
+
+\end_inset
+
+: added user id field, MD handle field, and NI specific failure field to
+ the
+\family typewriter
+ptl_event_t
+\family default
+ structure.
+\layout Enumerate
+
+Table
+\begin_inset LatexCommand \ref{tab:types}
+
+\end_inset
+
+: added
+\family typewriter
+ptl_ni_fail_t
+\family default
+.
+\layout Enumerate
+
+Section
+\begin_inset LatexCommand \ref{sec:event-type}
+
+\end_inset
+
+: added
+\family typewriter
+PTL_EVENT_UNLINK
+\family default
+ event type.
+\layout Enumerate
+
+Table
+\begin_inset LatexCommand \ref{tab:func}
+
+\end_inset
+
+: removed
+\shape slanted
+PtlTransId
+\shape default
+.
+\layout Enumerate
+
+Section
+\begin_inset LatexCommand \ref{sec:meattach}
+
+\end_inset
+
+, Section
+\begin_inset LatexCommand \ref{sec:meinsert}
+
+\end_inset
+
+, Section
+\begin_inset LatexCommand \ref{sec:put}
+
+\end_inset
+
+: listed allowable constants with relevant fields.
+\layout Enumerate
+
+Table
+\begin_inset LatexCommand \ref{tab:func}
+
+\end_inset
+
+: added
+\shape italic
+PtlMEAttachAny
+\shape default
+ function.
+\layout Enumerate
+
+Table
+\begin_inset LatexCommand \ref{tab:retcodes}
+
+\end_inset
+
+: added
+\family typewriter
+PTL_PT_FULL
+\family default
+ return code for
+\shape italic
+PtlMEAttachAny
+\shape default
+.
+\layout Enumerate
+
+Table
+\begin_inset LatexCommand \ref{tab:oconsts}
+
+\end_inset
+
+: updated to reflect new event types.
+\layout Enumerate
+
+Section
+\begin_inset LatexCommand \ref{sec:id-type}
+
+\end_inset
+
+: added
+\family typewriter
+ptl_nid_t
+\family default
+,
+\family typewriter
+ptl_pid_t
+\family default
+, and
+\family typewriter
+ptl_uid_t
+\family default
+.
+\layout Chapter*
+
+Summary of Changes for Version 3.1
+\layout Section*
+
+Thread Issues
+\layout Standard
+
+The most significant change to the interface from version 3.0 to 3.1 involves
+ the clarification of how the interface interacts with multi-threaded applicatio
+ns.
+ We adopted a generic thread model in which processes define an address
+ space and threads share the address space.
+ Consideration of the API in the light of threads lead to several clarifications
+ throughout the document:
+\layout Enumerate
+
+Glossary:
+\begin_deeper
+\layout Enumerate
+
+added a definition for
+\emph on
+thread
+\emph default
+,
+\layout Enumerate
+
+reworded the definition for
+\emph on
+process
+\emph default
+.
+
+\end_deeper
+\layout Enumerate
+
+Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:apiover}
+
+\end_inset
+
+: added section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:threads}
+
+\end_inset
+
+ to describe the multi-threading model used by the Portals API.
+
+\layout Enumerate
+
+Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:ptlinit}
+
+\end_inset
+
+:
+\emph on
+PtlInit
+\emph default
+ must be called at least once and may be called any number of times.
+
+\layout Enumerate
+
+Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:ptlfini}
+
+\end_inset
+
+:
+\emph on
+PtlFini
+\emph default
+ should be called once as the process is terminating and not as each thread
+ terminates.
+
+\layout Enumerate
+
+Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:pid}
+
+\end_inset
+
+: Portals does not define thread ids.
+
+\layout Enumerate
+
+Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:ni}
+
+\end_inset
+
+: network interfaces are associated with processes, not threads.
+
+\layout Enumerate
+
+Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:niinit}
+
+\end_inset
+
+:
+\emph on
+PtlNIInit
+\emph default
+ must be called at least once and may be called any number of times.
+
+\layout Enumerate
+
+Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:eqget}
+
+\end_inset
+
+:
+\emph on
+PtlEQGet
+\emph default
+ returns
+\family typewriter
+PTL_EQ_EMPTY
+\family default
+ if a thread is blocked on
+\emph on
+PtlEQWait
+\emph default
+.
+
+\layout Enumerate
+
+Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:eqwait}
+
+\end_inset
+
+: waiting threads are awakened in FIFO order.
+
+\layout Standard
+
+Two functions,
+\emph on
+PtlNIBarrier
+\emph default
+ and
+\emph on
+PtlEQCount
+\emph default
+ were removed from the API.
+
+\emph on
+PtlNIBarrier
+\emph default
+ was defined to block the calling process until all of the processes in
+ the application group had invoked
+\emph on
+PtlNIBarrier
+\emph default
+.
+ We now consider this functionality, along with the concept of groups (see
+ the discussion under
+\begin_inset Quotes eld
+\end_inset
+
+other changes
+\begin_inset Quotes erd
+\end_inset
+
+), to be part of the runtime system, not part of the Portals API.
+
+\emph on
+PtlEQCount
+\emph default
+ was defined to return the number of events in an event queue.
+ Because external operations may lead to new events being added and other
+ threads may remove events, the value returned by
+\emph on
+PtlEQCount
+\emph default
+ would have to be a hint about the number of events in the event queue.
+\layout Section*
+
+Handling small, unexpected messages
+\layout Standard
+
+Another set of changes relates to handling small unexpected messages in
+ MPI.
+ In designing version 3.0, we assumed that each unexpected message would
+ be placed in a unique memory descriptor.
+ To avoid the need to process a long list of memory descriptors, we moved
+ the memory descriptors out of the match list and hung them off of a single
+ match list entry.
+ In this way, large unexpected messages would only encounter a single
+\begin_inset Quotes eld
+\end_inset
+
+short message
+\begin_inset Quotes erd
+\end_inset
+
+ match list entry before encountering the
+\begin_inset Quotes eld
+\end_inset
+
+long message
+\begin_inset Quotes erd
+\end_inset
+
+ match list entry.
+ Experience with this strategy identified resource management problems with
+ this approach.
+ In particular, a long sequence of very short (or zero length) messages
+ could quickly exhaust the memory descriptors constructed for handling unexpecte
+d messages.
+ Our new strategy involves the use of several very large memory descriptors
+ for small unexpected messages.
+ Consecutive unexpected messages will be written into the first of these
+ memory descriptors until the memory descriptor fills up.
+ When the first of the
+\begin_inset Quotes eld
+\end_inset
+
+small memory
+\begin_inset Quotes erd
+\end_inset
+
+ descriptors fills up, it will be unlinked and subsequent short messages
+ will be written into the next
+\begin_inset Quotes eld
+\end_inset
+
+short message
+\begin_inset Quotes erd
+\end_inset
+
+ memory descriptor.
+ In this case, a
+\begin_inset Quotes eld
+\end_inset
+
+short message
+\begin_inset Quotes erd
+\end_inset
+
+ memory descriptor will be declared full when it does not have sufficient
+ space for the largest small unexpected message.
+\layout Standard
+
+This lead to two significant changes.
+ First, each match list entry now has a single memory descriptor rather
+ than a list of memory descriptors.
+ Second, in addition to exceeding the operation threshold, a memory descriptor
+ can be unlinked when the local offset exceeds a specified value.
+ These changes have lead to several changes in this document:
+\layout Enumerate
+
+Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{subsec:paddress}
+
+\end_inset
+
+:
+\begin_deeper
+\layout Enumerate
+
+removed references to the memory descriptor list,
+\layout Enumerate
+
+changed the portals address translation description to indicate that unlinking
+ a memory descriptor implies unlinking the associated match list entry--match
+ list entries can no longer be unlinked independently from the memory descriptor.
+
+\end_deeper
+\layout Enumerate
+
+Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:meattach}
+
+\end_inset
+
+:
+\begin_deeper
+\layout Enumerate
+
+removed unlink from argument list,
+\layout Enumerate
+
+removed description of
+\family typewriter
+ptl_unlink
+\family default
+ type,
+\layout Enumerate
+
+changed wording of the error condition when the Portal table index already
+ has an associated match list.
+
+\end_deeper
+\layout Enumerate
+
+Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:meinsert}
+
+\end_inset
+
+: removed unlink from argument list.
+
+\layout Enumerate
+
+Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:md-type}
+
+\end_inset
+
+: added
+\family typewriter
+max_offset
+\family default
+.
+
+\layout Enumerate
+
+Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:mdattach}
+
+\end_inset
+
+:
+\begin_deeper
+\layout Enumerate
+
+added description of
+\family typewriter
+ptl_unlink
+\family default
+ type,
+\layout Enumerate
+
+removed reference to memory descriptor lists,
+\layout Enumerate
+
+changed wording of the error condition when match list entry already has
+ an associated memory descriptor,
+\layout Enumerate
+
+changed the description of the
+\family typewriter
+unlink
+\family default
+ argument.
+
+\end_deeper
+\layout Enumerate
+
+Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:md}
+
+\end_inset
+
+: removed
+\family typewriter
+PtlMDInsert
+\family default
+ operation.
+
+\layout Enumerate
+
+Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:mdbind}
+
+\end_inset
+
+: removed references to memory descriptor list.
+
+\layout Enumerate
+
+Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:mdfree}
+
+\end_inset
+
+: removed reference to memory descriptor list.
+
+\layout Enumerate
+
+Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:summary}
+
+\end_inset
+
+: removed references to PtlMDInsert.
+
+\layout Enumerate
+
+Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:semantics}
+
+\end_inset
+
+: removed reference to memory descriptor list.
+
+\layout Enumerate
+
+Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:exmpi}
+
+\end_inset
+
+: revised the MPI example to reflect the changes to the interface.
+
+\layout Standard
+
+Several changes have been made to improve the general documentation of the
+ interface.
+
+\layout Enumerate
+
+Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:handle-type}
+
+\end_inset
+
+: documented the special value
+\family typewriter
+PTL_EQ_NONE
+\family default
+.
+
+\layout Enumerate
+
+Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:id-type}
+
+\end_inset
+
+: documented the special value
+\family typewriter
+PTL_ID_ANY
+\family default
+.
+
+\layout Enumerate
+
+Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:mdbind}
+
+\end_inset
+
+: documented the return value
+\family typewriter
+PTL_INV_EQ
+\layout Enumerate
+
+Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:mdupdate}
+
+\end_inset
+
+: clarified the description of the
+\emph on
+PtlMDUpdate
+\emph default
+ function.
+
+\layout Enumerate
+
+Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:implvals}
+
+\end_inset
+
+: introduced a new section to document the implementation defined values.
+
+\layout Enumerate
+
+Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:summary}
+
+\end_inset
+
+: modified Table\SpecialChar ~
+
+\begin_inset LatexCommand \ref{tab:oconsts}
+
+\end_inset
+
+ to indicate where each constant is introduced and where it is used.
+
+\layout Section*
+
+Other changes
+\layout Subsection*
+
+Implementation defined limits (Section
+\begin_inset LatexCommand \ref{sec:niinit}
+
+\end_inset
+
+)
+\layout Standard
+
+The earlier version provided implementation defined limits for the maximum
+ number of match entries, the maximum number of memory descriptors, etc.
+ Rather than spanning the entire implementation, these limits are now associated
+ with individual network interfaces.
+\layout Subsection*
+
+Added User Ids (Section
+\begin_inset LatexCommand \ref{sec:uid}
+
+\end_inset
+
+)
+\layout Standard
+
+Group Ids had been used to simplify access control entries.
+ In particular, a process could allow access for all of the processes in
+ a group.
+ User Ids have been introduced to regain this functionality.
+ We use user ids to fill this role.
+\layout Subsection*
+
+Removed Group Ids and Rank Ids (Section
+\begin_inset LatexCommand \ref{sec:pid}
+
+\end_inset
+
+)
+\layout Standard
+
+The earlier version of Portals had two forms for addressing processes: <node
+ id, process id> and <group id, rank id>.
+ A process group was defined as the collection processes created during
+ application launch.
+ Each process in the group was given a unique rank id in the range 0 to
+
+\begin_inset Formula $n-1$
+\end_inset
+
+ where
+\begin_inset Formula $n$
+\end_inset
+
+ was the number of processes in the group.
+ We removed groups because they are better handled in the runtime system.
+\layout Subsection*
+
+Match lists (Section
+\begin_inset LatexCommand \ref{sec:meattach}
+
+\end_inset
+
+)
+\layout Standard
+
+It is no longer illegal to have an existing match entry when calling PtlMEAttach.
+ A position argument was added to the list of arguments supplied to
+\emph on
+PtlMEAttach
+\emph default
+ to specify whether the new match entry is prepended or appended to the
+ existing list.
+ If there is no existing match list, the position argument is ignored.
+\layout Subsection*
+
+Unlinking Memory Descriptors (Section
+\begin_inset LatexCommand \ref{sec:md}
+
+\end_inset
+
+)
+\layout Standard
+
+Previously, a memory descriptor could be unlinked if the offset exceeded
+ a threshold upon the completion of an operation.
+ In this version, the unlinking is delayed until there is a matching operation
+ which requires more memory than is currently available in the descriptor.
+ In addition to changes in section, this lead to a revision of Figure\SpecialChar ~
+
+\begin_inset LatexCommand \ref{fig:flow}
+
+\end_inset
+
+.
+\layout Subsection*
+
+Split Phase Operations and Events (Section
+\begin_inset LatexCommand \ref{sec:eq}
+
+\end_inset
+
+)
+\layout Standard
+
+Previously, there were five types of events:
+\family typewriter
+PTL_EVENT_PUT
+\family default
+,
+\family typewriter
+PTL_EVENT_GET
+\family default
+,
+\family typewriter
+PTL_EVENT_REPLY
+\family default
+,
+\family typewriter
+PTL_EVENT_SENT
+\family default
+, and
+\family typewriter
+PTL_EVENT_ACK.
+
+\family default
+The first four of these reflected the completion of potentially long operations.
+ We have introduced new event types to reflect the fact that long operations
+ have a distinct starting point and a distinct completion point.
+ Moreover, the completion may be successful or unsuccessful.
+\layout Standard
+
+In addition to providing a mechanism for reporting failure to higher levels
+ of software, this split provides an opportunity for for improved ordering
+ semantics.
+ Previously, if one process intiated two operations (e.g., two put operations)
+ on a remote process, these operations were guaranteed to complete in the
+ same order that they were initiated.
+ Now, we only guarantee that the initiation events are delivered in the
+ same order.
+ In particular, the operations do not need to complete in the order that
+ they were intiated.
+\layout Subsection*
+
+Well known proces ids (Section
+\begin_inset LatexCommand \ref{sec:niinit}
+
+\end_inset
+
+)
+\layout Standard
+
+To support the notion of
+\begin_inset Quotes eld
+\end_inset
+
+well known process ids,
+\begin_inset Quotes erd
+\end_inset
+
+ we added a process id argument to the arguments for PtlNIInit.
+\layout Chapter*
+
+Glossary
+\layout Description
+
+API Application Programming Interface.
+ A definition of the functions and semantics provided by library of functions.
+
+\layout Description
+
+Initiator A
+\emph on
+process
+\emph default
+ that initiates a message operation.
+
+\layout Description
+
+Message An application-defined unit of data that is exchanged between
+\emph on
+processes
+\emph default
+.
+
+\layout Description
+
+Message\SpecialChar ~
+Operation Either a put operation, which writes data, or a get operation,
+ which reads data.
+
+\layout Description
+
+Network A network provides point-to-point communication between
+\emph on
+nodes
+\emph default
+.
+ Internally, a network may provide multiple routes between endpoints (to
+ improve fault tolerance or to improve performance characteristics); however,
+ multiple paths will not be exposed outside of the network.
+
+\layout Description
+
+Node A node is an endpoint in a
+\emph on
+network
+\emph default
+.
+ Nodes provide processing capabilities and memory.
+ A node may provide multiple processors (an SMP node) or it may act as a
+
+\emph on
+gateway
+\emph default
+ between networks.
+
+\layout Description
+
+Process A context of execution.
+ A process defines a virtual memory (VM) context.
+ This context is not shared with other processes.
+ Several threads may share the VM context defined by a process.
+
+\layout Description
+
+Target A
+\emph on
+process
+\emph default
+ that is acted upon by a message operation.
+
+\layout Description
+
+Thread A context of execution that shares a VM context with other threads.
+
+\layout Standard
+
+
+\begin_inset ERT
+status Collapsed
+
+\layout Standard
+
+\backslash
+cleardoublepage
+\layout Standard
+
+\backslash
+setcounter{page}{1}
+\backslash
+pagenumbering{arabic}
+\end_inset
+
+
+\layout Chapter
+
+Introduction
+\begin_inset LatexCommand \label{sec:intro}
+
+\end_inset
+
+
+\layout Section
+
+Overview
+\layout Standard
+
+This document describes an application programming interface for message
+ passing between nodes in a system area network.
+ The goal of this interface is to improve the scalability and performance
+ of network communication by defining the functions and semantics of message
+ passing required for scaling a parallel computing system to ten thousand
+ nodes.
+ This goal is achieved by providing an interface that will allow a quality
+ implementation to take advantage of the inherently scalable design of Portals.
+\layout Standard
+
+This document is divided into several sections:
+\layout Description
+
+Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:intro}
+
+\end_inset
+
+---Introduction This section describes the purpose and scope of the Portals
+ API.
+
+\layout Description
+
+Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:apiover}
+
+\end_inset
+
+---An\SpecialChar ~
+Overview\SpecialChar ~
+of\SpecialChar ~
+the\SpecialChar ~
+Portals\SpecialChar ~
+3.1\SpecialChar ~
+API This section gives a brief overview of the
+ Portals API.
+ The goal is to introduce the key concepts and terminology used in the descripti
+on of the API.
+
+\layout Description
+
+Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:api}
+
+\end_inset
+
+---The\SpecialChar ~
+Portals\SpecialChar ~
+3.2\SpecialChar ~
+API This section describes the functions and semantics of
+ the Portals application programming interface.
+
+\layout Description
+
+Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:semantics}
+
+\end_inset
+
+--The\SpecialChar ~
+Semantics\SpecialChar ~
+of\SpecialChar ~
+Message\SpecialChar ~
+Transmission This section describes the semantics
+ of message transmission.
+ In particular, the information transmitted in each type of message and
+ the processing of incoming messages.
+
+\layout Description
+
+Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:examples}
+
+\end_inset
+
+---Examples This section presents several examples intended to illustrates
+ the use of the Portals API.
+
+\layout Section
+
+Purpose
+\layout Standard
+
+Existing message passing technologies available for commodity cluster networking
+ hardware do not meet the scalability goals required by the Cplant\SpecialChar ~
+
+\begin_inset LatexCommand \cite{Cplant}
+
+\end_inset
+
+ project at Sandia National Laboratories.
+ The goal of the Cplant project is to construct a commodity cluster that
+ can scale to the order of ten thousand nodes.
+ This number greatly exceeds the capacity for which existing message passing
+ technologies have been designed and implemented.
+\layout Standard
+
+In addition to the scalability requirements of the network, these technologies
+ must also be able to support a scalable implementation of the Message Passing
+ Interface (MPI)\SpecialChar ~
+
+\begin_inset LatexCommand \cite{MPIstandard}
+
+\end_inset
+
+ standard, which has become the
+\shape italic
+de facto
+\shape default
+ standard for parallel scientific computing.
+ While MPI does not impose any scalability limitations, existing message
+ passing technologies do not provide the functionality needed to allow implement
+ations of MPI to meet the scalability requirements of Cplant.
+\layout Standard
+
+The following are properties of a network architecture that do not impose
+ any inherent scalability limitations:
+\layout Itemize
+
+Connectionless - Many connection-oriented architectures, such as VIA\SpecialChar ~
+
+\begin_inset LatexCommand \cite{VIA}
+
+\end_inset
+
+ and TCP/IP sockets, have limitations on the number of peer connections
+ that can be established.
+
+\layout Itemize
+
+Network independence - Many communication systems depend on the host processor
+ to perform operations in order for messages in the network to be consumed.
+ Message consumption from the network should not be dependent on host processor
+ activity, such as the operating system scheduler or user-level thread scheduler.
+
+\layout Itemize
+
+User-level flow control - Many communication systems manage flow control
+ internally to avoid depleting resources, which can significantly impact
+ performance as the number of communicating processes increases.
+
+\layout Itemize
+
+OS Bypass - High performance network communication should not involve memory
+ copies into or out of a kernel-managed protocol stack.
+
+\layout Standard
+
+The following are properties of a network architecture that do not impose
+ scalability limitations for an implementation of MPI:
+\layout Itemize
+
+Receiver-managed - Sender-managed message passing implementations require
+ a persistent block of memory to be available for every process, requiring
+ memory resources to increase with job size and requiring user-level flow
+ control mechanisms to manage these resources.
+
+\layout Itemize
+
+User-level Bypass - While OS Bypass is necessary for high-performance, it
+ alone is not sufficient to support the Progress Rule of MPI asynchronous
+ operations.
+
+\layout Itemize
+
+Unexpected messages - Few communication systems have support for receiving
+ messages for which there is no prior notification.
+ Support for these types of messages is necessary to avoid flow control
+ and protocol overhead.
+
+\layout Section
+
+Background
+\layout Standard
+
+Portals was originally designed for and implemented on the nCube machine
+ as part of the SUNMOS (Sandia/UNM OS)\SpecialChar ~
+
+\begin_inset LatexCommand \cite{SUNMOS}
+
+\end_inset
+
+ and Puma\SpecialChar ~
+
+\begin_inset LatexCommand \cite{PumaOS}
+
+\end_inset
+
+ lightweight kernel development projects.
+ Portals went through two design phases, the latter of which is used on
+ the 4500-node Intel TeraFLOPS machine\SpecialChar ~
+
+\begin_inset LatexCommand \cite{TFLOPS}
+
+\end_inset
+
+.
+ Portals have been very successful in meeting the needs of such a large
+ machine, not only as a layer for a high-performance MPI implementation\SpecialChar ~
+
+\begin_inset LatexCommand \cite{PumaMPI}
+
+\end_inset
+
+, but also for implementing the scalable run-time environment and parallel
+ I/O capabilities of the machine.
+\layout Standard
+
+The second generation Portals implementation was designed to take full advantage
+ of the hardware architecture of large MPP machines.
+ However, efforts to implement this same design on commodity cluster technology
+ identified several limitations, due to the differences in network hardware
+ as well as to shortcomings in the design of Portals.
+\layout Section
+
+Scalability
+\layout Standard
+
+The primary goal in the design of Portals is scalability.
+ Portals are designed specifically for an implementation capable of supporting
+ a parallel job running on tens of thousands of nodes.
+ Performance is critical only in terms of scalability.
+ That is, the level of message passing performance is characterized by how
+ far it allows an application to scale and not by how it performs in micro-bench
+marks (e.g., a two node bandwidth or latency test).
+\layout Standard
+
+The Portals API is designed to allow for scalability, not to guarantee it.
+ Portals cannot overcome the shortcomings of a poorly designed application
+ program.
+ Applications that have inherent scalability limitations, either through
+ design or implementation, will not be transformed by Portals into scalable
+ applications.
+ Scalability must be addressed at all levels.
+ Portals do not inhibit scalability, but do not guarantee it either.
+\layout Standard
+
+To support scalability, the Portals interface maintains a minimal amount
+ of state.
+ Portals provide reliable, ordered delivery of messages between pairs of
+ processes.
+ They are connectionless: a process is not required to explicitly establish
+ a point-to-point connection with another process in order to communicate.
+ Moreover, all buffers used in the transmission of messages are maintained
+ in user space.
+ The target process determines how to respond to incoming messages, and
+ messages for which there are no buffers are discarded.
+\layout Section
+
+Communication Model
+\layout Standard
+
+Portals combine the characteristics of both one-side and two-sided communication.
+ They define a
+\begin_inset Quotes eld
+\end_inset
+
+matching put
+\begin_inset Quotes erd
+\end_inset
+
+ operation and a
+\begin_inset Quotes eld
+\end_inset
+
+matching get
+\begin_inset Quotes erd
+\end_inset
+
+ operation.
+ The destination of a put (or send) is not an explicit address; instead,
+ each message contains a set of match bits that allow the receiver to determine
+ where incoming messages should be placed.
+ This flexibility allows Portals to support both traditional one-sided operation
+s and two-sided send/receive operations.
+\layout Standard
+
+Portals allows the target to determine whether incoming messages are acceptable.
+ A target process can choose to accept message operations from any specific
+ process or can choose to ignore message operations from any specific process.
+\layout Section
+
+Zero Copy, OS Bypass and Application Bypass
+\layout Standard
+
+In traditional system architectures, network packets arrive at the network
+ interface card (NIC), are passed through one or more protocol layers in
+ the operating system, and eventually copied into the address space of the
+ application.
+ As network bandwidth began to approach memory copy rates, reduction of
+ memory copies became a critical concern.
+ This concern lead to the development of zero-copy message passing protocols
+ in which message copies are eliminated or pipelined to avoid the loss of
+ bandwidth.
+\layout Standard
+
+A typical zero-copy protocol has the NIC generate an interrupt for the CPU
+ when a message arrives from the network.
+ The interrupt handler then controls the transfer of the incoming message
+ into the address space of the appropriate application.
+ The interrupt latency, the time from the initiation of an interrupt until
+ the interrupt handler is running, is fairly significant.
+ To avoid this cost, some modern NICs have processors that can be programmed
+ to implement part of a message passing protocol.
+ Given a properly designed protocol, it is possible to program the NIC to
+ control the transfer of incoming messages, without needing to interrupt
+ the CPU.
+ Because this strategy does not need to involve the OS on every message
+ transfer, it is frequently called
+\begin_inset Quotes eld
+\end_inset
+
+OS Bypass.
+\begin_inset Quotes erd
+\end_inset
+
+ ST\SpecialChar ~
+
+\begin_inset LatexCommand \cite{ST}
+
+\end_inset
+
+, VIA\SpecialChar ~
+
+\begin_inset LatexCommand \cite{VIA}
+
+\end_inset
+
+, FM\SpecialChar ~
+
+\begin_inset LatexCommand \cite{FM2}
+
+\end_inset
+
+, GM\SpecialChar ~
+
+\begin_inset LatexCommand \cite{GM}
+
+\end_inset
+
+, and Portals are examples of OS Bypass protocols.
+\layout Standard
+
+Many protocols that support OS Bypass still require that the application
+ actively participate in the protocol to ensure progress.
+ As an example, the long message protocol of PM requires that the application
+ receive and reply to a request to put or get a long message.
+ This complicates the runtime environment, requiring a thread to process
+ incoming requests, and significantly increases the latency required to
+ initiate a long message protocol.
+ The Portals message passing protocol does not require activity on the part
+ of the application to ensure progress.
+ We use the term
+\begin_inset Quotes eld
+\end_inset
+
+Application Bypass
+\begin_inset Quotes erd
+\end_inset
+
+ to refer to this aspect of the Portals protocol.
+\layout Section
+
+Faults
+\layout Standard
+
+Given the number of components that we are dealing with and the fact that
+ we are interested in supporting applications that run for very long times,
+ failures are inevitable.
+ The Portals API recognizes that the underlying transport may not be able
+ to successfully complete an operation once it has been initiated.
+ This is reflected in the fact that the Portals API reports three types
+ of events: events indicating the initiation of an operation, events indicating
+ the successful completion of an operation, and events indicating the unsuccessf
+ul completion of an operation.
+ Every initiation event is eventually followed by a successful completion
+ event or an unsuccessful completion event.
+\layout Standard
+
+Between the time an operation is started and the time that the operation
+ completes (successfully or unsuccessfully), any memory associated with
+ the operation should be considered volatile.
+ That is, the memory may be changed in unpredictable ways while the operation
+ is progressing.
+ Once the operation completes, the memory associated with the operation
+ will not be subject to further modification (from this operation).
+ Notice that unsuccessful operations may alter memory in an essentially
+ unpredictable fashion.
+\layout Chapter
+
+An Overview of the Portals API
+\begin_inset LatexCommand \label{sec:apiover}
+
+\end_inset
+
+
+\layout Standard
+
+In this section, we give a conceptual overview of the Portals API.
+ The goal is to provide a context for understanding the detailed description
+ of the API presented in the next section.
+\layout Section
+
+Data Movement
+\begin_inset LatexCommand \label{sec:dmsemantics}
+
+\end_inset
+
+
+\layout Standard
+
+A Portal represents an opening in the address space of a process.
+ Other processes can use a Portal to read (get) or write (put) the memory
+ associated with the portal.
+ Every data movement operation involves two processes, the
+\series bold
+initiator
+\series default
+ and the
+\series bold
+target
+\series default
+.
+ The initiator is the process that initiates the data movement operation.
+ The target is the process that responds to the operation by either accepting
+ the data for a put operation, or replying with the data for a get operation.
+\layout Standard
+
+In this discussion, activities attributed to a process may refer to activities
+ that are actually performed by the process or
+\emph on
+on behalf of the process
+\emph default
+.
+ The inclusiveness of our terminology is important in the context of
+\emph on
+application bypass
+\emph default
+.
+ In particular, when we note that the target sends a reply in the case of
+ a get operation, it is possible that reply will be generated by another
+ component in the system, bypassing the application.
+\layout Standard
+
+Figures\SpecialChar ~
+
+\begin_inset LatexCommand \ref{fig:put}
+
+\end_inset
+
+ and
+\begin_inset LatexCommand \ref{fig:get}
+
+\end_inset
+
+ present graphical interpretations of the Portal data movement operations:
+ put and get.
+ In the case of a put operation, the initiator sends a put request message
+ containing the data to the target.
+ The target translates the Portal addressing information in the request
+ using its local Portal structures.
+ When the request has been processed, the target optionally sends an acknowledge
+ment message.
+\layout Standard
+
+
+\begin_inset Float figure
+placement htbp
+wide false
+collapsed false
+
+\layout Standard
+\align center
+
+\begin_inset Graphics FormatVersion 1
+ filename put.eps
+ display color
+ size_type 0
+ rotateOrigin center
+ lyxsize_type 1
+ lyxwidth 218pt
+ lyxheight 119pt
+\end_inset
+
+
+\layout Caption
+
+Portal Put (Send)
+\begin_inset LatexCommand \label{fig:put}
+
+\end_inset
+
+
+\end_inset
+
+
+\layout Standard
+
+In the case of a get operation, the initiator sends a get request to the
+ target.
+ As with the put operation, the target translates the Portal addressing
+ information in the request using its local Portal structures.
+ Once it has translated the Portal addressing information, the target sends
+ a reply that includes the requested data.
+\layout Standard
+
+
+\begin_inset Float figure
+placement htbp
+wide false
+collapsed false
+
+\layout Standard
+\align center
+
+\begin_inset Graphics FormatVersion 1
+ filename get.eps
+ display color
+ size_type 0
+ rotateOrigin center
+ lyxsize_type 1
+ lyxwidth 218pt
+ lyxheight 119pt
+\end_inset
+
+
+\layout Caption
+
+Portal Get
+\begin_inset LatexCommand \label{fig:get}
+
+\end_inset
+
+
+\end_inset
+
+
+\layout Standard
+
+We should note that Portal address translations are only performed on nodes
+ that respond to operations initiated by other nodes.
+ Acknowledgements and replies to get operations bypass the portals address
+ translation structures.
+\layout Section
+
+Portal Addressing
+\begin_inset LatexCommand \label{subsec:paddress}
+
+\end_inset
+
+
+\layout Standard
+
+One-sided data movement models (e.g., shmem\SpecialChar ~
+
+\begin_inset LatexCommand \cite{CraySHMEM}
+
+\end_inset
+
+, ST\SpecialChar ~
+
+\begin_inset LatexCommand \cite{ST}
+
+\end_inset
+
+, MPI-2\SpecialChar ~
+
+\begin_inset LatexCommand \cite{MPI2}
+
+\end_inset
+
+) typically use a triple to address memory on a remote node.
+ This triple consists of a process id, memory buffer id, and offset.
+ The process id identifies the target process, the memory buffer id specifies
+ the region of memory to be used for the operation, and the offset specifies
+ an offset within the memory buffer.
+\layout Standard
+
+In addition to the standard address components (process id, memory buffer
+ id, and offset), a Portal address includes a set of match bits.
+ This addressing model is appropriate for supporting one-sided operations
+ as well as traditional two-sided message passing operations.
+ Specifically, the Portals API provides the flexibility needed for an efficient
+ implementation of MPI-1, which defines two-sided operations with one-sided
+ completion semantics.
+\layout Standard
+
+Figure\SpecialChar ~
+
+\begin_inset LatexCommand \ref{fig:portals}
+
+\end_inset
+
+ presents a graphical representation of the structures used by a target
+ in the interpretation of a Portal address.
+ The process id is used to route the message to the appropriate node and
+ is not reflected in this diagram.
+ The memory buffer id, called the
+\series bold
+portal id
+\series default
+, is used as an index into the Portal table.
+ Each element of the Portal table identifies a match list.
+ Each element of the match list specifies two bit patterns: a set of
+\begin_inset Quotes eld
+\end_inset
+
+don't care
+\begin_inset Quotes erd
+\end_inset
+
+ bits, and a set of
+\begin_inset Quotes eld
+\end_inset
+
+must match
+\begin_inset Quotes erd
+\end_inset
+
+ bits.
+ In addition to the two sets of match bits, each match list element has
+ at most one memory descriptor.
+ Each memory descriptor identifies a memory region and an optional event
+ queue.
+ The memory region specifies the memory to be used in the operation and
+ the event queue is used to record information about these operations.
+\layout Standard
+
+
+\begin_inset Float figure
+placement htbp
+wide false
+collapsed false
+
+\layout Standard
+\align center
+
+\begin_inset Graphics FormatVersion 1
+ filename portals.eps
+ display color
+ size_type 0
+ rotateOrigin center
+ lyxsize_type 1
+ lyxwidth 305pt
+ lyxheight 106pt
+\end_inset
+
+
+\layout Caption
+
+Portal Addressing Structures
+\begin_inset LatexCommand \label{fig:portals}
+
+\end_inset
+
+
+\end_inset
+
+
+\layout Standard
+
+Figure\SpecialChar ~
+
+\begin_inset LatexCommand \ref{fig:flow}
+
+\end_inset
+
+ illustrates the steps involved in translating a Portal address, starting
+ from the first element in a match list.
+ If the match criteria specified in the match list entry are met and the
+ memory descriptor list accepts the operation
+\begin_inset Foot
+collapsed true
+
+\layout Standard
+
+Memory descriptors can reject operations because a threshold has been exceeded
+ or because the memory region does not have sufficient space, see Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:md}
+
+\end_inset
+
+
+\end_inset
+
+, the operation (put or get) is performed using the memory region specified
+ in the memory descriptor.
+ If the memory descriptor specifies that it is to be unlinked when a threshold
+ has been exceeded, the match list entry is removed from the match list
+ and the resources associated with the memory descriptor and match list
+ entry are reclaimed.
+ Finally, if there is an event queue specified in the memory descriptor,
+ the operation is logged in the event queue.
+\layout Standard
+
+
+\begin_inset Float figure
+placement htbp
+wide false
+collapsed false
+
+\layout Standard
+\align center
+
+\begin_inset Graphics FormatVersion 1
+ filename flow_new.eps
+ display color
+ size_type 0
+ rotateOrigin center
+ lyxsize_type 1
+ lyxwidth 447pt
+ lyxheight 282pt
+\end_inset
+
+
+\layout Caption
+
+Portals Address Translation
+\begin_inset LatexCommand \label{fig:flow}
+
+\end_inset
+
+
+\end_inset
+
+
+\layout Standard
+
+If the match criteria specified in the match list entry are not met, or
+ there is no memory descriptor associated with the match list entry, or
+ the memory descriptor associated with the match list entry rejects the
+ operation, the address translation continues with the next match list entry.
+ If the end of the match list has been reached, the address translation
+ is aborted and the incoming requested is discarded.
+\layout Section
+
+Access Control
+\layout Standard
+
+A process can control access to its portals using an access control list.
+ Each entry in the access control list specifies a process id and a Portal
+ table index.
+ The access control list is actually an array of entries.
+ Each incoming request includes an index into the access control list (i.e.,
+ a
+\begin_inset Quotes eld
+\end_inset
+
+cookie
+\begin_inset Quotes erd
+\end_inset
+
+ or hint).
+ If the id of the process issuing the request doesn't match the id specified
+ in the access control list entry or the Portal table index specified in
+ the request doesn't match the Portal table index specified in the access
+ control list entry, the request is rejected.
+ Process identifiers and Portal table indexes may include wild card values
+ to increase the flexibility of this mechanism.
+
+\layout Standard
+
+Two aspects of this design merit further discussion.
+ First, the model assumes that the information in a message header, the
+ sender's id in particular, is trustworthy.
+ In most contexts, we assume that the entity that constructs the header
+ is trustworthy; however, using cryptographic techniques, we could easily
+ devise a protocol that would ensure the authenticity of the sender.
+\layout Standard
+
+Second, because the access check is performed by the receiver, it is possible
+ that a malicious process will generate thousands of messages that will
+ be denied by the receiver.
+ This could saturate the network and/or the receiver, resulting in a
+\emph on
+denial of service
+\emph default
+ attack.
+ Moving the check to the sender using capabilities, would remove the potential
+ for this form of attack.
+ However, the solution introduces the complexities of capability management
+ (exchange of capabilities, revocation, protections, etc).
+\layout Section
+
+Multi-threaded Applications
+\begin_inset LatexCommand \label{sec:threads}
+
+\end_inset
+
+
+\layout Standard
+
+The Portals API supports a generic view of multi-threaded applications.
+ From the perspective of the Portals API, an application program is defined
+ by a set of processes.
+ Each process defines a unique address space.
+ The Portals API defines access to this address space from other processes
+ (using portals addressing and the data movement operations).
+ A process may have one or more
+\emph on
+threads
+\emph default
+ executing in its address space.
+
+\layout Standard
+
+With the exception of
+\emph on
+PtlEQWait
+\emph default
+ every function in the Portals API is non-blocking and atomic with respect
+ to both other threads and external operations that result from data movement
+ operations.
+ While individual operations are atomic, sequences of these operations may
+ be interleaved between different threads and with external operations.
+ The Portals API does not provide any mechanisms to control this interleaving.
+ It is expected that these mechanisms will be provided by the API used to
+ create threads.
+\layout Chapter
+
+The Portals API
+\begin_inset LatexCommand \label{sec:api}
+
+\end_inset
+
+
+\layout Section
+
+Naming Conventions
+\begin_inset LatexCommand \label{sec:conv}
+
+\end_inset
+
+
+\layout Standard
+
+The Portals API defines two types of entities: functions and types.
+ Function always start with
+\emph on
+Ptl
+\emph default
+ and use mixed upper and lower case.
+ When used in the body of this report, function names appear in italic face,
+ e.g.,
+\emph on
+PtlInit
+\emph default
+.
+ The functions associated with an object type will have names that start
+ with
+\emph on
+Ptl
+\emph default
+, followed by the two letter object type code shown in Table\SpecialChar ~
+
+\begin_inset LatexCommand \ref{tab:objcodes}
+
+\end_inset
+
+.
+ As an example, the function
+\emph on
+PtlEQAlloc
+\emph default
+ allocates resources for an event queue.
+\layout Standard
+
+
+\begin_inset Float table
+placement htbp
+wide false
+collapsed false
+
+\layout Caption
+
+Object Type Codes
+\begin_inset LatexCommand \label{tab:objcodes}
+
+\end_inset
+
+
+\begin_inset ERT
+status Collapsed
+
+\layout Standard
+
+\backslash
+medskip
+\newline
+
+\end_inset
+
+
+\layout Standard
+\align center
+
+\size small
+
+\begin_inset Tabular
+<lyxtabular version="3" rows="5" columns="3">
+<features firstHeadEmpty="true">
+<column alignment="left" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="0pt">
+<row bottomline="true">
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\emph on
+xx
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ Name
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ Section
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+EQ
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ Event Queue
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:eq}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ MD
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ Memory Descriptor
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:md}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ ME
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ Match list Entry
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:me}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ NI
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ Network Interface
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:ni}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+</lyxtabular>
+
+\end_inset
+
+
+\end_inset
+
+
+\layout Standard
+
+Type names use lower case with underscores to separate words.
+ Each type name starts with
+\family typewriter
+ptl
+\family default
+_ and ends with
+\family typewriter
+_t
+\family default
+.
+ When used in the body of this report, type names appear in a fixed font,
+ e.g.,
+\family typewriter
+ptl_match_bits_t
+\family default
+.
+\layout Standard
+
+Names for constants use upper case with underscores to separate words.
+ Each constant name starts with
+\family typewriter
+PTL_
+\family default
+.
+ When used in the body of this report, type names appear in a fixed font,
+ e.g.,
+\family typewriter
+PTL_OK
+\family default
+.
+\layout Section
+
+Base Types
+\layout Standard
+
+The Portals API defines a variety of base types.
+ These types represent a simple renaming of the base types provided by the
+ C programming language.
+ In most cases these new type names have been introduced to improve type
+ safety and to avoid issues arising from differences in representation sizes
+ (e.g., 16-bit or 32-bit integers).
+\layout Subsection
+
+Sizes
+\begin_inset LatexCommand \label{sec:size-t}
+
+\end_inset
+
+
+\layout Standard
+
+The type
+\family typewriter
+ptl_size_t
+\family default
+ is an unsigned 64-bit integral type used for representing sizes.
+\layout Subsection
+
+Handles
+\begin_inset LatexCommand \label{sec:handle-type}
+
+\end_inset
+
+
+\layout Standard
+
+Objects maintained by the API are accessed through handles.
+ Handle types have names of the form
+\family typewriter
+ptl_handle_
+\emph on
+xx
+\emph default
+_t
+\family default
+, where
+\emph on
+xx
+\emph default
+ is one of the two letter object type codes shown in Table\SpecialChar ~
+
+\begin_inset LatexCommand \ref{tab:objcodes}
+
+\end_inset
+
+.
+ For example, the type
+\family typewriter
+ptl_handle_ni_t
+\family default
+ is used for network interface handles.
+\layout Standard
+
+Each type of object is given a unique handle type to enhance type checking.
+ The type,
+\family typewriter
+ptl_handle_any_t
+\family default
+, can be used when a generic handle is needed.
+ Every handle value can be converted into a value of type
+\family typewriter
+ptl_handle_any_t
+\family default
+ without loss of information.
+\layout Standard
+
+Handles are not simple values.
+ Every portals object is associated with a specific network interface and
+ an identifier for this interface (along with an object identifier) is part
+ of the handle for the object.
+\layout Standard
+
+The special value
+\family typewriter
+PTL_EQ_NONE
+\family default
+, of type
+\family typewriter
+ptl_handle_eq_t
+\family default
+, is used to indicate the absence of an event queue.
+ See sections
+\begin_inset LatexCommand \ref{sec:mdfree}
+
+\end_inset
+
+ and\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:mdupdate}
+
+\end_inset
+
+ for uses of this value.
+\layout Subsection
+
+Indexes
+\begin_inset LatexCommand \label{sec:index-type}
+
+\end_inset
+
+
+\layout Standard
+
+The types
+\family typewriter
+ptl_pt_index_t
+\family default
+ and
+\family typewriter
+ptl_ac_index_t
+\family default
+ are integral types used for representing Portal table indexes and access
+ control tables indexes, respectively.
+ See section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:niinit}
+
+\end_inset
+
+ for limits on values of these types.
+\layout Subsection
+
+Match Bits
+\begin_inset LatexCommand \label{sec:mb-type}
+
+\end_inset
+
+
+\layout Standard
+
+The type
+\family typewriter
+ptl_match_bits_t
+\family default
+ is capable of holding unsigned 64-bit integer values.
+\layout Subsection
+
+Network Interfaces
+\begin_inset LatexCommand \label{sec:ni-type}
+
+\end_inset
+
+
+\layout Standard
+
+The type
+\family typewriter
+ptl_interface_t
+\family default
+ is an integral type used for identifying different network interfaces.
+ Users will need to consult the local documentation to determine appropriate
+ values for the interfaces available.
+ The special value
+\family typewriter
+PTL_IFACE_DEFAULT
+\family default
+ identifies the default interface.
+\layout Subsection
+
+Identifiers
+\begin_inset LatexCommand \label{sec:id-type}
+
+\end_inset
+
+
+\layout Standard
+
+The type
+\family typewriter
+ptl_nid_t
+\family default
+ is an integral type used for representing node ids
+\family typewriter
+, ptl_pid_t
+\family default
+ is an integral type for representing process ids, and
+\family typewriter
+ptl_uid_t
+\family default
+is an integral type for representing user ids.
+\layout Standard
+
+The special values
+\family typewriter
+PTL_PID_ANY
+\family default
+ matches any process identifier, PTL_NID_ANY matches any node identifier,
+ and
+\family typewriter
+PTL_UID_ANY
+\family default
+ matches any user identifier.
+ See sections
+\begin_inset LatexCommand \ref{sec:meattach}
+
+\end_inset
+
+ and\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:acentry}
+
+\end_inset
+
+ for uses of these values.
+\layout Subsection
+
+Status Registers
+\begin_inset LatexCommand \label{sec:stat-type}
+
+\end_inset
+
+
+\layout Standard
+
+Each network interface maintains an array of status registers that can be
+ accessed using the
+\family typewriter
+PtlNIStatus
+\family default
+ function (see Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:nistatus}
+
+\end_inset
+
+).
+ The type
+\family typewriter
+ptl_sr_index_t
+\family default
+ defines the types of indexes that can be used to access the status registers.
+ The only index defined for all implementations is
+\family typewriter
+PTL_SR_DROP_COUNT
+\family default
+ which identifies the status register that counts the dropped requests for
+ the interface.
+ Other indexes (and registers) may be defined by the implementation.
+\layout Standard
+
+The type
+\family typewriter
+ptl_sr_value_t
+\family default
+ defines the types of values held in status registers.
+ This is a signed integer type.
+ The size is implementation dependent, but must be at least 32 bits.
+\layout Section
+
+Initialization and Cleanup
+\begin_inset LatexCommand \label{sec:init}
+
+\end_inset
+
+
+\layout Standard
+
+The Portals API includes a function,
+\emph on
+PtlInit
+\emph default
+, to initialize the library and a function,
+\emph on
+PtlFini
+\emph default
+, to cleanup after the application is done using the library.
+\layout Subsection
+
+PtlInit
+\begin_inset LatexCommand \label{sec:ptlinit}
+
+\end_inset
+
+
+\layout LyX-Code
+
+int PtlInit( int *max_interfaces );
+\layout Standard
+\noindent
+The
+\emph on
+PtlInit
+\emph default
+ function initializes the Portals library.
+ PtlInit must be called at least once by a process before any thread makes
+ a Portals function call, but may be safely called more than once.
+\layout Subsubsection
+
+Return Codes
+\layout Description
+
+PTL_OK Indicates success.
+
+\layout Description
+
+PTL_FAIL Indicates an error during initialization.
+
+\layout Description
+
+PTL_SEGV Indicates that
+\family typewriter
+max_interfaces
+\family default
+ is not a legal address.
+
+\layout Subsubsection
+
+Arguments
+\layout Standard
+
+
+\begin_inset Tabular
+<lyxtabular version="3" rows="1" columns="3">
+<features>
+<column alignment="right" valignment="top" width="0pt">
+<column alignment="center" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="5in">
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+max_interfaces
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+output
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+On successful return, this location will hold the maximum number of interfaces
+ that can be initialized.
+\end_inset
+</cell>
+</row>
+</lyxtabular>
+
+\end_inset
+
+
+\layout Subsection
+
+PtlFini
+\begin_inset LatexCommand \label{sec:ptlfini}
+
+\end_inset
+
+
+\layout LyX-Code
+
+void PtlFini( void );
+\layout Standard
+\noindent
+The
+\emph on
+PtlFini
+\emph default
+ function cleans up after the Portals library is no longer needed by a process.
+ After this function is called, calls to any of the functions defined by
+ the Portal API or use of the structures set up by the Portals API will
+ result in undefined behavior.
+ This function should be called once and only once during termination by
+ a process.
+ Typically, this function will be called in the exit sequence of a process.
+ Individual threads should not call PtlFini when they terminate.
+\layout Section
+
+Network Interfaces
+\begin_inset LatexCommand \label{sec:ni}
+
+\end_inset
+
+
+\layout Standard
+
+The Portals API supports the use of multiple network interfaces.
+ However, each interface is treated as an independent entity.
+ Combining interfaces (e.g.,
+\begin_inset Quotes eld
+\end_inset
+
+bonding
+\begin_inset Quotes erd
+\end_inset
+
+ to create a higher bandwidth connection) must be implemented by the application
+ or embedded in the underlying network.
+ Interfaces are treated as independent entities to make it easier to cache
+ information on individual network interface cards.
+\layout Standard
+
+Once initialized, each interface provides a Portal table, an access control
+ table, and a collection of status registers.
+ See Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:me}
+
+\end_inset
+
+ for a discussion of updating Portal table entries using the
+\emph on
+PtlMEAttach
+\emph default
+ function.
+ See Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:ac}
+
+\end_inset
+
+ for a discussion of the initialization and updating of entries in the access
+ control table.
+ See Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:nistatus}
+
+\end_inset
+
+ for a discussion of the
+\emph on
+PtlNIStatus
+\emph default
+ function which can be used to determine the value of a status register.
+\layout Standard
+
+Every other type of Portal object (e.g., memory descriptor, event queue, or
+ match list entry) is associated with a specific network interface.
+ The association to a network interface is established when the object is
+ created and is encoded in the handle for the object.
+\layout Standard
+
+Each network interface is initialized and shutdown independently.
+ The initialization routine,
+\emph on
+PtlNIInit
+\emph default
+, returns a handle for an interface object which is used in all subsequent
+ Portal operations.
+ The
+\emph on
+PtlNIFini
+\emph default
+ function is used to shutdown an interface and release any resources that
+ are associated with the interface.
+ Network interface handles are associated with processes, not threads.
+ All threads in a process share all of the network interface handles.
+\layout Standard
+
+The Portals API also defines the
+\emph on
+PtlNIStatus
+\emph default
+ function to query the status registers for a network interface, the
+\emph on
+PtlNIDist
+\emph default
+ function to determine the
+\begin_inset Quotes eld
+\end_inset
+
+distance
+\begin_inset Quotes erd
+\end_inset
+
+ to another process, and the
+\emph on
+PtlNIHandle
+\emph default
+ function to determine the network interface that an object is associated
+ with.
+\layout Subsection
+
+PtlNIInit
+\begin_inset LatexCommand \label{sec:niinit}
+
+\end_inset
+
+
+\layout LyX-Code
+
+typedef struct {
+\newline
+ int max_match_entries;
+\newline
+ int max_mem_descriptors;
+\newline
+ int max_event_queues;
+\newline
+ ptl_ac_index_t max_atable_index;
+\newline
+ ptl_pt_index_t max_ptable_index;
+\newline
+} ptl_ni_limits_t;
+\newline
+
+\newline
+int PtlNIInit( ptl_interface_t interface
+\newline
+ ptl_pid_t pid,
+\newline
+ ptl_ni_limits_t* desired,
+\newline
+ ptl_ni_limits_t* actual,
+\newline
+ ptl_handle_ni_t* handle );
+\layout Standard
+
+Values of type
+\family typewriter
+ptl_ni_limits_t
+\family default
+ include the following members:
+\layout Description
+
+max_match_entries Maximum number of match entries that can be allocated
+ at any one time.
+\layout Description
+
+max_mem_descriptors Maximum number of memory descriptors that can be allocated
+ at any one time.
+\layout Description
+
+max_event_queues Maximum number of event queues that can be allocated at
+ any one time.
+\layout Description
+
+max_atable_index Largest access control table index for this interface,
+ valid indexes range from zero to
+\family typewriter
+max_atable_index
+\family default
+, inclusive.
+\layout Description
+
+max_ptable_index Largest Portal table index for this interface, valid indexes
+ range from zero to
+\family typewriter
+max_ptable_index
+\family default
+, inclusive.
+\layout Standard
+\noindent
+The
+\emph on
+PtlNIInit
+\emph default
+ function is used to initialized the Portals API for a network interface.
+ This function must be called at least once by each process before any other
+ operations that apply to the interface by any process or thread.
+ For subsequent calls to
+\shape italic
+PtlNIInit
+\shape default
+ from within the same process (either by different threads or the same thread),
+ the desired limits will be ignored and the call will return the existing
+ NI handle.
+\layout Subsubsection
+
+Return Codes
+\layout Description
+
+PTL_OK Indicates success.
+
+\layout Description
+
+PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
+
+\layout Description
+
+PTL_INIT_DUP Indicates a duplicate initialization of
+\family typewriter
+interface
+\family default
+.
+
+\layout Description
+
+PTL_INIT_INV Indicates that
+\family typewriter
+interface
+\family default
+ is not a valid network interface.
+
+\layout Description
+
+PTL_NOSPACE Indicates that there is insufficient memory to initialize the
+ interface.
+
+\layout Description
+
+PTL_INV_PROC Indicates that
+\family typewriter
+pid
+\family default
+ is not a valid process id.
+\layout Description
+
+PTL_SEGV Indicates that
+\family typewriter
+actual
+\family default
+or
+\family typewriter
+ handle
+\family default
+ is not a legal address.
+
+\layout Subsubsection
+
+Arguments
+\layout Standard
+
+
+\begin_inset Tabular
+<lyxtabular version="3" rows="5" columns="3">
+<features>
+<column alignment="right" valignment="top" width="0pt">
+<column alignment="center" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="4.7in">
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+interface
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+Identifies the network interface to be initialized.
+ (See section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:ni-type}
+
+\end_inset
+
+ for a discussion of values used to identify network interfaces.)
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+pid
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+Identifies the desired process id (for well known process ids).
+ The value
+\family typewriter
+PTL_PID_ANY
+\family default
+ may be used to have the process id assigned by the underlying library.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+desired
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+If non-NULL, points to a structure that holds the desired limits.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+actual
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+output
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+On successful return, the location pointed to by actual will hold the actual
+ limits.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+handle
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+output
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+On successful return, this location will hold a handle for the interface.
+\end_inset
+</cell>
+</row>
+</lyxtabular>
+
+\end_inset
+
+
+\layout Comment
+
+The use of desired is implementation dependent.
+ In particular, an implementation may choose to ignore this argument.
+\layout Subsection
+
+PtlNIFini
+\begin_inset LatexCommand \label{sec:nifini}
+
+\end_inset
+
+
+\layout LyX-Code
+
+int PtlNIFini( ptl_handle_ni_t interface );
+\layout Standard
+\noindent
+The
+\emph on
+PtlNIFini
+\emph default
+ function is used to release the resources allocated for a network interface.
+ Once the
+\emph on
+PtlNIFini
+\emph default
+ operation has been started, the results of pending API operations (e.g.,
+ operations initiated by another thread) for this interface are undefined.
+ Similarly, the effects of incoming operations (puts and gets) or return
+ values (acknowledgements and replies) for this interface are undefined.
+\layout Subsubsection
+
+Return Codes
+\layout Description
+
+PTL_OK Indicates success.
+
+\layout Description
+
+PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
+
+\layout Description
+
+PTL_INV_NI Indicates that
+\family typewriter
+interface
+\family default
+ is not a valid network interface handle.
+
+\layout Subsubsection
+
+Arguments
+\layout Standard
+
+
+\begin_inset Tabular
+<lyxtabular version="3" rows="1" columns="3">
+<features>
+<column alignment="right" valignment="top" width="0pt">
+<column alignment="center" valignment="top" width="0pt">
+<column alignment="center" valignment="top" width="0pt">
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+interface
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+A handle for the interface to shutdown.
+\end_inset
+</cell>
+</row>
+</lyxtabular>
+
+\end_inset
+
+
+\layout Subsection
+
+PtlNIStatus
+\begin_inset LatexCommand \label{sec:nistatus}
+
+\end_inset
+
+
+\layout LyX-Code
+
+int PtlNIStatus( ptl_handle_ni_t interface,
+\newline
+ ptl_sr_index_t status_register,
+\newline
+ ptl_sr_value_t* status );
+\layout Standard
+\noindent
+The
+\emph on
+PtlNIStatus
+\emph default
+ function returns the value of a status register for the specified interface.
+ (See section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:stat-type}
+
+\end_inset
+
+ for more information on status register indexes and status register values.)
+\layout Subsubsection
+
+Return Codes
+\layout Description
+
+PTL_OK Indicates success.
+
+\layout Description
+
+PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
+
+\layout Description
+
+PTL_INV_NI Indicates that
+\family typewriter
+interface
+\family default
+ is not a valid network interface handle.
+
+\layout Description
+
+PTL_INV_SR_INDX Indicates that
+\family typewriter
+status_register
+\family default
+ is not a valid status register.
+
+\layout Description
+
+PTL_SEGV Indicates that
+\family typewriter
+status
+\family default
+ is not a legal address.
+
+\layout Subsubsection
+
+Arguments
+\layout Standard
+
+
+\begin_inset Tabular
+<lyxtabular version="3" rows="3" columns="3">
+<features>
+<column alignment="right" valignment="top" width="0pt">
+<column alignment="center" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="4.7in">
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+interface
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+A handle for the interface to use.
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+status_register
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+An index for the status register to read.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+status
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+output
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+On successful return, this location will hold the current value of the status
+ register.
+\end_inset
+</cell>
+</row>
+</lyxtabular>
+
+\end_inset
+
+
+\layout Comment
+
+The only status register that must be defined is a drop count register (
+\family typewriter
+PTL_SR_DROP_COUNT
+\family default
+).
+ Implementations may define additional status registers.
+ Identifiers for the indexes associated with these registers should start
+ with the prefix
+\family typewriter
+PTL_SR_
+\family default
+.
+\layout Subsection
+
+PtlNIDist
+\layout LyX-Code
+
+int PtlNIDist( ptl_handle_ni_t interface,
+\newline
+ ptl_process_id_t process,
+\newline
+ unsigned long* distance );
+\layout Standard
+\noindent
+The
+\emph on
+PtlNIDist
+\emph default
+ function returns the distance to another process using the specified interface.
+ Distances are only defined relative to an interface.
+ Distance comparisons between different interfaces on the same process may
+ be meaningless.
+\layout Subsubsection
+
+Return Codes
+\layout Description
+
+PTL_OK Indicates success.
+
+\layout Description
+
+PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
+
+\layout Description
+
+PTL_INV_NI Indicates that
+\family typewriter
+interface
+\family default
+ is not a valid network interface handle.
+
+\layout Description
+
+PTL_INV_PROC Indicates that
+\family typewriter
+process
+\family default
+ is not a valid process identifier.
+
+\layout Description
+
+PTL_SEGV Indicates that
+\family typewriter
+distance
+\family default
+ is not a legal address.
+
+\layout Subsubsection
+
+Arguments
+\layout Standard
+
+
+\begin_inset Tabular
+<lyxtabular version="3" rows="3" columns="3">
+<features>
+<column alignment="right" valignment="top" width="0pt">
+<column alignment="center" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="4.7in">
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+interface
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+A handle for the interface to use.
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+process
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+An identifier for the process whose distance is being requested.
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+distance
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+output
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+On successful return, this location will hold the distance to the remote
+ process.
+\end_inset
+</cell>
+</row>
+</lyxtabular>
+
+\end_inset
+
+
+\layout Comment
+
+This function should return a static measure of distance.
+ Examples include minimum latency, the inverse of available bandwidth, or
+ the number of switches between the two endpoints.
+\layout Subsection
+
+PtlNIHandle
+\layout LyX-Code
+
+int PtlNIHandle( ptl_handle_any_t handle,
+\newline
+ ptl_handle_ni_t* interface );
+\layout Standard
+\noindent
+The
+\emph on
+PtlNIHandle
+\emph default
+ function returns a handle for the network interface with which the object
+ identified by
+\family typewriter
+handle
+\family default
+ is associated.
+ If the object identified by
+\family typewriter
+handle
+\family default
+ is a network interface, this function returns the same value it is passed.
+\layout Subsubsection
+
+Return Codes
+\layout Description
+
+PTL_OK Indicates success.
+
+\layout Description
+
+PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
+
+\layout Description
+
+PTL_INV_HANDLE Indicates that
+\family typewriter
+handle
+\family default
+ is not a valid handle.
+
+\layout Description
+
+PTL_SEGV Indicates that
+\family typewriter
+interface
+\family default
+ is not a legal address.
+
+\layout Subsubsection
+
+Arguments
+\layout Standard
+
+
+\begin_inset Tabular
+<lyxtabular version="3" rows="2" columns="3">
+<features>
+<column alignment="right" valignment="top" width="0pt">
+<column alignment="center" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="4.7in">
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+handle
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+A handle for the object.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+interface
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+output
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+On successful return, this location will hold a handle for the network interface
+ associated with
+\family typewriter
+handle
+\family default
+.
+\end_inset
+</cell>
+</row>
+</lyxtabular>
+
+\end_inset
+
+
+\layout Comment
+
+Every handle should encode the network interface and the object id relative
+ to this handle.
+ Both are presumably encoded using integer values.
+\layout Section
+
+User Identification
+\begin_inset LatexCommand \label{sec:uid}
+
+\end_inset
+
+
+\layout Standard
+
+Every process runs on behalf of a user.
+
+\layout Subsection
+
+PtlGetUid
+\layout LyX-Code
+
+int PtlGetUid( ptl_handle_ni_t ni_handle,
+\newline
+ ptl_uid_t* uid );
+\layout Subsubsection
+
+Return Codes
+\layout Description
+
+PTL_OK Indicates success.
+
+\layout Description
+
+PTL_INV_NI Indicates that
+\family typewriter
+ni_handle
+\family default
+ is not a valid network interface handle.
+
+\layout Description
+
+PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
+
+\layout Description
+
+PTL_SEGV Indicates that
+\family typewriter
+interface
+\family default
+ is not a legal address.
+
+\layout Subsubsection
+
+Arguments
+\layout Standard
+
+
+\begin_inset Tabular
+<lyxtabular version="3" rows="2" columns="3">
+<features>
+<column alignment="right" valignment="top" width="0pt">
+<column alignment="center" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="5in">
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+handle
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+A network interface handle.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+id
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+output
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+On successful return, this location will hold the user id for the calling
+ process.
+\end_inset
+</cell>
+</row>
+</lyxtabular>
+
+\end_inset
+
+
+\layout Comment
+
+Note that user identifiers are dependent on the network interface(s).
+ In particular, if a node has multiple interfaces, a process may have multiple
+ user identifiers.
+\layout Section
+
+Process Identification
+\begin_inset LatexCommand \label{sec:pid}
+
+\end_inset
+
+
+\layout Standard
+
+Processes that use the Portals API, can be identified using a node id and
+ process id.
+ Every node accessible through a network interface has a unique node identifier
+ and every process running on a node has a unique process identifier.
+ As such, any process in the computing system can be identified by its node
+ id and process id.
+
+\layout Standard
+
+The Portals API defines a type,
+\family typewriter
+ptl_process_id_t
+\family default
+ for representing process ids and a function,
+\emph on
+PtlGetId
+\emph default
+, which can be used to obtain the id of the current process.
+\layout Comment
+
+The portals API does not include thread identifiers.
+ Messages are delivered to processes (address spaces) not threads (contexts
+ of execution).
+\layout Subsection
+
+The Process Id Type
+\begin_inset LatexCommand \label{sec:pid-type}
+
+\end_inset
+
+
+\layout LyX-Code
+
+typedef struct {
+\newline
+ ptl_nid_t nid; /* node id */
+\newline
+ ptl_pid_t pid; /* process id */
+\newline
+} ptl_process_id_t;
+\layout Standard
+\noindent
+The
+\family typewriter
+ptl_process_id_t
+\family default
+ type uses two identifiers to represent a process id: a node id and a process
+ id.
+
+\layout Subsection
+
+PtlGetId
+\begin_inset LatexCommand \label{sub:PtlGetId}
+
+\end_inset
+
+
+\layout LyX-Code
+
+int PtlGetId( ptl_handle_ni_t ni_handle,
+\newline
+ ptl_process_id_t* id );
+\layout Subsubsection
+
+Return Codes
+\layout Description
+
+PTL_OK Indicates success.
+
+\layout Description
+
+PTL_INV_NI Indicates that
+\family typewriter
+ni_handle
+\family default
+ is not a valid network interface handle.
+
+\layout Description
+
+PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
+
+\layout Description
+
+PTL_SEGV Indicates that
+\family typewriter
+id
+\family default
+ is not a legal address.
+
+\layout Subsubsection
+
+Arguments
+\layout Standard
+
+
+\begin_inset Tabular
+<lyxtabular version="3" rows="2" columns="3">
+<features>
+<column alignment="right" valignment="top" width="0pt">
+<column alignment="center" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="5in">
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+handle
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+A network interface handle.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+id
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+output
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+On successful return, this location will hold the id for the calling process.
+\end_inset
+</cell>
+</row>
+</lyxtabular>
+
+\end_inset
+
+
+\layout Comment
+
+Note that process identifiers are dependent on the network interface(s).
+ In particular, if a node has multiple interfaces, it may have multiple
+ node identifiers.
+\layout Section
+
+Match List Entries and Match Lists
+\begin_inset LatexCommand \label{sec:me}
+
+\end_inset
+
+
+\layout Standard
+
+A match list is a chain of match list entries.
+ Each match list entry includes a memory descriptor and a set of match criteria.
+ The match criteria can be used to reject incoming requests based on process
+ id or the match bits provided in the request.
+ A match list is created using the
+\emph on
+PtlMEAttach
+\emph default
+ or
+\shape italic
+PtlMEAttachAny
+\shape default
+ functions, which create a match list consisting of a single match list
+ entry, attaches the match list to the specified Portal index, and returns
+ a handle for the match list entry.
+ Match entries can be dynamically inserted and removed from a match list
+ using the
+\emph on
+PtlMEInsert
+\emph default
+ and
+\emph on
+PtlMEUnlink
+\emph default
+ functions.
+\layout Subsection
+
+PtlMEAttach
+\begin_inset LatexCommand \label{sec:meattach}
+
+\end_inset
+
+
+\layout LyX-Code
+
+typedef enum { PTL_RETAIN, PTL_UNLINK } ptl_unlink_t;
+\newline
+
+\layout LyX-Code
+
+typedef enum { PTL_INS_BEFORE, PTL_INS_AFTER } ptl_ins_pos_t;
+\newline
+
+\layout LyX-Code
+
+int PtlMEAttach( ptl_handle_ni_t interface,
+\newline
+ ptl_pt_index_t index,
+\newline
+ ptl_process_id_t matchid,
+\newline
+ ptl_match_bits_t match_bits,
+\newline
+ ptl_match_bits_t ignorebits,
+\newline
+ ptl_unlink_t unlink,
+\newline
+ ptl_ins_pos_t position,
+\newline
+ ptl_handle_me_t* handle );
+\layout Standard
+\noindent
+Values of the type
+\family typewriter
+ptl_ins_pos_t
+\family default
+ are used to control where a new item is inserted.
+ The value
+\family typewriter
+PTL_INS_BEFORE
+\family default
+ is used to insert the new item before the current item or before the head
+ of the list.
+ The value
+\family typewriter
+PTL_INS_AFTER
+\family default
+ is used to insert the new item after the current item or after the last
+ item in the list.
+
+\layout Standard
+
+The
+\emph on
+PtlMEAttach
+\emph default
+ function creates a match list consisting of a single entry and attaches
+ this list to the Portal table for
+\family typewriter
+interface
+\family default
+.
+\layout Subsubsection
+
+Return Codes
+\layout Description
+
+PTL_OK Indicates success.
+
+\layout Description
+
+PTL_INV_NI Indicates that
+\family typewriter
+interface
+\family default
+ is not a valid network interface handle.
+
+\layout Description
+
+PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
+
+\layout Description
+
+PTL_INV_PTINDEX Indicates that
+\family typewriter
+index
+\family default
+ is not a valid Portal table index.
+
+\layout Description
+
+PTL_INV_PROC Indicates that
+\family typewriter
+matchid
+\family default
+ is not a valid process identifier.
+
+\layout Description
+
+PTL_NOSPACE Indicates that there is insufficient memory to allocate the
+ match list entry.
+
+\layout Description
+
+PTL_ML_TOOLONG Indicates that the resulting match list is too long.
+ The maximum length for a match list is defined by the interface.
+
+\layout Subsubsection
+
+Arguments
+\layout Standard
+
+
+\begin_inset Tabular
+<lyxtabular version="3" rows="7" columns="3">
+<features>
+<column alignment="left" valignment="top" width="0.8in">
+<column alignment="center" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="4.75in">
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+
+\family typewriter
+interface
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+A handle for the interface to use.
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+
+\family typewriter
+index
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+The Portal table index where the match list should be attached.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+
+\family typewriter
+matchid
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+Specifies the match criteria for the process id of the requestor.
+ The constants
+\family typewriter
+PTL_PID_ANY
+\family default
+ and
+\family typewriter
+PTL_NID_ANY
+\family default
+ can be used to wildcard either of the ids in the
+\family typewriter
+ptl_process_id_t
+\family default
+ structure.
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+
+\family typewriter
+match_bits, ignorebits
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+Specify the match criteria to apply to the match bits in the incoming request.
+ The
+\family typewriter
+ignorebits
+\family default
+ are used to mask out insignificant bits in the incoming match bits.
+ The resulting bits are then compared to the match list entry's match
+ bits to determine if the incoming request meets the match criteria.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+
+\family typewriter
+unlink
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+Indicates the match list entry should be unlinked when the last memory descripto
+r associated with this match list entry is unlinked.
+ (Note, the check for unlinking a match entry only occurs when a memory
+ descriptor is unlinked.)
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+
+\family typewriter
+position
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+Indicates whether the new match entry should be prepended or appended to
+ the existing match list.
+ If there is no existing list, this argument is ignored and the new match
+ entry becomes the only entry in the list.
+ Allowed constants:
+\family typewriter
+PTL_INS_BEFORE
+\family default
+,
+\family typewriter
+PTL_INS_AFTER
+\family default
+.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+
+\family typewriter
+handle
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+output
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+On successful return, this location will hold a handle for the newly created
+ match list entry.
+\end_inset
+</cell>
+</row>
+</lyxtabular>
+
+\end_inset
+
+
+\layout Subsection
+
+PtlMEAttachAny
+\begin_inset LatexCommand \label{sec:attachany}
+
+\end_inset
+
+
+\layout LyX-Code
+
+int PtlMEAttachAny( ptl_handle_ni_t interface,
+\newline
+ ptl_pt_index_t *index,
+\newline
+ ptl_process_id_t matchid,
+\newline
+ ptl_match_bits_t match_bits,
+\newline
+ ptl_match_bits_t ignorebits,
+\newline
+ ptl_unlink_t unlink,
+\newline
+ ptl_handle_me_t* handle );
+\layout Standard
+
+The
+\emph on
+PtlMEAttachAny
+\emph default
+ function creates a match list consisting of a single entry and attaches
+ this list to an unused Portal table entry for
+\family typewriter
+interface
+\family default
+.
+\layout Subsubsection
+
+Return Codes
+\layout Description
+
+PTL_OK Indicates success.
+
+\layout Description
+
+PTL_INV_NI Indicates that
+\family typewriter
+interface
+\family default
+ is not a valid network interface handle.
+
+\layout Description
+
+PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
+
+\layout Description
+
+PTL_INV_PROC Indicates that
+\family typewriter
+matchid
+\family default
+ is not a valid process identifier.
+
+\layout Description
+
+PTL_NOSPACE Indicates that there is insufficient memory to allocate the
+ match list entry.
+
+\layout Description
+
+PTL_PT_FULL Indicates that there are no free entries in the Portal table.
+\layout Subsubsection
+
+Arguments
+\layout Standard
+
+
+\begin_inset Tabular
+<lyxtabular version="3" rows="4" columns="3">
+<features>
+<column alignment="left" valignment="top" width="0.8in">
+<column alignment="center" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="4.75in">
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+
+\family typewriter
+interface
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+A handle for the interface to use.
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+
+\family typewriter
+index
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+output
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+On succesfful return, this location will hold the Portal index where the
+ match list has been attached.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+
+\family typewriter
+matchid, match_bits, ignorebits, unlink
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+See the discussion for
+\shape italic
+PtlMEAttach
+\shape default
+.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+
+\family typewriter
+handle
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+output
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+On successful return, this location will hold a handle for the newly created
+ match list entry.
+\end_inset
+</cell>
+</row>
+</lyxtabular>
+
+\end_inset
+
+
+\layout Subsection
+
+PtlMEInsert
+\begin_inset LatexCommand \label{sec:meinsert}
+
+\end_inset
+
+
+\layout LyX-Code
+
+int PtlMEInsert( ptl_handle_me_t current,
+\newline
+ ptl_process_id_t matchid,
+\newline
+ ptl_match_bits_t match_bits,
+\newline
+ ptl_match_bits_t ignorebits,
+\newline
+ ptl_ins_pos_t position,
+\newline
+ ptl_handle_me_t* handle );
+\layout Standard
+
+The
+\emph on
+PtlMEInsert
+\emph default
+ function creates a new match list entry and inserts this entry into the
+ match list containing
+\family typewriter
+current
+\family default
+.
+\layout Subsubsection
+
+Return Codes
+\layout Description
+
+PTL_OK Indicates success.
+
+\layout Description
+
+PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
+
+\layout Description
+
+PTL_INV_PROC Indicates that
+\family typewriter
+matchid
+\family default
+ is not a valid process identifier.
+
+\layout Description
+
+PTL_INV_ME Indicates that
+\family typewriter
+current
+\family default
+ is not a valid match entry handle.
+
+\layout Description
+
+PTL_ML_TOOLONG Indicates that the resulting match list is too long.
+ The maximum length for a match list is defined by the interface.
+
+\layout Description
+
+PTL_NOSPACE Indicates that there is insufficient memory to allocate the
+ match entry.
+
+\layout Subsubsection
+
+Arguments
+\layout Standard
+
+
+\begin_inset Tabular
+<lyxtabular version="3" rows="4" columns="3">
+<features>
+<column alignment="left" valignment="top" width="0.8in">
+<column alignment="center" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="4.7in">
+<row>
+<cell alignment="left" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+
+\family typewriter
+current
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+A handle for a match entry.
+ The new match entry will be inserted immediately before or immediately
+ after this match entry.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+
+\family typewriter
+matchid
+\family default
+,
+\family typewriter
+match_bits
+\family default
+,
+\family typewriter
+ignorebits
+\family default
+,
+\family typewriter
+unlink
+\family default
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+See the discussion for
+\emph on
+PtlMEAttach
+\emph default
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+
+\family typewriter
+position
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+Indicates whether the new match entry should be inserted before or after
+ the
+\family typewriter
+current
+\family default
+ entry.
+ Allowed constants:
+\family typewriter
+PTL_INS_BEFORE
+\family default
+,
+\family typewriter
+PTL_INS_AFTER
+\family default
+.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+
+\family typewriter
+handle
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+See the discussion for
+\emph on
+PtlMEAttach
+\emph default
+.
+\end_inset
+</cell>
+</row>
+</lyxtabular>
+
+\end_inset
+
+
+\layout Subsection
+
+PtlMEUnlink
+\begin_inset LatexCommand \label{sec:meunlink}
+
+\end_inset
+
+
+\layout LyX-Code
+
+int PtlMEUnlink( ptl_handle_me_t entry );
+\layout Standard
+\noindent
+The
+\emph on
+PtlMEUnlink
+\emph default
+ function can be used to unlink a match entry from a match list.
+ This operation also releases any resources associated with the match entry
+ (including the associated memory descriptor).
+ It is an error to use the match entry handle after calling
+\emph on
+PtlMEUnlink
+\emph default
+.
+\layout Subsubsection
+
+Return Codes
+\layout Description
+
+PTL_OK Indicates success.
+
+\layout Description
+
+PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
+
+\layout Description
+
+PTL_INV_ME Indicates that
+\family typewriter
+entry
+\family default
+ is not a valid match entry handle.
+
+\layout Subsubsection
+
+Arguments
+\layout Standard
+
+
+\begin_inset Tabular
+<lyxtabular version="3" rows="1" columns="3">
+<features>
+<column alignment="right" valignment="top" width="0pt">
+<column alignment="center" valignment="top" width="0pt">
+<column alignment="center" valignment="top" width="0pt">
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+entry
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+A handle for the match entry to be unlinked.
+\end_inset
+</cell>
+</row>
+</lyxtabular>
+
+\end_inset
+
+
+\layout Section
+
+Memory Descriptors
+\begin_inset LatexCommand \label{sec:md}
+
+\end_inset
+
+
+\layout Standard
+
+A memory descriptor contains information about a region of an application
+ process' memory and an event queue where information about the operations
+ performed on the memory descriptor are recorded.
+ The Portals API provides two operations to create memory descriptors:
+\emph on
+PtlMDAttach
+\emph default
+, and
+\emph on
+PtlMDBind
+\emph default
+; an operation to update a memory descriptor,
+\emph on
+PtlMDUpdate
+\emph default
+; and an operation to unlink and release the resources associated with a
+ memory descriptor,
+\emph on
+PtlMDUnlink
+\emph default
+.
+\layout Subsection
+
+The Memory Descriptor Type
+\begin_inset LatexCommand \label{sec:md-type}
+
+\end_inset
+
+
+\layout LyX-Code
+
+typedef struct {
+\newline
+ void* start;
+\newline
+ ptl_size_t length;
+\newline
+ int threshold;
+\newline
+ unsigned int max_offset;
+\newline
+ unsigned int options;
+\newline
+ void* user_ptr;
+\newline
+ ptl_handle_eq_t eventq;
+\newline
+} ptl_md_t;
+\layout Standard
+\noindent
+The
+\family typewriter
+ptl_md_t
+\family default
+ type defines the application view of a memory descriptor.
+ Values of this type are used to initialize and update the memory descriptors.
+\layout Subsubsection
+
+Members
+\layout Description
+
+start,\SpecialChar ~
+length Specify the memory region associated with the memory descriptor.
+ The
+\family typewriter
+start
+\family default
+ member specifies the starting address for the memory region and the
+\family typewriter
+length
+\family default
+ member specifies the length of the region.
+ The
+\family typewriter
+start member
+\family default
+ can be NULL provided that the
+\family typewriter
+length
+\family default
+ member is zero.
+ (Zero length buffers are useful to record events.) There are no alignment
+ restrictions on the starting address or the length of the region; although,
+ unaligned messages may be slower (i.e., lower bandwidth and/or longer latency)
+ on some implementations.
+
+\layout Description
+
+threshold Specifies the maximum number of operations that can be performed
+ on the memory descriptor.
+ An operation is any action that could possibly generate an event (see Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:ek-type}
+
+\end_inset
+
+ for the different types of events).
+ In the usual case, the threshold value is decremented for each operation
+ on the memory descriptor.
+ When the threshold value is zero, the memory descriptor is
+\emph on
+inactive
+\emph default
+, and does not respond to operations.
+ A memory descriptor can have an initial threshold value of zero to allow
+ for manipulation of an inactive memory descriptor by the local process.
+ A threshold value of
+\family typewriter
+PTL_MD_THRESH_INF
+\family default
+ indicates that there is no bound on the number of operations that may be
+ applied to a memory descriptor.
+ Note that local operations (e.g.,
+\emph on
+PtlMDUpdate
+\emph default
+) are not applied to the threshold count.
+
+\layout Description
+
+max_offset Specifies the maximum local offset of a memory descriptor.
+ When the local offset of a memory descriptor exceeds this maximum, the
+ memory descriptor becomes
+\shape italic
+inactive
+\shape default
+ and does not respond to further operations.
+\layout Description
+
+options Specifies the behavior of the memory descriptor.
+ There are five options that can be selected: enable put operations (yes
+ or no), enable get operations (yes or no), offset management (local or
+ remote), message truncation (yes or no), and acknowledgement (yes or no).
+ Values for this argument can be constructed using a bitwise or of the following
+ values:
+\begin_deeper
+\begin_deeper
+\layout Description
+
+PTL_MD_OP_PUT Specifies that the memory descriptor will respond to
+\emph on
+put
+\emph default
+ operations.
+ By default, memory descriptors reject
+\emph on
+put
+\emph default
+ operations.
+
+\layout Description
+
+PTL_MD_OP_GET Specifies that the memory descriptor will respond to
+\emph on
+get
+\emph default
+ operations.
+ By default, memory descriptors reject
+\emph on
+get
+\emph default
+ operations.
+
+\layout Description
+
+PTL_MD_MANAGE_REMOTE Specifies that the offset used in accessing the memory
+ region is provided by the incoming request.
+ By default, the offset is maintained locally.
+ When the offset is maintained locally, the offset is incremented by the
+ length of the request so that the next operation (put and/or get) will
+ access the next part of the memory region.
+\layout Description
+
+PTL_MD_TRUNCATE Specifies that the length provided in the incoming request
+ can be reduced to match the memory available in the region.
+ (The memory available in a memory region is determined by subtracting the
+ offset from the length of the memory region.) By default, if the length
+ in the incoming operation is greater than the amount of memory available,
+ the operation is rejected.
+
+\layout Description
+
+PTL_MD_ACK_DISABLE Specifies that an acknowledgement should
+\emph on
+not
+\emph default
+ be sent for incoming
+\emph on
+put
+\emph default
+ operations, even if requested.
+ By default, acknowledgements are sent for
+\emph on
+put
+\emph default
+ operations that request an acknowledgement.
+ Acknowledgements are never sent for
+\emph on
+get
+\emph default
+ operations.
+ The value sent in the reply serves as an implicit acknowledgement.
+
+\end_deeper
+\layout Standard
+
+
+\series bold
+Note
+\series default
+: It is not considered an error to have a memory descriptor that does not
+ respond to either
+\emph on
+put
+\emph default
+ or
+\emph on
+get
+\emph default
+ operations: Every memory descriptor responds to
+\emph on
+reply
+\emph default
+ operations.
+ Nor is it considered an error to have a memory descriptor that responds
+ to both
+\emph on
+put
+\emph default
+ and
+\emph on
+get
+\emph default
+ operations.
+
+\end_deeper
+\layout Description
+
+user_ptr A user-specified value that is associated with the memory descriptor.
+ The value does not need to be a pointer, but must fit in the space used
+ by a pointer.
+ This value (along with other values) is recorded in events associated with
+ operations on this memory descriptor.
+\begin_inset Foot
+collapsed true
+
+\layout Standard
+
+Tying the memory descriptor to a user-defined value can be useful when multiple
+ memory descriptor share the same event queue or when the memory descriptor
+ needs to be associated with a data structure maintained by the application.
+ For example, an MPI implementation can set the
+\family typewriter
+user_ptr
+\family default
+ argument to the value of an MPI Request.
+ This direct association allows for processing of memory descriptor's by
+ the MPI implementation without a table lookup or a search for the appropriate
+ MPI Request.
+\end_inset
+
+
+\layout Description
+
+eventq A handle for the event queue used to log the operations performed
+ on the memory region.
+ If this argument is
+\family typewriter
+PTl_EQ_NONE
+\family default
+, operations performed on this memory descriptor are not logged.
+
+\layout Subsection
+
+PtlMDAttach
+\begin_inset LatexCommand \label{sec:mdattach}
+
+\end_inset
+
+
+\layout LyX-Code
+
+int PtlMDAttach( ptl_handle_me_t match,
+\newline
+ ptl_md_t mem_desc,
+\newline
+ ptl_unlink_t unlink_op,
+\newline
+ ptl_unlink_t unlink_nofit,
+\newline
+ ptl_handle_md_t* handle );
+\layout Standard
+\noindent
+Values of the type
+\family typewriter
+ptl_unlink_t
+\family default
+ are used to control whether an item is unlinked from a list.
+ The value
+\family typewriter
+PTL_UNLINK
+\family default
+ enables unlinking.
+ The value
+\family typewriter
+PTL_RETAIN
+\family default
+ disables unlinking.
+\layout Standard
+
+The
+\emph on
+PtlMDAttach
+\emph default
+ operation is used to create a memory descriptor and attach it to a match
+ list entry.
+ An error code is returned if this match list entry already has an associated
+ memory descriptor.
+\layout Subsubsection
+
+Return Codes
+\layout Description
+
+PTL_OK Indicates success.
+
+\layout Description
+
+PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
+
+\layout Description
+
+PTL_INUSE Indicates that
+\family typewriter
+match
+\family default
+ already has a memory descriptor attached.
+
+\layout Description
+
+PTL_INV_ME Indicates that
+\family typewriter
+match
+\family default
+ is not a valid match entry handle.
+
+\layout Description
+
+PTL_ILL_MD Indicates that
+\family typewriter
+mem_desc
+\family default
+ is not a legal memory descriptor.
+ This may happen because the memory region defined in
+\family typewriter
+mem_desc
+\family default
+ is invalid or because the network interface associated with the
+\family typewriter
+eventq
+\family default
+ in
+\family typewriter
+mem_desc
+\family default
+ is not the same as the network interface associated with
+\family typewriter
+match
+\family default
+.
+
+\layout Description
+
+PTL_NOSPACE Indicates that there is insufficient memory to allocate the
+ memory descriptor.
+
+\layout Subsubsection
+
+Arguments
+\layout Standard
+
+
+\begin_inset Tabular
+<lyxtabular version="3" rows="5" columns="3">
+<features>
+<column alignment="right" valignment="top" width="0pt">
+<column alignment="center" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="4.7in">
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+match
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+A handle for the match entry that the memory descriptor will be associated
+ with.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+mem_desc
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+Provides initial values for the application visible parts of a memory descriptor.
+ Other than its use for initialization, there is no linkage between this
+ structure and the memory descriptor maintained by the API.
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+unlink_op
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+A flag to indicate whether the memory descriptor is unlinked when it becomes
+ inactive, either because the operation threshold drops to zero or because
+ the maximum offset has been exceeded.
+ (Note, the check for unlinking a memory descriptor only occurs after a
+ the completion of a successful operation.
+ If the threshold is set to zero during initialization or using
+\emph on
+PtlMDUpdate
+\emph default
+, the memory descriptor is
+\series bold
+not
+\series default
+ unlinked.)
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+unlink_nofit
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+A flag to indicate whether the memory descriptor is unlinked when the space
+ remaining in the memory descriptor is not sufficient for a matching operation.
+ If an incoming message arrives arrives at a memory descriptor that does
+ not have sufficient space and the
+\series bold
+PTL_MD_TRUNCATE
+\series default
+ operation is not specified, the memory descriptor will be unlinked.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+handle
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+output
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+On successful return, this location will hold a handle for the newly created
+ memory descriptor.
+ The
+\family typewriter
+handle
+\family default
+ argument can be NULL, in which case the handle will not be returned.
+\end_inset
+</cell>
+</row>
+</lyxtabular>
+
+\end_inset
+
+
+\layout Subsection
+
+PtlMDBind
+\begin_inset LatexCommand \label{sec:mdbind}
+
+\end_inset
+
+
+\layout LyX-Code
+
+int PtlMDBind( ptl_handle_ni_t interface,
+\newline
+ ptl_md_t mem_desc,
+\newline
+ ptl_handle_md_t* handle );
+\layout Standard
+\noindent
+The
+\emph on
+PtlMDBind
+\emph default
+ operation is used to create a
+\begin_inset Quotes eld
+\end_inset
+
+free floating
+\begin_inset Quotes erd
+\end_inset
+
+ memory descriptor, i.e., a memory descriptor that is not associated with
+ a match list entry.
+\layout Subsubsection
+
+Return Codes
+\layout Description
+
+PTL_OK Indicates success.
+
+\layout Description
+
+PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
+
+\layout Description
+
+PTL_INV_NI Indicates that
+\family typewriter
+interface
+\family default
+ is not a valid match entry handle.
+
+\layout Description
+
+PTL_ILL_MD Indicates that
+\family typewriter
+mem_desc
+\family default
+ is not a legal memory descriptor.
+ This may happen because the memory region defined in
+\family typewriter
+mem_desc
+\family default
+ is invalid or because the network interface associated with the
+\family typewriter
+eventq
+\family default
+ in
+\family typewriter
+mem_desc
+\family default
+ is not the same as the network interface,
+\family typewriter
+interface
+\family default
+.
+
+\layout Description
+
+PTL_INV_EQ Indicates that the event queue associated with
+\family typewriter
+mem_desc
+\family default
+ is not valid.
+
+\layout Description
+
+PTL_NOSPACE Indicates that there is insufficient memory to allocate the
+ memory descriptor.
+
+\layout Description
+
+PTL_SEGV Indicates that
+\family typewriter
+handle
+\family default
+ is not a legal address.
+
+\layout Subsubsection
+
+Arguments
+\layout Standard
+
+
+\begin_inset Tabular
+<lyxtabular version="3" rows="3" columns="3">
+<features>
+<column alignment="right" valignment="top" width="0pt">
+<column alignment="center" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="4.7in">
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+interface
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+A handle for the network interface with which the memory descriptor will
+ be associated.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+mem_desc
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+Provides initial values for the application visible parts of a memory descriptor.
+ Other than its use for initialization, there is no linkage between this
+ structure and the memory descriptor maintained by the API.
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+handle
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+output
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+On successful return, this location will hold a handle for the newly created
+ memory descriptor.
+ The
+\family typewriter
+handle
+\family default
+ argument must be a valid address and cannot be NULL.
+\end_inset
+</cell>
+</row>
+</lyxtabular>
+
+\end_inset
+
+
+\layout Subsection
+
+PtlMDUnlink
+\begin_inset LatexCommand \label{sec:mdfree}
+
+\end_inset
+
+
+\layout LyX-Code
+
+int PtlMDUnlink( ptl_handle_md_t mem_desc );
+\layout Standard
+\noindent
+The
+\emph on
+PtlMDUnlink
+\emph default
+ function unlinks the memory descriptor from any match list entry it may
+ be linked to and releases the resources associated with a memory descriptor.
+ (This function does not free the memory region associated with the memory
+ descriptor.) This function also releases the resources associated with a
+ floating memory descriptor.
+ Only memory descriptors with no pending operations may be unlinked.
+\layout Subsubsection
+
+Return Codes
+\layout Description
+
+PTL_OK Indicates success.
+
+\layout Description
+
+PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
+
+\layout Description
+
+PTL_INV_MD Indicates that
+\family typewriter
+mem_desc
+\family default
+ is not a valid memory descriptor handle.
+\layout Description
+
+PTL_MD_INUSE Indicates that
+\family typewriter
+mem_desc
+\family default
+ has pending operations and cannot be unlinked.
+\layout Subsubsection
+
+Arguments
+\layout Standard
+
+
+\begin_inset Tabular
+<lyxtabular version="3" rows="1" columns="3">
+<features>
+<column alignment="right" valignment="top" width="0pt">
+<column alignment="center" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="4.7in">
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+mem_desc
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+A handle for the memory descriptor to be released.
+\end_inset
+</cell>
+</row>
+</lyxtabular>
+
+\end_inset
+
+
+\layout Subsection
+
+PtlMDUpdate
+\begin_inset LatexCommand \label{sec:mdupdate}
+
+\end_inset
+
+
+\layout LyX-Code
+
+int PtlMDUpdate( ptl_handle_md_t mem_desc,
+\newline
+ ptl_md_t* old_md,
+\newline
+ ptl_md_t* new_md,
+\newline
+ ptl_handle_eq_t testq );
+\layout Standard
+\noindent
+The
+\emph on
+PtlMDUpdate
+\emph default
+ function provides a conditional, atomic update operation for memory descriptors.
+ The memory descriptor identified by
+\family typewriter
+mem_desc
+\family default
+ is only updated if the event queue identified by
+\family typewriter
+testq
+\family default
+ is empty.
+ The intent is to only enable updates to the memory descriptor when no new
+ messages have arrived since the last time the queue was checked.
+ See section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:exmpi}
+
+\end_inset
+
+ for an example of how this function can be used.
+\layout Standard
+
+If
+\family typewriter
+new
+\family default
+ is not NULL the memory descriptor identified by handle will be updated
+ to reflect the values in the structure pointed to by
+\family typewriter
+new
+\family default
+ if
+\family typewriter
+testq
+\family default
+ has the value
+\family typewriter
+PTL_EQ_NONE
+\family default
+ or if the event queue identified by
+\family typewriter
+testq
+\family default
+ is empty.
+ If
+\family typewriter
+old
+\family default
+ is not NULL, the current value of the memory descriptor identified by
+\family typewriter
+mem_desc
+\family default
+ is recorded in the location identified by
+\family typewriter
+old
+\family default
+.
+\layout Subsubsection
+
+Return Codes
+\layout Description
+
+PTL_OK Indicates success.
+
+\layout Description
+
+PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
+
+\layout Description
+
+PTL_NOUPDATE Indicates that the update was not performed because
+\family typewriter
+testq
+\family default
+ was not empty.
+
+\layout Description
+
+PTL_INV_MD Indicates that
+\family typewriter
+mem_desc
+\family default
+ is not a valid memory descriptor handle.
+
+\layout Description
+
+PTL_ILL_MD Indicates that the value pointed to by
+\family typewriter
+new
+\family default
+ is not a legal memory descriptor (e.g., the memory region specified by the
+ memory descriptor may be invalid).
+
+\layout Description
+
+PTL_INV_EQ Indicates that
+\family typewriter
+testq
+\family default
+ is not a valid event queue handle.
+
+\layout Description
+
+PTL_SEGV Indicates that
+\family typewriter
+new
+\family default
+ or
+\family typewriter
+old
+\family default
+ is not a legal address.
+
+\layout Subsubsection
+
+Arguments
+\layout Standard
+
+
+\begin_inset Tabular
+<lyxtabular version="3" rows="4" columns="3">
+<features>
+<column alignment="right" valignment="top" width="0pt">
+<column alignment="center" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="4.7in">
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+mem_desc
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+A handle for the memory descriptor to update.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+old_md
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+output
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+If
+\family typewriter
+old_md
+\family default
+ is not the value
+\family typewriter
+NULL
+\family default
+, the current value of the memory descriptor will be stored in the location
+ identified by
+\family typewriter
+old
+\family default
+_md.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+new_md
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+If
+\family typewriter
+new_md
+\family default
+ is not the value
+\family typewriter
+NULL
+\family default
+, this argument provides the new values for the memory descriptor, if the
+ update is performed.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+testq
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+A handle for an event queue used to predicate the update.
+ If
+\family typewriter
+testq
+\family default
+ is equal to
+\family typewriter
+PTL_EQ_NONE
+\family default
+, the update is performed unconditionally.
+ Otherwise, the update is performed if and only if
+\family typewriter
+testq
+\family default
+ is empty.
+ If the update is not performed, the function returns the value
+\family typewriter
+PTL_NOUPDATE
+\family default
+.
+ (Note, the
+\family typewriter
+testq
+\family default
+ argument does not need to be the same as the event queue associated with
+ the memory descriptor.)
+\end_inset
+</cell>
+</row>
+</lyxtabular>
+
+\end_inset
+
+
+\layout Standard
+
+The conditional update can be used to ensure that the memory descriptor
+ has not changed between the time it was examined and the time it is updated.
+ In particular, it is needed to support an MPI implementation where the
+ activity of searching an unexpected message queue and posting a receive
+ must be atomic.
+\layout Section
+
+Events and Event Queues
+\begin_inset LatexCommand \label{sec:eq}
+
+\end_inset
+
+
+\layout Standard
+
+Event queues are used to log operations performed on memory descriptors.
+ They can also be used to hold acknowledgements for completed
+\emph on
+put
+\emph default
+ operations and to note when the data specified in a
+\emph on
+put
+\emph default
+ operation has been sent (i.e., when it is safe to reuse the buffer that holds
+ this data).
+ Multiple memory descriptors can share a single event queue.
+\layout Standard
+
+In addition to the
+\family typewriter
+ptl_handle_eq_t
+\family default
+ type, the Portals API defines two types associated with events: The
+\family typewriter
+
+\newline
+ptl_event_kind_t
+\family default
+ type defines the kinds of events that can be stored in an event queue.
+ The
+\family typewriter
+ptl_event_t
+\family default
+ type defines a structure that holds the information associated with an
+ event.
+\layout Standard
+
+The Portals API also provides four functions for dealing with event queues:
+ The
+\emph on
+PtlEQAlloc
+\emph default
+ function is used to allocate the API resources needed for an event queue,
+ the
+\emph on
+PtlEQFree
+\emph default
+ function is used to release these resources, the
+\emph on
+PtlEQGet
+\emph default
+ function can be used to get the next event from an event queue, and the
+
+\emph on
+PtlEQWait
+\emph default
+ function can be used to block a process (or thread) until an event queue
+ has at least one event.
+\layout Subsection
+
+Kinds of Events
+\begin_inset LatexCommand \label{sec:ek-type}
+
+\end_inset
+
+
+\layout LyX-Code
+
+typedef enum {
+\newline
+ PTL_EVENT_GET_START, PTL_EVENT_GET_END, PTL_EVENT_GET_FAIL,
+\newline
+ PTL_EVENT_PUT_START, PTL_EVENT_PUT_END, PTL_EVENT_PUT_FAIL,
+\newline
+ PTL_EVENT_REPLY_START, PTL_EVENT_REPLY_END, PTL_EVENT_REPLY_FAIL,
+\newline
+ PTL_EVENT_SEND_START, PTL_EVENT_SEND_END, PTL_EVENT_SEND_FAIL,
+\newline
+ PTL_EVENT_ACK,
+\newline
+ PTL_EVENT_UNLINK
+\newline
+} ptl_event_kind_t;
+\layout Standard
+\noindent
+The Portals API defines fourteen types of events that can be logged in an
+ event queue:
+\layout Description
+
+PTL_EVENT_GET_START A remote
+\emph on
+get
+\emph default
+ operation has been started on the memory descriptor.
+ The memory region associated with this descriptor should not be altered
+ until the corresponding END or FAIL event is logged.
+\layout Description
+
+PTL_EVENT_GET_END A previously initiated
+\emph on
+get
+\emph default
+ operation completed successfully.
+ This event is logged after the reply has been sent by the local node.
+ As such, the process could free the memory descriptor once it sees this
+ event.
+
+\layout Description
+
+PTL_EVENT_GET_FAIL A previously initiated
+\emph on
+get
+\emph default
+ operation completed unsuccessfully.
+ This event is logged after the reply has been sent by the local node.
+ As such, the process could free the memory descriptor once it sees this
+ event.
+
+\layout Description
+
+PTL_EVENT_PUT_START A remote
+\emph on
+put
+\emph default
+ operation has been started on the memory descriptor.
+ The memory region associated with this descriptor should should be considered
+ volatile until the corresponding END or FAIL event is logged.
+\layout Description
+
+PTL_EVENT_PUT_END A previously initiated
+\emph on
+put
+\emph default
+ operation completed successfully.
+ The underlying layers will not alter the memory (on behalf of this operation)
+ once this event has been logged.
+
+\layout Description
+
+PTL_EVENT_PUT_FAIL A previously initiated
+\emph on
+put
+\emph default
+ operation completed unsuccessfully.
+ The underlying layers will not alter the memory (on behalf of this operation)
+ once this event has been logged.
+
+\layout Description
+
+PTL_EVENT_REPLY_START A
+\emph on
+reply
+\emph default
+ operation has been started on the memory descriptor.
+
+\layout Description
+
+PTL_EVENT_REPLY_END A previously initiated
+\emph on
+reply
+\emph default
+ operation has completed successfully .
+ This event is logged after the data (if any) from the reply has been written
+ into the memory descriptor.
+
+\layout Description
+
+PTL_EVENT_REPLY_FAIL A previously initiated
+\emph on
+reply
+\emph default
+ operation has completed unsuccessfully.
+ This event is logged after the data (if any) from the reply has been written
+ into the memory descriptor.
+
+\layout Description
+
+PTL_EVENT_ACK An
+\emph on
+acknowledgement
+\emph default
+ was received.
+ This event is logged when the acknowledgement is received
+\layout Description
+
+PTL_EVENT_SEND_START An outgoing
+\emph on
+send
+\emph default
+ operation has been started.
+ The memory region associated with this descriptor should not be altered
+ until the corresponding END or FAIL event is logged.
+\layout Description
+
+PTL_EVENT_SEND_END A previously initiated
+\emph on
+send
+\emph default
+ operation has completed successfully.
+ This event is logged after the entire buffer has been sent and it is safe
+ for the application to reuse the buffer.
+
+\layout Description
+
+PTL_EVENT_SEND_FAIL A previously initiated
+\emph on
+send
+\emph default
+ operation has completed unsuccessfully.
+ The process can safely manipulate the memory or free the memory descriptor
+ once it sees this event.
+\layout Description
+
+PTL_EVENT_UNLINK A memory descriptor associated with this event queue has
+ been automatically unlinked.
+ This event is not generated when a memory descriptor is explicitly unlinked
+ by calling
+\shape italic
+PtlMDUnlink
+\shape default
+.
+ This event does not decrement the threshold count.
+\layout Subsection
+
+Event Ordering
+\layout Standard
+
+The Portals API guarantees that a when a process initiates two operations
+ on a remote process, the operations will be initiated on the remote process
+ in the same order that they were initiated on the original process.
+ As an example, if process A intitates two
+\emph on
+put
+\emph default
+ operations,
+\emph on
+x
+\emph default
+ and
+\emph on
+y
+\emph default
+, on process B, the Portals API guarantees that process A will receive the
+
+\family typewriter
+PTL_EVENT_SEND_START
+\family default
+ events for
+\emph on
+x
+\emph default
+ and
+\emph on
+y
+\emph default
+ in the same order that process B receives the
+\family typewriter
+PTL_EVENT_PUT_START
+\family default
+ events for
+\emph on
+x
+\emph default
+ and
+\emph on
+y
+\emph default
+.
+ Notice that the API does not guarantee that the start events will be delivered
+ in the same order that process A initiated the
+\emph on
+x
+\emph default
+ and
+\emph on
+y
+\emph default
+ operations.
+ If process A needs to ensure the ordering of these operations, it should
+ include code to wait for the initiation of
+\emph on
+x
+\emph default
+ before it initiates
+\emph on
+y
+\emph default
+.
+\layout Subsection
+
+Failure Notification
+\layout Standard
+
+Operations may fail to complete successfully; however, unless the node itself
+ fails, every operation that is started will eventually complete.
+ While an operation is in progress, the memory associated with the operation
+ should not be viewed (in the case of a put or a reply) or altered (in the
+ case of a send or get).
+ Operation completion, whether successful or unsuccessful, is final.
+ That is, when an operation completes, the memory associated with the operation
+ will no longer be read or altered by the operation.
+ A network interface can use the
+\family typewriter
+ptl_ni_fail_t
+\family default
+ to define more specific information regarding the failure of the operation
+ and record this information in the
+\family typewriter
+ni_fail_type
+\family default
+ field of the event.
+\layout Subsection
+
+The Event Type
+\begin_inset LatexCommand \label{sec:event-type}
+
+\end_inset
+
+
+\layout LyX-Code
+
+typedef struct {
+\newline
+ ptl_event_kind_t type;
+\newline
+ ptl_process_id_t initiator;
+\newline
+ ptl_uid_t uid;
+\layout LyX-Code
+
+ ptl_pt_index_t portal;
+\newline
+ ptl_match_bits_t match_bits;
+\newline
+ ptl_size_t rlength;
+\newline
+ ptl_size_t mlength;
+\newline
+ ptl_size_t offset;
+\newline
+ ptl_handle_md_t md_handle;
+\newline
+ ptl_md_t mem_desc;
+\newline
+ ptl_hdr_data_t hdr_data;
+\newline
+ ptl_seq_t link;
+\newline
+ ptl_ni_fail_t ni_fail_type;
+\newline
+ volatile ptl_seq_t sequence;
+\newline
+} ptl_event_t;
+\layout Standard
+\noindent
+An event structure includes the following members:
+\layout Description
+
+type Indicates the type of the event.
+
+\layout Description
+
+initiator The id of the initiator.
+
+\layout Description
+
+portal The Portal table index specified in the request.
+
+\layout Description
+
+match_bits A copy of the match bits specified in the request.
+ See section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:me}
+
+\end_inset
+
+ for more information on match bits.
+
+\layout Description
+
+rlength The length (in bytes) specified in the request.
+
+\layout Description
+
+mlength The length (in bytes) of the data that was manipulated by the operation.
+ For truncated operations, the manipulated length will be the number of
+ bytes specified by the memory descriptor (possibly with an offset) operation.
+ For all other operations, the manipulated length will be the length of
+ the requested operation.
+
+\layout Description
+
+offset Is the displacement (in bytes) into the memory region that the operation
+ used.
+ The offset can be determined by the operation (see Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:datamovement}
+
+\end_inset
+
+) for a remote managed memory descriptor, or by the local memory descriptor
+ (see Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:md}
+
+\end_inset
+
+).
+
+\layout Description
+
+md_handle Is the handle to the memory descriptor associated with the event.
+\layout Description
+
+mem_desc Is the state of the memory descriptor immediately after the event
+ has been processed.
+
+\layout Description
+
+hdr_data 64 bits of out-of-band user data (see Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:put}
+
+\end_inset
+
+).
+
+\layout Description
+
+link The
+\emph on
+link
+\emph default
+ member is used to link
+\family typewriter
+START
+\family default
+ events with the
+\family typewriter
+END
+\family default
+ or
+\family typewriter
+FAIL
+\family default
+ event that signifies completion of the operation.
+ The
+\emph on
+link
+\emph default
+ member will be the same for the two events associated with an operation.
+ The link member is also used to link an
+\family typewriter
+UNLINK
+\family default
+ event with the event that caused the memory descriptor to be unlinked.
+\layout Description
+
+sequence The sequence number for this event.
+ Sequence numbers are unique to each event.
+\layout Comment
+
+The
+\emph on
+sequence
+\emph default
+ member is the last member and is volatile to support SMP implementations.
+ When an event structure is filled in, the
+\emph on
+sequence
+\emph default
+ member should be written after all other members have been updated.
+ Moreover, a memory barrier should be inserted between the updating of other
+ members and the updating of the
+\emph on
+sequence
+\emph default
+ member.
+\layout Subsection
+
+PtlEQAlloc
+\begin_inset LatexCommand \label{sec:eqalloc}
+
+\end_inset
+
+
+\layout LyX-Code
+
+int PtlEQAlloc( ptl_handle_ni_t interface,
+\newline
+ ptl_size_t count,
+\newline
+ ptl_handle_eq_t* handle );
+\layout Standard
+\noindent
+The
+\emph on
+PtlEQAlloc
+\emph default
+ function is used to build an event queue.
+
+\layout Subsubsection
+
+Return Codes
+\layout Description
+
+PTL_OK Indicates success.
+
+\layout Description
+
+PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
+
+\layout Description
+
+PTL_INV_NI Indicates that
+\family typewriter
+interface
+\family default
+ is not a valid network interface handle.
+
+\layout Description
+
+PTL_NOSPACE Indicates that there is insufficient memory to allocate the
+ event queue.
+
+\layout Description
+
+PTL_SEGV Indicates that
+\family typewriter
+handle
+\family default
+ is not a legal address.
+
+\layout Subsubsection
+
+Arguments
+\layout Standard
+
+
+\begin_inset Tabular
+<lyxtabular version="3" rows="3" columns="3">
+<features>
+<column alignment="right" valignment="top" width="0pt">
+<column alignment="center" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="4.7in">
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+interface
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+A handle for the interface with which the event queue will be associated.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+count
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+The number of events that can be stored in the event queue.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+handle
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+output
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+On successful return, this location will hold a handle for the newly created
+ event queue.
+\end_inset
+</cell>
+</row>
+</lyxtabular>
+
+\end_inset
+
+
+\layout Subsection
+
+PtlEQFree
+\begin_inset LatexCommand \label{sec:eqfree}
+
+\end_inset
+
+
+\layout LyX-Code
+
+int PtlEQFree( ptl_handle_eq_t eventq );
+\layout Standard
+\noindent
+The
+\emph on
+PtlEQFree
+\emph default
+ function releases the resources associated with an event queue.
+ It is up to the user to insure that no memory descriptors are associated
+ with the event queue once it is freed.
+
+\layout Subsubsection
+
+Return Codes
+\layout Description
+
+PTL_OK Indicates success.
+
+\layout Description
+
+PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
+
+\layout Description
+
+PTL_INV_EQ Indicates that
+\family typewriter
+eventq
+\family default
+ is not a valid event queue handle.
+
+\layout Subsubsection
+
+Arguments
+\layout Standard
+
+
+\begin_inset Tabular
+<lyxtabular version="3" rows="1" columns="3">
+<features>
+<column alignment="right" valignment="top" width="0pt">
+<column alignment="center" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="4.7in">
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+eventq
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+A handle for the event queue to be released.
+\end_inset
+</cell>
+</row>
+</lyxtabular>
+
+\end_inset
+
+
+\layout Subsection
+
+PtlEQGet
+\begin_inset LatexCommand \label{sec:eqget}
+
+\end_inset
+
+
+\layout LyX-Code
+
+int PtlEQGet( ptl_handle_eq_t eventq,
+\newline
+ ptl_event_t* event );
+\layout Standard
+\noindent
+The
+\emph on
+PTLEQGet
+\emph default
+ function is a nonblocking function that can be used to get the next event
+ in an event queue.
+ The event is removed from the queue.
+\layout Subsubsection
+
+Return Codes
+\layout Description
+
+PTL_OK Indicates success.
+
+\layout Description
+
+PTL_EQ_DROPPED Indicates success (i.e., an event is returned) and that at
+ least one event between this event and the last event obtained (using
+\emph on
+PtlEQGet
+\emph default
+ or
+\emph on
+PtlEQWait
+\emph default
+) from this event queue has been dropped due to limited space in the event
+ queue.
+
+\layout Description
+
+PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
+
+\layout Description
+
+PTL_EQ_EMPTY Indicates that
+\family typewriter
+eventq
+\family default
+ is empty or another thread is waiting on
+\emph on
+PtlEQWait
+\emph default
+.
+
+\layout Description
+
+PTL_INV_EQ Indicates that
+\family typewriter
+eventq
+\family default
+ is not a valid event queue handle.
+
+\layout Description
+
+PTL_SEGV Indicates that
+\family typewriter
+event
+\family default
+ is not a legal address.
+
+\layout Subsubsection
+
+Arguments
+\layout Standard
+
+
+\begin_inset Tabular
+<lyxtabular version="3" rows="2" columns="3">
+<features>
+<column alignment="right" valignment="top" width="0pt">
+<column alignment="center" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="4.5in">
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+eventq
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+A handle for the event queue.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+event
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+output
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+On successful return, this location will hold the values associated with
+ the next event in the event queue.
+\end_inset
+</cell>
+</row>
+</lyxtabular>
+
+\end_inset
+
+
+\layout Subsection
+
+PtlEQWait
+\begin_inset LatexCommand \label{sec:eqwait}
+
+\end_inset
+
+
+\layout LyX-Code
+
+int PtlEQWait( ptl_handle_eq_t eventq,
+\newline
+ ptl_event_t* event );
+\layout Standard
+\noindent
+The
+\emph on
+PTLEQWait
+\emph default
+ function can be used to block the calling process (thread) until there
+ is an event in an event queue.
+ This function also returns the next event in the event queue and removes
+ this event from the queue.
+ This is the only blocking operation in the Portals 3.2 API.
+ In the event that multiple threads are waiting on the same event queue,
+ PtlEQWait is guaranteed to wake exactly one thread, but the order in which
+ they are awakened is not specified.
+\layout Subsubsection
+
+Return Codes
+\layout Description
+
+PTL_OK Indicates success.
+
+\layout Description
+
+PTL_EQ_DROPPED Indicates success (i.e., an event is returned) and that at
+ least one event between this event and the last event obtained (using
+\emph on
+PtlEQGet
+\emph default
+ or
+\emph on
+PtlEQWait
+\emph default
+) from this event queue has been dropped due to limited space in the event
+ queue.
+
+\layout Description
+
+PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
+
+\layout Description
+
+PTL_INV_EQ Indicates that
+\family typewriter
+eventq
+\family default
+ is not a valid event queue handle.
+
+\layout Description
+
+PTL_SEGV Indicates that
+\family typewriter
+event
+\family default
+ is not a legal address.
+ queue handle.
+
+\layout Subsubsection
+
+Arguments
+\layout Standard
+\noindent
+
+\begin_inset Tabular
+<lyxtabular version="3" rows="2" columns="3">
+<features>
+<column alignment="right" valignment="top" width="0pt">
+<column alignment="center" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="4.7in">
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+eventq
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+A handle for the event queue to wait on.
+ The calling process (thread) will be blocked until
+\family typewriter
+eventq
+\family default
+ is not empty.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+event
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+output
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+On successful return, this location will hold the values associated with
+ the next event in the event queue.
+\end_inset
+</cell>
+</row>
+</lyxtabular>
+
+\end_inset
+
+
+\layout Section
+
+The Access Control Table
+\begin_inset LatexCommand \label{sec:ac}
+
+\end_inset
+
+
+\layout Standard
+
+Processes can use the access control table to control which processes are
+ allowed to perform operations on Portal table entries.
+ Each communication interface has a Portal table and an access control table.
+ The access control table for the default interface contains an entry at
+ index zero that allows all processes with the same user id to communicate.
+ Entries in the access control table can be manipulated using the
+\emph on
+PtlACEntry
+\emph default
+ function.
+\layout Subsection
+
+PtlACEntry
+\begin_inset LatexCommand \label{sec:acentry}
+
+\end_inset
+
+
+\layout LyX-Code
+
+int PtlACEntry( ptl_handle_ni_t interface,
+\newline
+ ptl_ac_index_t index,
+\newline
+ ptl_process_id_t matchid,
+\newline
+ ptl_uid_t user_id,
+\newline
+ ptl_pt_index_t portal );
+\layout Standard
+\noindent
+The
+\emph on
+PtlACEntry
+\emph default
+ function can be used to update an entry in the access control table for
+ an interface.
+\layout Subsubsection
+
+Return Codes
+\layout Description
+
+PTL_OK Indicates success.
+
+\layout Description
+
+PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
+
+\layout Description
+
+PTL_INV_NI Indicates that
+\family typewriter
+interface
+\family default
+ is not a valid network interface handle.
+
+\layout Description
+
+PTL_AC_INV_INDEX Indicates that
+\family typewriter
+index
+\family default
+ is not a valid access control table index.
+
+\layout Description
+
+PTL_INV_PROC Indicates that
+\family typewriter
+matchid
+\family default
+ is not a valid process identifier.
+
+\layout Description
+
+PTL_PT_INV_INDEX Indicates that
+\family typewriter
+portal
+\family default
+ is not a valid Portal table index.
+
+\layout Subsubsection
+
+Arguments
+\layout Standard
+
+
+\begin_inset Tabular
+<lyxtabular version="3" rows="5" columns="3">
+<features>
+<column alignment="right" valignment="top" width="0pt">
+<column alignment="center" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="4.7in">
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+interface
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+Identifies the interface to use.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+index
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+The index of the entry in the access control table to update.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+matchid
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+Identifies the process(es) that are allowed to perform operations.
+ The constants
+\family typewriter
+PTL_PID_ANY
+\family default
+ and
+\family typewriter
+PTL_NID_ANY
+\family default
+ can be used to wildcard either of the ids in the
+\family typewriter
+ptl_process_id_t
+\family default
+ structure.
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+user_id
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+Identifies the user that is allowed to perform operations.
+ The value
+\family typewriter
+PTL_UID_ANY
+\family default
+ can be used to wildcard the user.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+portal
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+Identifies the Portal index(es) that can be used.
+ The value
+\family typewriter
+PTL_PT_INDEX_ANY
+\family default
+ can be used to wildcard the Portal index.
+\end_inset
+</cell>
+</row>
+</lyxtabular>
+
+\end_inset
+
+
+\layout Section
+
+Data Movement Operations
+\begin_inset LatexCommand \label{sec:datamovement}
+
+\end_inset
+
+
+\layout Standard
+
+The Portals API provides two data movement operations:
+\emph on
+PtlPut
+\emph default
+ and
+\emph on
+PtlGet
+\emph default
+.
+\layout Subsection
+
+PtlPut
+\begin_inset LatexCommand \label{sec:put}
+
+\end_inset
+
+
+\layout LyX-Code
+
+typedef enum { PTL_ACK_REQ, PTL_NOACK_REQ } ptl_ack_req_t;
+\newline
+
+\newline
+int PtlPut( ptl_handle_md_t mem_desc,
+\newline
+ ptl_ack_req_t ack_req,
+\newline
+ ptl_process_id_t target,
+\newline
+ ptl_pt_index_t portal,
+\newline
+ ptl_ac_index_t cookie,
+\newline
+ ptl_match_bits_t match_bits,
+\newline
+ ptl_size_t offset,
+\newline
+ ptl_hdr_data_t hdr_data );
+\layout Standard
+\noindent
+Values of the type
+\family typewriter
+ptl_ack_req_t
+\family default
+ are used to control whether an acknowledgement should be sent when the
+ operation completes (i.e., when the data has been written to a memory descriptor
+ of the
+\family typewriter
+target
+\family default
+ process).
+ The value
+\family typewriter
+PTL_ACK_REQ
+\family default
+ requests an acknowledgement, the value
+\family typewriter
+PTL_NOACK_REQ
+\family default
+ requests that no acknowledgement should be generated.
+\layout Standard
+
+The
+\emph on
+PtlPut
+\emph default
+ function initiates an asynchronous put operation.
+ There are several events associated with a put operation: initiation of
+ the send on the local node (
+\family typewriter
+PTL_EVENT_SEND_START
+\family default
+), completion of the send on the local node (
+\family typewriter
+PTL_EVENT_SEND_END
+\family default
+ or
+\family typewriter
+PTL_EVENT_SEND_FAIL
+\family default
+), and, when the send completes successfully, the receipt of an acknowledgement
+ (
+\family typewriter
+PTL_EVENT_ACK
+\family default
+) indicating that the operation was accepted by the target.
+ These events will be logged in the event queue associated with the memory
+ descriptor (
+\family typewriter
+mem_desc
+\family default
+) used in the put operation.
+ Using a memory descriptor that does not have an associated event queue
+ results in these events being discarded.
+ In this case, the application must have another mechanism (e.g., a higher
+ level protocol) for determining when it is safe to modify the memory region
+ associated with the memory descriptor.
+\layout Subsubsection
+
+Return Codes
+\layout Description
+
+PTL_OK Indicates success.
+
+\layout Description
+
+PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
+
+\layout Description
+
+PTL_INV_MD Indicates that
+\family typewriter
+mem_desc
+\family default
+ is not a valid memory descriptor.
+
+\layout Description
+
+PTL_INV_PROC Indicates that
+\family typewriter
+target
+\family default
+ is not a valid process id.
+
+\layout Subsubsection
+
+Arguments
+\layout Standard
+
+
+\begin_inset Tabular
+<lyxtabular version="3" rows="8" columns="3">
+<features>
+<column alignment="center" valignment="top" width="0pt">
+<column alignment="center" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="4.7in">
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+mem_desc
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+A handle for the memory descriptor that describes the memory to be sent.
+ If the memory descriptor has an event queue associated with it, it will
+ be used to record events when the message has been sent (PTL_EVENT_SEND_START,
+ PTL_EVENT_SEND_END).
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ack_req
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+Controls whether an acknowledgement event is requested.
+ Acknowledgements are only sent when they are requested by the initiating
+ process
+\series bold
+and
+\series default
+ the memory descriptor has an event queue
+\series bold
+and
+\series default
+ the target memory descriptor enables them.
+ Allowed constants:
+\family typewriter
+PTL_ACK_REQ
+\family default
+,
+\family typewriter
+PTL_NOACK_REQ
+\family default
+.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+target
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+A process id for the target process.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+portal
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+The index in the remote Portal table.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+cookie
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+The index into the access control table of the target process.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+match_bits
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+The match bits to use for message selection at the target process.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+offset
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+The offset into the target memory descriptor (only used when the target
+ memory descriptor has the
+\family typewriter
+PTL_MD_MANAGE_REMOTE
+\family default
+ option set).
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+hdr_data
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+64 bits of user data that can be included in message header.
+ This data is written to an event queue entry at the target if an event
+ queue is present on the matching memory descriptor.
+\end_inset
+</cell>
+</row>
+</lyxtabular>
+
+\end_inset
+
+
+\layout Subsection
+
+PtlGet
+\begin_inset LatexCommand \label{sec:get}
+
+\end_inset
+
+
+\layout LyX-Code
+
+int PtlGet( ptl_handle_md_t mem_desc,
+\newline
+ ptl_process_id_t target,
+\newline
+ ptl_pt_index_t portal,
+\newline
+ ptl_ac_index_t cookie,
+\newline
+ ptl_match_bits_t match_bits,
+\newline
+ ptl_size_t offset );
+\layout Standard
+\noindent
+The
+\emph on
+PtlGet
+\emph default
+ function initiates a remote read operation.
+ There are two event pairs associated with a get operation , when the data
+ is sent from the remote node, a
+\family typewriter
+PTL_EVENT_GET{START|END}
+\family default
+ event pair is registered on the remote node; and when the data is returned
+ from the remote node a
+\family typewriter
+PTL_EVENT_REPLY{START|END}
+\family default
+ event pair is registered on the local node.
+\layout Subsubsection
+
+Return Codes
+\layout Description
+
+PTL_OK Indicates success.
+
+\layout Description
+
+PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
+
+\layout Description
+
+PTL_INV_MD Indicates that
+\family typewriter
+mem_desc
+\family default
+ is not a valid memory descriptor.
+
+\layout Description
+
+PTL_INV_PROC Indicates that
+\family typewriter
+target
+\family default
+ is not a valid process id.
+
+\layout Subsubsection
+
+Arguments
+\layout Standard
+
+
+\begin_inset Tabular
+<lyxtabular version="3" rows="6" columns="3">
+<features>
+<column alignment="right" valignment="top" width="0pt">
+<column alignment="center" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="4.7in">
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+mem_desc
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+A handle for the memory descriptor that describes the memory into which
+ the requested data will be received.
+ The memory descriptor can have an event queue associated with it to record
+ events, such as when the message receive has started (
+\family typewriter
+PTL_EVENT_REPLY
+\family default
+_
+\family typewriter
+START
+\family default
+).
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+target
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+A process id for the target process.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+portal
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+The index in the remote Portal table.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+cookie
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+The index into the access control table of the target process.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+match_bits
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+The match bits to use for message selection at the target process.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+offset
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+The offset into the target memory descriptor (only used when the target
+ memory descriptor has the
+\family typewriter
+PTL_MD_MANAGE_REMOTE
+\family default
+ option set).
+\end_inset
+</cell>
+</row>
+</lyxtabular>
+
+\end_inset
+
+
+\layout Section
+
+Summary
+\layout Standard
+
+
+\begin_inset LatexCommand \label{sec:summary}
+
+\end_inset
+
+ We conclude this section by summarizing the names introduced by the Portals
+ 3.2 API.
+ We start by summarizing the names of the types introduced by the API.
+ This is followed by a summary of the functions introduced by the API.
+ Which is followed by a summary of the function return codes.
+ Finally, we conclude with a summary of the other constant values introduced
+ by the API.
+\layout Standard
+
+Table\SpecialChar ~
+
+\begin_inset LatexCommand \ref{tab:types}
+
+\end_inset
+
+ presents a summary of the types defined by the Portals API.
+ The first column in this table gives the type name, the second column gives
+ a brief description of the type, the third column identifies the section
+ where the type is defined, and the fourth column lists the functions that
+ have arguments of this type.
+\layout Standard
+
+
+\begin_inset Float table
+placement htbp
+wide false
+collapsed false
+
+\layout Caption
+
+Types Defined by the Portals 3.2 API
+\begin_inset LatexCommand \label{tab:types}
+
+\end_inset
+
+
+\layout Standard
+
+
+\begin_inset ERT
+status Collapsed
+
+\layout Standard
+
+\backslash
+medskip
+\end_inset
+
+
+\layout Standard
+\noindent
+
+\size small
+
+\begin_inset Tabular
+<lyxtabular version="3" rows="25" columns="4">
+<features firstHeadEmpty="true">
+<column alignment="left" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="2in">
+<column alignment="left" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="2.2in">
+<row bottomline="true">
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+ Name
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+ Meaning
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+ Sect
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+ Functions
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_ac_index_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+indexes for an access control table
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:index-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+PtlACEntry, PtlPut, PtlGet
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_ack_req_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+acknowledgement request types
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:put}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+PtlPut
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_event_kind_t
+\family default
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+kinds of events
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:ek-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+PtlGet
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_event_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+information about events
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:event-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+PtlEQGet
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+plt_seq_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+event sequence number
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:event-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+PtlEQGet, PtlEQWait
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_handle_any_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+handles for any object
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:handle-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+PtlNIHandle
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_handle_eq_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+handles for event queues
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:handle-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+PtlEQAlloc, PtlEQFree, PtlEQGet, PtlEQWait, PtlMDUpdate
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_handle_md_t
+\family default
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+handles for memory descriptors
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:handle-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+PtlMDAlloc, PtlMDUnlink, PtlMDUpdate, PtlMEAttach, PtlMEAttachAny, PtlMEInsert,
+ PtlPut, PtlGet
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_handle_me_t
+\family default
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+handles for match entries
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:handle-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+PtlMEAttach, PtlMEAttachAny, PtlMEInsert, PtlMEUnlink
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_handle_ni_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+handles for network interfaces
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:handle-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+PtlNIInit, PtlNIFini, PtlNIStatus, PtlNIDist, PtlEQAlloc, PtlACEntry, PtlPut,
+ PtlGet
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_nid_t
+\family default
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+node identifiers
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:id-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+ PtlGetId,PtlACEntry
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_pid_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+process identifier
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:id-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+PtlGetId, PtlACEntry
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_uid_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+user indentifier
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:id-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+PtlGetUid, PtlACEntry
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_ins_pos_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+insertion position (before or after)
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:meattach}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+PtlMEAttach, PtlMEAttachAny, PtlMEInsert
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_interface_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+identifiers for network interfaces
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:ni-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+PtlNIInit
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_match_bits_t
+\family default
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+match (and ignore) bits
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:mb-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+PtlMEAttach, PtlMEAttachAny, PtlMEInsert, PtlPut, PtlGet
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_md_t
+\family default
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+memory descriptors
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:md-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+PtlMDAttach, PtlMDUpdate
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_ni_fail_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+network interface-specific failures
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:eq}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+PtlEQGet, PtlEQWait
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_process_id_t
+\family default
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+process identifiers
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:pid-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+PtlGetId, PtlNIDist, PtlMEAttach, PtlMEAttachAny, PtlACEntry, PtlPut, PtlGet
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_pt_index_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+indexes for Portal tables
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:index-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+PtlMEAttach, PtlMEAttachAny, PtlACEntry
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_size_t
+\family default
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+sizes
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:size-t}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+PtlEQAlloc, PtlPut, PtlGet
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_sr_index_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+indexes for status registers
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:stat-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+PtlNIStatus
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_sr_value_t
+\family default
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+values in status registers
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:stat-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+PtlNIStatus
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_unlink_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+unlink options
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:meattach}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+PtlMEAttach, PtlMEAttachAny, PtlMEInsert, PtlMDAttach
+\end_inset
+</cell>
+</row>
+</lyxtabular>
+
+\end_inset
+
+
+\end_inset
+
+
+\layout Standard
+
+Table\SpecialChar ~
+
+\begin_inset LatexCommand \ref{tab:func}
+
+\end_inset
+
+ presents a summary of the functions defined by the Portals API.
+ The first column in this table gives the name for the function, the second
+ column gives a brief description of the operation implemented by the function,
+ and the third column identifies the section where the function is defined.
+\layout Standard
+
+
+\begin_inset Float table
+placement htbp
+wide false
+collapsed false
+
+\layout Caption
+
+Functions Defined by the Portals 3.2 API
+\begin_inset LatexCommand \label{tab:func}
+
+\end_inset
+
+
+\layout Standard
+
+
+\begin_inset ERT
+status Collapsed
+
+\layout Standard
+
+\backslash
+medskip
+\end_inset
+
+
+\layout Standard
+\align center
+
+\size small
+
+\begin_inset Tabular
+<lyxtabular version="3" rows="24" columns="3">
+<features firstHeadEmpty="true">
+<column alignment="left" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="0pt">
+<row bottomline="true">
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+Name
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ Operation
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ Section
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+PtlACEntry
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ update an entry in an access control table
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:ac}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ PtlEQAlloc
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ create an event queue
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:eq}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ PtlEQGet
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ get the next event from an event queue
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:eq}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ PtlEQFree
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ release the resources for an event queue
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:eq}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ PtlEQWait
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ wait for a new event in an event queue
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:eq}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ PtlFini
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ shutdown the Portals API
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:init}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ PtlGet
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ perform a get operation
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:datamovement}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ PtlGetId
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ get the id for the current process
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:pid}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ PtlInit
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ initialize the Portals API
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:init}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ PtlMDAttach
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ create a memory descriptor and attach it to a match entry
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:md}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ PtlMDBind
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ create a free-floating memory descriptor
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:mdbind}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ PtlMDUnlink
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ remove a memory descriptor from a list and release its resources
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:md}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ PtlMDUpdate
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ update a memory descriptor
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:md}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ PtlMEAttach
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+create a match entry and attach it to a Portal table
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:me}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+PtlMEAttachAny
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+create a match entry and attach it to a free Portal table entry
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:attachany}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ PtlMEInsert
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ create a match entry and insert it in a list
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:me}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ PtlMEUnlink
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ remove a match entry from a list and release its resources
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:me}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ PtlNIDist
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ get the distance to another process
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:ni}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ PtlNIFini
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ shutdown a network interface
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:ni}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ PtlNIHandle
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ get the network interface handle for an object
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:ni}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ PtlNIInit
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ initialize a network interface
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:ni}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ PtlNIStatus
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ read a network interface status register
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:ni}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ PtlPut
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ perform a put operation
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:datamovement}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+</lyxtabular>
+
+\end_inset
+
+
+\end_inset
+
+
+\layout Standard
+
+Table\SpecialChar ~
+
+\begin_inset LatexCommand \ref{tab:retcodes}
+
+\end_inset
+
+ summarizes the return codes used by functions defined by the Portals API.
+ All of these constants are integer values.
+ The first column of this table gives the symbolic name for the constant,
+ the second column gives a brief description of the value, and the third
+ column identifies the functions that can return this value.
+\layout Standard
+
+
+\begin_inset Float table
+placement htbp
+wide false
+collapsed false
+
+\layout Caption
+
+Function Return Codes for the Portals 3.2 API
+\begin_inset LatexCommand \label{tab:retcodes}
+
+\end_inset
+
+
+\layout Standard
+
+
+\begin_inset ERT
+status Collapsed
+
+\layout Standard
+
+\backslash
+medskip
+\end_inset
+
+
+\layout Standard
+\align center
+
+\size small
+
+\begin_inset Tabular
+<lyxtabular version="3" rows="27" columns="3">
+<features>
+<column alignment="left" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="2.6in">
+<row bottomline="true">
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+Name
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+Meaning
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+Functions
+\series default
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_AC_INV_INDEX
+\family default
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+invalid access control table index
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+ PtlACEntry
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_EQ_DROPPED
+\family default
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+at least one event has been dropped
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+ PtlEQGet, PtlWait
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_EQ_EMPTY
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+no events available in an event queue
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+ PtlEQGet
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_FAIL
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+error during initialization or cleanup
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+ PtlInit, PtlFini
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_ILL_MD
+\family default
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+illegal memory descriptor values
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+PtlMDAttach, PtlMDBind, PtlMDUpdate
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_INIT_DUP
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+duplicate initialization of an interface
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+PtlNIInit
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_INIT_INV
+\family default
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+initialization of an invalid interface
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+PtlNIInit
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_INUSE
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+the ME already has an MD
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+PtlMDAttach
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_INV_ASIZE
+\family default
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+invalid access control table size
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+PtlNIInit
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_INV_EQ
+\family default
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+invalid event queue handle
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+PtlMDUpdate, PtlEQFree, PtlEQGet
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_INV_HANDLE
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+invalid handle
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+PtlNIHandle
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_INV_MD
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+invalid memory descriptor handle
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+PtlMDUnlink, PtlMDUpdate
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_INV_ME
+\family default
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+invalid match entry handle
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+PtlMDAttach
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_INV_NI
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+invalid network interface handle
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+PtlNIDist, PtlNIFini, PtlMDBind, PtlEQAlloc
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_INV_PROC
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+invalid process identifier
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+PtlNIInit, PtlNIDist, PtlMEAttach, PtlMEInsert, PtlACEntry, PtlPut, PtlGet
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_INV_PTINDEX
+\family default
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+invalid Portal table index
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+ PtlMEAttach
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_INV_REG
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+invalid status register
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+ PtlNIStatus
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_INV_SR_INDX
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+invalid status register index
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+ PtlNIStatus
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_ML_TOOLONG
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+match list too long
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+ PtlMEAttach, PtlMEInsert
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_MD_INUSE
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+MD has pending operations
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+PtlMDUnlink
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_NOINIT
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+uninitialized API
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+
+\emph on
+all
+\emph default
+, except PtlInit
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_NOSPACE
+\family default
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+insufficient memory
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+PtlNIInit, PtlMDAttach, PtlMDBind, PtlEQAlloc, PtlMEAttach, PtlMEInsert
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_NOUPDATE
+\family default
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ no update was performed
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+ PtlMDUpdate
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_PT_FULL
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+Portal table is full
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+PtlMEAttachAny
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_OK
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ success
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+
+\emph on
+all
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_SEGV
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+addressing violation
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+PtlNIInit, PtlNIStatus, PtlNIDist, PtlNIHandle, PtlMDBind, PtlMDUpdate,
+ PtlEQAlloc, PtlEQGet, PtlEQWait
+\end_inset
+</cell>
+</row>
+</lyxtabular>
+
+\end_inset
+
+
+\end_inset
+
+
+\layout Standard
+
+Table\SpecialChar ~
+
+\begin_inset LatexCommand \ref{tab:oconsts}
+
+\end_inset
+
+ summarizes the remaining constant values introduced by the Portals API.
+ The first column in this table presents the symbolic name for the constant,
+ the second column gives a brief description of the value, the third column
+ identifies the type for the value, and the fourth column identifies the
+ sections in which the value is mentioned.
+\layout Standard
+
+
+\begin_inset Float table
+placement htbp
+wide false
+collapsed false
+
+\layout Caption
+
+Other Constants Defined by the Portals 3.2 API
+\begin_inset LatexCommand \label{tab:oconsts}
+
+\end_inset
+
+
+\layout Standard
+
+
+\begin_inset ERT
+status Collapsed
+
+\layout Standard
+
+\backslash
+medskip
+\end_inset
+
+
+\layout Standard
+\align center
+
+\size small
+
+\begin_inset Tabular
+<lyxtabular version="3" rows="36" columns="5">
+<features>
+<column alignment="left" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="0pt">
+<row bottomline="true">
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+Name
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+Meaning
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+Base type
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+Intr.
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+Ref.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_ACK_REQ
+\family default
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+request an acknowledgement
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_ack_req_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:put}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_EQ_NONE
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+a NULL event queue handle
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_handle_eq_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:handle-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:md}
+
+\end_inset
+
+,
+\begin_inset LatexCommand \ref{sec:mdupdate}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_EVENT_GET_START
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+get event start
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_event_kind_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:ek-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:get}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_EVENT_GET_END
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+get event end
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_event_kind_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:ek-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:get}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_EVENT_GET_FAIL
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+get event fail
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_event_kind_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:ek-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:get}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_EVENT_PUT_START
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+put event start
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_event_kind_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:ek-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:put}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_EVENT_PUT_END
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+put event end
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_event_kind_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:ek-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:put}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_EVENT_PUT_FAIL
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+put event fail
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_event_kind_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:ek-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:put}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_EVENT_REPLY_START
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+reply event start
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_event_kind_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:ek-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:get}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_EVENT_REPLY_END
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+reply event end
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_event_kind_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:ek-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:get}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_EVENT_REPLY_FAIL
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+reply event fail
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_event_kind_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:ek-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:get}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_EVENT_ACK_START
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+acknowledgement event start
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_event_kind_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:ek-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:put}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_EVENT_ACK_END
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+acknowledgement event end
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_event_kind_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:ek-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:put}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_EVENT_ACK_FAIL
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+acknowledgement event fail
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_event_kind_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:ek-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:put}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_EVENT_SEND_START
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+send event start
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_event_kind_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:ek-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:put}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_EVENT_SEND_END
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+send event end
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_event_kind_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:ek-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:put}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_EVENT_SEND_FAIL
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+send event fail
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_event_kind_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:ek-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:put}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_EVENT_UNLINK
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+unlink event
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_event_kind_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:ek-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:md-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_PID_ANY
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+wildcard for process id fields
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_pid_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:id-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:meattach}
+
+\end_inset
+
+,
+\begin_inset LatexCommand \ref{sec:acentry}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_NID_ANY
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+wildcard for node id fields
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_nid_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:id-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:meattach}
+
+\end_inset
+
+,
+\begin_inset LatexCommand \ref{sec:acentry}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_UID_ANY
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+wildcard for user id
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_uid_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:id-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:meattach}
+
+\end_inset
+
+,
+\begin_inset LatexCommand \ref{sec:acentry}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_IFACE_DEFAULT
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+default interface
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_interface_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:ni-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_INS_AFTER
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+insert after
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_ins_pos_t
+\family default
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:meinsert}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_INS_BEFORE
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+insert before
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_ins_pos_t
+\family default
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:meinsert}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_MD_ACK_DISABLE
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+a flag to disable acknowledgements
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+int
+\family default
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:md-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_MD_MANAGE_REMOTE
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+a flag to enable the use of remote offsets
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+int
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:md-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:put}
+
+\end_inset
+
+,
+\begin_inset LatexCommand \ref{sec:get}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_MD_OP_GET
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+a flag to enable get operations
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+int
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:md-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_MD_OP_PUT
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+a flag to enable put operations
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+int
+\family default
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:md-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_MD_THRESH_INF
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+infinite threshold for a memory descriptor
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+int
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:md-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_MD_TRUNCATE
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+a flag to enable truncation of a request
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+int
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:md-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_NOACK_REQ
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+request no acknowledgement
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_ack_req_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:put}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_PT_INDEX_ANY
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+wildcard for Portal indexes
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_pt_index_t
+\family default
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:acentry}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_RETAIN
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+disable unlinking
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_unlink_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:mdattach}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_SR_DROP_COUNT
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+index for the dropped count register
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_sr_index_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:stat-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:nistatus}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_UNLINK
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+enable unlinking
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_unlink_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:mdattach}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+</row>
+</lyxtabular>
+
+\end_inset
+
+
+\end_inset
+
+
+\layout Chapter
+
+The Semantics of Message Transmission
+\begin_inset LatexCommand \label{sec:semantics}
+
+\end_inset
+
+
+\layout Standard
+
+The portals API uses four types of messages: put requests, acknowledgements,
+ get requests, and replies.
+ In this section, we describe the information passed on the wire for each
+ type of message.
+ We also describe how this information is used to process incoming messages.
+\layout Section
+
+Sending Messages
+\layout Standard
+
+Table\SpecialChar ~
+
+\begin_inset LatexCommand \ref{tab:put-wire}
+
+\end_inset
+
+ summarizes the information that is transmitted for a put request.
+ The first column provides a descriptive name for the information, the second
+ column provides the type for this information, the third column identifies
+ the source of the information, and the fourth column provides additional
+ notes.
+ Most information that is transmitted is obtained directly from the
+\emph on
+PtlPut
+\emph default
+ operation.
+ Notice that the handle for the memory descriptor used in the
+\emph on
+PtlPut
+\emph default
+ operation is transmitted even though this value cannot be interpreted by
+ the target.
+ A value of anything other than
+\family typewriter
+PTL_MD_NONE
+\family default
+, is interpreted as a request for an acknowledgement.
+\layout Standard
+
+
+\begin_inset Float table
+placement htbp
+wide false
+collapsed false
+
+\layout Caption
+
+Information Passed in a Put Request
+\begin_inset LatexCommand \label{tab:put-wire}
+
+\end_inset
+
+
+\layout Standard
+
+
+\begin_inset ERT
+status Collapsed
+
+\layout Standard
+
+\backslash
+medskip
+\end_inset
+
+
+\layout Standard
+\align center
+
+\size small
+
+\begin_inset Tabular
+<lyxtabular version="3" rows="12" columns="4">
+<features firstHeadEmpty="true">
+<column alignment="left" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="0pt">
+<row bottomline="true">
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+Information
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+Type
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+\emph on
+PtlPut
+\emph default
+ arg
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+Notes
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+operation
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+int
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+indicates a put request
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+initiator
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_process_id_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+local information
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+user
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_uid_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+local information
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+target
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_process_id_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+target
+\family default
+
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+portal index
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_pt_index_t
+\family default
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+portal
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+cookie
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_ac_index_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+cookie
+\family default
+
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+match bits
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_match_bits_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+match_bits
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+offset
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_size_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+offset
+\family default
+
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+memory desc
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_handle_md_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+mem_desc
+\family default
+
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+no ack if
+\family typewriter
+PTL_MD_NONE
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+length
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_size_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+mem_desc
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+length
+\family default
+ member
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+data
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family roman
+\emph on
+bytes
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+mem_desc
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+start
+\family default
+ and
+\family typewriter
+length
+\family default
+ members
+\end_inset
+</cell>
+</row>
+</lyxtabular>
+
+\end_inset
+
+
+\end_inset
+
+
+\layout Standard
+
+Table\SpecialChar ~
+
+\begin_inset LatexCommand \ref{tab:ack-wire}
+
+\end_inset
+
+ summarizes the information transmitted in an acknowledgement.
+ Most of the information is simply echoed from the put request.
+ Notice that the initiator and target are obtained directly from the put
+ request, but are swapped in generating the acknowledgement.
+ The only new piece of information in the acknowledgement is the manipulated
+ length which is determined as the put request is satisfied.
+\layout Standard
+
+
+\begin_inset Float table
+placement htbp
+wide false
+collapsed false
+
+\layout Caption
+
+Information Passed in an Acknowledgement
+\begin_inset LatexCommand \label{tab:ack-wire}
+
+\end_inset
+
+
+\layout Standard
+
+
+\begin_inset ERT
+status Collapsed
+
+\layout Standard
+
+\backslash
+medskip
+\end_inset
+
+
+\layout Standard
+\align center
+
+\size small
+
+\begin_inset Tabular
+<lyxtabular version="3" rows="10" columns="4">
+<features firstHeadEmpty="true">
+<column alignment="left" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="0pt">
+<row bottomline="true">
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+Information
+\series default
+
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+Type
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+Put Information
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+Notes
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+operation
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+int
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ indicates an acknowledgement
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ initiator
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_process_id_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ target
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ target
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_process_id_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ initiator
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ portal index
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_pt_index_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ portal index
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ echo
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ match bits
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_match_bits_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ match bits
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ echo
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ offset
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_size_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ offset
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ echo
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ memory desc
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ ptl_handle_md_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ memory desc
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ echo
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ requested length
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ ptl_size_t
+\family default
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ length
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ echo
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ manipulated length
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ ptl_size_t
+\family default
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ obtained from the operation
+\end_inset
+</cell>
+</row>
+</lyxtabular>
+
+\end_inset
+
+
+\end_inset
+
+
+\layout Standard
+
+Table\SpecialChar ~
+
+\begin_inset LatexCommand \ref{tab:get-wire}
+
+\end_inset
+
+ summarizes the information that is transmitted for a get request.
+ Like the information transmitted in a put request, most of the information
+ transmitted in a get request is obtained directly from the
+\emph on
+PtlGet
+\emph default
+ operation.
+ Unlike put requests, get requests do not include the event queue handle.
+ In this case, the reply is generated whenever the operation succeeds and
+ the memory descriptor must not be unlinked until the reply is received.
+ As such, there is no advantage to explicitly sending the event queue handle.
+\layout Standard
+
+
+\begin_inset Float table
+placement htbp
+wide false
+collapsed false
+
+\layout Caption
+
+Information Passed in a Get Request
+\begin_inset LatexCommand \label{tab:get-wire}
+
+\end_inset
+
+
+\layout Standard
+
+
+\begin_inset ERT
+status Collapsed
+
+\layout Standard
+
+\backslash
+medskip
+\end_inset
+
+
+\layout Standard
+\align center
+
+\size small
+
+\begin_inset Tabular
+<lyxtabular version="3" rows="11" columns="4">
+<features firstHeadEmpty="true">
+<column alignment="left" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="0pt">
+<row bottomline="true">
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+Information
+\series default
+
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+Type
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+\emph on
+PtlGet
+\emph default
+ argument
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+Notes
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+operation
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+int
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+indicates a get operation
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+initiator
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_process_id_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+local information
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+user
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_uid_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+local information
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+target
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_process_id_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+target
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+portal index
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_pt_index_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+portal
+\family default
+
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+cookie
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_ac_index_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+cookie
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+match bits
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_match_bits_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+match_bits
+\family default
+
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+offset
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_size_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+offset
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+memory desc
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_handle_md_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+mem_desc
+\family default
+
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+length
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_size_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+mem_desc
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+length
+\family default
+ member
+\end_inset
+</cell>
+</row>
+</lyxtabular>
+
+\end_inset
+
+
+\end_inset
+
+
+\layout Standard
+
+Table\SpecialChar ~
+
+\begin_inset LatexCommand \ref{tab:reply-wire}
+
+\end_inset
+
+ summarizes the information transmitted in a reply.
+ Like an acknowledgement, most of the information is simply echoed from
+ the get request.
+ The initiator and target are obtained directly from the get request, but
+ are swapped in generating the acknowledgement.
+ The only new information in the acknowledgement are the manipulated length
+ and the data, which are determined as the get request is satisfied.
+\layout Standard
+
+
+\begin_inset Float table
+placement htbp
+wide false
+collapsed false
+
+\layout Caption
+
+Information Passed in a Reply
+\begin_inset LatexCommand \label{tab:reply-wire}
+
+\end_inset
+
+
+\layout Standard
+
+
+\begin_inset ERT
+status Collapsed
+
+\layout Standard
+
+\backslash
+medskip
+\end_inset
+
+
+\layout Standard
+\align center
+
+\size small
+
+\begin_inset Tabular
+<lyxtabular version="3" rows="11" columns="4">
+<features firstHeadEmpty="true">
+<column alignment="left" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="0pt">
+<row bottomline="true">
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+Information
+\series default
+
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+Type
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+Put Information
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+Notes
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+operation
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+int
+\family default
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+indicates an acknowledgement
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+initiator
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_process_id_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+target
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+target
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_process_id_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+initiator
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+portal index
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_pt_index_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+portal index
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+echo
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+match bits
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_match_bits_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+match bits
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+echo
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+offset
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_size_t
+\family default
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+offset
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+echo
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+memory desc
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_handle_md_t
+\family default
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+memory desc
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+echo
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+requested length
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_size_t
+\family default
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+length
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+echo
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+manipulated length
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_size_t
+\family default
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+obtained from the operation
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+data
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\emph on
+bytes
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+obtained from the operation
+\end_inset
+</cell>
+</row>
+</lyxtabular>
+
+\end_inset
+
+
+\end_inset
+
+
+\layout Section
+
+Receiving Messages
+\begin_inset LatexCommand \label{sec:receiving}
+
+\end_inset
+
+
+\layout Standard
+
+When an incoming message arrives on a network interface, the communication
+ system first checks that the target process identified in the request is
+ a valid process that has initialized the network interface (i.e., that the
+ target process has a valid Portal table).
+ If this test fails, the communication system discards the message and increment
+s the dropped message count for the interface.
+ The remainder of the processing depends on the type of the incoming message.
+ Put and get messages are subject to access control checks and translation
+ (searching a match list), while acknowledgement and reply messages bypass
+ the access control checks and the translation step.
+\layout Standard
+
+Acknowledgement messages include a handle for the memory descriptor used
+ in the original
+\emph on
+PtlPut
+\emph default
+ operation.
+ This memory descriptor will identify the event queue where the event should
+ be recorded.
+ Upon receipt of an acknowledgement, the runtime system only needs to confirm
+ that the memory descriptor and event queue still exist and that there is
+ space for another event.
+ Should the any of these conditions fail, the message is simply discarded
+ and the dropped message count for the interface is incremented.
+ Otherwise, the system builds an acknowledgement event from the information
+ in the acknowledgement message and adds it to the event queue.
+\layout Standard
+
+Reception of reply messages is also relatively straightforward.
+ Each reply message includes a handle for a memory descriptor.
+ If this descriptor exists, it is used to receive the message.
+ A reply message will be dropped if the memory descriptor identified in
+ the request doesn't exist.
+ In either of this case, the dropped message count for the interface is
+ incremented.
+ These are the only reasons for dropping reply messages.
+ Every memory descriptor accepts and truncates incoming reply messages,
+ eliminating the other potential reasons for rejecting a reply message.
+\layout Standard
+
+The critical step in processing an incoming put or get request involves
+ mapping the request to a memory descriptor.
+ This step starts by using the Portal index in the incoming request to identify
+ a list of match entries.
+ This list of match entries is searched in order until a match entry is
+ found whose match criteria matches the match bits in the incoming request
+ and whose memory descriptor accepts the request.
+\layout Standard
+
+Because acknowledge and reply messages are generated in response to requests
+ made by the process receiving these messages, the checks performed by the
+ runtime system for acknowledgements and replies are minimal.
+ In contrast, put and get messages are generated by remote processes and
+ the checks performed for these messages are more extensive.
+ Incoming put or get messages may be rejected because:
+\layout Itemize
+
+the Portal index supplied in the request is not valid;
+\layout Itemize
+
+the cookie supplied in the request is not a valid access control entry;
+
+\layout Itemize
+
+the access control entry identified by the cookie does not match the identifier
+ of the requesting process;
+\layout Itemize
+
+the access control entry identified by the access control entry does not
+ match the Portal index supplied in the request; or
+\layout Itemize
+
+the match bits supplied in the request do not match any of the match entries
+ with a memory descriptor that accepts the request.
+
+\layout Standard
+
+In all cases, if the message is rejected, the incoming message is discarded
+ and the dropped message count for the interface is incremented.
+\layout Standard
+
+A memory descriptor may reject an incoming request for any of the following
+ reasons:
+\layout Itemize
+
+the
+\family typewriter
+PTL_MD_PUT
+\family default
+ or
+\family typewriter
+PTL_MD_GET
+\family default
+ option has not been enabled and the operation is put or get, respectively;
+
+\layout Itemize
+
+the length specified in the request is too long for the memory descriptor
+ and the
+\family typewriter
+PTL_MD_TRUNCATE
+\family default
+ option has not been enabled.
+\layout Chapter
+
+Examples
+\begin_inset LatexCommand \label{sec:examples}
+
+\end_inset
+
+
+\layout Comment
+
+The examples presented in this chapter have not been updated to reflect
+ the current API.
+\layout Standard
+
+In this section we present several example to illustrate expected usage
+ patterns for the Portals 3.2 API.
+ The first example describes how to implement parallel servers using the
+ features of the Portals 3.2 API.
+ This example covers the access control list and the use of remote managed
+ offsets.
+ The second example presents an approach to dealing with dropped requests.
+ This example covers aspects of match lists and memory descriptors.
+ The final example covers message reception in MPI.
+ This example illustrates more sophisticated uses of matching and a procedure
+ to update a memory descriptor.
+\layout Section
+
+Parallel File Servers
+\begin_inset LatexCommand \label{sec:expfs}
+
+\end_inset
+
+
+\layout Standard
+
+Figure\SpecialChar ~
+
+\begin_inset LatexCommand \ref{fig:file}
+
+\end_inset
+
+ illustrates the logical structure of a parallel file server.
+ In this case, the parallel server consists of four servers that stripe
+ application data across four disks.
+ We would like to present applications with the illusion that the file server
+ is a single entity.
+ We will assume that all of the processes that constitute the parallel server
+ have the same user id.
+\layout Standard
+
+
+\begin_inset Float figure
+placement htbp
+wide false
+collapsed false
+
+\layout Standard
+\align center
+
+\begin_inset Graphics FormatVersion 1
+ filename file.eps
+ display color
+ size_type 0
+ rotateOrigin center
+ lyxsize_type 1
+ lyxwidth 196pt
+ lyxheight 147pt
+\end_inset
+
+
+\layout Caption
+
+Parallel File Server
+\begin_inset LatexCommand \label{fig:file}
+
+\end_inset
+
+
+\end_inset
+
+
+\layout Standard
+
+When an application establishes a connection to the parallel file server,
+ it will allocate a Portal and access control list entry for communicating
+ with the server.
+ The access control list entry will include the Portal and match any process
+ in the parallel file server's, so all of the file server processes will
+ have access to the portal.
+ The Portal information and access control entry will be sent to the file
+ server at this time.
+ If the application and server need to have multiple, concurrent I/O operations,
+ they can use additional portals or match entries to keep the operations
+ from interfering with one another.
+\layout Standard
+
+When an application initiates an I/O operation, it first builds a memory
+ descriptor that describes the memory region involved in the operation.
+ This memory descriptor will enable the appropriate operation (put for read
+ operations and get for write operations) and enable the use of remote offsets
+ (this lets the servers decide where their data should be placed in the
+ memory region).
+ After creating the memory descriptor and linking it into the appropriate
+ Portal entry, the application sends a read or write request (using
+\emph on
+PtlPut
+\emph default
+) to one of the file server processes.
+ The file server processes can then use put or get operations with the appropria
+te offsets to fill or retrieve the contents of the application's buffer.
+ To know when the operation has completed, the application can add an event
+ queue to the memory descriptor and add up the lengths of the remote operations
+ until the sum is the size of the requested I/O operation.
+\layout Section
+
+Dealing with Dropped Requests
+\begin_inset LatexCommand \label{sec:exdrop}
+
+\end_inset
+
+
+\layout Standard
+
+If a process does not anticipate unexpected requests, they will be discarded.
+ Applications using the Portals API can query the dropped count for the
+ interface to determine the number of requests that have been dropped (see
+ Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:nistatus}
+
+\end_inset
+
+).
+ While this approach minimizes resource consumption, it does not provide
+ information that might be critical in debugging the implementation of a
+ higher level protocol.
+\layout Standard
+
+To keep track of more information about dropped requests, we use a memory
+ descriptor that truncates each incoming request to zero bytes and logs
+ the
+\begin_inset Quotes eld
+\end_inset
+
+dropped
+\begin_inset Quotes erd
+\end_inset
+
+ operations in an event queue.
+ Note that the operations are not dropped in the Portals sense, because
+ the operation succeeds.
+\layout Standard
+
+The following code fragment illustrates an implementation of this approach.
+ In this case, we assume that a thread is launched to execute the function
+
+\family typewriter
+watch_drop
+\family default
+.
+ This code starts by building an event queue to log truncated operations
+ and a memory descriptor to truncate the incoming requests.
+ This example only captures
+\begin_inset Quotes eld
+\end_inset
+
+dropped
+\begin_inset Quotes erd
+\end_inset
+
+ requests for a single portal.
+ In a more realistic situation, the memory descriptor would be appended
+ to the match list for every portal.
+ We also assume that the thread is capable of keeping up with the
+\begin_inset Quotes eld
+\end_inset
+
+dropped
+\begin_inset Quotes erd
+\end_inset
+
+ requests.
+ If this is not the case, we could use a finite threshold on the memory
+ descriptor to capture the first few dropped requests.
+\layout LyX-Code
+
+
+\size small
+#include <stdio.h>
+\newline
+#include <stdlib.h>
+\newline
+#include <portals.h>
+\newline
+
+\newline
+#define DROP_SIZE 32 /* number of dropped requests to track */
+\newline
+
+\newline
+int watch_drop( ptl_handle_ni_t ni, ptl_pt_index_t index ) {
+\newline
+ ptl_handle_eq_t drop_events;
+\newline
+ ptl_event_t event;
+\newline
+ ptl_handle_md_t drop_em;
+\newline
+ ptl_md_t drop_desc;
+\newline
+ ptl_process_id_t any_proc;
+\newline
+ ptl_handle_me_t match_any;
+\newline
+
+\newline
+ /* create the event queue */
+\newline
+ if( PtlEQAlloc(ni, DROP_SIZE, &drop_events) != PTL_OK ) {
+\newline
+ fprintf( stderr, "Couldn't create the event queue
+\backslash
+n" );
+\newline
+ exit( 1 );
+\newline
+ }
+\newline
+
+\newline
+ /* build a match entry */
+\newline
+ any_proc.nid = PTL_ID_ANY;
+\newline
+ any_proc.pid = PTL_ID_ANY;
+\newline
+ PtlMEAttach( index, any_proc, 0, ~(ptl_match_bits_t)0, PTL_RETAIN,
+\newline
+ &match_any );
+\newline
+
+\newline
+ /* create the memory descriptor */
+\newline
+ drop_desc.start = NULL;
+\newline
+ drop_desc.length = 0;
+\newline
+ drop_desc.threshold = PTL_MD_THRESH_INF;
+\newline
+ drop_desc.options = PTL_MD_OP_PUT | PTL_MD_OP_GET | PTL_MD_TRUNCATE;
+\newline
+ drop_desc.user_ptr = NULL;
+\newline
+ drop_desc.eventq = drop_events;
+\newline
+ if( PtlMDAttach(match_any, drop_desc, &drop_em) != PTL_OK ) {
+\newline
+ fprintf( stderr, "Couldn't create the memory descriptor
+\backslash
+n" );
+\newline
+ exit( 1 );
+\newline
+ }
+\newline
+
+\newline
+ /* watch for "dropped" requests */
+\newline
+ while( 1 ) {
+\newline
+ if( PtlEQWait( drop_events, &event ) != PTL_OK ) break;
+\newline
+ fprintf( stderr, "Dropped request from gid = event.initiator.gid,
+ event.initiator.rid );
+\newline
+ }
+\newline
+}
+\layout Section
+
+Message Transmission in MPI
+\begin_inset LatexCommand \label{sec:exmpi}
+
+\end_inset
+
+
+\layout Standard
+
+We conclude this section with a fairly extensive example that describes
+ an approach to implementing message transmission for MPI.
+ Like many MPI implementations, we distinguish two message transmission
+ protocols: a short message protocol and a long message protocol.
+ We use the constant
+\family typewriter
+MPI_LONG_LENGTH
+\family default
+ to determine the size of a long message.
+\layout Standard
+
+For small messages, the sender simply sends the message and presumes that
+ the message will be received (i.e., the receiver has allocated a memory region
+ to receive the message body).
+ For large messages, the sender also sends the message, but does not presume
+ that the message body will be saved.
+ Instead, the sender builds a memory descriptor for the message and enables
+ get operations on this descriptor.
+ If the target does not save the body of the message, it will record an
+ event for the put operation.
+ When the process later issues a matching MPI receive, it will perform a
+ get operation to retrieve the body of the message.
+\layout Standard
+
+To facilitate receive side matching based on the protocol, we use the most
+ significant bit in the match bits to indicate the protocol: 1 for long
+ messages and 0 for short messages.
+\layout Standard
+
+The following code presents a function that implements the send side of
+ the protocol.
+ The global variable
+\family typewriter
+EndGet
+\family default
+ is the last match entry attached to the Portal index used for posting long
+ messages.
+ This entry does not match any incoming requests (i.e., the memory descriptor
+ rejects all get operations) and is built during initialization of the MPI
+ library.
+ The other global variable,
+\family typewriter
+MPI_NI
+\family default
+, is a handle for the network interface used by the MPI implementation.
+\layout LyX-Code
+
+
+\size small
+extern ptl_handle_me_t EndGet;
+\newline
+extern ptl_handle_ni_t MPI_NI;
+\newline
+
+\newline
+void MPIsend( void *buf, ptl_size_t len, void *data, ptl_handle_eq_t eventq,
+\newline
+ ptl_process_id target, ptl_match_bits_t match )
+\newline
+{
+\newline
+ ptl_handle_md_t send_handle;
+\newline
+ ptl_md_t mem_desc;
+\newline
+ ptl_ack_req_t want_ack;
+\newline
+
+\newline
+ mem_desc.start = buf;
+\newline
+ mem_desc.length = len;
+\newline
+ mem_desc.threshold = 1;
+\newline
+ mem_desc.options = PTL_MD_GET_OP;
+\newline
+ mem_desc.user_ptr = data;
+\newline
+ mem_desc.eventq = eventq;
+\newline
+
+\newline
+ if( len >= MPI_LONG_LENGTH ) {
+\newline
+ ptl_handle_me_t me_handle;
+\newline
+
+\newline
+ /* add a match entry to the end of the get list */
+\newline
+ PtlMEInsert( target, match, 0, PTL_UNLINK, PTL_INS_BEFORE, EndGet,
+ &me_handle );
+\newline
+ PtlMDAttach( me_handle, mem_desc, PTL_UNLINK, NULL );
+\newline
+
+\newline
+ /* we want an ack for long messages */
+\newline
+ want_ack = PTL_ACK_REQ;
+\newline
+
+\newline
+ /* set the protocol bit to indicate that this is a long message
+ */
+\newline
+ match |= 1<<63;
+\newline
+ } else {
+\newline
+ /* we don't want an ack for short messages */
+\newline
+ want_ack = PTL_ACK_REQ;
+\newline
+
+\newline
+ /* set the protocol bit to indicate that this is a short message
+ */
+\newline
+ match &= ~(1<<63);
+\newline
+ }
+\newline
+
+\newline
+ /* create a memory descriptor and send it */
+\newline
+ PtlMDBind( MPI_NI, mem_desc, &send_handle );
+\newline
+ PtlPut( send_handle, want_ack, target, MPI_SEND_PINDEX, MPI_AINDEX, match,
+ 0 );
+\newline
+}
+\layout Standard
+
+The
+\emph on
+MPISend
+\emph default
+ function returns as soon as the message has been scheduled for transmission.
+ The event queue argument,
+\family typewriter
+eventq
+\family default
+, can be used to determine the disposition of the message.
+ Assuming that
+\family typewriter
+eventq
+\family default
+ is not
+\family typewriter
+PTL_EQ_NONE
+\family default
+, a
+\family typewriter
+PTL_EVENT_SENT
+\family default
+ event will be recorded for each message as the message is transmitted.
+ For small messages, this is the only event that will be recorded in
+\family typewriter
+eventq
+\family default
+.
+ In contrast, long messages include an explicit request for an acknowledgement.
+ If the
+\family typewriter
+target
+\family default
+ process has posted a matching receive, the acknowledgement will be sent
+ as the message is received.
+ If a matching receive has not been posted, the message will be discarded
+ and no acknowledgement will be sent.
+ When the
+\family typewriter
+target
+\family default
+ process later issues a matching receive, the receive will be translated
+ into a get operation and a
+\family typewriter
+PTL_EVENT_GET
+\family default
+ event will be recorded in
+\family typewriter
+eventq
+\family default
+.
+\layout Standard
+
+Figure\SpecialChar ~
+
+\begin_inset LatexCommand \ref{fig:mpi}
+
+\end_inset
+
+ illustrates the organization of the match list used for receiving MPI messages.
+ The initial entries (not shown in this figure) would be used to match the
+ MPI receives that have been preposted by the application.
+ The preposted receives are followed by a match entry,
+\emph on
+RcvMark
+\emph default
+, that marks the boundary between preposted receives and the memory descriptors
+ used for
+\begin_inset Quotes eld
+\end_inset
+
+unexpected
+\begin_inset Quotes erd
+\end_inset
+
+ messages.
+ The
+\emph on
+RcvMark
+\emph default
+ entry is followed by a small collection of match entries that match unexpected
+
+\begin_inset Quotes eld
+\end_inset
+
+short
+\begin_inset Quotes erd
+\end_inset
+
+ messages, i.e., messages that have a 0 in the most significant bit of their
+ match bits.
+ The memory descriptors associated with these match entries will append
+ the incoming message to the associated memory descriptor and record an
+ event in an event queue for unexpected messages.
+ The unexpected short message matching entries are followed by a match entry
+ that will match messages that were not matched by the preceding match entries,
+ i.e., the unexpected long messages.
+ The memory descriptor associated with this match entry truncates the message
+ body and records an event in the event queue for unexpected messages.
+ Note that of the memory descriptors used for unexpected messages share
+ a common event queue.
+ This makes it possible to process the unexpected messages in the order
+ in which they arrived, regardless of.
+\layout Standard
+
+
+\begin_inset Float figure
+placement htbp
+wide false
+collapsed false
+
+\layout Standard
+\align center
+
+\begin_inset Graphics FormatVersion 1
+ filename mpi.eps
+ display color
+ size_type 0
+ rotateOrigin center
+ lyxsize_type 1
+ lyxwidth 389pt
+ lyxheight 284pt
+\end_inset
+
+
+\layout Caption
+
+Message Reception in MPI
+\begin_inset LatexCommand \label{fig:mpi}
+
+\end_inset
+
+
+\end_inset
+
+
+\layout Standard
+
+When the local MPI process posts an MPI receive, we must first search the
+ events unexpected message queue to see if a matching message has already
+ arrived.
+ If no matching message is found, a match entry for the receive is inserted
+ before the
+\emph on
+RcvMark
+\emph default
+ entry--after the match entries for all of the previously posted receives
+ and before the match entries for the unexpected messages.
+ This ensures that preposted receives are matched in the order that they
+ were posted (a requirement of MPI).
+
+\layout Standard
+
+While this strategy respects the temporal semantics of MPI, it introduces
+ a race condition: a matching message might arrive after the events in the
+ unexpected message queue have been searched, but before the match entry
+ for the receive has been inserted in the match list.
+
+\layout Standard
+
+To avoid this race condition we start by setting the
+\family typewriter
+threshold
+\family default
+ of the memory descriptor to 0, making the descriptor inactive.
+ We then insert the match entry into the match list and proceed to search
+ the events in the unexpected message queue.
+ A matching message that arrives as we are searching the unexpected message
+ queue will not be accepted by the memory descriptor and, if not matched
+ by an earlier match list element, will add an event to the unexpected message
+ queue.
+ After searching the events in the unexpected message queue, we update the
+ memory descriptor, setting the threshold to 1 to activate the memory descriptor.
+ This update is predicated by the condition that the unexpected message
+ queue is empty.
+ We repeat the process of searching the unexpected message queue until the
+ update succeeds.
+\layout Standard
+
+The following code fragment illustrates this approach.
+ Because events must be removed from the unexpected message queue to be
+ examined, this code fragment assumes the existence of a user managed event
+ list,
+\family typewriter
+Rcvd
+\family default
+, for the events that have already been removed from the unexpected message
+ queue.
+ In an effort to keep the example focused on the basic protocol, we have
+ omitted the code that would be needed to manage the memory descriptors
+ used for unexpected short messages.
+ In particular, we simply leave messages in these descriptors until they
+ are received by the application.
+ In a robust implementation, we would introduce code to ensure that short
+ unexpected messages are removed from these memory descriptors so that they
+ can be re-used.
+\layout LyX-Code
+
+
+\size small
+extern ptl_handle_eq_t UnexpQueue;
+\newline
+extern ptl_handle_me_t RcvMark;
+\newline
+extern ptl_handle_me_t ShortMatch;
+\newline
+
+\newline
+typedef struct event_list_tag {
+\newline
+ ptl_event_t event;
+\newline
+ struct event_list_tag* next;
+\newline
+} event_list;
+\newline
+
+\newline
+extern event_list Rcvd;
+\newline
+
+\newline
+void AppendRcvd( ptl_event_t event )
+\newline
+{
+\newline
+ /* append an event onto the Rcvd list */
+\newline
+}
+\newline
+
+\newline
+int SearchRcvd( void *buf, ptl_size_t len, ptl_process_id_t sender, ptl_match_bi
+ts_t match,
+\newline
+ ptl_match_bits_t ignore, ptl_event_t *event )
+\newline
+{
+\newline
+ /* Search the Rcvd event queue, looking for a message that matches the
+ requested message.
+\newline
+ * If one is found, remove the event from the Rcvd list and return it.
+ */
+\newline
+}
+\newline
+
+\newline
+typedef enum { RECEIVED, POSTED } receive_state;
+\newline
+
+\newline
+receive_state CopyMsg( void *buf, ptl_size_t &length, ptl_event_t event,
+ ptl_md_t md_buf )
+\newline
+{
+\newline
+ ptl_md_t md_buf;
+\newline
+ ptl_handle_me_t me_handle;
+\newline
+
+\newline
+ if( event.rlength >= MPI_LONG_LENGTH ) {
+\newline
+ PtlMDBind( MPI_NI, md_buf, &md_handle );
+\newline
+ PtlGet( event.initiator, MPI_GET_PINDEX, 0, event.match_bits, MPI_AINDEX,
+ md_handle );
+\newline
+ return POSTED;
+\newline
+ } else {
+\newline
+ /* copy the message */
+\newline
+ if( event.mlength < *length ) *length = event.mlength;
+\newline
+ memcpy( buf, (char*)event.md_desc.start+event.offset, *length );
+\newline
+ return RECEIVED;
+\newline
+ }
+\newline
+}
+\newline
+
+\newline
+receive_state MPIreceive( void *buf, ptl_size_t &len, void *MPI_data, ptl_handle
+_eq_t eventq,
+\newline
+ ptl_process_id_t sender, ptl_match_bits_t match,
+ ptl_match_bits_t ignore )
+\newline
+{
+\newline
+ ptl_md_t md_buf;
+\newline
+ ptl_handle_md_t md_handle;
+\newline
+ ptl_handle_me_t me_handle;
+\newline
+ ptl_event_t event;
+\newline
+
+\newline
+ /* build a memory descriptor for the receive */
+\newline
+ md_buf.start = buf;
+\newline
+ md_buf.length = *len;
+\newline
+ md_buf.threshold = 0; /* temporarily disabled */
+\newline
+ md_buf.options = PTL_MD_PUT_OP;
+\newline
+ md_buf.user_ptr = MPI_data;
+\newline
+ md_buf.eventq = eventq;
+\newline
+
+\newline
+ /* see if we have already received the message */
+\newline
+ if( SearchRcvd(buf, len, sender, match, ignore, &event) )
+\newline
+ return CopyMsg( buf, len, event, md_buf );
+\newline
+
+\newline
+ /* create the match entry and attach the memory descriptor */
+\newline
+ PtlMEInsert(sender, match, ignore, PTL_UNLINK, PTL_INS_BEFORE, RcvMark,
+ &me_handle);
+\newline
+ PtlMDAttach( me_handle, md_buf, PTL_UNLINK, &md_handle );
+\newline
+
+\newline
+ md_buf.threshold = 1;
+\newline
+ do
+\newline
+ if( PtlEQGet( UnexpQueue, &event ) != PTL_EQ_EMPTY ) {
+\newline
+ if( MPIMatch(event, match, ignore, sender) ) {
+\newline
+ return CopyMsg( buf, len, (char*)event.md_desc.start+event.offset,
+ md_buf );
+\newline
+ } else {
+\newline
+ AppendRcvd( event );
+\newline
+ }
+\newline
+ }
+\newline
+ while( PtlMDUpdate(md_handle, NULL, &md_buf, unexp_queue) == PTL_NOUPDATE
+ );
+\newline
+ return POSTED;
+\newline
+}
+\layout Chapter*
+
+Acknowledgments
+\layout Standard
+
+Several people have contributed to the philosophy, design, and implementation
+ of the Portals message passing architecture as it has evolved.
+ We acknowledge the following people for their contributions: Al Audette,
+ Lee Ann Fisk, David Greenberg, Tramm Hudson, Gabi Istrail, Chu Jong, Mike
+ Levenhagen, Jim Otto, Mark Sears, Lance Shuler, Mack Stallcup, Jeff VanDyke,
+ Dave van Dresser, Lee Ward, and Stephen Wheat.
+
+\layout Standard
+
+
+\begin_inset LatexCommand \BibTeX[ieee]{portals3}
+
+\end_inset
+
+
+\the_end
--- /dev/null
+#FIG 3.2
+Landscape
+Center
+Inches
+Letter
+100.00
+Single
+-2
+1200 2
+6 1350 900 2175 1200
+4 0 0 100 0 0 10 0.0000 0 105 825 1350 1200 Transmission\001
+4 0 0 100 0 0 10 0.0000 0 105 285 1620 1050 Data\001
+-6
+2 1 0 1 0 7 100 0 -1 4.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 2700 1275 2700 1725
+2 1 0 1 0 7 100 0 -1 4.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 900 525 2700 1200
+2 2 0 1 0 7 100 0 -1 3.000 0 0 7 0 0 5
+ 0 300 1200 300 1200 2250 0 2250 0 300
+2 2 0 1 0 7 100 0 -1 3.000 0 0 7 0 0 5
+ 2400 300 3600 300 3600 2250 2400 2250 2400 300
+2 1 1 1 0 7 100 0 -1 4.000 0 0 7 1 0 2
+ 0 0 1.00 60.00 120.00
+ 2699 1788 899 1938
+4 0 0 100 0 0 10 0.0000 0 105 720 2775 1650 Translation\001
+4 1 0 100 0 0 10 0.0000 0 135 555 1800 2025 Optional\001
+4 1 0 100 0 0 10 0.0000 0 135 1170 1800 2175 Acknowledgement\001
+4 0 0 100 0 0 10 0.0000 0 105 405 2850 1500 Portal\001
+4 1 0 100 0 0 10 0.0000 0 135 405 3000 525 Target\001
+4 1 0 100 0 0 10 0.0000 0 105 540 600 525 Initiator\001
--- /dev/null
+# Copyright (C) 2001 Cluster File Systems, Inc.
+#
+# This code is issued under the GNU General Public License.
+# See the file COPYING in this distribution
+
+SUBDIRS = portals linux
+EXTRA_DIST = config.h.in
+include $(top_srcdir)/Rules
--- /dev/null
+/* ../include/config.h.in. Generated automatically from configure.in by autoheader. */
+
+/* Define if you have the readline library (-lreadline). */
+#undef HAVE_LIBREADLINE
+
+/* Name of package */
+#undef PACKAGE
+
+/* Version number of package */
+#undef VERSION
+
--- /dev/null
+# Copyright (C) 2001 Cluster File Systems, Inc.
+#
+# This code is issued under the GNU General Public License.
+# See the file COPYING in this distribution
+
+include $(top_srcdir)/Rules
+
+linuxincludedir = $(includedir)/linux
+
+linuxinclude_HEADERS=kp30.h portals_lib.h
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
+#ifndef _KP30_INCLUDED
+#define _KP30_INCLUDED
+
+
+#define PORTAL_DEBUG
+
+#ifndef offsetof
+# define offsetof(typ,memb) ((int)((char *)&(((typ *)0)->memb)))
+#endif
+
+#define LOWEST_BIT_SET(x) ((x) & ~((x) - 1))
+
+#ifndef CONFIG_SMP
+# define smp_processor_id() 0
+#endif
+
+/*
+ * Debugging
+ */
+extern unsigned int portal_subsystem_debug;
+extern unsigned int portal_stack;
+extern unsigned int portal_debug;
+extern unsigned int portal_printk;
+/* Debugging subsystems (8 bit ID)
+ *
+ * If you add debug subsystem #32, you need to send email to phil, because
+ * you're going to break kernel subsystem debug filtering. */
+#define S_UNDEFINED (0 << 24)
+#define S_MDC (1 << 24)
+#define S_MDS (2 << 24)
+#define S_OSC (3 << 24)
+#define S_OST (4 << 24)
+#define S_CLASS (5 << 24)
+#define S_OBDFS (6 << 24) /* obsolete */
+#define S_LLITE (7 << 24)
+#define S_RPC (8 << 24)
+#define S_EXT2OBD (9 << 24) /* obsolete */
+#define S_PORTALS (10 << 24)
+#define S_SOCKNAL (11 << 24)
+#define S_QSWNAL (12 << 24)
+#define S_PINGER (13 << 24)
+#define S_FILTER (14 << 24)
+#define S_TRACE (15 << 24) /* obsolete */
+#define S_ECHO (16 << 24)
+#define S_LDLM (17 << 24)
+#define S_LOV (18 << 24)
+#define S_GMNAL (19 << 24)
+#define S_PTLROUTER (20 << 24)
+#define S_COBD (21 << 24)
+#define S_PTLBD (22 << 24)
+#define S_LOG (23 << 24)
+
+/* If you change these values, please keep portals/linux/utils/debug.c
+ * up to date! */
+
+/* Debugging masks (24 bits, non-overlapping) */
+#define D_TRACE (1 << 0) /* ENTRY/EXIT markers */
+#define D_INODE (1 << 1)
+#define D_SUPER (1 << 2)
+#define D_EXT2 (1 << 3) /* anything from ext2_debug */
+#define D_MALLOC (1 << 4) /* print malloc, free information */
+#define D_CACHE (1 << 5) /* cache-related items */
+#define D_INFO (1 << 6) /* general information */
+#define D_IOCTL (1 << 7) /* ioctl related information */
+#define D_BLOCKS (1 << 8) /* ext2 block allocation */
+#define D_NET (1 << 9) /* network communications */
+#define D_WARNING (1 << 10)
+#define D_BUFFS (1 << 11)
+#define D_OTHER (1 << 12)
+#define D_DENTRY (1 << 13)
+#define D_PORTALS (1 << 14) /* ENTRY/EXIT markers */
+#define D_PAGE (1 << 15) /* bulk page handling */
+#define D_DLMTRACE (1 << 16)
+#define D_ERROR (1 << 17) /* CERROR(...) == CDEBUG (D_ERROR, ...) */
+#define D_EMERG (1 << 18) /* CEMERG(...) == CDEBUG (D_EMERG, ...) */
+#define D_HA (1 << 19) /* recovery and failover */
+#define D_RPCTRACE (1 << 20) /* for distributed debugging */
+#define D_VFSTRACE (1 << 21)
+
+#ifndef THREAD_SIZE
+#define THREAD_SIZE 8192
+#endif
+#ifdef __arch_ia64__
+#define CDEBUG_STACK(var) (&var & (THREAD_SIZE - 1))
+#else
+#define CDEBUG_STACK(var) (THREAD_SIZE - \
+ ((unsigned long)__builtin_frame_address(0)& \
+ (THREAD_SIZE - 1)))
+#endif
+
+#ifdef __KERNEL__
+#define CHECK_STACK(stack) \
+ do { \
+ if ((stack) > 3*THREAD_SIZE/4 && (stack) > portal_stack) \
+ portals_debug_msg(DEBUG_SUBSYSTEM, D_ERROR, \
+ __FILE__, __FUNCTION__, __LINE__, \
+ (stack), \
+ "maximum lustre stack %u\n", \
+ portal_stack = (stack)); \
+ } while (0)
+#else
+#define CHECK_STACK(stack) do{}while(0)
+#endif
+
+#define CDEBUG(mask, format, a...) \
+do { \
+ unsigned long stack = CDEBUG_STACK(stack); \
+ int match = 0; \
+ \
+ CHECK_STACK(stack); \
+ if (!(mask)) \
+ match = 1; \
+ else if ((mask) & (D_ERROR | D_EMERG)) \
+ match = 1; \
+ else if (portal_debug & (mask) && \
+ portal_subsystem_debug & (1 << (DEBUG_SUBSYSTEM >> 24))) \
+ match = 1; \
+ if (match) \
+ portals_debug_msg(DEBUG_SUBSYSTEM, mask, \
+ __FILE__, __FUNCTION__, __LINE__, \
+ stack, format , ## a); \
+} while (0)
+
+#define CWARN(format, a...) CDEBUG(D_WARNING, format, ## a)
+#define CERROR(format, a...) CDEBUG(D_ERROR, format, ## a)
+#define CEMERG(format, a...) CDEBUG(D_EMERG, format, ## a)
+
+#define GOTO(label, rc) \
+do { \
+ long GOTO__ret = (long)(rc); \
+ CDEBUG(D_TRACE,"Process leaving via %s (rc=%lu : %ld : %lx)\n", \
+ #label, (unsigned long)GOTO__ret, (signed long)GOTO__ret,\
+ (signed long)GOTO__ret); \
+ goto label; \
+} while (0)
+
+#define RETURN(rc) \
+do { \
+ typeof(rc) RETURN__ret = (rc); \
+ long tmp = (long)RETURN__ret; \
+ CDEBUG(D_TRACE, "Process leaving (rc=%lu : %ld : %lx)\n", \
+ (unsigned long)tmp, (signed long)tmp, \
+ (signed long)tmp); \
+ return RETURN__ret; \
+} while (0)
+
+#define ENTRY \
+do { \
+ CDEBUG(D_TRACE, "Process entered\n"); \
+} while (0)
+
+#define EXIT \
+do { \
+ CDEBUG(D_TRACE, "Process leaving\n"); \
+} while(0)
+
+
+#ifdef __KERNEL__
+# include <linux/vmalloc.h>
+# include <linux/time.h>
+# include <linux/slab.h>
+# include <linux/interrupt.h>
+# include <linux/highmem.h>
+# include <linux/module.h>
+# include <linux/version.h>
+# include <portals/lib-nal.h>
+# include <linux/smp_lock.h>
+# include <asm/atomic.h>
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+#define schedule_work schedule_task
+#define prepare_work(wq,cb,cbdata) \
+do { \
+ INIT_TQUEUE((wq), 0, 0); \
+ PREPARE_TQUEUE((wq), (cb), (cbdata)); \
+} while (0)
+
+#define ll_invalidate_inode_pages invalidate_inode_pages
+#define PageUptodate Page_Uptodate
+#define our_recalc_sigpending(current) recalc_sigpending(current)
+#define num_online_cpus() smp_num_cpus
+static inline void our_cond_resched(void)
+{
+ if (current->need_resched)
+ schedule ();
+}
+
+#else
+
+#define prepare_work(wq,cb,cbdata) \
+do { \
+ INIT_WORK((wq), (void *)(cb), (void *)(cbdata)); \
+} while (0)
+#define ll_invalidate_inode_pages(inode) invalidate_inode_pages((inode)->i_mapping)
+#define wait_on_page wait_on_page_locked
+#define our_recalc_sigpending(current) recalc_sigpending()
+#define strtok(a,b) strpbrk(a, b)
+static inline void our_cond_resched(void)
+{
+ cond_resched();
+}
+#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) */
+
+#ifdef PORTAL_DEBUG
+extern void kportal_assertion_failed(char *expr,char *file,char *func,int line);
+#define LASSERT(e) ((e) ? 0 : kportal_assertion_failed( #e , __FILE__, \
+ __FUNCTION__, __LINE__))
+#else
+#define LASSERT(e)
+#endif
+
+#ifdef __arch_um__
+#define LBUG() \
+do { \
+ CEMERG("LBUG - trying to dump log to /tmp/lustre-log\n"); \
+ portals_debug_dumplog(); \
+ portals_run_lbug_upcall(__FILE__, __FUNCTION__, __LINE__); \
+ panic("LBUG"); \
+} while (0)
+#else
+#define LBUG() \
+do { \
+ CEMERG("LBUG\n"); \
+ portals_debug_dumplog(); \
+ portals_run_lbug_upcall(__FILE__, __FUNCTION__, __LINE__); \
+ set_task_state(current, TASK_UNINTERRUPTIBLE); \
+ schedule(); \
+} while (0)
+#endif /* __arch_um__ */
+
+/*
+ * Memory
+ */
+#ifdef PORTAL_DEBUG
+extern atomic_t portal_kmemory;
+
+# define portal_kmem_inc(ptr, size) \
+do { \
+ atomic_add(size, &portal_kmemory); \
+} while (0)
+
+# define portal_kmem_dec(ptr, size) do { \
+ atomic_sub(size, &portal_kmemory); \
+} while (0)
+
+#else
+# define portal_kmem_inc(ptr, size) do {} while (0)
+# define portal_kmem_dec(ptr, size) do {} while (0)
+#endif /* PORTAL_DEBUG */
+
+#define PORTAL_VMALLOC_SIZE 16384
+
+#define PORTAL_ALLOC(ptr, size) \
+do { \
+ long s = size; \
+ LASSERT (!in_interrupt()); \
+ if (s > PORTAL_VMALLOC_SIZE) \
+ (ptr) = vmalloc(s); \
+ else \
+ (ptr) = kmalloc(s, GFP_KERNEL); \
+ if ((ptr) == NULL) \
+ CERROR("PORTALS: out of memory at %s:%d (tried to alloc" \
+ " '" #ptr "' = %ld)\n", __FILE__, __LINE__, s); \
+ else { \
+ portal_kmem_inc((ptr), s); \
+ memset((ptr), 0, s); \
+ } \
+ CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %ld at %p (tot %d).\n", \
+ s, (ptr), atomic_read (&portal_kmemory)); \
+} while (0)
+
+#define PORTAL_FREE(ptr, size) \
+do { \
+ long s = (size); \
+ if ((ptr) == NULL) { \
+ CERROR("PORTALS: free NULL '" #ptr "' (%ld bytes) at " \
+ "%s:%d\n", s, __FILE__, __LINE__); \
+ break; \
+ } \
+ if (s > PORTAL_VMALLOC_SIZE) \
+ vfree(ptr); \
+ else \
+ kfree(ptr); \
+ portal_kmem_dec((ptr), s); \
+ CDEBUG(D_MALLOC, "kfreed '" #ptr "': %ld at %p (tot %d).\n", \
+ s, (ptr), atomic_read (&portal_kmemory)); \
+} while (0)
+
+#define PORTAL_SLAB_ALLOC(ptr, slab, size) \
+do { \
+ long s = (size); \
+ LASSERT (!in_interrupt()); \
+ (ptr) = kmem_cache_alloc((slab), SLAB_KERNEL); \
+ if ((ptr) == NULL) { \
+ CERROR("PORTALS: out of memory at %s:%d (tried to alloc" \
+ " '" #ptr "' from slab '" #slab "')\n", __FILE__, \
+ __LINE__); \
+ } else { \
+ portal_kmem_inc((ptr), s); \
+ memset((ptr), 0, s); \
+ } \
+ CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %ld at %p (tot %d).\n", \
+ s, (ptr), atomic_read (&portal_kmemory)); \
+} while (0)
+
+#define PORTAL_SLAB_FREE(ptr, slab, size) \
+do { \
+ long s = (size); \
+ if ((ptr) == NULL) { \
+ CERROR("PORTALS: free NULL '" #ptr "' (%ld bytes) at " \
+ "%s:%d\n", s, __FILE__, __LINE__); \
+ break; \
+ } \
+ memset((ptr), 0x5a, s); \
+ kmem_cache_free((slab), ptr); \
+ portal_kmem_dec((ptr), s); \
+ CDEBUG(D_MALLOC, "kfreed '" #ptr "': %ld at %p (tot %d).\n", \
+ s, (ptr), atomic_read (&portal_kmemory)); \
+} while (0)
+
+/* ------------------------------------------------------------------- */
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+
+#define PORTAL_SYMBOL_REGISTER(x) inter_module_register(#x, THIS_MODULE, &x)
+#define PORTAL_SYMBOL_UNREGISTER(x) inter_module_unregister(#x)
+
+#define PORTAL_SYMBOL_GET(x) ((typeof(&x))inter_module_get(#x))
+#define PORTAL_SYMBOL_PUT(x) inter_module_put(#x)
+
+#define PORTAL_MODULE_USE MOD_INC_USE_COUNT
+#define PORTAL_MODULE_UNUSE MOD_DEC_USE_COUNT
+#else
+
+#define PORTAL_SYMBOL_REGISTER(x)
+#define PORTAL_SYMBOL_UNREGISTER(x)
+
+#define PORTAL_SYMBOL_GET(x) symbol_get(x)
+#define PORTAL_SYMBOL_PUT(x) symbol_put(x)
+
+#define PORTAL_MODULE_USE try_module_get(THIS_MODULE)
+#define PORTAL_MODULE_UNUSE module_put(THIS_MODULE)
+
+#endif
+
+/******************************************************************************/
+/* Kernel Portals Router interface */
+
+typedef void (*kpr_fwd_callback_t)(void *arg, int error); // completion callback
+
+/* space for routing targets to stash "stuff" in a forwarded packet */
+typedef union {
+ long long _alignment;
+ void *_space[16]; /* scale with CPU arch */
+} kprfd_scratch_t;
+
+/* Kernel Portals Routing Forwarded message Descriptor */
+typedef struct {
+ struct list_head kprfd_list; /* stash in queues (routing target can use) */
+ ptl_nid_t kprfd_target_nid; /* final destination NID */
+ ptl_nid_t kprfd_gateway_nid; /* gateway NID */
+ int kprfd_nob; /* # message bytes (including header) */
+ int kprfd_niov; /* # message frags (including header) */
+ struct iovec *kprfd_iov; /* message fragments */
+ void *kprfd_router_arg; // originating NAL's router arg
+ kpr_fwd_callback_t kprfd_callback; /* completion callback */
+ void *kprfd_callback_arg; /* completion callback arg */
+ kprfd_scratch_t kprfd_scratch; // scratchpad for routing targets
+} kpr_fwd_desc_t;
+
+typedef void (*kpr_fwd_t)(void *arg, kpr_fwd_desc_t *fwd);
+
+/* NAL's routing interface (Kernel Portals Routing Nal Interface) */
+typedef const struct {
+ int kprni_nalid; /* NAL's id */
+ void *kprni_arg; /* Arg to pass when calling into NAL */
+ kpr_fwd_t kprni_fwd; /* NAL's forwarding entrypoint */
+} kpr_nal_interface_t;
+
+/* Router's routing interface (Kernel Portals Routing Router Interface) */
+typedef const struct {
+ /* register the calling NAL with the router and get back the handle for
+ * subsequent calls */
+ int (*kprri_register) (kpr_nal_interface_t *nal_interface,
+ void **router_arg);
+
+ /* ask the router to find a gateway that forwards to 'nid' and is a peer
+ * of the calling NAL */
+ int (*kprri_lookup) (void *router_arg, ptl_nid_t nid,
+ ptl_nid_t *gateway_nid);
+
+ /* hand a packet over to the router for forwarding */
+ kpr_fwd_t kprri_fwd_start;
+
+ /* hand a packet back to the router for completion */
+ void (*kprri_fwd_done) (void *router_arg, kpr_fwd_desc_t *fwd,
+ int error);
+
+ /* the calling NAL is shutting down */
+ void (*kprri_shutdown) (void *router_arg);
+
+ /* deregister the calling NAL with the router */
+ void (*kprri_deregister) (void *router_arg);
+
+} kpr_router_interface_t;
+
+/* Convenient struct for NAL to stash router interface/args */
+typedef struct {
+ kpr_router_interface_t *kpr_interface;
+ void *kpr_arg;
+} kpr_router_t;
+
+/* Router's control interface (Kernel Portals Routing Control Interface) */
+typedef const struct {
+ int (*kprci_add_route)(int gateway_nal, ptl_nid_t gateway_nid,
+ ptl_nid_t lo_nid, ptl_nid_t hi_nid);
+ int (*kprci_del_route)(ptl_nid_t nid);
+ int (*kprci_get_route)(int index, int *gateway_nal,
+ ptl_nid_t *gateway, ptl_nid_t *lo_nid,
+ ptl_nid_t *hi_nid);
+} kpr_control_interface_t;
+
+extern kpr_control_interface_t kpr_control_interface;
+extern kpr_router_interface_t kpr_router_interface;
+
+static inline int
+kpr_register (kpr_router_t *router, kpr_nal_interface_t *nalif)
+{
+ int rc;
+
+ router->kpr_interface = PORTAL_SYMBOL_GET (kpr_router_interface);
+ if (router->kpr_interface == NULL)
+ return (-ENOENT);
+
+ rc = (router->kpr_interface)->kprri_register (nalif, &router->kpr_arg);
+ if (rc != 0)
+ router->kpr_interface = NULL;
+
+ PORTAL_SYMBOL_PUT (kpr_router_interface);
+ return (rc);
+}
+
+static inline int
+kpr_routing (kpr_router_t *router)
+{
+ return (router->kpr_interface != NULL);
+}
+
+static inline int
+kpr_lookup (kpr_router_t *router, ptl_nid_t nid, ptl_nid_t *gateway_nid)
+{
+ if (!kpr_routing (router))
+ return (-EHOSTUNREACH);
+
+ return (router->kpr_interface->kprri_lookup(router->kpr_arg, nid,
+ gateway_nid));
+}
+
+static inline void
+kpr_fwd_init (kpr_fwd_desc_t *fwd, ptl_nid_t nid,
+ int nob, int niov, struct iovec *iov,
+ kpr_fwd_callback_t callback, void *callback_arg)
+{
+ fwd->kprfd_target_nid = nid;
+ fwd->kprfd_gateway_nid = nid;
+ fwd->kprfd_nob = nob;
+ fwd->kprfd_niov = niov;
+ fwd->kprfd_iov = iov;
+ fwd->kprfd_callback = callback;
+ fwd->kprfd_callback_arg = callback_arg;
+}
+
+static inline void
+kpr_fwd_start (kpr_router_t *router, kpr_fwd_desc_t *fwd)
+{
+ if (!kpr_routing (router))
+ fwd->kprfd_callback (fwd->kprfd_callback_arg, -EHOSTUNREACH);
+ else
+ router->kpr_interface->kprri_fwd_start (router->kpr_arg, fwd);
+}
+
+static inline void
+kpr_fwd_done (kpr_router_t *router, kpr_fwd_desc_t *fwd, int error)
+{
+ LASSERT (kpr_routing (router));
+ router->kpr_interface->kprri_fwd_done (router->kpr_arg, fwd, error);
+}
+
+static inline void
+kpr_shutdown (kpr_router_t *router)
+{
+ if (kpr_routing (router))
+ router->kpr_interface->kprri_shutdown (router->kpr_arg);
+}
+
+static inline void
+kpr_deregister (kpr_router_t *router)
+{
+ if (!kpr_routing (router))
+ return;
+ router->kpr_interface->kprri_deregister (router->kpr_arg);
+ router->kpr_interface = NULL;
+}
+
+/******************************************************************************/
+
+#ifdef PORTALS_PROFILING
+#define prof_enum(FOO) PROF__##FOO
+enum {
+ prof_enum(our_recvmsg),
+ prof_enum(our_sendmsg),
+ prof_enum(socknal_recv),
+ prof_enum(lib_parse),
+ prof_enum(conn_list_walk),
+ prof_enum(memcpy),
+ prof_enum(lib_finalize),
+ prof_enum(pingcli_time),
+ prof_enum(gmnal_send),
+ prof_enum(gmnal_recv),
+ MAX_PROFS
+};
+
+struct prof_ent {
+ char *str;
+ /* hrmph. wrap-tastic. */
+ u32 starts;
+ u32 finishes;
+ cycles_t total_cycles;
+ cycles_t start;
+ cycles_t end;
+};
+
+extern struct prof_ent prof_ents[MAX_PROFS];
+
+#define PROF_START(FOO) \
+ do { \
+ struct prof_ent *pe = &prof_ents[PROF__##FOO]; \
+ pe->starts++; \
+ pe->start = get_cycles(); \
+ } while (0)
+
+#define PROF_FINISH(FOO) \
+ do { \
+ struct prof_ent *pe = &prof_ents[PROF__##FOO]; \
+ pe->finishes++; \
+ pe->end = get_cycles(); \
+ pe->total_cycles += (pe->end - pe->start); \
+ } while (0)
+#else /* !PORTALS_PROFILING */
+#define PROF_START(FOO) do {} while(0)
+#define PROF_FINISH(FOO) do {} while(0)
+#endif /* PORTALS_PROFILING */
+
+/* debug.c */
+void portals_run_lbug_upcall(char * file, char *fn, int line);
+void portals_debug_dumplog(void);
+int portals_debug_init(unsigned long bufsize);
+int portals_debug_cleanup(void);
+int portals_debug_clear_buffer(void);
+int portals_debug_mark_buffer(char *text);
+int portals_debug_set_daemon(unsigned int cmd, unsigned int length,
+ char *file, unsigned int size);
+__s32 portals_debug_copy_to_user(char *buf, unsigned long len);
+#if (__GNUC__)
+/* Use the special GNU C __attribute__ hack to have the compiler check the
+ * printf style argument string against the actual argument count and
+ * types.
+ */
+#ifdef printf
+# warning printf has been defined as a macro...
+# undef printf
+#endif
+void portals_debug_msg (int subsys, int mask, char *file, char *fn, int line,
+ unsigned long stack, const char *format, ...)
+ __attribute__ ((format (printf, 7, 8)));
+#else
+void portals_debug_msg (int subsys, int mask, char *file, char *fn,
+ int line, unsigned long stack,
+ const char *format, ...);
+#endif /* __GNUC__ */
+void portals_debug_set_level(unsigned int debug_level);
+
+# define fprintf(a, format, b...) CDEBUG(D_OTHER, format , ## b)
+# define printf(format, b...) CDEBUG(D_OTHER, format , ## b)
+# define time(a) CURRENT_TIME
+
+extern void kportal_daemonize (char *name);
+extern void kportal_blockallsigs (void);
+
+#else /* !__KERNEL__ */
+# include <stdio.h>
+# include <stdlib.h>
+#ifndef __CYGWIN__
+# include <stdint.h>
+#endif
+# include <unistd.h>
+# include <time.h>
+# include <asm/types.h>
+# ifndef DEBUG_SUBSYSTEM
+# define DEBUG_SUBSYSTEM S_UNDEFINED
+# endif
+# ifdef PORTAL_DEBUG
+# undef NDEBUG
+# include <assert.h>
+# define LASSERT(e) assert(e)
+# else
+# define LASSERT(e)
+# endif
+# define printk(format, args...) printf (format, ## args)
+# define PORTAL_ALLOC(ptr, size) do { (ptr) = malloc(size); } while (0);
+# define PORTAL_FREE(a, b) do { free(a); } while (0);
+# define portals_debug_msg(subsys, mask, file, fn, line, stack, format, a...) \
+ printf ("%02x:%06x (@%lu %s:%s,l. %d %d %lu): " format, \
+ (subsys) >> 24, (mask), (long)time(0), file, fn, line, \
+ getpid() , stack, ## a);
+#endif
+
+#ifndef CURRENT_TIME
+# define CURRENT_TIME time(0)
+#endif
+
+#include <linux/portals_lib.h>
+
+/*
+ * USER LEVEL STUFF BELOW
+ */
+
+#define PORTAL_IOCTL_VERSION 0x00010007
+#define PING_SYNC 0
+#define PING_ASYNC 1
+
+struct portal_ioctl_data {
+ __u32 ioc_len;
+ __u32 ioc_version;
+ __u64 ioc_nid;
+ __u64 ioc_nid2;
+ __u64 ioc_nid3;
+ __u32 ioc_count;
+ __u32 ioc_nal;
+ __u32 ioc_nal_cmd;
+ __u32 ioc_fd;
+ __u32 ioc_id;
+
+ __u32 ioc_flags;
+ __u32 ioc_size;
+
+ __u32 ioc_wait;
+ __u32 ioc_timeout;
+ __u32 ioc_misc;
+
+ __u32 ioc_inllen1;
+ char *ioc_inlbuf1;
+ __u32 ioc_inllen2;
+ char *ioc_inlbuf2;
+
+ __u32 ioc_plen1; /* buffers in userspace */
+ char *ioc_pbuf1;
+ __u32 ioc_plen2; /* buffers in userspace */
+ char *ioc_pbuf2;
+
+ char ioc_bulk[0];
+};
+
+struct portal_ioctl_hdr {
+ __u32 ioc_len;
+ __u32 ioc_version;
+};
+
+struct portals_debug_ioctl_data
+{
+ struct portal_ioctl_hdr hdr;
+ unsigned int subs;
+ unsigned int debug;
+};
+
+#define PORTAL_IOC_INIT(data) \
+do { \
+ memset(&data, 0, sizeof(data)); \
+ data.ioc_version = PORTAL_IOCTL_VERSION; \
+ data.ioc_len = sizeof(data); \
+} while (0)
+
+/* FIXME check conflict with lustre_lib.h */
+#define PTL_IOC_DEBUG_MASK _IOWR('f', 250, long)
+
+static inline int portal_ioctl_packlen(struct portal_ioctl_data *data)
+{
+ int len = sizeof(*data);
+ len += size_round(data->ioc_inllen1);
+ len += size_round(data->ioc_inllen2);
+ return len;
+}
+
+static inline int portal_ioctl_is_invalid(struct portal_ioctl_data *data)
+{
+ if (data->ioc_len > (1<<30)) {
+ CERROR ("PORTALS ioctl: ioc_len larger than 1<<30\n");
+ return 1;
+ }
+ if (data->ioc_inllen1 > (1<<30)) {
+ CERROR ("PORTALS ioctl: ioc_inllen1 larger than 1<<30\n");
+ return 1;
+ }
+ if (data->ioc_inllen2 > (1<<30)) {
+ CERROR ("PORTALS ioctl: ioc_inllen2 larger than 1<<30\n");
+ return 1;
+ }
+ if (data->ioc_inlbuf1 && !data->ioc_inllen1) {
+ CERROR ("PORTALS ioctl: inlbuf1 pointer but 0 length\n");
+ return 1;
+ }
+ if (data->ioc_inlbuf2 && !data->ioc_inllen2) {
+ CERROR ("PORTALS ioctl: inlbuf2 pointer but 0 length\n");
+ return 1;
+ }
+ if (data->ioc_pbuf1 && !data->ioc_plen1) {
+ CERROR ("PORTALS ioctl: pbuf1 pointer but 0 length\n");
+ return 1;
+ }
+ if (data->ioc_pbuf2 && !data->ioc_plen2) {
+ CERROR ("PORTALS ioctl: pbuf2 pointer but 0 length\n");
+ return 1;
+ }
+ if (data->ioc_plen1 && !data->ioc_pbuf1) {
+ CERROR ("PORTALS ioctl: plen1 nonzero but no pbuf1 pointer\n");
+ return 1;
+ }
+ if (data->ioc_plen2 && !data->ioc_pbuf2) {
+ CERROR ("PORTALS ioctl: plen2 nonzero but no pbuf2 pointer\n");
+ return 1;
+ }
+ if (portal_ioctl_packlen(data) != data->ioc_len ) {
+ CERROR ("PORTALS ioctl: packlen != ioc_len\n");
+ return 1;
+ }
+ if (data->ioc_inllen1 &&
+ data->ioc_bulk[data->ioc_inllen1 - 1] != '\0') {
+ CERROR ("PORTALS ioctl: inlbuf1 not 0 terminated\n");
+ return 1;
+ }
+ if (data->ioc_inllen2 &&
+ data->ioc_bulk[size_round(data->ioc_inllen1) +
+ data->ioc_inllen2 - 1] != '\0') {
+ CERROR ("PORTALS ioctl: inlbuf2 not 0 terminated\n");
+ return 1;
+ }
+ return 0;
+}
+
+#ifndef __KERNEL__
+static inline int portal_ioctl_pack(struct portal_ioctl_data *data, char **pbuf,
+ int max)
+{
+ char *ptr;
+ struct portal_ioctl_data *overlay;
+ data->ioc_len = portal_ioctl_packlen(data);
+ data->ioc_version = PORTAL_IOCTL_VERSION;
+
+ if (*pbuf && portal_ioctl_packlen(data) > max)
+ return 1;
+ if (*pbuf == NULL) {
+ *pbuf = malloc(data->ioc_len);
+ }
+ if (!*pbuf)
+ return 1;
+ overlay = (struct portal_ioctl_data *)*pbuf;
+ memcpy(*pbuf, data, sizeof(*data));
+
+ ptr = overlay->ioc_bulk;
+ if (data->ioc_inlbuf1)
+ LOGL(data->ioc_inlbuf1, data->ioc_inllen1, ptr);
+ if (data->ioc_inlbuf2)
+ LOGL(data->ioc_inlbuf2, data->ioc_inllen2, ptr);
+ if (portal_ioctl_is_invalid(overlay))
+ return 1;
+
+ return 0;
+}
+#else
+#include <asm/uaccess.h>
+
+/* buffer MUST be at least the size of portal_ioctl_hdr */
+static inline int portal_ioctl_getdata(char *buf, char *end, void *arg)
+{
+ struct portal_ioctl_hdr *hdr;
+ struct portal_ioctl_data *data;
+ int err;
+ ENTRY;
+
+ hdr = (struct portal_ioctl_hdr *)buf;
+ data = (struct portal_ioctl_data *)buf;
+
+ err = copy_from_user(buf, (void *)arg, sizeof(*hdr));
+ if ( err ) {
+ EXIT;
+ return err;
+ }
+
+ if (hdr->ioc_version != PORTAL_IOCTL_VERSION) {
+ CERROR ("PORTALS: version mismatch kernel vs application\n");
+ return -EINVAL;
+ }
+
+ if (hdr->ioc_len + buf >= end) {
+ CERROR ("PORTALS: user buffer exceeds kernel buffer\n");
+ return -EINVAL;
+ }
+
+
+ if (hdr->ioc_len < sizeof(struct portal_ioctl_data)) {
+ CERROR ("PORTALS: user buffer too small for ioctl\n");
+ return -EINVAL;
+ }
+
+ err = copy_from_user(buf, (void *)arg, hdr->ioc_len);
+ if ( err ) {
+ EXIT;
+ return err;
+ }
+
+ if (portal_ioctl_is_invalid(data)) {
+ CERROR ("PORTALS: ioctl not correctly formatted\n");
+ return -EINVAL;
+ }
+
+ if (data->ioc_inllen1) {
+ data->ioc_inlbuf1 = &data->ioc_bulk[0];
+ }
+
+ if (data->ioc_inllen2) {
+ data->ioc_inlbuf2 = &data->ioc_bulk[0] +
+ size_round(data->ioc_inllen1);
+ }
+
+ EXIT;
+ return 0;
+}
+#endif
+
+/* ioctls for manipulating snapshots 30- */
+#define IOC_PORTAL_TYPE 'e'
+#define IOC_PORTAL_MIN_NR 30
+
+#define IOC_PORTAL_PING _IOWR('e', 30, long)
+#define IOC_PORTAL_GET_DEBUG _IOWR('e', 31, long)
+#define IOC_PORTAL_CLEAR_DEBUG _IOWR('e', 32, long)
+#define IOC_PORTAL_MARK_DEBUG _IOWR('e', 33, long)
+#define IOC_PORTAL_PANIC _IOWR('e', 34, long)
+#define IOC_PORTAL_ADD_ROUTE _IOWR('e', 35, long)
+#define IOC_PORTAL_DEL_ROUTE _IOWR('e', 36, long)
+#define IOC_PORTAL_GET_ROUTE _IOWR('e', 37, long)
+#define IOC_PORTAL_NAL_CMD _IOWR('e', 38, long)
+#define IOC_PORTAL_GET_NID _IOWR('e', 39, long)
+#define IOC_PORTAL_FAIL_NID _IOWR('e', 40, long)
+#define IOC_PORTAL_SET_DAEMON _IOWR('e', 41, long)
+
+#define IOC_PORTAL_MAX_NR 41
+
+enum {
+ QSWNAL = 1,
+ SOCKNAL,
+ GMNAL,
+ TOENAL,
+ TCPNAL,
+ SCIMACNAL,
+ NAL_ENUM_END_MARKER
+};
+
+#ifdef __KERNEL__
+extern ptl_handle_ni_t kqswnal_ni;
+extern ptl_handle_ni_t ksocknal_ni;
+extern ptl_handle_ni_t ktoenal_ni;
+extern ptl_handle_ni_t kgmnal_ni;
+extern ptl_handle_ni_t kscimacnal_ni;
+#endif
+
+#define NAL_MAX_NR (NAL_ENUM_END_MARKER - 1)
+
+#define NAL_CMD_REGISTER_PEER_FD 100
+#define NAL_CMD_CLOSE_CONNECTION 101
+#define NAL_CMD_REGISTER_MYNID 102
+#define NAL_CMD_PUSH_CONNECTION 103
+
+enum {
+ DEBUG_DAEMON_START = 1,
+ DEBUG_DAEMON_STOP = 2,
+ DEBUG_DAEMON_PAUSE = 3,
+ DEBUG_DAEMON_CONTINUE = 4,
+};
+
+/* XXX remove to lustre ASAP */
+struct lustre_peer {
+ ptl_nid_t peer_nid;
+ ptl_handle_ni_t peer_ni;
+};
+
+/* module.c */
+typedef int (*nal_cmd_handler_t)(struct portal_ioctl_data *, void * private);
+int kportal_nal_register(int nal, nal_cmd_handler_t handler, void * private);
+int kportal_nal_unregister(int nal);
+
+ptl_handle_ni_t *kportal_get_ni (int nal);
+void kportal_put_ni (int nal);
+
+#ifdef __CYGWIN__
+#ifndef BITS_PER_LONG
+#if (~0UL) == 0xffffffffUL
+#define BITS_PER_LONG 32
+#else
+#define BITS_PER_LONG 64
+#endif
+#endif
+#endif
+
+#if (BITS_PER_LONG == 32 || __WORDSIZE == 32)
+# define LPU64 "%Lu"
+# define LPD64 "%Ld"
+# define LPX64 "%#Lx"
+# define LPSZ "%u"
+# define LPSSZ "%d"
+#endif
+#if (BITS_PER_LONG == 64 || __WORDSIZE == 64)
+# define LPU64 "%lu"
+# define LPD64 "%ld"
+# define LPX64 "%#lx"
+# define LPSZ "%lu"
+# define LPSSZ "%ld"
+#endif
+#ifndef LPU64
+# error "No word size defined"
+#endif
+
+#endif
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Basic library routines.
+ *
+ */
+
+#ifndef _PORTALS_LIB_H
+#define _PORTALS_LIB_H
+
+#ifndef __KERNEL__
+# include <string.h>
+#else
+# include <asm/types.h>
+#endif
+
+#undef MIN
+#define MIN(a,b) (((a)<(b)) ? (a): (b))
+#undef MAX
+#define MAX(a,b) (((a)>(b)) ? (a): (b))
+#define MKSTR(ptr) ((ptr))? (ptr) : ""
+
+static inline int size_round (int val)
+{
+ return (val + 7) & (~0x7);
+}
+
+static inline int size_round0(int val)
+{
+ if (!val)
+ return 0;
+ return (val + 1 + 7) & (~0x7);
+}
+
+static inline size_t round_strlen(char *fset)
+{
+ return size_round(strlen(fset) + 1);
+}
+
+#ifdef __KERNEL__
+static inline char *strdup(const char *str)
+{
+ int len = strlen(str) + 1;
+ char *tmp = kmalloc(len, GFP_KERNEL);
+ if (tmp)
+ memcpy(tmp, str, len);
+
+ return tmp;
+}
+#endif
+
+#ifdef __KERNEL__
+# define NTOH__u32(var) le32_to_cpu(var)
+# define NTOH__u64(var) le64_to_cpu(var)
+# define HTON__u32(var) cpu_to_le32(var)
+# define HTON__u64(var) cpu_to_le64(var)
+#else
+# define expansion_u64(var) \
+ ({ __u64 ret; \
+ switch (sizeof(var)) { \
+ case 8: (ret) = (var); break; \
+ case 4: (ret) = (__u32)(var); break; \
+ case 2: (ret) = (__u16)(var); break; \
+ case 1: (ret) = (__u8)(var); break; \
+ }; \
+ (ret); \
+ })
+# define NTOH__u32(var) (var)
+# define NTOH__u64(var) (expansion_u64(var))
+# define HTON__u32(var) (var)
+# define HTON__u64(var) (expansion_u64(var))
+#endif
+
+/*
+ * copy sizeof(type) bytes from pointer to var and move ptr forward.
+ * return EFAULT if pointer goes beyond end
+ */
+#define UNLOGV(var,type,ptr,end) \
+do { \
+ var = *(type *)ptr; \
+ ptr += sizeof(type); \
+ if (ptr > end ) \
+ return -EFAULT; \
+} while (0)
+
+/* the following two macros convert to little endian */
+/* type MUST be __u32 or __u64 */
+#define LUNLOGV(var,type,ptr,end) \
+do { \
+ var = NTOH##type(*(type *)ptr); \
+ ptr += sizeof(type); \
+ if (ptr > end ) \
+ return -EFAULT; \
+} while (0)
+
+/* now log values */
+#define LOGV(var,type,ptr) \
+do { \
+ *((type *)ptr) = var; \
+ ptr += sizeof(type); \
+} while (0)
+
+/* and in network order */
+#define LLOGV(var,type,ptr) \
+do { \
+ *((type *)ptr) = HTON##type(var); \
+ ptr += sizeof(type); \
+} while (0)
+
+
+/*
+ * set var to point at (type *)ptr, move ptr forward with sizeof(type)
+ * return from function with EFAULT if ptr goes beyond end
+ */
+#define UNLOGP(var,type,ptr,end) \
+do { \
+ var = (type *)ptr; \
+ ptr += sizeof(type); \
+ if (ptr > end ) \
+ return -EFAULT; \
+} while (0)
+
+#define LOGP(var,type,ptr) \
+do { \
+ memcpy(ptr, var, sizeof(type)); \
+ ptr += sizeof(type); \
+} while (0)
+
+/*
+ * set var to point at (char *)ptr, move ptr forward by size_round(len);
+ * return from function with EFAULT if ptr goes beyond end
+ */
+#define UNLOGL(var,type,len,ptr,end) \
+do { \
+ var = (type *)ptr; \
+ ptr += size_round(len * sizeof(type)); \
+ if (ptr > end ) \
+ return -EFAULT; \
+} while (0)
+
+#define UNLOGL0(var,type,len,ptr,end) \
+do { \
+ UNLOGL(var,type,len,ptr,end); \
+ if ( *((char *)ptr - size_round(len) + len - 1) != '\0') \
+ return -EFAULT; \
+} while (0)
+
+#define LOGL(var,len,ptr) \
+do { \
+ if (var) \
+ memcpy((char *)ptr, (const char *)var, len); \
+ ptr += size_round(len); \
+} while (0)
+
+#define LOGU(var,len,ptr) \
+do { \
+ if (var) \
+ memcpy((char *)var, (const char *)ptr, len); \
+ ptr += size_round(len); \
+} while (0)
+
+#define LOGL0(var,len,ptr) \
+do { \
+ if (!len) \
+ break; \
+ memcpy((char *)ptr, (const char *)var, len); \
+ *((char *)(ptr) + len) = 0; \
+ ptr += size_round(len + 1); \
+} while (0)
+
+#endif /* _PORTALS_LIB_H */
--- /dev/null
+# Copyright (C) 2001 Cluster File Systems, Inc.
+#
+# This code is issued under the GNU General Public License.
+# See the file COPYING in this distribution
+
+SUBDIRS = base
+include $(top_srcdir)/Rules
+
+pkginclude_HEADERS=api-support.h api.h arg-blocks.h defines.h errno.h internal.h lib-dispatch.h lib-nal.h lib-p30.h lib-types.h myrnal.h nal.h p30.h ppid.h ptlctl.h stringtab.h types.h nalids.h list.h bridge.h ipmap.h procbridge.h lltrace.h
+
--- /dev/null
+# define DEBUG_SUBSYSTEM S_PORTALS
+# define PORTAL_DEBUG
+
+#ifndef __KERNEL__
+# include <stdio.h>
+# include <stdlib.h>
+# include <unistd.h>
+# include <time.h>
+
+/* Lots of POSIX dependencies to support PtlEQWait_timeout */
+# include <signal.h>
+# include <setjmp.h>
+# include <time.h>
+#endif
+
+#include <portals/types.h>
+#include <linux/kp30.h>
+#include <portals/p30.h>
+
+#include <portals/internal.h>
+#include <portals/nal.h>
+#include <portals/arg-blocks.h>
+
+/* Hack for 2.4.18 macro name collision */
+#ifdef yield
+#undef yield
+#endif
--- /dev/null
+#ifndef P30_API_H
+#define P30_API_H
+
+#include <portals/types.h>
+
+#ifndef PTL_NO_WRAP
+int PtlInit(void);
+int PtlInitialized(void);
+void PtlFini(void);
+
+int PtlNIInit(ptl_interface_t interface, ptl_pt_index_t ptl_size_in,
+ ptl_ac_index_t acl_size_in, ptl_pid_t requested_pid,
+ ptl_handle_ni_t * interface_out);
+
+int PtlNIInitialized(ptl_interface_t);
+
+int PtlNIFini(ptl_handle_ni_t interface_in);
+
+#endif
+
+int PtlGetId(ptl_handle_ni_t ni_handle, ptl_process_id_t *id);
+
+
+/*
+ * Network interfaces
+ */
+
+#ifndef PTL_NO_WRAP
+int PtlNIBarrier(ptl_handle_ni_t interface_in);
+#endif
+
+int PtlNIStatus(ptl_handle_ni_t interface_in, ptl_sr_index_t register_in,
+ ptl_sr_value_t * status_out);
+
+int PtlNIDist(ptl_handle_ni_t interface_in, ptl_process_id_t process_in,
+ unsigned long *distance_out);
+
+#ifndef PTL_NO_WRAP
+int PtlNIHandle(ptl_handle_any_t handle_in, ptl_handle_ni_t * interface_out);
+#endif
+
+
+/*
+ * PtlNIDebug:
+ *
+ * This is not an official Portals 3 API call. It is provided
+ * by the reference implementation to allow the maintainers an
+ * easy way to turn on and off debugging information in the
+ * library. Do not use it in code that is not intended for use
+ * with any version other than the portable reference library.
+ */
+unsigned int PtlNIDebug(ptl_handle_ni_t ni, unsigned int mask_in);
+
+/*
+ * PtlNIFailNid
+ *
+ * Not an official Portals 3 API call. It provides a way of simulating
+ * communications failures to all (nid == PTL_NID_ANY), or specific peers
+ * (via multiple calls), either until further notice (threshold == -1), or
+ * for a specific number of messages. Passing a threshold of zero, "heals"
+ * the given peer.
+ */
+int PtlFailNid (ptl_handle_ni_t ni, ptl_nid_t nid, unsigned int threshold);
+
+
+/*
+ * Match entries
+ */
+
+int PtlMEAttach(ptl_handle_ni_t interface_in, ptl_pt_index_t index_in,
+ ptl_process_id_t match_id_in, ptl_match_bits_t match_bits_in,
+ ptl_match_bits_t ignore_bits_in, ptl_unlink_t unlink_in,
+ ptl_ins_pos_t pos_in, ptl_handle_me_t * handle_out);
+
+int PtlMEInsert(ptl_handle_me_t current_in, ptl_process_id_t match_id_in,
+ ptl_match_bits_t match_bits_in, ptl_match_bits_t ignore_bits_in,
+ ptl_unlink_t unlink_in, ptl_ins_pos_t position_in,
+ ptl_handle_me_t * handle_out);
+
+int PtlMEUnlink(ptl_handle_me_t current_in);
+
+int PtlMEUnlinkList(ptl_handle_me_t current_in);
+
+int PtlTblDump(ptl_handle_ni_t ni, int index_in);
+int PtlMEDump(ptl_handle_me_t current_in);
+
+
+
+/*
+ * Memory descriptors
+ */
+
+#ifndef PTL_NO_WRAP
+int PtlMDAttach(ptl_handle_me_t current_in, ptl_md_t md_in,
+ ptl_unlink_t unlink_in, ptl_handle_md_t * handle_out);
+
+int PtlMDBind(ptl_handle_ni_t ni_in, ptl_md_t md_in,
+ ptl_handle_md_t * handle_out);
+
+int PtlMDUnlink(ptl_handle_md_t md_in);
+
+int PtlMDUpdate(ptl_handle_md_t md_in, ptl_md_t * old_inout,
+ ptl_md_t * new_inout, ptl_handle_eq_t testq_in);
+
+#endif
+
+/* These should not be called by users */
+int PtlMDUpdate_internal(ptl_handle_md_t md_in, ptl_md_t * old_inout,
+ ptl_md_t * new_inout, ptl_handle_eq_t testq_in,
+ ptl_seq_t sequence_in);
+
+
+
+
+/*
+ * Event queues
+ */
+#ifndef PTL_NO_WRAP
+
+/* These should be called by users */
+int PtlEQAlloc(ptl_handle_ni_t ni_in, ptl_size_t count_in,
+ int (*callback) (ptl_event_t * event),
+ ptl_handle_eq_t * handle_out);
+int PtlEQFree(ptl_handle_eq_t eventq_in);
+
+int PtlEQCount(ptl_handle_eq_t eventq_in, ptl_size_t * count_out);
+
+int PtlEQGet(ptl_handle_eq_t eventq_in, ptl_event_t * event_out);
+
+
+int PtlEQWait(ptl_handle_eq_t eventq_in, ptl_event_t * event_out);
+
+int PtlEQWait_timeout(ptl_handle_eq_t eventq_in, ptl_event_t * event_out,
+ int timeout);
+#endif
+
+/*
+ * Access Control Table
+ */
+int PtlACEntry(ptl_handle_ni_t ni_in, ptl_ac_index_t index_in,
+ ptl_process_id_t match_id_in, ptl_pt_index_t portal_in);
+
+
+/*
+ * Data movement
+ */
+
+int PtlPut(ptl_handle_md_t md_in, ptl_ack_req_t ack_req_in,
+ ptl_process_id_t target_in, ptl_pt_index_t portal_in,
+ ptl_ac_index_t cookie_in, ptl_match_bits_t match_bits_in,
+ ptl_size_t offset_in, ptl_hdr_data_t hdr_data_in);
+
+int PtlGet(ptl_handle_md_t md_in, ptl_process_id_t target_in,
+ ptl_pt_index_t portal_in, ptl_ac_index_t cookie_in,
+ ptl_match_bits_t match_bits_in, ptl_size_t offset_in);
+
+
+
+#endif
--- /dev/null
+#ifndef PTL_BLOCKS_H
+#define PTL_BLOCKS_H
+
+/*
+ * blocks.h
+ *
+ * Argument block types for the Portals 3.0 library
+ * Generated by idl
+ *
+ */
+
+#include <portals/types.h>
+
+/* put LIB_MAX_DISPATCH last here -- these must match the
+ assignements to the dispatch table in lib-p30/dispatch.c */
+#define PTL_GETID 1
+#define PTL_NISTATUS 2
+#define PTL_NIDIST 3
+#define PTL_NIDEBUG 4
+#define PTL_MEATTACH 5
+#define PTL_MEINSERT 6
+// #define PTL_MEPREPEND 7
+#define PTL_MEUNLINK 8
+#define PTL_TBLDUMP 9
+#define PTL_MEDUMP 10
+#define PTL_MDATTACH 11
+// #define PTL_MDINSERT 12
+#define PTL_MDBIND 13
+#define PTL_MDUPDATE 14
+#define PTL_MDUNLINK 15
+#define PTL_EQALLOC 16
+#define PTL_EQFREE 17
+#define PTL_ACENTRY 18
+#define PTL_PUT 19
+#define PTL_GET 20
+#define PTL_FAILNID 21
+#define LIB_MAX_DISPATCH 21
+
+typedef struct PtlFailNid_in {
+ ptl_handle_ni_t interface;
+ ptl_nid_t nid;
+ unsigned int threshold;
+} PtlFailNid_in;
+
+typedef struct PtlFailNid_out {
+ int rc;
+} PtlFailNid_out;
+
+typedef struct PtlGetId_in {
+ ptl_handle_ni_t handle_in;
+} PtlGetId_in;
+
+typedef struct PtlGetId_out {
+ int rc;
+ ptl_process_id_t id_out;
+} PtlGetId_out;
+
+typedef struct PtlNIStatus_in {
+ ptl_handle_ni_t interface_in;
+ ptl_sr_index_t register_in;
+} PtlNIStatus_in;
+
+typedef struct PtlNIStatus_out {
+ int rc;
+ ptl_sr_value_t status_out;
+} PtlNIStatus_out;
+
+
+typedef struct PtlNIDist_in {
+ ptl_handle_ni_t interface_in;
+ ptl_process_id_t process_in;
+} PtlNIDist_in;
+
+typedef struct PtlNIDist_out {
+ int rc;
+ unsigned long distance_out;
+} PtlNIDist_out;
+
+
+typedef struct PtlNIDebug_in {
+ unsigned int mask_in;
+} PtlNIDebug_in;
+
+typedef struct PtlNIDebug_out {
+ unsigned int rc;
+} PtlNIDebug_out;
+
+
+typedef struct PtlMEAttach_in {
+ ptl_handle_ni_t interface_in;
+ ptl_pt_index_t index_in;
+ ptl_ins_pos_t position_in;
+ ptl_process_id_t match_id_in;
+ ptl_match_bits_t match_bits_in;
+ ptl_match_bits_t ignore_bits_in;
+ ptl_unlink_t unlink_in;
+} PtlMEAttach_in;
+
+typedef struct PtlMEAttach_out {
+ int rc;
+ ptl_handle_me_t handle_out;
+} PtlMEAttach_out;
+
+
+typedef struct PtlMEInsert_in {
+ ptl_handle_me_t current_in;
+ ptl_process_id_t match_id_in;
+ ptl_match_bits_t match_bits_in;
+ ptl_match_bits_t ignore_bits_in;
+ ptl_unlink_t unlink_in;
+ ptl_ins_pos_t position_in;
+} PtlMEInsert_in;
+
+typedef struct PtlMEInsert_out {
+ int rc;
+ ptl_handle_me_t handle_out;
+} PtlMEInsert_out;
+
+typedef struct PtlMEUnlink_in {
+ ptl_handle_me_t current_in;
+ ptl_unlink_t unlink_in;
+} PtlMEUnlink_in;
+
+typedef struct PtlMEUnlink_out {
+ int rc;
+} PtlMEUnlink_out;
+
+
+typedef struct PtlTblDump_in {
+ int index_in;
+} PtlTblDump_in;
+
+typedef struct PtlTblDump_out {
+ int rc;
+} PtlTblDump_out;
+
+
+typedef struct PtlMEDump_in {
+ ptl_handle_me_t current_in;
+} PtlMEDump_in;
+
+typedef struct PtlMEDump_out {
+ int rc;
+} PtlMEDump_out;
+
+
+typedef struct PtlMDAttach_in {
+ ptl_handle_me_t me_in;
+ ptl_handle_eq_t eq_in;
+ ptl_md_t md_in;
+ ptl_unlink_t unlink_in;
+} PtlMDAttach_in;
+
+typedef struct PtlMDAttach_out {
+ int rc;
+ ptl_handle_md_t handle_out;
+} PtlMDAttach_out;
+
+
+typedef struct PtlMDBind_in {
+ ptl_handle_ni_t ni_in;
+ ptl_handle_eq_t eq_in;
+ ptl_md_t md_in;
+} PtlMDBind_in;
+
+typedef struct PtlMDBind_out {
+ int rc;
+ ptl_handle_md_t handle_out;
+} PtlMDBind_out;
+
+
+typedef struct PtlMDUpdate_internal_in {
+ ptl_handle_md_t md_in;
+ ptl_handle_eq_t testq_in;
+ ptl_seq_t sequence_in;
+
+ ptl_md_t old_inout;
+ int old_inout_valid;
+ ptl_md_t new_inout;
+ int new_inout_valid;
+} PtlMDUpdate_internal_in;
+
+typedef struct PtlMDUpdate_internal_out {
+ int rc;
+ ptl_md_t old_inout;
+ ptl_md_t new_inout;
+} PtlMDUpdate_internal_out;
+
+
+typedef struct PtlMDUnlink_in {
+ ptl_handle_md_t md_in;
+} PtlMDUnlink_in;
+
+typedef struct PtlMDUnlink_out {
+ int rc;
+ ptl_md_t status_out;
+} PtlMDUnlink_out;
+
+
+typedef struct PtlEQAlloc_in {
+ ptl_handle_ni_t ni_in;
+ ptl_size_t count_in;
+ void *base_in;
+ int len_in;
+ int (*callback_in) (ptl_event_t * event);
+} PtlEQAlloc_in;
+
+typedef struct PtlEQAlloc_out {
+ int rc;
+ ptl_handle_eq_t handle_out;
+} PtlEQAlloc_out;
+
+
+typedef struct PtlEQFree_in {
+ ptl_handle_eq_t eventq_in;
+} PtlEQFree_in;
+
+typedef struct PtlEQFree_out {
+ int rc;
+} PtlEQFree_out;
+
+
+typedef struct PtlACEntry_in {
+ ptl_handle_ni_t ni_in;
+ ptl_ac_index_t index_in;
+ ptl_process_id_t match_id_in;
+ ptl_pt_index_t portal_in;
+} PtlACEntry_in;
+
+typedef struct PtlACEntry_out {
+ int rc;
+} PtlACEntry_out;
+
+
+typedef struct PtlPut_in {
+ ptl_handle_md_t md_in;
+ ptl_ack_req_t ack_req_in;
+ ptl_process_id_t target_in;
+ ptl_pt_index_t portal_in;
+ ptl_ac_index_t cookie_in;
+ ptl_match_bits_t match_bits_in;
+ ptl_size_t offset_in;
+ ptl_hdr_data_t hdr_data_in;
+} PtlPut_in;
+
+typedef struct PtlPut_out {
+ int rc;
+} PtlPut_out;
+
+
+typedef struct PtlGet_in {
+ ptl_handle_md_t md_in;
+ ptl_process_id_t target_in;
+ ptl_pt_index_t portal_in;
+ ptl_ac_index_t cookie_in;
+ ptl_match_bits_t match_bits_in;
+ ptl_size_t offset_in;
+} PtlGet_in;
+
+typedef struct PtlGet_out {
+ int rc;
+} PtlGet_out;
+
+
+#endif
--- /dev/null
+/*
+** $Id: defines.h,v 1.1.2.1 2003/05/19 04:25:31 braam Exp $
+**
+** This files contains definitions that are used throughout the cplant code.
+*/
+
+#ifndef CPLANT_H
+#define CPLANT_H
+
+#define TITLE(fname,zmig)
+
+
+/*
+** TRUE and FALSE
+*/
+#undef TRUE
+#define TRUE (1)
+#undef FALSE
+#define FALSE (0)
+
+
+/*
+** Return codes from functions
+*/
+#undef OK
+#define OK (0)
+#undef ERROR
+#define ERROR (-1)
+
+
+
+/*
+** The GCC macro for a safe max() that works on all types arithmetic types.
+*/
+#ifndef MAX
+#define MAX(a, b) (a) > (b) ? (a) : (b)
+#endif /* MAX */
+
+#ifndef MIN
+#define MIN(a, b) (a) < (b) ? (a) : (b)
+#endif /* MIN */
+
+/*
+** The rest is from the old qkdefs.h
+*/
+
+#ifndef __linux__
+#define __inline__
+#endif
+
+#ifndef NULL
+#define NULL ((void *)0)
+#endif
+
+#ifndef __osf__
+#define PRIVATE static
+#define PUBLIC
+#endif
+
+#ifndef __osf__
+typedef unsigned char uchar;
+#endif
+
+typedef char CHAR;
+typedef unsigned char UCHAR;
+typedef char INT8;
+typedef unsigned char UINT8;
+typedef short int INT16;
+typedef unsigned short int UINT16;
+typedef int INT32;
+typedef unsigned int UINT32;
+typedef long LONG32;
+typedef unsigned long ULONG32;
+
+/* long may be 32 or 64, so we can't really append the size to the definition */
+typedef long LONG;
+typedef unsigned long ULONG;
+
+#ifdef __alpha__
+typedef long int_t;
+#ifndef __osf__
+typedef unsigned long uint_t;
+#endif
+#endif
+
+#ifdef __i386__
+typedef int int_t;
+typedef unsigned int uint_t;
+#endif
+
+typedef float FLOAT32;
+typedef double FLOAT64;
+typedef void VOID;
+typedef INT32 BOOLEAN;
+typedef void (*FCN_PTR)(void);
+
+#ifndef off64_t
+
+#if defined (__alpha__) || defined (__ia64__)
+typedef long off64_t;
+#else
+typedef long long off64_t;
+#endif
+
+#endif
+
+/*
+** Process related typedefs
+*/
+typedef UINT16 PID_TYPE; /* Type of Local process ID */
+typedef UINT16 NID_TYPE; /* Type of Physical node ID */
+typedef UINT16 GID_TYPE; /* Type of Group ID */
+typedef UINT16 RANK_TYPE; /* Type of Logical rank/process within a group */
+
+
+
+#endif /* CPLANT_H */
--- /dev/null
+#ifndef _P30_ERRNO_H_
+#define _P30_ERRNO_H_
+
+/*
+ * include/portals/errno.h
+ *
+ * Shared error number lists
+ */
+
+/* If you change these, you must update the string table in api-errno.c */
+typedef enum {
+ PTL_OK = 0,
+ PTL_SEGV = 1,
+
+ PTL_NOSPACE = 2,
+ PTL_INUSE = 3,
+ PTL_VAL_FAILED = 4,
+
+ PTL_NAL_FAILED = 5,
+ PTL_NOINIT = 6,
+ PTL_INIT_DUP = 7,
+ PTL_INIT_INV = 8,
+ PTL_AC_INV_INDEX = 9,
+
+ PTL_INV_ASIZE = 10,
+ PTL_INV_HANDLE = 11,
+ PTL_INV_MD = 12,
+ PTL_INV_ME = 13,
+ PTL_INV_NI = 14,
+/* If you change these, you must update the string table in api-errno.c */
+ PTL_ILL_MD = 15,
+ PTL_INV_PROC = 16,
+ PTL_INV_PSIZE = 17,
+ PTL_INV_PTINDEX = 18,
+ PTL_INV_REG = 19,
+
+ PTL_INV_SR_INDX = 20,
+ PTL_ML_TOOLONG = 21,
+ PTL_ADDR_UNKNOWN = 22,
+ PTL_INV_EQ = 23,
+ PTL_EQ_DROPPED = 24,
+
+ PTL_EQ_EMPTY = 25,
+ PTL_NOUPDATE = 26,
+ PTL_FAIL = 27,
+ PTL_NOT_IMPLEMENTED = 28,
+ PTL_NO_ACK = 29,
+
+ PTL_IOV_TOO_MANY = 30,
+ PTL_IOV_TOO_SMALL = 31,
+
+ PTL_EQ_INUSE = 32,
+ PTL_MD_INUSE = 33,
+
+ PTL_MAX_ERRNO = 33
+} ptl_err_t;
+/* If you change these, you must update the string table in api-errno.c */
+
+extern const char *ptl_err_str[];
+
+#endif
--- /dev/null
+/*
+** $Id: internal.h,v 1.1.2.1 2003/05/19 04:25:31 braam Exp $
+*/
+#ifndef _P30_INTERNAL_H_
+#define _P30_INTERNAL_H_
+
+/*
+ * p30/internal.h
+ *
+ * Internals for the API level library that are not needed
+ * by the user application
+ */
+
+#include <portals/p30.h>
+
+extern int ptl_init; /* Has the library be initialized */
+
+extern int ptl_ni_init(void);
+extern int ptl_me_init(void);
+extern int ptl_md_init(void);
+extern int ptl_eq_init(void);
+
+extern int ptl_me_ni_init(nal_t * nal);
+extern int ptl_md_ni_init(nal_t * nal);
+extern int ptl_eq_ni_init(nal_t * nal);
+
+extern void ptl_ni_fini(void);
+extern void ptl_me_fini(void);
+extern void ptl_md_fini(void);
+extern void ptl_eq_fini(void);
+
+extern void ptl_me_ni_fini(nal_t * nal);
+extern void ptl_md_ni_fini(nal_t * nal);
+extern void ptl_eq_ni_fini(nal_t * nal);
+
+static inline ptl_eq_t *
+ptl_handle2usereq (ptl_handle_eq_t *handle)
+{
+ /* EQ handles are a little wierd. On the "user" side, the cookie
+ * is just a pointer to a queue of events in shared memory. It's
+ * cb_eq_handle is the "real" handle which we pass when we
+ * call do_forward(). */
+ return (ptl_eq_t *)((unsigned long)handle->cookie);
+}
+
+#endif
--- /dev/null
+#ifndef PTL_DISPATCH_H
+#define PTL_DISPATCH_H
+
+/*
+ * include/dispatch.h
+ *
+ * Dispatch table header and externs for remote side
+ * operations
+ *
+ * Generated by idl
+ *
+ */
+
+#include <portals/lib-p30.h>
+#include <portals/arg-blocks.h>
+
+extern int do_PtlGetId(nal_cb_t * nal, void *private, void *args, void *ret);
+extern int do_PtlNIStatus(nal_cb_t * nal, void *private, void *args, void *ret);
+extern int do_PtlNIDist(nal_cb_t * nal, void *private, void *args, void *ret);
+extern int do_PtlNIDebug(nal_cb_t * nal, void *private, void *args, void *ret);
+extern int do_PtlMEAttach(nal_cb_t * nal, void *private, void *args, void *ret);
+extern int do_PtlMEInsert(nal_cb_t * nal, void *private, void *args, void *ret);
+extern int do_PtlMEPrepend(nal_cb_t * nal, void *private, void *args,
+ void *ret);
+extern int do_PtlMEUnlink(nal_cb_t * nal, void *private, void *args, void *ret);
+extern int do_PtlTblDump(nal_cb_t * nal, void *private, void *args, void *ret);
+extern int do_PtlMEDump(nal_cb_t * nal, void *private, void *args, void *ret);
+extern int do_PtlMDAttach(nal_cb_t * nal, void *private, void *args,
+ void *ret);
+extern int do_PtlMDBind(nal_cb_t * nal, void *private, void *args,
+ void *ret);
+extern int do_PtlMDUpdate_internal(nal_cb_t * nal, void *private, void *args,
+ void *ret);
+extern int do_PtlMDUnlink(nal_cb_t * nal, void *private, void *args,
+ void *ret);
+extern int do_PtlEQAlloc_internal(nal_cb_t * nal, void *private, void *args,
+ void *ret);
+extern int do_PtlEQFree_internal(nal_cb_t * nal, void *private, void *args,
+ void *ret);
+extern int do_PtlACEntry(nal_cb_t * nal, void *private, void *args, void *ret);
+extern int do_PtlPut(nal_cb_t * nal, void *private, void *args, void *ret);
+extern int do_PtlGet(nal_cb_t * nal, void *private, void *args, void *ret);
+extern int do_PtlFailNid (nal_cb_t *nal, void *private, void *args, void *ret);
+
+extern char *dispatch_name(int index);
+#endif
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * lib-p30.h
+ *
+ * Top level include for library side routines
+ */
+
+#ifndef _LIB_P30_H_
+#define _LIB_P30_H_
+
+#ifdef __KERNEL__
+# include <asm/page.h>
+# include <linux/string.h>
+#else
+# include <portals/list.h>
+# include <string.h>
+#endif
+#include <portals/types.h>
+#include <linux/kp30.h>
+#include <portals/p30.h>
+#include <portals/errno.h>
+#include <portals/lib-types.h>
+#include <portals/lib-nal.h>
+#include <portals/lib-dispatch.h>
+
+static inline int ptl_is_wire_handle_none (ptl_handle_wire_t *wh)
+{
+ return (wh->wh_interface_cookie == PTL_WIRE_HANDLE_NONE.wh_interface_cookie &&
+ wh->wh_object_cookie == PTL_WIRE_HANDLE_NONE.wh_object_cookie);
+}
+
+#ifdef __KERNEL__
+#define state_lock(nal,flagsp) \
+do { \
+ CDEBUG(D_PORTALS, "taking state lock\n"); \
+ nal->cb_cli(nal, flagsp); \
+} while (0)
+
+#define state_unlock(nal,flagsp) \
+{ \
+ CDEBUG(D_PORTALS, "releasing state lock\n"); \
+ nal->cb_sti(nal, flagsp); \
+}
+#else
+/* not needed in user space until we thread there */
+#define state_lock(nal,flagsp) \
+do { \
+ CDEBUG(D_PORTALS, "taking state lock\n"); \
+ CDEBUG(D_PORTALS, "%p:%p\n", nal, flagsp); \
+} while (0)
+
+#define state_unlock(nal,flagsp) \
+{ \
+ CDEBUG(D_PORTALS, "releasing state lock\n"); \
+ CDEBUG(D_PORTALS, "%p:%p\n", nal, flagsp); \
+}
+#endif /* __KERNEL__ */
+
+#ifndef PTL_USE_SLAB_CACHE
+
+#define MAX_MES 2048
+#define MAX_MDS 2048
+#define MAX_MSGS 2048 /* Outstanding messages */
+#define MAX_EQS 512
+
+extern int lib_freelist_init (nal_cb_t *nal, lib_freelist_t *fl, int nobj, int objsize);
+extern void lib_freelist_fini (nal_cb_t *nal, lib_freelist_t *fl);
+
+static inline void *
+lib_freelist_alloc (lib_freelist_t *fl)
+{
+ /* ALWAYS called with statelock held */
+ lib_freeobj_t *o;
+
+ if (list_empty (&fl->fl_list))
+ return (NULL);
+
+ o = list_entry (fl->fl_list.next, lib_freeobj_t, fo_list);
+ list_del (&o->fo_list);
+ return ((void *)&o->fo_contents);
+}
+
+static inline void
+lib_freelist_free (lib_freelist_t *fl, void *obj)
+{
+ /* ALWAYS called with statelock held */
+ lib_freeobj_t *o = list_entry (obj, lib_freeobj_t, fo_contents);
+
+ list_add (&o->fo_list, &fl->fl_list);
+}
+
+
+static inline lib_eq_t *
+lib_eq_alloc (nal_cb_t *nal)
+{
+ /* NEVER called with statelock held */
+ unsigned long flags;
+ lib_eq_t *eq;
+
+ state_lock (nal, &flags);
+ eq = (lib_eq_t *)lib_freelist_alloc (&nal->ni.ni_free_eqs);
+ state_unlock (nal, &flags);
+
+ return (eq);
+}
+
+static inline void
+lib_eq_free (nal_cb_t *nal, lib_eq_t *eq)
+{
+ /* ALWAYS called with statelock held */
+ lib_freelist_free (&nal->ni.ni_free_eqs, eq);
+}
+
+static inline lib_md_t *
+lib_md_alloc (nal_cb_t *nal)
+{
+ /* NEVER called with statelock held */
+ unsigned long flags;
+ lib_md_t *md;
+
+ state_lock (nal, &flags);
+ md = (lib_md_t *)lib_freelist_alloc (&nal->ni.ni_free_mds);
+ state_unlock (nal, &flags);
+
+ return (md);
+}
+
+static inline void
+lib_md_free (nal_cb_t *nal, lib_md_t *md)
+{
+ /* ALWAYS called with statelock held */
+ lib_freelist_free (&nal->ni.ni_free_mds, md);
+}
+
+static inline lib_me_t *
+lib_me_alloc (nal_cb_t *nal)
+{
+ /* NEVER called with statelock held */
+ unsigned long flags;
+ lib_me_t *me;
+
+ state_lock (nal, &flags);
+ me = (lib_me_t *)lib_freelist_alloc (&nal->ni.ni_free_mes);
+ state_unlock (nal, &flags);
+
+ return (me);
+}
+
+static inline void
+lib_me_free (nal_cb_t *nal, lib_me_t *me)
+{
+ /* ALWAYS called with statelock held */
+ lib_freelist_free (&nal->ni.ni_free_mes, me);
+}
+
+static inline lib_msg_t *
+lib_msg_alloc (nal_cb_t *nal)
+{
+ /* ALWAYS called with statelock held */
+ return ((lib_msg_t *)lib_freelist_alloc (&nal->ni.ni_free_msgs));
+}
+
+static inline void
+lib_msg_free (nal_cb_t *nal, lib_msg_t *msg)
+{
+ /* ALWAYS called with statelock held */
+ lib_freelist_free (&nal->ni.ni_free_msgs, msg);
+}
+
+#else
+
+extern kmem_cache_t *ptl_md_slab;
+extern kmem_cache_t *ptl_msg_slab;
+extern kmem_cache_t *ptl_me_slab;
+extern kmem_cache_t *ptl_eq_slab;
+extern atomic_t md_in_use_count;
+extern atomic_t msg_in_use_count;
+extern atomic_t me_in_use_count;
+extern atomic_t eq_in_use_count;
+
+static inline lib_eq_t *
+lib_eq_alloc (nal_cb_t *nal)
+{
+ /* NEVER called with statelock held */
+ lib_eq_t *eq = kmem_cache_alloc(ptl_eq_slab, GFP_KERNEL);
+
+ if (eq == NULL)
+ return (NULL);
+
+ atomic_inc (&eq_in_use_count);
+ return (eq);
+}
+
+static inline void
+lib_eq_free (nal_cb_t *nal, lib_eq_t *eq)
+{
+ /* ALWAYS called with statelock held */
+ atomic_dec (&eq_in_use_count);
+ kmem_cache_free(ptl_eq_slab, eq);
+}
+
+static inline lib_md_t *
+lib_md_alloc (nal_cb_t *nal)
+{
+ /* NEVER called with statelock held */
+ lib_md_t *md = kmem_cache_alloc(ptl_md_slab, GFP_KERNEL);
+
+ if (md == NULL)
+ return (NULL);
+
+ atomic_inc (&md_in_use_count);
+ return (md);
+}
+
+static inline void
+lib_md_free (nal_cb_t *nal, lib_md_t *md)
+{
+ /* ALWAYS called with statelock held */
+ atomic_dec (&md_in_use_count);
+ kmem_cache_free(ptl_md_slab, md);
+}
+
+static inline lib_me_t *
+lib_me_alloc (nal_cb_t *nal)
+{
+ /* NEVER called with statelock held */
+ lib_me_t *me = kmem_cache_alloc(ptl_me_slab, GFP_KERNEL);
+
+ if (me == NULL)
+ return (NULL);
+
+ atomic_inc (&me_in_use_count);
+ return (me);
+}
+
+static inline void
+lib_me_free(nal_cb_t *nal, lib_me_t *me)
+{
+ /* ALWAYS called with statelock held */
+ atomic_dec (&me_in_use_count);
+ kmem_cache_free(ptl_me_slab, me);
+}
+
+static inline lib_msg_t *
+lib_msg_alloc(nal_cb_t *nal)
+{
+ /* ALWAYS called with statelock held */
+ lib_msg_t *msg = kmem_cache_alloc(ptl_msg_slab, GFP_ATOMIC);
+
+ if (msg == NULL)
+ return (NULL);
+
+ atomic_inc (&msg_in_use_count);
+ return (msg);
+}
+
+static inline void
+lib_msg_free(nal_cb_t *nal, lib_msg_t *msg)
+{
+ /* ALWAYS called with statelock held */
+ atomic_dec (&msg_in_use_count);
+ kmem_cache_free(ptl_msg_slab, msg);
+}
+#endif
+
+extern lib_handle_t *lib_lookup_cookie (nal_cb_t *nal, __u64 cookie);
+extern void lib_initialise_handle (nal_cb_t *nal, lib_handle_t *lh);
+extern void lib_invalidate_handle (nal_cb_t *nal, lib_handle_t *lh);
+
+static inline void
+ptl_eq2handle (ptl_handle_eq_t *handle, lib_eq_t *eq)
+{
+ handle->cookie = eq->eq_lh.lh_cookie;
+}
+
+static inline lib_eq_t *
+ptl_handle2eq (ptl_handle_eq_t *handle, nal_cb_t *nal)
+{
+ /* ALWAYS called with statelock held */
+ lib_handle_t *lh = lib_lookup_cookie (nal, handle->cookie);
+
+ if (lh == NULL)
+ return (NULL);
+
+ return (lh_entry (lh, lib_eq_t, eq_lh));
+}
+
+static inline void
+ptl_md2handle (ptl_handle_md_t *handle, lib_md_t *md)
+{
+ handle->cookie = md->md_lh.lh_cookie;
+}
+
+static inline lib_md_t *
+ptl_handle2md (ptl_handle_md_t *handle, nal_cb_t *nal)
+{
+ /* ALWAYS called with statelock held */
+ lib_handle_t *lh = lib_lookup_cookie (nal, handle->cookie);
+
+ if (lh == NULL)
+ return (NULL);
+
+ return (lh_entry (lh, lib_md_t, md_lh));
+}
+
+static inline lib_md_t *
+ptl_wire_handle2md (ptl_handle_wire_t *wh, nal_cb_t *nal)
+{
+ /* ALWAYS called with statelock held */
+ lib_handle_t *lh;
+
+ if (wh->wh_interface_cookie != nal->ni.ni_interface_cookie)
+ return (NULL);
+
+ lh = lib_lookup_cookie (nal, wh->wh_object_cookie);
+ if (lh == NULL)
+ return (NULL);
+
+ return (lh_entry (lh, lib_md_t, md_lh));
+}
+
+static inline void
+ptl_me2handle (ptl_handle_me_t *handle, lib_me_t *me)
+{
+ handle->cookie = me->me_lh.lh_cookie;
+}
+
+static inline lib_me_t *
+ptl_handle2me (ptl_handle_me_t *handle, nal_cb_t *nal)
+{
+ /* ALWAYS called with statelock held */
+ lib_handle_t *lh = lib_lookup_cookie (nal, handle->cookie);
+
+ if (lh == NULL)
+ return (NULL);
+
+ return (lh_entry (lh, lib_me_t, me_lh));
+}
+
+extern int lib_init(nal_cb_t * cb, ptl_nid_t nid, ptl_pid_t pid, int gsize,
+ ptl_pt_index_t tbl_size, ptl_ac_index_t ac_size);
+extern int lib_fini(nal_cb_t * cb);
+extern void lib_dispatch(nal_cb_t * cb, void *private, int index,
+ void *arg_block, void *ret_block);
+extern char *dispatch_name(int index);
+
+/*
+ * When the NAL detects an incoming message, it should call
+ * lib_parse() decode it. The NAL callbacks will be handed
+ * the private cookie as a way for the NAL to maintain state
+ * about which transaction is being processed. An extra parameter,
+ * lib_cookie will contain the necessary information for
+ * finalizing the message.
+ *
+ * After it has finished the handling the message, it should
+ * call lib_finalize() with the lib_cookie parameter.
+ * Call backs will be made to write events, send acks or
+ * replies and so on.
+ */
+extern int lib_parse(nal_cb_t * nal, ptl_hdr_t * hdr, void *private);
+extern int lib_finalize(nal_cb_t * nal, void *private, lib_msg_t * msg);
+extern void print_hdr(nal_cb_t * nal, ptl_hdr_t * hdr);
+
+extern ptl_size_t lib_iov_nob (int niov, struct iovec *iov);
+extern void lib_copy_iov2buf (char *dest, int niov, struct iovec *iov, ptl_size_t len);
+extern void lib_copy_buf2iov (int niov, struct iovec *iov, char *dest, ptl_size_t len);
+
+extern ptl_size_t lib_kiov_nob (int niov, ptl_kiov_t *iov);
+extern void lib_copy_kiov2buf (char *dest, int niov, ptl_kiov_t *iov, ptl_size_t len);
+extern void lib_copy_buf2kiov (int niov, ptl_kiov_t *iov, char *src, ptl_size_t len);
+
+extern void lib_recv (nal_cb_t *nal, void *private, lib_msg_t *msg, lib_md_t *md,
+ ptl_size_t offset, ptl_size_t mlen, ptl_size_t rlen);
+extern int lib_send (nal_cb_t *nal, void *private, lib_msg_t *msg,
+ ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
+ lib_md_t *md, ptl_size_t offset, ptl_size_t len);
+
+extern void lib_md_deconstruct(nal_cb_t * nal, lib_md_t * md_in,
+ ptl_md_t * md_out);
+extern void lib_md_unlink(nal_cb_t * nal, lib_md_t * md_in);
+extern void lib_me_unlink(nal_cb_t * nal, lib_me_t * me_in);
+#endif
--- /dev/null
+#ifndef _LIB_NAL_H_
+#define _LIB_NAL_H_
+
+/*
+ * nal.h
+ *
+ * Library side headers that define the abstraction layer's
+ * responsibilities and interfaces
+ */
+
+#include <portals/lib-types.h>
+
+struct nal_cb_t {
+ /*
+ * Per interface portal table, access control table
+ * and NAL private data field;
+ */
+ lib_ni_t ni;
+ void *nal_data;
+ /*
+ * send: Sends a preformatted header and user data to a
+ * specified remote process.
+ * Can overwrite iov.
+ */
+ int (*cb_send) (nal_cb_t * nal, void *private, lib_msg_t * cookie,
+ ptl_hdr_t * hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
+ unsigned int niov, struct iovec *iov, size_t mlen);
+
+ /* as send, but with a set of page fragments (NULL if not supported) */
+ int (*cb_send_pages) (nal_cb_t * nal, void *private, lib_msg_t * cookie,
+ ptl_hdr_t * hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
+ unsigned int niov, ptl_kiov_t *iov, size_t mlen);
+ /*
+ * recv: Receives an incoming message from a remote process
+ * Type of iov depends on options. Can overwrite iov.
+ */
+ int (*cb_recv) (nal_cb_t * nal, void *private, lib_msg_t * cookie,
+ unsigned int niov, struct iovec *iov, size_t mlen,
+ size_t rlen);
+
+ /* as recv, but with a set of page fragments (NULL if not supported) */
+ int (*cb_recv_pages) (nal_cb_t * nal, void *private, lib_msg_t * cookie,
+ unsigned int niov, ptl_kiov_t *iov, size_t mlen,
+ size_t rlen);
+ /*
+ * read: Reads a block of data from a specified user address
+ */
+ int (*cb_read) (nal_cb_t * nal, void *private, void *dst_addr,
+ user_ptr src_addr, size_t len);
+
+ /*
+ * write: Writes a block of data into a specified user address
+ */
+ int (*cb_write) (nal_cb_t * nal, void *private, user_ptr dsr_addr,
+ void *src_addr, size_t len);
+
+ /*
+ * callback: Calls an event callback
+ */
+ int (*cb_callback) (nal_cb_t * nal, void *private, lib_eq_t *eq,
+ ptl_event_t *ev);
+
+ /*
+ * malloc: Acquire a block of memory in a system independent
+ * fashion.
+ */
+ void *(*cb_malloc) (nal_cb_t * nal, size_t len);
+
+ void (*cb_free) (nal_cb_t * nal, void *buf, size_t len);
+
+ /*
+ * (un)map: Tell the NAL about some memory it will access.
+ * *addrkey passed to cb_unmap() is what cb_map() set it to.
+ * type of *iov depends on options.
+ * Set to NULL if not required.
+ */
+ int (*cb_map) (nal_cb_t * nal, unsigned int niov, struct iovec *iov,
+ void **addrkey);
+ void (*cb_unmap) (nal_cb_t * nal, unsigned int niov, struct iovec *iov,
+ void **addrkey);
+
+ /* as (un)map, but with a set of page fragments */
+ int (*cb_map_pages) (nal_cb_t * nal, unsigned int niov, ptl_kiov_t *iov,
+ void **addrkey);
+ void (*cb_unmap_pages) (nal_cb_t * nal, unsigned int niov, ptl_kiov_t *iov,
+ void **addrkey);
+
+ void (*cb_printf) (nal_cb_t * nal, const char *fmt, ...);
+
+ /* Turn interrupts off (begin of protected area) */
+ void (*cb_cli) (nal_cb_t * nal, unsigned long *flags);
+
+ /* Turn interrupts on (end of protected area) */
+ void (*cb_sti) (nal_cb_t * nal, unsigned long *flags);
+
+ /*
+ * Calculate a network "distance" to given node
+ */
+ int (*cb_dist) (nal_cb_t * nal, ptl_nid_t nid, unsigned long *dist);
+};
+
+#endif
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * lib-p30.h
+ *
+ * Top level include for library side routines
+ */
+
+#ifndef _LIB_P30_H_
+#define _LIB_P30_H_
+
+#ifdef __KERNEL__
+# include <asm/page.h>
+# include <linux/string.h>
+#else
+# include <portals/list.h>
+# include <string.h>
+#endif
+#include <portals/types.h>
+#include <linux/kp30.h>
+#include <portals/p30.h>
+#include <portals/errno.h>
+#include <portals/lib-types.h>
+#include <portals/lib-nal.h>
+#include <portals/lib-dispatch.h>
+
+static inline int ptl_is_wire_handle_none (ptl_handle_wire_t *wh)
+{
+ return (wh->wh_interface_cookie == PTL_WIRE_HANDLE_NONE.wh_interface_cookie &&
+ wh->wh_object_cookie == PTL_WIRE_HANDLE_NONE.wh_object_cookie);
+}
+
+#ifdef __KERNEL__
+#define state_lock(nal,flagsp) \
+do { \
+ CDEBUG(D_PORTALS, "taking state lock\n"); \
+ nal->cb_cli(nal, flagsp); \
+} while (0)
+
+#define state_unlock(nal,flagsp) \
+{ \
+ CDEBUG(D_PORTALS, "releasing state lock\n"); \
+ nal->cb_sti(nal, flagsp); \
+}
+#else
+/* not needed in user space until we thread there */
+#define state_lock(nal,flagsp) \
+do { \
+ CDEBUG(D_PORTALS, "taking state lock\n"); \
+ CDEBUG(D_PORTALS, "%p:%p\n", nal, flagsp); \
+} while (0)
+
+#define state_unlock(nal,flagsp) \
+{ \
+ CDEBUG(D_PORTALS, "releasing state lock\n"); \
+ CDEBUG(D_PORTALS, "%p:%p\n", nal, flagsp); \
+}
+#endif /* __KERNEL__ */
+
+#ifndef PTL_USE_SLAB_CACHE
+
+#define MAX_MES 2048
+#define MAX_MDS 2048
+#define MAX_MSGS 2048 /* Outstanding messages */
+#define MAX_EQS 512
+
+extern int lib_freelist_init (nal_cb_t *nal, lib_freelist_t *fl, int nobj, int objsize);
+extern void lib_freelist_fini (nal_cb_t *nal, lib_freelist_t *fl);
+
+static inline void *
+lib_freelist_alloc (lib_freelist_t *fl)
+{
+ /* ALWAYS called with statelock held */
+ lib_freeobj_t *o;
+
+ if (list_empty (&fl->fl_list))
+ return (NULL);
+
+ o = list_entry (fl->fl_list.next, lib_freeobj_t, fo_list);
+ list_del (&o->fo_list);
+ return ((void *)&o->fo_contents);
+}
+
+static inline void
+lib_freelist_free (lib_freelist_t *fl, void *obj)
+{
+ /* ALWAYS called with statelock held */
+ lib_freeobj_t *o = list_entry (obj, lib_freeobj_t, fo_contents);
+
+ list_add (&o->fo_list, &fl->fl_list);
+}
+
+
+static inline lib_eq_t *
+lib_eq_alloc (nal_cb_t *nal)
+{
+ /* NEVER called with statelock held */
+ unsigned long flags;
+ lib_eq_t *eq;
+
+ state_lock (nal, &flags);
+ eq = (lib_eq_t *)lib_freelist_alloc (&nal->ni.ni_free_eqs);
+ state_unlock (nal, &flags);
+
+ return (eq);
+}
+
+static inline void
+lib_eq_free (nal_cb_t *nal, lib_eq_t *eq)
+{
+ /* ALWAYS called with statelock held */
+ lib_freelist_free (&nal->ni.ni_free_eqs, eq);
+}
+
+static inline lib_md_t *
+lib_md_alloc (nal_cb_t *nal)
+{
+ /* NEVER called with statelock held */
+ unsigned long flags;
+ lib_md_t *md;
+
+ state_lock (nal, &flags);
+ md = (lib_md_t *)lib_freelist_alloc (&nal->ni.ni_free_mds);
+ state_unlock (nal, &flags);
+
+ return (md);
+}
+
+static inline void
+lib_md_free (nal_cb_t *nal, lib_md_t *md)
+{
+ /* ALWAYS called with statelock held */
+ lib_freelist_free (&nal->ni.ni_free_mds, md);
+}
+
+static inline lib_me_t *
+lib_me_alloc (nal_cb_t *nal)
+{
+ /* NEVER called with statelock held */
+ unsigned long flags;
+ lib_me_t *me;
+
+ state_lock (nal, &flags);
+ me = (lib_me_t *)lib_freelist_alloc (&nal->ni.ni_free_mes);
+ state_unlock (nal, &flags);
+
+ return (me);
+}
+
+static inline void
+lib_me_free (nal_cb_t *nal, lib_me_t *me)
+{
+ /* ALWAYS called with statelock held */
+ lib_freelist_free (&nal->ni.ni_free_mes, me);
+}
+
+static inline lib_msg_t *
+lib_msg_alloc (nal_cb_t *nal)
+{
+ /* ALWAYS called with statelock held */
+ return ((lib_msg_t *)lib_freelist_alloc (&nal->ni.ni_free_msgs));
+}
+
+static inline void
+lib_msg_free (nal_cb_t *nal, lib_msg_t *msg)
+{
+ /* ALWAYS called with statelock held */
+ lib_freelist_free (&nal->ni.ni_free_msgs, msg);
+}
+
+#else
+
+extern kmem_cache_t *ptl_md_slab;
+extern kmem_cache_t *ptl_msg_slab;
+extern kmem_cache_t *ptl_me_slab;
+extern kmem_cache_t *ptl_eq_slab;
+extern atomic_t md_in_use_count;
+extern atomic_t msg_in_use_count;
+extern atomic_t me_in_use_count;
+extern atomic_t eq_in_use_count;
+
+static inline lib_eq_t *
+lib_eq_alloc (nal_cb_t *nal)
+{
+ /* NEVER called with statelock held */
+ lib_eq_t *eq = kmem_cache_alloc(ptl_eq_slab, GFP_KERNEL);
+
+ if (eq == NULL)
+ return (NULL);
+
+ atomic_inc (&eq_in_use_count);
+ return (eq);
+}
+
+static inline void
+lib_eq_free (nal_cb_t *nal, lib_eq_t *eq)
+{
+ /* ALWAYS called with statelock held */
+ atomic_dec (&eq_in_use_count);
+ kmem_cache_free(ptl_eq_slab, eq);
+}
+
+static inline lib_md_t *
+lib_md_alloc (nal_cb_t *nal)
+{
+ /* NEVER called with statelock held */
+ lib_md_t *md = kmem_cache_alloc(ptl_md_slab, GFP_KERNEL);
+
+ if (md == NULL)
+ return (NULL);
+
+ atomic_inc (&md_in_use_count);
+ return (md);
+}
+
+static inline void
+lib_md_free (nal_cb_t *nal, lib_md_t *md)
+{
+ /* ALWAYS called with statelock held */
+ atomic_dec (&md_in_use_count);
+ kmem_cache_free(ptl_md_slab, md);
+}
+
+static inline lib_me_t *
+lib_me_alloc (nal_cb_t *nal)
+{
+ /* NEVER called with statelock held */
+ lib_me_t *me = kmem_cache_alloc(ptl_me_slab, GFP_KERNEL);
+
+ if (me == NULL)
+ return (NULL);
+
+ atomic_inc (&me_in_use_count);
+ return (me);
+}
+
+static inline void
+lib_me_free(nal_cb_t *nal, lib_me_t *me)
+{
+ /* ALWAYS called with statelock held */
+ atomic_dec (&me_in_use_count);
+ kmem_cache_free(ptl_me_slab, me);
+}
+
+static inline lib_msg_t *
+lib_msg_alloc(nal_cb_t *nal)
+{
+ /* ALWAYS called with statelock held */
+ lib_msg_t *msg = kmem_cache_alloc(ptl_msg_slab, GFP_ATOMIC);
+
+ if (msg == NULL)
+ return (NULL);
+
+ atomic_inc (&msg_in_use_count);
+ return (msg);
+}
+
+static inline void
+lib_msg_free(nal_cb_t *nal, lib_msg_t *msg)
+{
+ /* ALWAYS called with statelock held */
+ atomic_dec (&msg_in_use_count);
+ kmem_cache_free(ptl_msg_slab, msg);
+}
+#endif
+
+extern lib_handle_t *lib_lookup_cookie (nal_cb_t *nal, __u64 cookie);
+extern void lib_initialise_handle (nal_cb_t *nal, lib_handle_t *lh);
+extern void lib_invalidate_handle (nal_cb_t *nal, lib_handle_t *lh);
+
+static inline void
+ptl_eq2handle (ptl_handle_eq_t *handle, lib_eq_t *eq)
+{
+ handle->cookie = eq->eq_lh.lh_cookie;
+}
+
+static inline lib_eq_t *
+ptl_handle2eq (ptl_handle_eq_t *handle, nal_cb_t *nal)
+{
+ /* ALWAYS called with statelock held */
+ lib_handle_t *lh = lib_lookup_cookie (nal, handle->cookie);
+
+ if (lh == NULL)
+ return (NULL);
+
+ return (lh_entry (lh, lib_eq_t, eq_lh));
+}
+
+static inline void
+ptl_md2handle (ptl_handle_md_t *handle, lib_md_t *md)
+{
+ handle->cookie = md->md_lh.lh_cookie;
+}
+
+static inline lib_md_t *
+ptl_handle2md (ptl_handle_md_t *handle, nal_cb_t *nal)
+{
+ /* ALWAYS called with statelock held */
+ lib_handle_t *lh = lib_lookup_cookie (nal, handle->cookie);
+
+ if (lh == NULL)
+ return (NULL);
+
+ return (lh_entry (lh, lib_md_t, md_lh));
+}
+
+static inline lib_md_t *
+ptl_wire_handle2md (ptl_handle_wire_t *wh, nal_cb_t *nal)
+{
+ /* ALWAYS called with statelock held */
+ lib_handle_t *lh;
+
+ if (wh->wh_interface_cookie != nal->ni.ni_interface_cookie)
+ return (NULL);
+
+ lh = lib_lookup_cookie (nal, wh->wh_object_cookie);
+ if (lh == NULL)
+ return (NULL);
+
+ return (lh_entry (lh, lib_md_t, md_lh));
+}
+
+static inline void
+ptl_me2handle (ptl_handle_me_t *handle, lib_me_t *me)
+{
+ handle->cookie = me->me_lh.lh_cookie;
+}
+
+static inline lib_me_t *
+ptl_handle2me (ptl_handle_me_t *handle, nal_cb_t *nal)
+{
+ /* ALWAYS called with statelock held */
+ lib_handle_t *lh = lib_lookup_cookie (nal, handle->cookie);
+
+ if (lh == NULL)
+ return (NULL);
+
+ return (lh_entry (lh, lib_me_t, me_lh));
+}
+
+extern int lib_init(nal_cb_t * cb, ptl_nid_t nid, ptl_pid_t pid, int gsize,
+ ptl_pt_index_t tbl_size, ptl_ac_index_t ac_size);
+extern int lib_fini(nal_cb_t * cb);
+extern void lib_dispatch(nal_cb_t * cb, void *private, int index,
+ void *arg_block, void *ret_block);
+extern char *dispatch_name(int index);
+
+/*
+ * When the NAL detects an incoming message, it should call
+ * lib_parse() decode it. The NAL callbacks will be handed
+ * the private cookie as a way for the NAL to maintain state
+ * about which transaction is being processed. An extra parameter,
+ * lib_cookie will contain the necessary information for
+ * finalizing the message.
+ *
+ * After it has finished the handling the message, it should
+ * call lib_finalize() with the lib_cookie parameter.
+ * Call backs will be made to write events, send acks or
+ * replies and so on.
+ */
+extern int lib_parse(nal_cb_t * nal, ptl_hdr_t * hdr, void *private);
+extern int lib_finalize(nal_cb_t * nal, void *private, lib_msg_t * msg);
+extern void print_hdr(nal_cb_t * nal, ptl_hdr_t * hdr);
+
+extern ptl_size_t lib_iov_nob (int niov, struct iovec *iov);
+extern void lib_copy_iov2buf (char *dest, int niov, struct iovec *iov, ptl_size_t len);
+extern void lib_copy_buf2iov (int niov, struct iovec *iov, char *dest, ptl_size_t len);
+
+extern ptl_size_t lib_kiov_nob (int niov, ptl_kiov_t *iov);
+extern void lib_copy_kiov2buf (char *dest, int niov, ptl_kiov_t *iov, ptl_size_t len);
+extern void lib_copy_buf2kiov (int niov, ptl_kiov_t *iov, char *src, ptl_size_t len);
+
+extern void lib_recv (nal_cb_t *nal, void *private, lib_msg_t *msg, lib_md_t *md,
+ ptl_size_t offset, ptl_size_t mlen, ptl_size_t rlen);
+extern int lib_send (nal_cb_t *nal, void *private, lib_msg_t *msg,
+ ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
+ lib_md_t *md, ptl_size_t offset, ptl_size_t len);
+
+extern void lib_md_deconstruct(nal_cb_t * nal, lib_md_t * md_in,
+ ptl_md_t * md_out);
+extern void lib_md_unlink(nal_cb_t * nal, lib_md_t * md_in);
+extern void lib_me_unlink(nal_cb_t * nal, lib_me_t * me_in);
+#endif
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * p30/lib-types.h
+ *
+ * Types used by the library side routines that do not need to be
+ * exposed to the user application
+ */
+
+#ifndef _LIB_TYPES_H_
+#define _LIB_TYPES_H_
+
+#include <portals/types.h>
+#ifdef __KERNEL__
+# define PTL_USE_SLAB_CACHE
+# include <linux/uio.h>
+# include <linux/smp_lock.h>
+# include <linux/types.h>
+#else
+# include <sys/types.h>
+#endif
+
+/* struct nal_cb_t is defined in lib-nal.h */
+typedef struct nal_cb_t nal_cb_t;
+
+typedef char *user_ptr;
+typedef struct lib_msg_t lib_msg_t;
+typedef struct lib_ptl_t lib_ptl_t;
+typedef struct lib_ac_t lib_ac_t;
+typedef struct lib_me_t lib_me_t;
+typedef struct lib_md_t lib_md_t;
+typedef struct lib_eq_t lib_eq_t;
+
+/* The wire handle's interface cookie only matches one network interface in
+ * one epoch (i.e. new cookie when the interface restarts or the node
+ * reboots). The object cookie only matches one object on that interface
+ * during that object's lifetime (i.e. no cookie re-use). */
+typedef struct {
+ __u64 wh_interface_cookie;
+ __u64 wh_object_cookie;
+} ptl_handle_wire_t;
+
+/* byte-flip insensitive! */
+#define PTL_WIRE_HANDLE_NONE \
+((const ptl_handle_wire_t) {.wh_interface_cookie = -1, .wh_object_cookie = -1})
+
+typedef enum {
+ PTL_MSG_ACK = 0,
+ PTL_MSG_PUT,
+ PTL_MSG_GET,
+ PTL_MSG_REPLY,
+ PTL_MSG_HELLO,
+} ptl_msg_type_t;
+
+/* Each of these structs should start with an odd number of
+ * __u32, or the compiler could add its own padding and confuse
+ * everyone.
+ *
+ * Also, "length" needs to be at offset 28 of each struct.
+ */
+typedef struct ptl_ack {
+ ptl_size_t mlength;
+ ptl_handle_wire_t dst_wmd;
+ ptl_match_bits_t match_bits;
+ ptl_size_t length; /* common length (0 for acks) moving out RSN */
+} ptl_ack_t;
+
+typedef struct ptl_put {
+ ptl_pt_index_t ptl_index;
+ ptl_handle_wire_t ack_wmd;
+ ptl_match_bits_t match_bits;
+ ptl_size_t length; /* common length moving out RSN */
+ ptl_size_t offset;
+ ptl_hdr_data_t hdr_data;
+} ptl_put_t;
+
+typedef struct ptl_get {
+ ptl_pt_index_t ptl_index;
+ ptl_handle_wire_t return_wmd;
+ ptl_match_bits_t match_bits;
+ ptl_size_t length; /* common length (0 for gets) moving out RSN */
+ ptl_size_t src_offset;
+ ptl_size_t return_offset; /* unused: going RSN */
+ ptl_size_t sink_length;
+} ptl_get_t;
+
+typedef struct ptl_reply {
+ __u32 unused1; /* unused fields going RSN */
+ ptl_handle_wire_t dst_wmd;
+ ptl_size_t dst_offset; /* unused: going RSN */
+ __u32 unused2;
+ ptl_size_t length; /* common length moving out RSN */
+} ptl_reply_t;
+
+typedef struct {
+ ptl_nid_t dest_nid;
+ ptl_nid_t src_nid;
+ ptl_pid_t dest_pid;
+ ptl_pid_t src_pid;
+ __u32 type; /* ptl_msg_type_t */
+ union {
+ ptl_ack_t ack;
+ ptl_put_t put;
+ ptl_get_t get;
+ ptl_reply_t reply;
+ } msg;
+} ptl_hdr_t;
+
+/* All length fields in individual unions at same offset */
+/* LASSERT for same in lib-move.c */
+#define PTL_HDR_LENGTH(h) ((h)->msg.ack.length)
+
+/* A HELLO message contains the portals magic number and protocol version
+ * code in the header's dest_nid, the peer's NID in the src_nid, and
+ * PTL_MSG_HELLO in the type field. All other fields are zero (including
+ * PTL_HDR_LENGTH; i.e. no payload).
+ * This is for use by byte-stream NALs (e.g. TCP/IP) to check the peer is
+ * running the same protocol and to find out its NID, so that hosts with
+ * multiple IP interfaces can have a single NID. These NALs should exchange
+ * HELLO messages when a connection is first established. */
+typedef struct {
+ __u32 magic; /* PORTALS_PROTO_MAGIC */
+ __u16 version_major; /* increment on incompatible change */
+ __u16 version_minor; /* increment on compatible change */
+} ptl_magicversion_t;
+
+#define PORTALS_PROTO_MAGIC 0xeebc0ded
+
+#define PORTALS_PROTO_VERSION_MAJOR 0
+#define PORTALS_PROTO_VERSION_MINOR 1
+
+typedef struct {
+ long recv_count, recv_length, send_count, send_length, drop_count,
+ drop_length, msgs_alloc, msgs_max;
+} lib_counters_t;
+
+/* temporary expedient: limit number of entries in discontiguous MDs */
+#if PTL_LARGE_MTU
+# define PTL_MD_MAX_IOV 64
+#else
+# define PTL_MD_MAX_IOV 16
+#endif
+
+struct lib_msg_t {
+ struct list_head msg_list;
+ int send_ack;
+ lib_md_t *md;
+ ptl_nid_t nid;
+ ptl_pid_t pid;
+ ptl_event_t ev;
+ ptl_handle_wire_t ack_wmd;
+ union {
+ struct iovec iov[PTL_MD_MAX_IOV];
+ ptl_kiov_t kiov[PTL_MD_MAX_IOV];
+ } msg_iov;
+};
+
+struct lib_ptl_t {
+ ptl_pt_index_t size;
+ struct list_head *tbl;
+};
+
+struct lib_ac_t {
+ int next_free;
+};
+
+typedef struct {
+ struct list_head lh_hash_chain;
+ __u64 lh_cookie;
+} lib_handle_t;
+
+#define lh_entry(ptr, type, member) \
+ ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member)))
+
+struct lib_eq_t {
+ struct list_head eq_list;
+ lib_handle_t eq_lh;
+ ptl_seq_t sequence;
+ ptl_size_t size;
+ ptl_event_t *base;
+ int eq_refcount;
+ int (*event_callback) (ptl_event_t * event);
+ void *eq_addrkey;
+};
+
+struct lib_me_t {
+ struct list_head me_list;
+ lib_handle_t me_lh;
+ ptl_process_id_t match_id;
+ ptl_match_bits_t match_bits, ignore_bits;
+ ptl_unlink_t unlink;
+ lib_md_t *md;
+};
+
+struct lib_md_t {
+ struct list_head md_list;
+ lib_handle_t md_lh;
+ lib_me_t *me;
+ user_ptr start;
+ ptl_size_t offset;
+ ptl_size_t length;
+ ptl_size_t max_size;
+ int threshold;
+ int pending;
+ ptl_unlink_t unlink;
+ unsigned int options;
+ unsigned int md_flags;
+ void *user_ptr;
+ lib_eq_t *eq;
+ void *md_addrkey;
+ unsigned int md_niov; /* # frags */
+ union {
+ struct iovec iov[PTL_MD_MAX_IOV];
+ ptl_kiov_t kiov[PTL_MD_MAX_IOV];
+ } md_iov;
+};
+
+#define PTL_MD_FLAG_UNLINK (1 << 0)
+#define PTL_MD_FLAG_AUTO_UNLINKED (1 << 1)
+
+#ifndef PTL_USE_SLAB_CACHE
+typedef struct
+{
+ void *fl_objs; /* single contiguous array of objects */
+ int fl_nobjs; /* the number of them */
+ int fl_objsize; /* the size (including overhead) of each of them */
+ struct list_head fl_list; /* where they are enqueued */
+} lib_freelist_t;
+
+typedef struct
+{
+ struct list_head fo_list; /* enqueue on fl_list */
+ void *fo_contents; /* aligned contents */
+} lib_freeobj_t;
+#endif
+
+typedef struct {
+ /* info about peers we are trying to fail */
+ struct list_head tp_list; /* stash in ni.ni_test_peers */
+ ptl_nid_t tp_nid; /* matching nid */
+ unsigned int tp_threshold; /* # failures to simulate */
+} lib_test_peer_t;
+
+typedef struct {
+ int up;
+ int refcnt;
+ ptl_nid_t nid;
+ ptl_pid_t pid;
+ int num_nodes;
+ unsigned int debug;
+ lib_ptl_t tbl;
+ lib_ac_t ac;
+ lib_counters_t counters;
+
+ int ni_lh_hash_size; /* size of lib handle hash table */
+ struct list_head *ni_lh_hash_table; /* all extant lib handles, this interface */
+ __u64 ni_next_object_cookie; /* cookie generator */
+ __u64 ni_interface_cookie; /* uniquely identifies this ni in this epoch */
+
+ struct list_head ni_test_peers;
+
+#ifndef PTL_USE_SLAB_CACHE
+ lib_freelist_t ni_free_mes;
+ lib_freelist_t ni_free_msgs;
+ lib_freelist_t ni_free_mds;
+ lib_freelist_t ni_free_eqs;
+#endif
+ struct list_head ni_active_msgs;
+ struct list_head ni_active_mds;
+ struct list_head ni_active_eqs;
+} lib_ni_t;
+
+#endif
--- /dev/null
+#ifndef _LINUX_LIST_H
+#define _LINUX_LIST_H
+
+
+/*
+ * Simple doubly linked list implementation.
+ *
+ * Some of the internal functions ("__xxx") are useful when
+ * manipulating whole lists rather than single entries, as
+ * sometimes we already know the next/prev entries and we can
+ * generate better code by using them directly rather than
+ * using the generic single-entry routines.
+ */
+
+#define prefetch(a) ((void)a)
+
+struct list_head {
+ struct list_head *next, *prev;
+};
+
+#define LIST_HEAD_INIT(name) { &(name), &(name) }
+
+#define LIST_HEAD(name) \
+ struct list_head name = LIST_HEAD_INIT(name)
+
+#define INIT_LIST_HEAD(ptr) do { \
+ (ptr)->next = (ptr); (ptr)->prev = (ptr); \
+} while (0)
+
+/*
+ * Insert a new entry between two known consecutive entries.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static inline void __list_add(struct list_head * new,
+ struct list_head * prev,
+ struct list_head * next)
+{
+ next->prev = new;
+ new->next = next;
+ new->prev = prev;
+ prev->next = new;
+}
+
+/**
+ * list_add - add a new entry
+ * @new: new entry to be added
+ * @head: list head to add it after
+ *
+ * Insert a new entry after the specified head.
+ * This is good for implementing stacks.
+ */
+static inline void list_add(struct list_head *new, struct list_head *head)
+{
+ __list_add(new, head, head->next);
+}
+
+/**
+ * list_add_tail - add a new entry
+ * @new: new entry to be added
+ * @head: list head to add it before
+ *
+ * Insert a new entry before the specified head.
+ * This is useful for implementing queues.
+ */
+static inline void list_add_tail(struct list_head *new, struct list_head *head)
+{
+ __list_add(new, head->prev, head);
+}
+
+/*
+ * Delete a list entry by making the prev/next entries
+ * point to each other.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static inline void __list_del(struct list_head * prev, struct list_head * next)
+{
+ next->prev = prev;
+ prev->next = next;
+}
+
+/**
+ * list_del - deletes entry from list.
+ * @entry: the element to delete from the list.
+ * Note: list_empty on entry does not return true after this, the entry is in an undefined state.
+ */
+static inline void list_del(struct list_head *entry)
+{
+ __list_del(entry->prev, entry->next);
+}
+
+/**
+ * list_del_init - deletes entry from list and reinitialize it.
+ * @entry: the element to delete from the list.
+ */
+static inline void list_del_init(struct list_head *entry)
+{
+ __list_del(entry->prev, entry->next);
+ INIT_LIST_HEAD(entry);
+}
+
+/**
+ * list_move - delete from one list and add as another's head
+ * @list: the entry to move
+ * @head: the head that will precede our entry
+ */
+static inline void list_move(struct list_head *list, struct list_head *head)
+{
+ __list_del(list->prev, list->next);
+ list_add(list, head);
+}
+
+/**
+ * list_move_tail - delete from one list and add as another's tail
+ * @list: the entry to move
+ * @head: the head that will follow our entry
+ */
+static inline void list_move_tail(struct list_head *list,
+ struct list_head *head)
+{
+ __list_del(list->prev, list->next);
+ list_add_tail(list, head);
+}
+
+/**
+ * list_empty - tests whether a list is empty
+ * @head: the list to test.
+ */
+static inline int list_empty(struct list_head *head)
+{
+ return head->next == head;
+}
+
+static inline void __list_splice(struct list_head *list,
+ struct list_head *head)
+{
+ struct list_head *first = list->next;
+ struct list_head *last = list->prev;
+ struct list_head *at = head->next;
+
+ first->prev = head;
+ head->next = first;
+
+ last->next = at;
+ at->prev = last;
+}
+
+/**
+ * list_splice - join two lists
+ * @list: the new list to add.
+ * @head: the place to add it in the first list.
+ */
+static inline void list_splice(struct list_head *list, struct list_head *head)
+{
+ if (!list_empty(list))
+ __list_splice(list, head);
+}
+
+/**
+ * list_splice_init - join two lists and reinitialise the emptied list.
+ * @list: the new list to add.
+ * @head: the place to add it in the first list.
+ *
+ * The list at @list is reinitialised
+ */
+static inline void list_splice_init(struct list_head *list,
+ struct list_head *head)
+{
+ if (!list_empty(list)) {
+ __list_splice(list, head);
+ INIT_LIST_HEAD(list);
+ }
+}
+
+/**
+ * list_entry - get the struct for this entry
+ * @ptr: the &struct list_head pointer.
+ * @type: the type of the struct this is embedded in.
+ * @member: the name of the list_struct within the struct.
+ */
+#define list_entry(ptr, type, member) \
+ ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member)))
+
+/**
+ * list_for_each - iterate over a list
+ * @pos: the &struct list_head to use as a loop counter.
+ * @head: the head for your list.
+ */
+#define list_for_each(pos, head) \
+ for (pos = (head)->next, prefetch(pos->next); pos != (head); \
+ pos = pos->next, prefetch(pos->next))
+
+/**
+ * list_for_each_prev - iterate over a list in reverse order
+ * @pos: the &struct list_head to use as a loop counter.
+ * @head: the head for your list.
+ */
+#define list_for_each_prev(pos, head) \
+ for (pos = (head)->prev, prefetch(pos->prev); pos != (head); \
+ pos = pos->prev, prefetch(pos->prev))
+
+/**
+ * list_for_each_safe - iterate over a list safe against removal of list entry
+ * @pos: the &struct list_head to use as a loop counter.
+ * @n: another &struct list_head to use as temporary storage
+ * @head: the head for your list.
+ */
+#define list_for_each_safe(pos, n, head) \
+ for (pos = (head)->next, n = pos->next; pos != (head); \
+ pos = n, n = pos->next)
+
+#endif
+
+#ifndef list_for_each_entry
+/**
+ * list_for_each_entry - iterate over list of given type
+ * @pos: the type * to use as a loop counter.
+ * @head: the head for your list.
+ * @member: the name of the list_struct within the struct.
+ */
+#define list_for_each_entry(pos, head, member) \
+ for (pos = list_entry((head)->next, typeof(*pos), member), \
+ prefetch(pos->member.next); \
+ &pos->member != (head); \
+ pos = list_entry(pos->member.next, typeof(*pos), member), \
+ prefetch(pos->member.next))
+#endif
+
+#ifndef list_for_each_entry_safe
+/**
+ * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry
+ * @pos: the type * to use as a loop counter.
+ * @n: the &struct list_head to use as temporary storage
+ * @head: the head for your list.
+ * @member: the name of the list_struct within the struct.
+ */
+#define list_for_each_entry_safe(pos, n, head, member) \
+ for (pos = list_entry((head)->next, typeof(*pos), member), \
+ n = pos->member.next; \
+ &pos->member != (head); \
+ pos = list_entry(n, typeof(*pos), member), \
+ n = pos->member.next)
+#endif
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Compile with:
+ * cc -I../../portals/include -o fio fio.c -L../../portals/linux/utils -lptlctl
+ */
+#ifndef __LTRACE_H_
+#define __LTRACE_H_
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <getopt.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/time.h>
+#include <portals/types.h>
+#include <portals/ptlctl.h>
+#include <linux/kp30.h>
+#include <linux/limits.h>
+#include <asm/page.h>
+#include <linux/version.h>
+
+static inline int ltrace_write_file(char* fname)
+{
+ char* argv[3];
+
+ argv[0] = "debug_kernel";
+ argv[1] = fname;
+ argv[2] = "1";
+
+ fprintf(stderr, "[ptlctl] %s %s %s\n", argv[0], argv[1], argv[2]);
+
+ return jt_dbg_debug_kernel(3, argv);
+}
+
+static inline int ltrace_clear()
+{
+ char* argv[1];
+
+ argv[0] = "clear";
+
+ fprintf(stderr, "[ptlctl] %s\n", argv[0]);
+
+ return jt_dbg_clear_debug_buf(1, argv);
+}
+
+static inline int ltrace_mark(int indent_level, char* text)
+{
+ char* argv[2];
+ char mark_buf[PATH_MAX];
+
+ snprintf(mark_buf, PATH_MAX, "====%d=%s", indent_level, text);
+
+ argv[0] = "mark";
+ argv[1] = mark_buf;
+ return jt_dbg_mark_debug_buf(2, argv);
+}
+
+static inline int ltrace_applymasks()
+{
+ char* argv[2];
+ argv[0] = "list";
+ argv[1] = "applymasks";
+
+ fprintf(stderr, "[ptlctl] %s %s\n", argv[0], argv[1]);
+
+ return jt_dbg_list(2, argv);
+}
+
+
+static inline int ltrace_filter(char* subsys_or_mask)
+{
+ char* argv[2];
+ argv[0] = "filter";
+ argv[1] = subsys_or_mask;
+ return jt_dbg_filter(2, argv);
+}
+
+static inline int ltrace_show(char* subsys_or_mask)
+{
+ char* argv[2];
+ argv[0] = "show";
+ argv[1] = subsys_or_mask;
+ return jt_dbg_show(2, argv);
+}
+
+static inline int ltrace_start()
+{
+ int rc = 0;
+ dbg_initialize(0, NULL);
+#ifdef PORTALS_DEV_ID
+ rc = register_ioc_dev(PORTALS_DEV_ID, PORTALS_DEV_PATH);
+#endif
+ ltrace_filter("class");
+ ltrace_filter("socknal");
+ ltrace_filter("qswnal");
+ ltrace_filter("gmnal");
+ ltrace_filter("portals");
+
+ ltrace_show("all_types");
+ ltrace_filter("trace");
+ ltrace_filter("malloc");
+ ltrace_filter("net");
+ ltrace_filter("page");
+ ltrace_filter("other");
+ ltrace_filter("info");
+ ltrace_applymasks();
+
+ return rc;
+}
+
+
+static inline void ltrace_stop()
+{
+#ifdef PORTALS_DEV_ID
+ unregister_ioc_dev(PORTALS_DEV_ID);
+#endif
+}
+
+static inline int not_uml()
+{
+ /* Return Values:
+ * 0 when run under UML
+ * 1 when run on host
+ * <0 when lookup failed
+ */
+ struct stat buf;
+ int rc = stat("/dev/ubd", &buf);
+ rc = ((rc<0) && (errno == ENOENT)) ? 1 : rc;
+ if (rc<0) {
+ fprintf(stderr, "Cannot stat /dev/ubd: %s\n", strerror(errno));
+ rc = 1; /* Assume host */
+ }
+ return rc;
+}
+
+#define LTRACE_MAX_NOB 256
+static inline void ltrace_add_processnames(char* fname)
+{
+ char cmdbuf[LTRACE_MAX_NOB];
+ struct timeval tv;
+ struct timezone tz;
+ int nob;
+ int underuml = !not_uml();
+
+ gettimeofday(&tv, &tz);
+
+ nob = snprintf(cmdbuf, LTRACE_MAX_NOB, "ps --no-headers -eo \"");
+
+ /* Careful - these format strings need to match the CDEBUG
+ * formats in portals/linux/debug.c EXACTLY
+ */
+ nob += snprintf(cmdbuf+nob, LTRACE_MAX_NOB, "%02x:%06x:%d:%lu.%06lu ",
+ S_RPC >> 24, D_VFSTRACE, 0, tv.tv_sec, tv.tv_usec);
+
+ if (underuml && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))) {
+ nob += snprintf (cmdbuf+nob, LTRACE_MAX_NOB,
+ "(%s:%d:%s() %d | %d+%lu): ",
+ "lltrace.h", __LINE__, __FUNCTION__, 0, 0, 0L);
+ }
+ else {
+ nob += snprintf (cmdbuf+nob, LTRACE_MAX_NOB,
+ "(%s:%d:%s() %d+%lu): ",
+ "lltrace.h", __LINE__, __FUNCTION__, 0, 0L);
+ }
+
+ nob += snprintf(cmdbuf+nob, LTRACE_MAX_NOB, " %%p %%c\" >> %s", fname);
+ system(cmdbuf);
+}
+
+#endif
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
+#ifndef _P30_H_
+#define _P30_H_
+
+/*
+ * p30.h
+ *
+ * User application interface file
+ */
+
+#if defined (__KERNEL__)
+#include <linux/uio.h>
+#include <linux/types.h>
+#else
+#include <sys/types.h>
+#include <sys/uio.h>
+#endif
+
+#include <portals/types.h>
+#include <portals/nal.h>
+#include <portals/api.h>
+#include <portals/errno.h>
+#include <portals/nalids.h>
+
+extern int __p30_initialized; /* for libraries & test codes */
+extern int __p30_myr_initialized; /* that don't know if p30 */
+extern int __p30_ip_initialized; /* had been initialized yet */
+extern ptl_handle_ni_t __myr_ni_handle, __ip_ni_handle;
+
+extern int __p30_myr_timeout; /* in seconds, for PtlNIBarrier, */
+extern int __p30_ip_timeout; /* PtlReduce_all, & PtlBroadcast_all */
+
+/*
+ * Debugging flags reserved for the Portals reference library.
+ * These are not part of the API as described in the SAND report
+ * but are for the use of the maintainers of the reference implementation.
+ *
+ * It is not expected that the real implementations will export
+ * this functionality.
+ */
+#define PTL_DEBUG_NONE 0ul
+#define PTL_DEBUG_ALL (0x0FFFul) /* Only the Portals flags */
+
+#define __bit(x) ((unsigned long) 1<<(x))
+#define PTL_DEBUG_PUT __bit(0)
+#define PTL_DEBUG_GET __bit(1)
+#define PTL_DEBUG_REPLY __bit(2)
+#define PTL_DEBUG_ACK __bit(3)
+#define PTL_DEBUG_DROP __bit(4)
+#define PTL_DEBUG_REQUEST __bit(5)
+#define PTL_DEBUG_DELIVERY __bit(6)
+#define PTL_DEBUG_UNLINK __bit(7)
+#define PTL_DEBUG_THRESHOLD __bit(8)
+#define PTL_DEBUG_API __bit(9)
+
+/*
+ * These eight are reserved for the NAL to define
+ * It should probably give them better names...
+ */
+#define PTL_DEBUG_NI_ALL (0xF000ul) /* Only the NAL flags */
+#define PTL_DEBUG_NI0 __bit(24)
+#define PTL_DEBUG_NI1 __bit(25)
+#define PTL_DEBUG_NI2 __bit(26)
+#define PTL_DEBUG_NI3 __bit(27)
+#define PTL_DEBUG_NI4 __bit(28)
+#define PTL_DEBUG_NI5 __bit(29)
+#define PTL_DEBUG_NI6 __bit(30)
+#define PTL_DEBUG_NI7 __bit(31)
+
+#endif
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ *
+ * This file is part of Portals, http://www.sf.net/projects/lustre/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * header for libptlctl.a
+ */
+#ifndef _PTLCTL_H_
+#define _PTLCTL_H_
+
+#define PORTALS_DEV_ID 0
+#define PORTALS_DEV_PATH "/dev/portals"
+#define OBD_DEV_ID 1
+#define OBD_DEV_PATH "/dev/obd"
+
+int ptl_name2nal(char *str);
+int ptl_parse_nid (ptl_nid_t *nidp, char *str);
+char * ptl_nid2str (char *buffer, ptl_nid_t nid);
+
+int ptl_initialize(int argc, char **argv);
+int jt_ptl_network(int argc, char **argv);
+int jt_ptl_connect(int argc, char **argv);
+int jt_ptl_disconnect(int argc, char **argv);
+int jt_ptl_push_connection(int argc, char **argv);
+int jt_ptl_ping(int argc, char **argv);
+int jt_ptl_mynid(int argc, char **argv);
+int jt_ptl_add_uuid(int argc, char **argv);
+int jt_ptl_add_uuid_old(int argc, char **argv); /* backwards compatibility */
+int jt_ptl_close_uuid(int argc, char **argv);
+int jt_ptl_del_uuid(int argc, char **argv);
+int jt_ptl_rxmem (int argc, char **argv);
+int jt_ptl_txmem (int argc, char **argv);
+int jt_ptl_nagle (int argc, char **argv);
+int jt_ptl_add_route (int argc, char **argv);
+int jt_ptl_del_route (int argc, char **argv);
+int jt_ptl_print_routes (int argc, char **argv);
+int jt_ptl_fail_nid (int argc, char **argv);
+
+int dbg_initialize(int argc, char **argv);
+int jt_dbg_filter(int argc, char **argv);
+int jt_dbg_show(int argc, char **argv);
+int jt_dbg_list(int argc, char **argv);
+int jt_dbg_debug_kernel(int argc, char **argv);
+int jt_dbg_debug_daemon(int argc, char **argv);
+int jt_dbg_debug_file(int argc, char **argv);
+int jt_dbg_clear_debug_buf(int argc, char **argv);
+int jt_dbg_mark_debug_buf(int argc, char **argv);
+int jt_dbg_modules(int argc, char **argv);
+int jt_dbg_panic(int argc, char **argv);
+
+/* l_ioctl.c */
+int register_ioc_dev(int dev_id, const char * dev_name);
+void unregister_ioc_dev(int dev_id);
+int set_ioctl_dump(char * file);
+int l_ioctl(int dev_id, int opc, void *buf);
+int parse_dump(char * dump_file, int (*ioc_func)(int dev_id, int opc, void *));
+int jt_ioc_dump(int argc, char **argv);
+
+#endif
--- /dev/null
+/*
+** $Id: myrnal.h,v 1.1.2.1 2003/05/19 04:25:31 braam Exp $
+*/
+
+#ifndef MYRNAL_H
+#define MYRNAL_H
+
+#define MAX_ARGS_LEN (256)
+#define MAX_RET_LEN (128)
+#define MYRNAL_MAX_ACL_SIZE (64)
+#define MYRNAL_MAX_PTL_SIZE (64)
+
+#define P3CMD (100)
+#define P3SYSCALL (200)
+#define P3REGISTER (300)
+
+enum { PTL_MLOCKALL };
+
+typedef struct {
+ void *args;
+ size_t args_len;
+ void *ret;
+ size_t ret_len;
+ int p3cmd;
+} myrnal_forward_t;
+
+#endif /* MYRNAL_H */
--- /dev/null
+/*
+** $Id: nal.h,v 1.1.2.1 2003/05/19 04:25:31 braam Exp $
+*/
+#ifndef _NAL_H_
+#define _NAL_H_
+
+/*
+ * p30/nal.h
+ *
+ * The API side NAL declarations
+ */
+
+#include <portals/types.h>
+
+#ifdef yield
+#undef yield
+#endif
+
+typedef struct nal_t nal_t;
+
+struct nal_t {
+ ptl_ni_t ni;
+ int refct;
+ void *nal_data;
+ int *timeout; /* for libp30api users */
+ int (*forward) (nal_t * nal, int index, /* Function ID */
+ void *args, size_t arg_len, void *ret, size_t ret_len);
+
+ int (*shutdown) (nal_t * nal, int interface);
+
+ int (*validate) (nal_t * nal, void *base, size_t extent);
+
+ void (*yield) (nal_t * nal);
+
+ void (*lock) (nal_t * nal, unsigned long *flags);
+
+ void (*unlock) (nal_t * nal, unsigned long *flags);
+};
+
+typedef nal_t *(ptl_interface_t) (int, ptl_pt_index_t, ptl_ac_index_t, ptl_pid_t requested_pid);
+extern nal_t *PTL_IFACE_IP(int, ptl_pt_index_t, ptl_ac_index_t, ptl_pid_t requested_pid);
+extern nal_t *PTL_IFACE_MYR(int, ptl_pt_index_t, ptl_ac_index_t, ptl_pid_t requested_pid);
+
+extern nal_t *ptl_hndl2nal(ptl_handle_any_t * any);
+
+#ifndef PTL_IFACE_DEFAULT
+#define PTL_IFACE_DEFAULT (PTL_IFACE_IP)
+#endif
+
+#endif
--- /dev/null
+#define PTL_IFACE_TCP 1
+#define PTL_IFACE_ER 2
+#define PTL_IFACE_SS 3
+#define PTL_IFACE_MAX 4
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
+#ifndef _P30_H_
+#define _P30_H_
+
+/*
+ * p30.h
+ *
+ * User application interface file
+ */
+
+#if defined (__KERNEL__)
+#include <linux/uio.h>
+#include <linux/types.h>
+#else
+#include <sys/types.h>
+#include <sys/uio.h>
+#endif
+
+#include <portals/types.h>
+#include <portals/nal.h>
+#include <portals/api.h>
+#include <portals/errno.h>
+#include <portals/nalids.h>
+
+extern int __p30_initialized; /* for libraries & test codes */
+extern int __p30_myr_initialized; /* that don't know if p30 */
+extern int __p30_ip_initialized; /* had been initialized yet */
+extern ptl_handle_ni_t __myr_ni_handle, __ip_ni_handle;
+
+extern int __p30_myr_timeout; /* in seconds, for PtlNIBarrier, */
+extern int __p30_ip_timeout; /* PtlReduce_all, & PtlBroadcast_all */
+
+/*
+ * Debugging flags reserved for the Portals reference library.
+ * These are not part of the API as described in the SAND report
+ * but are for the use of the maintainers of the reference implementation.
+ *
+ * It is not expected that the real implementations will export
+ * this functionality.
+ */
+#define PTL_DEBUG_NONE 0ul
+#define PTL_DEBUG_ALL (0x0FFFul) /* Only the Portals flags */
+
+#define __bit(x) ((unsigned long) 1<<(x))
+#define PTL_DEBUG_PUT __bit(0)
+#define PTL_DEBUG_GET __bit(1)
+#define PTL_DEBUG_REPLY __bit(2)
+#define PTL_DEBUG_ACK __bit(3)
+#define PTL_DEBUG_DROP __bit(4)
+#define PTL_DEBUG_REQUEST __bit(5)
+#define PTL_DEBUG_DELIVERY __bit(6)
+#define PTL_DEBUG_UNLINK __bit(7)
+#define PTL_DEBUG_THRESHOLD __bit(8)
+#define PTL_DEBUG_API __bit(9)
+
+/*
+ * These eight are reserved for the NAL to define
+ * It should probably give them better names...
+ */
+#define PTL_DEBUG_NI_ALL (0xF000ul) /* Only the NAL flags */
+#define PTL_DEBUG_NI0 __bit(24)
+#define PTL_DEBUG_NI1 __bit(25)
+#define PTL_DEBUG_NI2 __bit(26)
+#define PTL_DEBUG_NI3 __bit(27)
+#define PTL_DEBUG_NI4 __bit(28)
+#define PTL_DEBUG_NI5 __bit(29)
+#define PTL_DEBUG_NI6 __bit(30)
+#define PTL_DEBUG_NI7 __bit(31)
+
+#endif
--- /dev/null
+/*
+ * TITLE(ppid_h, "@(#) $Id: ppid.h,v 1.1.2.1 2003/05/19 04:25:31 braam Exp $");
+ */
+
+#ifndef _INCppidh_
+#define _INCppidh_
+
+#include "defines.h"
+// #include "idtypes.h"
+
+
+#define MAX_PPID 1000 /* this needs to fit into 16 bits so the
+ maximum value is 65535. having it "large"
+ can help w/ debugging process accounting
+ but there are reasons for making it
+ somewhat smaller than the maximum --
+ requiring storage for arrays that index
+ on the ppid, eg... */
+
+#define MAX_GID 1000 /* this needs to fit into 16 bits... */
+
+#define MAX_FIXED_PPID 100
+#define MAX_FIXED_GID 100
+#define PPID_FLOATING MAX_FIXED_PPID+1 /* Floating area starts here */
+#define GID_FLOATING MAX_FIXED_GID+1 /* Floating area starts here */
+#define NUM_PTL_TASKS MAX_FIXED_PPID+80 /* Maximum no. portals tasks */
+
+#define PPID_AUTO 0
+
+/* Minimum PPID is 1 */
+#define PPID_BEBOPD 1 /* bebopd */
+#define GID_BEBOPD 1 /* bebopd */
+
+#define PPID_PCT 2 /* pct */
+#define GID_PCT 2 /* pct */
+
+#define PPID_FYOD 3 /* fyod */
+#define GID_FYOD 3 /* fyod */
+
+#define PPID_GDBWRAP 11 /* portals proxy for gdb */
+#define GID_GDBWRAP 11 /* portals proxy for gdb */
+
+#define PPID_TEST 15 /* for portals tests */
+#define GID_TEST 15
+
+#define GID_YOD 5 /* yod */
+#define GID_PINGD 6 /* pingd */
+#define GID_BT 7 /* bt */
+#define GID_PTLTEST 8 /* ptltest */
+#define GID_CGDB 9 /* cgdb */
+#define GID_TVDSVR 10 /* start-tvdsvr */
+
+#endif /* _INCppidh_ */
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ *
+ * This file is part of Portals, http://www.sf.net/projects/lustre/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * header for libptlctl.a
+ */
+#ifndef _PTLCTL_H_
+#define _PTLCTL_H_
+
+#define PORTALS_DEV_ID 0
+#define PORTALS_DEV_PATH "/dev/portals"
+#define OBD_DEV_ID 1
+#define OBD_DEV_PATH "/dev/obd"
+
+int ptl_name2nal(char *str);
+int ptl_parse_nid (ptl_nid_t *nidp, char *str);
+char * ptl_nid2str (char *buffer, ptl_nid_t nid);
+
+int ptl_initialize(int argc, char **argv);
+int jt_ptl_network(int argc, char **argv);
+int jt_ptl_connect(int argc, char **argv);
+int jt_ptl_disconnect(int argc, char **argv);
+int jt_ptl_push_connection(int argc, char **argv);
+int jt_ptl_ping(int argc, char **argv);
+int jt_ptl_mynid(int argc, char **argv);
+int jt_ptl_add_uuid(int argc, char **argv);
+int jt_ptl_add_uuid_old(int argc, char **argv); /* backwards compatibility */
+int jt_ptl_close_uuid(int argc, char **argv);
+int jt_ptl_del_uuid(int argc, char **argv);
+int jt_ptl_rxmem (int argc, char **argv);
+int jt_ptl_txmem (int argc, char **argv);
+int jt_ptl_nagle (int argc, char **argv);
+int jt_ptl_add_route (int argc, char **argv);
+int jt_ptl_del_route (int argc, char **argv);
+int jt_ptl_print_routes (int argc, char **argv);
+int jt_ptl_fail_nid (int argc, char **argv);
+
+int dbg_initialize(int argc, char **argv);
+int jt_dbg_filter(int argc, char **argv);
+int jt_dbg_show(int argc, char **argv);
+int jt_dbg_list(int argc, char **argv);
+int jt_dbg_debug_kernel(int argc, char **argv);
+int jt_dbg_debug_daemon(int argc, char **argv);
+int jt_dbg_debug_file(int argc, char **argv);
+int jt_dbg_clear_debug_buf(int argc, char **argv);
+int jt_dbg_mark_debug_buf(int argc, char **argv);
+int jt_dbg_modules(int argc, char **argv);
+int jt_dbg_panic(int argc, char **argv);
+
+/* l_ioctl.c */
+int register_ioc_dev(int dev_id, const char * dev_name);
+void unregister_ioc_dev(int dev_id);
+int set_ioctl_dump(char * file);
+int l_ioctl(int dev_id, int opc, void *buf);
+int parse_dump(char * dump_file, int (*ioc_func)(int dev_id, int opc, void *));
+int jt_ioc_dump(int argc, char **argv);
+
+#endif
--- /dev/null
+/*
+** $Id: stringtab.h,v 1.1.2.1 2003/05/19 04:25:31 braam Exp $
+*/
+/*
+ * stringtab.h
+ */
--- /dev/null
+#ifndef _P30_TYPES_H_
+#define _P30_TYPES_H_
+
+#ifdef __linux__
+#include <asm/types.h>
+#include <asm/timex.h>
+#else
+#include <sys/types.h>
+typedef u_int32_t __u32;
+typedef u_int64_t __u64;
+typedef unsigned long long cycles_t;
+static inline cycles_t get_cycles(void) { return 0; }
+#endif
+
+typedef __u64 ptl_nid_t;
+typedef __u32 ptl_pid_t;
+typedef __u32 ptl_pt_index_t;
+typedef __u32 ptl_ac_index_t;
+typedef __u64 ptl_match_bits_t;
+typedef __u64 ptl_hdr_data_t;
+typedef __u32 ptl_size_t;
+
+typedef struct {
+ unsigned long nal_idx; /* which network interface */
+ __u64 cookie; /* which thing on that interface */
+} ptl_handle_any_t;
+
+typedef ptl_handle_any_t ptl_handle_ni_t;
+typedef ptl_handle_any_t ptl_handle_eq_t;
+typedef ptl_handle_any_t ptl_handle_md_t;
+typedef ptl_handle_any_t ptl_handle_me_t;
+
+#define PTL_HANDLE_NONE \
+((const ptl_handle_any_t){.nal_idx = -1, .cookie = -1})
+#define PTL_EQ_NONE PTL_HANDLE_NONE
+
+static inline int PtlHandleEqual (ptl_handle_any_t h1, ptl_handle_any_t h2)
+{
+ return (h1.nal_idx == h2.nal_idx && h1.cookie == h2.cookie);
+}
+
+#define PTL_NID_ANY ((ptl_nid_t) -1)
+#define PTL_PID_ANY ((ptl_pid_t) -1)
+
+typedef struct {
+ ptl_nid_t nid;
+ ptl_pid_t pid; /* node id / process id */
+} ptl_process_id_t;
+
+typedef enum {
+ PTL_RETAIN = 0,
+ PTL_UNLINK
+} ptl_unlink_t;
+
+typedef enum {
+ PTL_INS_BEFORE,
+ PTL_INS_AFTER
+} ptl_ins_pos_t;
+
+typedef struct {
+ struct page *kiov_page;
+ unsigned int kiov_len;
+ unsigned int kiov_offset;
+} ptl_kiov_t;
+
+typedef struct {
+ void *start;
+ ptl_size_t length;
+ int threshold;
+ int max_size;
+ unsigned int options;
+ void *user_ptr;
+ ptl_handle_eq_t eventq;
+ unsigned int niov;
+} ptl_md_t;
+
+/* Options for the MD structure */
+#define PTL_MD_OP_PUT (1 << 0)
+#define PTL_MD_OP_GET (1 << 1)
+#define PTL_MD_MANAGE_REMOTE (1 << 2)
+#define PTL_MD_AUTO_UNLINK (1 << 3)
+#define PTL_MD_TRUNCATE (1 << 4)
+#define PTL_MD_ACK_DISABLE (1 << 5)
+#define PTL_MD_IOV (1 << 6)
+#define PTL_MD_MAX_SIZE (1 << 7)
+#define PTL_MD_KIOV (1 << 8)
+
+#define PTL_MD_THRESH_INF (-1)
+
+typedef enum {
+ PTL_EVENT_GET,
+ PTL_EVENT_PUT,
+ PTL_EVENT_REPLY,
+ PTL_EVENT_ACK,
+ PTL_EVENT_SENT
+} ptl_event_kind_t;
+
+#define PTL_SEQ_BASETYPE long
+typedef unsigned PTL_SEQ_BASETYPE ptl_seq_t;
+#define PTL_SEQ_GT(a,b) (((signed PTL_SEQ_BASETYPE)((a) - (b))) > 0)
+
+typedef struct {
+ ptl_event_kind_t type;
+ ptl_process_id_t initiator;
+ ptl_pt_index_t portal;
+ ptl_match_bits_t match_bits;
+ ptl_size_t rlength, mlength, offset;
+ ptl_handle_me_t unlinked_me;
+ ptl_md_t mem_desc;
+ ptl_hdr_data_t hdr_data;
+ cycles_t arrival_time;
+ volatile ptl_seq_t sequence;
+} ptl_event_t;
+
+
+typedef enum {
+ PTL_ACK_REQ,
+ PTL_NOACK_REQ
+} ptl_ack_req_t;
+
+
+typedef struct {
+ volatile ptl_seq_t sequence;
+ ptl_size_t size;
+ ptl_event_t *base;
+ ptl_handle_any_t cb_eq_handle;
+} ptl_eq_t;
+
+typedef struct {
+ ptl_eq_t *eq;
+} ptl_ni_t;
+
+
+typedef struct {
+ int max_match_entries; /* max number of match entries */
+ int max_mem_descriptors; /* max number of memory descriptors */
+ int max_event_queues; /* max number of event queues */
+ int max_atable_index; /* maximum access control list table index */
+ int max_ptable_index; /* maximum portals table index */
+} ptl_ni_limits_t;
+
+/*
+ * Status registers
+ */
+typedef enum {
+ PTL_SR_DROP_COUNT,
+ PTL_SR_DROP_LENGTH,
+ PTL_SR_RECV_COUNT,
+ PTL_SR_RECV_LENGTH,
+ PTL_SR_SEND_COUNT,
+ PTL_SR_SEND_LENGTH,
+ PTL_SR_MSGS_MAX,
+} ptl_sr_index_t;
+
+typedef int ptl_sr_value_t;
+
+#endif
--- /dev/null
+# Copyright (C) 2001 Cluster File Systems, Inc.
+#
+# This code is issued under the GNU General Public License.
+# See the file COPYING in this distribution
+
+SUBDIRS= socknal toenal @QSWNAL@ @GMNAL@ @SCIMACNAL@
--- /dev/null
+include ../Kernelenv
+
+obj-y = socknal/
+# more coming...
\ No newline at end of file
--- /dev/null
+# Copyright (C) 2001 Cluster File Systems, Inc.
+#
+# This code is issued under the GNU General Public License.
+# See the file COPYING in this distribution
+
+include ../../Rules.linux
+
+MODULE = kgmnal
+modulenet_DATA = kgmnal.o
+EXTRA_PROGRAMS = kgmnal
+
+DEFS =
+kgmnal_SOURCES = gmnal.c gmnal_cb.c gmnal.h
--- /dev/null
+diff -ru gm-1.5.2.1_Linux/drivers/linux/gm/gm_arch.c gm-1.5.2.1_Linux-cfs/drivers/linux/gm/gm_arch.c
+--- gm-1.5.2.1_Linux/drivers/linux/gm/gm_arch.c Mon Jul 1 10:35:09 2002
++++ gm-1.5.2.1_Linux-cfs/drivers/linux/gm/gm_arch.c Thu Sep 19 14:19:38 2002
+@@ -30,6 +30,8 @@
+ *
+ ************************************************************************/
+
++#define EXPORT_SYMTAB
++
+ #include <linux/config.h>
+ #include <linux/module.h>
+
+@@ -4075,6 +4077,28 @@
+ return 0;
+ }
+
++EXPORT_SYMBOL(gm_blocking_receive_no_spin);
++EXPORT_SYMBOL(gm_close);
++EXPORT_SYMBOL(gm_dma_free);
++EXPORT_SYMBOL(gm_dma_malloc);
++EXPORT_SYMBOL(gm_drop_sends);
++EXPORT_SYMBOL(gm_finalize);
++EXPORT_SYMBOL(gm_get_node_id);
++EXPORT_SYMBOL(gm_init);
++EXPORT_SYMBOL(gm_initialize_alarm);
++EXPORT_SYMBOL(gm_max_node_id_in_use);
++EXPORT_SYMBOL(gm_min_size_for_length);
++EXPORT_SYMBOL(gm_num_receive_tokens);
++EXPORT_SYMBOL(gm_num_send_tokens);
++EXPORT_SYMBOL(gm_open);
++EXPORT_SYMBOL(gm_provide_receive_buffer);
++EXPORT_SYMBOL(gm_resume_sending);
++EXPORT_SYMBOL(gm_send_with_callback);
++EXPORT_SYMBOL(gm_set_acceptable_sizes);
++EXPORT_SYMBOL(gm_set_alarm);
++EXPORT_SYMBOL(gm_unknown);
++
++
+ /*
+ This file uses GM standard indentation.
+
+Only in gm-1.5.2.1_Linux-cfs/drivers/linux/gm: gm_arch.c~
+Only in gm-1.5.2.1_Linux-cfs/: trace
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
+#ifndef _GMNAL_H
+#define _GMNAL_H
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/errno.h>
+#include <linux/locks.h>
+#include <linux/unistd.h>
+#include <linux/init.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/stat.h>
+#include <linux/list.h>
+#include <asm/uaccess.h>
+#include <asm/segment.h>
+
+#define DEBUG_SUBSYSTEM S_GMNAL
+
+#include <linux/kp30.h>
+#include <portals/p30.h>
+#include <portals/lib-p30.h>
+
+#include <gm.h>
+
+
+/*
+ * Myrinet GM NAL
+ */
+#define NPAGES_LARGE 16
+#define NPAGES_SMALL 1
+#define MSG_LEN_LARGE NPAGES_LARGE*PAGE_SIZE
+#define MSG_LEN_SMALL NPAGES_SMALL*PAGE_SIZE
+#define MSG_SIZE_LARGE (gm_min_size_for_length(MSG_LEN_LARGE))
+#define MSG_SIZE_SMALL (gm_min_size_for_length(MSG_LEN_SMALL))
+
+#define TXMSGS 64 /* Number of Transmit Messages */
+#define ENVELOPES 8 /* Number of outstanding receive msgs */
+
+#define KGM_PORT_NUM 3
+#define KGM_HOSTNAME "kgmnal"
+
+
+typedef struct {
+ char *krx_buffer;
+ unsigned long krx_len;
+ unsigned int krx_size;
+ unsigned int krx_priority;
+ struct list_head krx_item;
+} kgmnal_rx_t;
+
+
+typedef struct {
+ nal_cb_t *ktx_nal;
+ void *ktx_private;
+ lib_msg_t *ktx_cookie;
+ char *ktx_buffer;
+ size_t ktx_len;
+ unsigned long ktx_size;
+ int ktx_ndx;
+ unsigned int ktx_priority;
+ unsigned int ktx_tgt_node;
+ unsigned int ktx_tgt_port_id;
+} kgmnal_tx_t;
+
+
+typedef struct {
+ char kgm_init;
+ char kgm_shuttingdown;
+ struct gm_port *kgm_port;
+ struct list_head kgm_list;
+ ptl_nid_t kgm_nid;
+ nal_cb_t *kgm_cb;
+ struct kgm_trans *kgm_trans;
+ struct tq_struct kgm_ready_tq;
+ spinlock_t kgm_dispatch_lock;
+ spinlock_t kgm_update_lock;
+ spinlock_t kgm_send_lock;
+} kgmnal_data_t;
+
+int kgm_init(kgmnal_data_t *kgm_data);
+int kgmnal_recv_thread(void *);
+int gm_return_mynid(void);
+void kgmnal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd);
+
+extern kgmnal_data_t kgmnal_data;
+extern nal_t kgmnal_api;
+extern nal_cb_t kgmnal_lib;
+
+#endif /* _GMNAL_H */
+
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Based on ksocknal and qswnal
+ *
+ * Copyright (C) 2002 Cluster File Systems, Inc.
+ * Author: Robert Read <rread@datarithm.net>
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/* TODO
+ * preallocate send buffers, store on list
+ * put receive buffers on queue, handle with receive threads
+ * use routing
+ */
+
+#include "gmnal.h"
+
+extern kgmnal_rx_t *kgm_add_recv(kgmnal_data_t *,int);
+
+static kgmnal_tx_t *
+get_trans(void)
+{
+ kgmnal_tx_t *t;
+ PORTAL_ALLOC(t, (sizeof(kgmnal_tx_t)));
+ return t;
+}
+
+static void
+put_trans(kgmnal_tx_t *t)
+{
+ PORTAL_FREE(t, sizeof(kgmnal_tx_t));
+}
+
+int
+kgmnal_ispeer (ptl_nid_t nid)
+{
+ unsigned int gmnid = (unsigned int)nid;
+ unsigned int nnids;
+
+ gm_max_node_id_in_use(kgmnal_data.kgm_port, &nnids);
+
+ return ((ptl_nid_t)gmnid == nid &&/* didn't lose high bits on conversion ? */
+ gmnid < nnids); /* it's in this machine */
+}
+
+/*
+ * LIB functions follow
+ *
+ */
+static int
+kgmnal_read (nal_cb_t *nal, void *private, void *dst_addr, user_ptr src_addr,
+ size_t len)
+{
+ CDEBUG(D_NET, "0x%Lx: reading %ld bytes from %p -> %p\n",
+ nal->ni.nid, (long)len, src_addr, dst_addr );
+ memcpy( dst_addr, src_addr, len );
+ return 0;
+}
+
+static int
+kgmnal_write(nal_cb_t *nal, void *private, user_ptr dst_addr, void *src_addr,
+ size_t len)
+{
+ CDEBUG(D_NET, "0x%Lx: writing %ld bytes from %p -> %p\n",
+ nal->ni.nid, (long)len, src_addr, dst_addr );
+ memcpy( dst_addr, src_addr, len );
+ return 0;
+}
+
+static void *
+kgmnal_malloc(nal_cb_t *nal, size_t len)
+{
+ void *buf;
+
+ PORTAL_ALLOC(buf, len);
+ return buf;
+}
+
+static void
+kgmnal_free(nal_cb_t *nal, void *buf, size_t len)
+{
+ PORTAL_FREE(buf, len);
+}
+
+static void
+kgmnal_printf(nal_cb_t *nal, const char *fmt, ...)
+{
+ va_list ap;
+ char msg[256];
+
+ if (portal_debug & D_NET) {
+ va_start( ap, fmt );
+ vsnprintf( msg, sizeof(msg), fmt, ap );
+ va_end( ap );
+
+ printk("CPUId: %d %s",smp_processor_id(), msg);
+ }
+}
+
+
+static void
+kgmnal_cli(nal_cb_t *nal, unsigned long *flags)
+{
+ kgmnal_data_t *data= nal->nal_data;
+
+ spin_lock_irqsave(&data->kgm_dispatch_lock,*flags);
+}
+
+
+static void
+kgmnal_sti(nal_cb_t *nal, unsigned long *flags)
+{
+ kgmnal_data_t *data= nal->nal_data;
+
+ spin_unlock_irqrestore(&data->kgm_dispatch_lock,*flags);
+}
+
+
+static int
+kgmnal_dist(nal_cb_t *nal, ptl_nid_t nid, unsigned long *dist)
+{
+ /* network distance doesn't mean much for this nal */
+ if ( nal->ni.nid == nid ) {
+ *dist = 0;
+ } else {
+ *dist = 1;
+ }
+
+ return 0;
+}
+
+/* FIXME rmr: add rounting code here */
+static void
+kgmnal_tx_done(kgmnal_tx_t *trans, int error)
+{
+ lib_finalize(trans->ktx_nal, trans->ktx_private, trans->ktx_cookie);
+
+ gm_dma_free(kgmnal_data.kgm_port, trans->ktx_buffer);
+
+ trans->ktx_buffer = NULL;
+ trans->ktx_len = 0;
+
+ put_trans(trans);
+}
+static char * gm_error_strings[GM_NUM_STATUS_CODES] = {
+ [GM_SUCCESS] = "GM_SUCCESS",
+ [GM_SEND_TIMED_OUT] = "GM_SEND_TIMED_OUT",
+ [GM_SEND_REJECTED] = "GM_SEND_REJECTED",
+ [GM_SEND_TARGET_PORT_CLOSED] = "GM_SEND_TARGET_PORT_CLOSED",
+ [GM_SEND_TARGET_NODE_UNREACHABLE] = "GM_SEND_TARGET_NODE_UNREACHABLE",
+ [GM_SEND_DROPPED] = "GM_SEND_DROPPED",
+ [GM_SEND_PORT_CLOSED] = "GM_SEND_PORT_CLOSED",
+};
+
+inline char * get_error(int status)
+{
+ if (gm_error_strings[status] != NULL)
+ return gm_error_strings[status];
+ else
+ return "Unknown error";
+}
+
+static void
+kgmnal_errhandler(struct gm_port *p, void *context, gm_status_t status)
+{
+ CDEBUG(D_NET,"error callback: ktx %p status %d\n", context, status);
+}
+
+static void
+kgmnal_txhandler(struct gm_port *p, void *context, gm_status_t status)
+{
+ kgmnal_tx_t *ktx = (kgmnal_tx_t *)context;
+ int err = 0;
+
+ LASSERT (p != NULL);
+ LASSERT (ktx != NULL);
+
+ CDEBUG(D_NET,"ktx %p status %d nid 0x%x pid %d\n", ktx, status,
+ ktx->ktx_tgt_node, ktx->ktx_tgt_port_id);
+
+ switch((int)status) {
+ case GM_SUCCESS: /* normal */
+ break;
+ case GM_SEND_TIMED_OUT: /* application error */
+ case GM_SEND_REJECTED: /* size of msg unacceptable */
+ case GM_SEND_TARGET_PORT_CLOSED:
+ CERROR("%s (%d):\n", get_error(status), status);
+ gm_resume_sending(kgmnal_data.kgm_port, ktx->ktx_priority,
+ ktx->ktx_tgt_node, ktx->ktx_tgt_port_id,
+ kgmnal_errhandler, NULL);
+ err = -EIO;
+ break;
+ case GM_SEND_TARGET_NODE_UNREACHABLE:
+ case GM_SEND_PORT_CLOSED:
+ CERROR("%s (%d):\n", get_error(status), status);
+ gm_drop_sends(kgmnal_data.kgm_port, ktx->ktx_priority,
+ ktx->ktx_tgt_node, ktx->ktx_tgt_port_id,
+ kgmnal_errhandler, NULL);
+ err = -EIO;
+ break;
+ case GM_SEND_DROPPED:
+ CERROR("%s (%d):\n", get_error(status), status);
+ err = -EIO;
+ break;
+ default:
+ CERROR("Unknown status: %d\n", status);
+ err = -EIO;
+ break;
+ }
+
+ kgmnal_tx_done(ktx, err);
+}
+
+/*
+ */
+
+static int
+kgmnal_send(nal_cb_t *nal,
+ void *private,
+ lib_msg_t *cookie,
+ ptl_hdr_t *hdr,
+ int type,
+ ptl_nid_t nid,
+ ptl_pid_t pid,
+ int options,
+ unsigned int niov,
+ lib_md_iov_t *iov,
+ size_t len)
+{
+ /*
+ * ipnal assumes that this is the private as passed to lib_dispatch..
+ * so do we :/
+ */
+ kgmnal_tx_t *ktx=NULL;
+ int rc=0;
+ void * buf;
+ int buf_len = sizeof(ptl_hdr_t) + len;
+ int buf_size = 0;
+
+ LASSERT ((options & PTL_MD_KIOV) == 0);
+
+ PROF_START(gmnal_send);
+
+
+ CDEBUG(D_NET, "sending %d bytes from %p to nid: 0x%Lx pid %d\n",
+ len, iov, nid, KGM_PORT_NUM);
+
+ /* ensure there is an available tx handle */
+
+ /* save transaction info to trans for later finalize and cleanup */
+ ktx = get_trans();
+ if (ktx == NULL) {
+ rc = -ENOMEM;
+ goto send_exit;
+ }
+
+ /* hmmm... GM doesn't support vectored write, so need to allocate buffer to coalesce
+ header and data.
+ Also, memory must be dma'able or registered with GM. */
+
+ if (buf_len <= MSG_LEN_SMALL) {
+ buf_size = MSG_SIZE_SMALL;
+ } else if (buf_len <= MSG_LEN_LARGE) {
+ buf_size = MSG_SIZE_LARGE;
+ } else {
+ printk("kgmnal:request exceeds TX MTU size (%d).\n",
+ MSG_SIZE_LARGE);
+ rc = -1;
+ goto send_exit;
+ }
+
+ buf = gm_dma_malloc(kgmnal_data.kgm_port, buf_len);
+ if (buf == NULL) {
+ rc = -ENOMEM;
+ goto send_exit;
+ }
+ memcpy(buf, hdr, sizeof(ptl_hdr_t));
+
+ if (len != 0)
+ lib_copy_iov2buf(((char *)buf) + sizeof (ptl_hdr_t),
+ options, niov, iov, len);
+
+ ktx->ktx_nal = nal;
+ ktx->ktx_private = private;
+ ktx->ktx_cookie = cookie;
+ ktx->ktx_len = buf_len;
+ ktx->ktx_size = buf_size;
+ ktx->ktx_buffer = buf;
+ ktx->ktx_priority = GM_LOW_PRIORITY;
+ ktx->ktx_tgt_node = nid;
+ ktx->ktx_tgt_port_id = KGM_PORT_NUM;
+
+ CDEBUG(D_NET, "gm_send %d bytes (size %d) from %p to nid: 0x%Lx "
+ "pid %d pri %d\n", buf_len, buf_size, iov, nid, KGM_PORT_NUM,
+ GM_LOW_PRIORITY);
+
+ gm_send_with_callback(kgmnal_data.kgm_port, buf, buf_size,
+ buf_len, GM_LOW_PRIORITY,
+ nid, KGM_PORT_NUM,
+ kgmnal_txhandler, ktx);
+
+ PROF_FINISH(gmnal_send);
+ send_exit:
+ return rc;
+}
+void
+kgmnal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd)
+{
+ CERROR ("forwarding not implemented\n");
+}
+
+void
+kqswnal_fwd_callback (void *arg, int error)
+{
+ CERROR ("forwarding not implemented\n");
+}
+
+
+static inline void
+kgmnal_requeue_rx(kgmnal_rx_t *krx)
+{
+ gm_provide_receive_buffer(kgmnal_data.kgm_port, krx->krx_buffer,
+ krx->krx_size, krx->krx_priority);
+}
+
+/* Process a received portals packet */
+
+/* Receive Interrupt Handler */
+static void kgmnal_rx(kgmnal_data_t *kgm, unsigned long len, unsigned int size,
+ void * buf, unsigned int pri)
+{
+ ptl_hdr_t *hdr = buf;
+ kgmnal_rx_t krx;
+
+ CDEBUG(D_NET,"buf %p, len %ld\n", buf, len);
+
+ if ( len < sizeof( ptl_hdr_t ) ) {
+ /* XXX what's this for? */
+ if (kgm->kgm_shuttingdown)
+ return;
+ CERROR("kgmnal: did not receive complete portal header, "
+ "len= %ld", len);
+ gm_provide_receive_buffer(kgm->kgm_port, buf, size, pri);
+ return;
+ }
+
+ /* might want to use seperate threads to handle receive */
+ krx.krx_buffer = buf;
+ krx.krx_len = len;
+ krx.krx_size = size;
+ krx.krx_priority = pri;
+
+ if ( hdr->dest_nid == kgmnal_lib.ni.nid ) {
+ PROF_START(lib_parse);
+ lib_parse(&kgmnal_lib, (ptl_hdr_t *)krx.krx_buffer, &krx);
+ PROF_FINISH(lib_parse);
+ } else if (kgmnal_ispeer(hdr->dest_nid)) {
+ /* should have gone direct to peer */
+ CERROR("dropping packet from 0x%llx to 0x%llx: target is "
+ "a peer", hdr->src_nid, hdr->dest_nid);
+ kgmnal_requeue_rx(&krx);
+ } else {
+ /* forward to gateway */
+ CERROR("forwarding not implemented yet");
+ kgmnal_requeue_rx(&krx);
+ }
+
+ return;
+}
+
+
+static int kgmnal_recv(nal_cb_t *nal,
+ void *private,
+ lib_msg_t *cookie,
+ int options,
+ unsigned int niov,
+ lib_md_iov_t *iov,
+ size_t mlen,
+ size_t rlen)
+{
+ kgmnal_rx_t *krx = private;
+
+ LASSERT ((options & PTL_MD_KIOV) == 0);
+
+ CDEBUG(D_NET,"mlen=%d, rlen=%d\n", mlen, rlen);
+
+ /* What was actually received must be >= what sender claims to
+ * have sent. This is an LASSERT, since lib-move doesn't
+ * check cb return code yet. */
+ LASSERT (krx->krx_len >= sizeof (ptl_hdr_t) + rlen);
+ LASSERT (mlen <= rlen);
+
+ PROF_START(gmnal_recv);
+
+ if(mlen != 0) {
+ PROF_START(memcpy);
+ lib_copy_buf2iov (options, niov, iov,
+ krx->krx_buffer + sizeof (ptl_hdr_t), mlen);
+ PROF_FINISH(memcpy);
+ }
+
+ PROF_START(lib_finalize);
+ lib_finalize(nal, private, cookie);
+ PROF_FINISH(lib_finalize);
+
+ kgmnal_requeue_rx(krx);
+
+ PROF_FINISH(gmnal_recv);
+
+ return rlen;
+}
+
+
+static void kgmnal_shutdown(void * none)
+{
+ CERROR("called\n");
+ return;
+}
+
+/*
+ * Set terminate and use alarm to wake up the recv thread.
+ */
+static void recv_shutdown(kgmnal_data_t *kgm)
+{
+ gm_alarm_t alarm;
+
+ kgm->kgm_shuttingdown = 1;
+ gm_initialize_alarm(&alarm);
+ gm_set_alarm(kgm->kgm_port, &alarm, 1, kgmnal_shutdown, NULL);
+}
+
+int kgmnal_end(kgmnal_data_t *kgm)
+{
+
+ /* wait for sends to finish ? */
+ /* remove receive buffers */
+ /* shutdown receive thread */
+
+ recv_shutdown(kgm);
+
+ return 0;
+}
+
+/* Used only for the spinner */
+int kgmnal_recv_thread(void *arg)
+{
+ kgmnal_data_t *kgm = arg;
+
+ LASSERT(kgm != NULL);
+
+ kportal_daemonize("kgmnal_rx");
+
+ while(1) {
+ gm_recv_event_t *e;
+ int priority = GM_LOW_PRIORITY;
+ if (kgm->kgm_shuttingdown)
+ break;
+
+ e = gm_blocking_receive_no_spin(kgm->kgm_port);
+ if (e == NULL) {
+ CERROR("gm_blocking_receive returned NULL\n");
+ break;
+ }
+
+ switch(gm_ntohc(e->recv.type)) {
+ case GM_HIGH_RECV_EVENT:
+ priority = GM_HIGH_PRIORITY;
+ /* fall through */
+ case GM_RECV_EVENT:
+ kgmnal_rx(kgm, gm_ntohl(e->recv.length),
+ gm_ntohc(e->recv.size),
+ gm_ntohp(e->recv.buffer), priority);
+ break;
+ case GM_ALARM_EVENT:
+ CERROR("received alarm");
+ gm_unknown(kgm->kgm_port, e);
+ break;
+ case GM_BAD_SEND_DETECTED_EVENT: /* ?? */
+ CERROR("received bad send!\n");
+ break;
+ default:
+ gm_unknown(kgm->kgm_port, e);
+ }
+ }
+
+ CERROR("shuttting down.\n");
+ return 0;
+}
+
+nal_cb_t kgmnal_lib = {
+ nal_data: &kgmnal_data, /* NAL private data */
+ cb_send: kgmnal_send,
+ cb_recv: kgmnal_recv,
+ cb_read: kgmnal_read,
+ cb_write: kgmnal_write,
+ cb_malloc: kgmnal_malloc,
+ cb_free: kgmnal_free,
+ cb_printf: kgmnal_printf,
+ cb_cli: kgmnal_cli,
+ cb_sti: kgmnal_sti,
+ cb_dist: kgmnal_dist
+};
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Based on ksocknal and qswnal
+ *
+ * Copyright (C) 2002 Cluster File Systems, Inc.
+ * Author: Robert Read <rread@datarithm.net>
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include "gmnal.h"
+
+ptl_handle_ni_t kgmnal_ni;
+nal_t kgmnal_api;
+
+kgmnal_data_t kgmnal_data;
+int gmnal_debug = 0;
+
+kpr_nal_interface_t kqswnal_router_interface = {
+ kprni_nalid: GMNAL,
+ kprni_arg: NULL,
+ kprni_fwd: kgmnal_fwd_packet,
+};
+
+static int kgmnal_forward(nal_t *nal,
+ int id,
+ void *args, size_t args_len,
+ void *ret, size_t ret_len)
+{
+ kgmnal_data_t *k = nal->nal_data;
+ nal_cb_t *nal_cb = k->kgm_cb;
+
+ LASSERT (nal == &kgmnal_api);
+ LASSERT (k == &kgmnal_data);
+ LASSERT (nal_cb == &kgmnal_lib);
+
+ lib_dispatch(nal_cb, k, id, args, ret); /* nal needs k */
+ return PTL_OK;
+}
+
+static void kgmnal_lock(nal_t *nal, unsigned long *flags)
+{
+ kgmnal_data_t *k = nal->nal_data;
+ nal_cb_t *nal_cb = k->kgm_cb;
+
+
+ LASSERT (nal == &kgmnal_api);
+ LASSERT (k == &kgmnal_data);
+ LASSERT (nal_cb == &kgmnal_lib);
+
+ nal_cb->cb_cli(nal_cb,flags);
+}
+
+static void kgmnal_unlock(nal_t *nal, unsigned long *flags)
+{
+ kgmnal_data_t *k = nal->nal_data;
+ nal_cb_t *nal_cb = k->kgm_cb;
+
+
+ LASSERT (nal == &kgmnal_api);
+ LASSERT (k == &kgmnal_data);
+ LASSERT (nal_cb == &kgmnal_lib);
+
+ nal_cb->cb_sti(nal_cb,flags);
+}
+
+static int kgmnal_shutdown(nal_t *nal, int ni)
+{
+ LASSERT (nal == &kgmnal_api);
+ return 0;
+}
+
+static void kgmnal_yield( nal_t *nal )
+{
+ LASSERT (nal == &kgmnal_api);
+
+ if (current->need_resched)
+ schedule();
+ return;
+}
+
+kgmnal_rx_t *kgm_add_recv(kgmnal_data_t *data,int ndx)
+{
+ kgmnal_rx_t *conn;
+
+ PORTAL_ALLOC(conn, sizeof(kgmnal_rx_t));
+ /* Check for out of mem here */
+ if (conn==NULL) {
+ printk("kgm_add_recv: memory alloc failed\n");
+ return NULL;
+ }
+
+ list_add(&conn->krx_item,(struct list_head *)&data->kgm_list);
+ // conn->ndx=ndx;
+ // conn->len=conn->ptlhdr_copied=0;
+ // conn->loopback=0;
+ return conn;
+}
+
+static nal_t *kgmnal_init(int interface, ptl_pt_index_t ptl_size,
+ ptl_ac_index_t ac_size, ptl_pid_t requested_pid)
+{
+ unsigned int nnids;
+
+ gm_max_node_id_in_use(kgmnal_data.kgm_port, &nnids);
+
+ CDEBUG(D_NET, "calling lib_init with nid 0x%Lx of %d\n",
+ kgmnal_data.kgm_nid, nnids);
+ lib_init(&kgmnal_lib, kgmnal_data.kgm_nid, 0, nnids,ptl_size, ac_size);
+ return &kgmnal_api;
+}
+
+static void __exit
+kgmnal_finalize(void)
+{
+ struct list_head *tmp;
+
+ PORTAL_SYMBOL_UNREGISTER (kgmnal_ni);
+ PtlNIFini(kgmnal_ni);
+ lib_fini(&kgmnal_api);
+
+ if (kgmnal_data.kgm_port) {
+ gm_close(kgmnal_data.kgm_port);
+ }
+
+ /* FIXME: free dma buffers */
+ /* FIXME: kill receiver thread */
+
+ PORTAL_FREE (kgmnal_data.kgm_trans, bsizeof(kgmnal_tx_t)*TXMSGS);
+
+ list_for_each(tmp, &kgmnal_data.kgm_list) {
+ kgmnal_rx_t *conn;
+ conn = list_entry(tmp, kgmnal_rx_t, krx_item);
+ CDEBUG(D_IOCTL, "freeing conn %p\n",conn);
+ tmp = tmp->next;
+ list_del(&conn->krx_item);
+ PORTAL_FREE(conn, sizeof(*conn));
+ }
+
+ CDEBUG (D_MALLOC, "done kmem %d\n", atomic_read (&portal_kmemory));
+
+ return;
+}
+
+static int __init
+kgmnal_initialize(void)
+{
+ int rc;
+ int ntok;
+ unsigned long sizemask;
+ unsigned int nid;
+
+ CDEBUG (D_MALLOC, "start kmem %d\n", atomic_read (&portal_kmemory));
+
+ kgmnal_api.forward = kgmnal_forward;
+ kgmnal_api.shutdown = kgmnal_shutdown;
+ kgmnal_api.yield = kgmnal_yield;
+ kgmnal_api.validate = NULL; /* our api validate is a NOOP */
+ kgmnal_api.lock= kgmnal_lock;
+ kgmnal_api.unlock= kgmnal_unlock;
+ kgmnal_api.nal_data = &kgmnal_data;
+
+ kgmnal_lib.nal_data = &kgmnal_data;
+
+ memset(&kgmnal_data, 0, sizeof(kgmnal_data));
+
+ INIT_LIST_HEAD(&kgmnal_data.kgm_list);
+ kgmnal_data.kgm_cb = &kgmnal_lib;
+
+ /* Allocate transmit descriptors */
+ PORTAL_ALLOC (kgmnal_data.kgm_trans, sizeof(kgmnal_tx_t)*TXMSGS);
+ if (kgmnal_data.kgm_trans==NULL) {
+ printk("kgmnal: init: failed to allocate transmit "
+ "descriptors\n");
+ return -1;
+ }
+ memset(kgmnal_data.kgm_trans,-1,sizeof(kgmnal_tx_t)*(TXMSGS));
+
+ spin_lock_init(&kgmnal_data.kgm_dispatch_lock);
+ spin_lock_init(&kgmnal_data.kgm_update_lock);
+ spin_lock_init(&kgmnal_data.kgm_send_lock);
+
+ /* Do the receiver and xmtr allocation */
+
+ rc = gm_init();
+ if (rc != GM_SUCCESS) {
+ CERROR("gm_init failed: %d\n", rc);
+ return -1;
+ }
+
+ rc = gm_open(&kgmnal_data.kgm_port, 0 , KGM_PORT_NUM, KGM_HOSTNAME,
+ GM_API_VERSION_1_1);
+ if (rc != GM_SUCCESS) {
+ gm_finalize();
+ kgmnal_data.kgm_port = NULL;
+ CERROR("gm_open failed: %d\n", rc);
+ return -1;
+ }
+ gm_get_node_id(kgmnal_data.kgm_port, &nid);
+ kgmnal_data.kgm_nid = nid;
+ /* Allocate 2 different sizes of buffers. For new, use half
+ the tokens for each. */
+ ntok = gm_num_receive_tokens(kgmnal_data.kgm_port)/2;
+ CDEBUG(D_NET, "gmnal_init: creating %d large %d byte recv buffers\n",
+ ntok, MSG_LEN_LARGE);
+ while (ntok-- > 0) {
+ void * buffer = gm_dma_malloc(kgmnal_data.kgm_port,
+ MSG_LEN_LARGE);
+ if (buffer == NULL) {
+ CERROR("gm_init failed: %d\n", rc);
+ return (-ENOMEM);
+ }
+ CDEBUG(D_NET, " add buffer: port %p buf %p len %d size %d "
+ "pri %d\n ", kgmnal_data.kgm_port, buffer,
+ MSG_LEN_LARGE, MSG_SIZE_LARGE, GM_LOW_PRIORITY);
+
+ gm_provide_receive_buffer(kgmnal_data.kgm_port, buffer,
+ MSG_SIZE_LARGE, GM_LOW_PRIORITY);
+ }
+
+ ntok = gm_num_receive_tokens(kgmnal_data.kgm_port)/2;
+ CDEBUG(D_NET, "gmnal_init: creating %d small %d byte recv buffers\n",
+ ntok, MSG_LEN_SMALL);
+ while (ntok-- > 0) {
+ void * buffer = gm_dma_malloc(kgmnal_data.kgm_port,
+ MSG_LEN_SMALL);
+ if (buffer == NULL) {
+ CERROR("gm_init failed: %d\n", rc);
+ return (-ENOMEM);
+ }
+ CDEBUG(D_NET, " add buffer: port %p buf %p len %d size %d "
+ "pri %d\n ", kgmnal_data.kgm_port, buffer,
+ MSG_LEN_SMALL, MSG_SIZE_SMALL, GM_LOW_PRIORITY);
+
+ gm_provide_receive_buffer(kgmnal_data.kgm_port, buffer,
+ MSG_SIZE_SMALL, GM_LOW_PRIORITY);
+
+ }
+ sizemask = (1 << MSG_SIZE_LARGE) | (1 << MSG_SIZE_SMALL);
+ CDEBUG(D_NET, "gm_set_acceptable_sizes port %p pri %d mask 0x%x\n",
+ kgmnal_data.kgm_port, GM_LOW_PRIORITY, sizemask);
+ gm_set_acceptable_sizes(kgmnal_data.kgm_port, GM_LOW_PRIORITY,
+ sizemask);
+ gm_set_acceptable_sizes(kgmnal_data.kgm_port, GM_HIGH_PRIORITY, 0);
+
+ /* Initialize Network Interface */
+ rc = PtlNIInit(kgmnal_init, 32, 4, 0, &kgmnal_ni);
+ if (rc) {
+ CERROR("PtlNIInit failed %d\n", rc);
+ return (-ENOMEM);
+ }
+
+ /* Start receiver thread */
+ kernel_thread(kgmnal_recv_thread, &kgmnal_data, 0);
+
+ PORTAL_SYMBOL_REGISTER(kgmnal_ni);
+
+ kgmnal_data.kgm_init = 1;
+
+ return 0;
+}
+
+MODULE_AUTHOR("Robert Read <rread@datarithm.net>");
+MODULE_DESCRIPTION("Kernel Myrinet GM NAL v0.1");
+MODULE_LICENSE("GPL");
+
+module_init (kgmnal_initialize);
+module_exit (kgmnal_finalize);
+
+EXPORT_SYMBOL (kgmnal_ni);
--- /dev/null
+# Copyright (C) 2001 Cluster File Systems, Inc.
+#
+# This code is issued under the GNU General Public License.
+# See the file COPYING in this distribution
+
+include ../../Rules.linux
+
+MODULE = kqswnal
+modulenet_DATA = kqswnal.o
+EXTRA_PROGRAMS = kqswnal
+
+
+#CFLAGS:= @KCFLAGS@
+#CPPFLAGS:=@KCPPFLAGS@
+DEFS =
+kqswnal_SOURCES = qswnal.c qswnal_cb.c qswnal.h
--- /dev/null
+/*
+ * Copyright (C) 2002 Cluster File Systems, Inc.
+ * Author: Eric Barton <eric@bartonsoftware.com>
+ *
+ * Copyright (C) 2002, Lawrence Livermore National Labs (LLNL)
+ * W. Marcus Miller - Based on ksocknal
+ *
+ * This file is part of Portals, http://www.sf.net/projects/lustre/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#include "qswnal.h"
+
+ptl_handle_ni_t kqswnal_ni;
+nal_t kqswnal_api;
+kqswnal_data_t kqswnal_data;
+
+kpr_nal_interface_t kqswnal_router_interface = {
+ kprni_nalid: QSWNAL,
+ kprni_arg: NULL,
+ kprni_fwd: kqswnal_fwd_packet,
+};
+
+
+static int
+kqswnal_forward(nal_t *nal,
+ int id,
+ void *args, size_t args_len,
+ void *ret, size_t ret_len)
+{
+ kqswnal_data_t *k = nal->nal_data;
+ nal_cb_t *nal_cb = k->kqn_cb;
+
+ LASSERT (nal == &kqswnal_api);
+ LASSERT (k == &kqswnal_data);
+ LASSERT (nal_cb == &kqswnal_lib);
+
+ lib_dispatch(nal_cb, k, id, args, ret); /* nal needs k */
+ return (PTL_OK);
+}
+
+static void
+kqswnal_lock (nal_t *nal, unsigned long *flags)
+{
+ kqswnal_data_t *k = nal->nal_data;
+ nal_cb_t *nal_cb = k->kqn_cb;
+
+ LASSERT (nal == &kqswnal_api);
+ LASSERT (k == &kqswnal_data);
+ LASSERT (nal_cb == &kqswnal_lib);
+
+ nal_cb->cb_cli(nal_cb,flags);
+}
+
+static void
+kqswnal_unlock(nal_t *nal, unsigned long *flags)
+{
+ kqswnal_data_t *k = nal->nal_data;
+ nal_cb_t *nal_cb = k->kqn_cb;
+
+ LASSERT (nal == &kqswnal_api);
+ LASSERT (k == &kqswnal_data);
+ LASSERT (nal_cb == &kqswnal_lib);
+
+ nal_cb->cb_sti(nal_cb,flags);
+}
+
+static int
+kqswnal_shutdown(nal_t *nal, int ni)
+{
+ CDEBUG (D_NET, "shutdown\n");
+
+ LASSERT (nal == &kqswnal_api);
+ return (0);
+}
+
+static void
+kqswnal_yield( nal_t *nal )
+{
+ CDEBUG (D_NET, "yield\n");
+
+ if (current->need_resched)
+ schedule();
+ return;
+}
+
+static nal_t *
+kqswnal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size,
+ ptl_pid_t requested_pid)
+{
+ ptl_nid_t mynid = ep_nodeid (kqswnal_data.kqn_epdev);
+ int nnids = ep_numnodes (kqswnal_data.kqn_epdev);
+
+ CDEBUG(D_NET, "calling lib_init with nid "LPX64" of %d\n", mynid,nnids);
+
+ lib_init(&kqswnal_lib, mynid, 0, nnids, ptl_size, ac_size);
+
+ return (&kqswnal_api);
+}
+
+void __exit
+kqswnal_finalise (void)
+{
+ switch (kqswnal_data.kqn_init)
+ {
+ default:
+ LASSERT (0);
+
+ case KQN_INIT_ALL:
+ PORTAL_SYMBOL_UNREGISTER (kqswnal_ni);
+ /* fall through */
+
+ case KQN_INIT_PTL:
+ PtlNIFini (kqswnal_ni);
+ lib_fini (&kqswnal_lib);
+ /* fall through */
+
+ case KQN_INIT_DATA:
+ break;
+
+ case KQN_INIT_NOTHING:
+ return;
+ }
+
+ /**********************************************************************/
+ /* Make router stop her calling me and fail any more call-ins */
+ kpr_shutdown (&kqswnal_data.kqn_router);
+
+ /**********************************************************************/
+ /* flag threads to terminate, wake them and wait for them to die */
+
+ kqswnal_data.kqn_shuttingdown = 1;
+ wake_up_all (&kqswnal_data.kqn_sched_waitq);
+
+ while (atomic_read (&kqswnal_data.kqn_nthreads) != 0) {
+ CDEBUG(D_NET, "waiting for %d threads to terminate\n",
+ atomic_read (&kqswnal_data.kqn_nthreads));
+ set_current_state (TASK_UNINTERRUPTIBLE);
+ schedule_timeout (HZ);
+ }
+
+ /**********************************************************************/
+ /* close elan comms */
+
+ if (kqswnal_data.kqn_eprx_small != NULL)
+ ep_remove_large_rcvr (kqswnal_data.kqn_eprx_small);
+
+ if (kqswnal_data.kqn_eprx_large != NULL)
+ ep_remove_large_rcvr (kqswnal_data.kqn_eprx_large);
+
+ if (kqswnal_data.kqn_eptx != NULL)
+ ep_free_large_xmtr (kqswnal_data.kqn_eptx);
+
+ /**********************************************************************/
+ /* No more threads. No more portals, router or comms callbacks!
+ * I control the horizontals and the verticals...
+ */
+
+ /**********************************************************************/
+ /* Complete any blocked forwarding packets with error
+ */
+
+ while (!list_empty (&kqswnal_data.kqn_idletxd_fwdq))
+ {
+ kpr_fwd_desc_t *fwd = list_entry (kqswnal_data.kqn_idletxd_fwdq.next,
+ kpr_fwd_desc_t, kprfd_list);
+ list_del (&fwd->kprfd_list);
+ kpr_fwd_done (&kqswnal_data.kqn_router, fwd, -EHOSTUNREACH);
+ }
+
+ while (!list_empty (&kqswnal_data.kqn_delayedfwds))
+ {
+ kpr_fwd_desc_t *fwd = list_entry (kqswnal_data.kqn_delayedfwds.next,
+ kpr_fwd_desc_t, kprfd_list);
+ list_del (&fwd->kprfd_list);
+ kpr_fwd_done (&kqswnal_data.kqn_router, fwd, -EHOSTUNREACH);
+ }
+
+ /**********************************************************************/
+ /* Wait for router to complete any packets I sent her
+ */
+
+ kpr_deregister (&kqswnal_data.kqn_router);
+
+
+ /**********************************************************************/
+ /* Unmap message buffers and free all descriptors and buffers
+ */
+
+ if (kqswnal_data.kqn_eprxdmahandle != NULL)
+ {
+ elan3_dvma_unload(kqswnal_data.kqn_epdev->DmaState,
+ kqswnal_data.kqn_eprxdmahandle, 0,
+ KQSW_NRXMSGPAGES_SMALL * KQSW_NRXMSGS_SMALL +
+ KQSW_NRXMSGPAGES_LARGE * KQSW_NRXMSGS_LARGE);
+
+ elan3_dma_release(kqswnal_data.kqn_epdev->DmaState,
+ kqswnal_data.kqn_eprxdmahandle);
+ }
+
+ if (kqswnal_data.kqn_eptxdmahandle != NULL)
+ {
+ elan3_dvma_unload(kqswnal_data.kqn_epdev->DmaState,
+ kqswnal_data.kqn_eptxdmahandle, 0,
+ KQSW_NTXMSGPAGES * (KQSW_NTXMSGS +
+ KQSW_NNBLK_TXMSGS));
+
+ elan3_dma_release(kqswnal_data.kqn_epdev->DmaState,
+ kqswnal_data.kqn_eptxdmahandle);
+ }
+
+ if (kqswnal_data.kqn_txds != NULL)
+ {
+ int i;
+
+ for (i = 0; i < KQSW_NTXMSGS + KQSW_NNBLK_TXMSGS; i++)
+ {
+ kqswnal_tx_t *ktx = &kqswnal_data.kqn_txds[i];
+
+ if (ktx->ktx_buffer != NULL)
+ PORTAL_FREE(ktx->ktx_buffer,
+ KQSW_TX_BUFFER_SIZE);
+ }
+
+ PORTAL_FREE(kqswnal_data.kqn_txds,
+ sizeof (kqswnal_tx_t) * (KQSW_NTXMSGS +
+ KQSW_NNBLK_TXMSGS));
+ }
+
+ if (kqswnal_data.kqn_rxds != NULL)
+ {
+ int i;
+ int j;
+
+ for (i = 0; i < KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE; i++)
+ {
+ kqswnal_rx_t *krx = &kqswnal_data.kqn_rxds[i];
+
+ for (j = 0; j < krx->krx_npages; j++)
+ if (krx->krx_pages[j] != NULL)
+ __free_page (krx->krx_pages[j]);
+ }
+
+ PORTAL_FREE(kqswnal_data.kqn_rxds,
+ sizeof(kqswnal_rx_t) * (KQSW_NRXMSGS_SMALL +
+ KQSW_NRXMSGS_LARGE));
+ }
+
+ /* resets flags, pointers to NULL etc */
+ memset(&kqswnal_data, 0, sizeof (kqswnal_data));
+
+ CDEBUG (D_MALLOC, "done kmem %d\n", atomic_read(&portal_kmemory));
+
+ printk (KERN_INFO "Routing QSW NAL unloaded (final mem %d)\n",
+ atomic_read(&portal_kmemory));
+}
+
+static int __init
+kqswnal_initialise (void)
+{
+ ELAN3_DMA_REQUEST dmareq;
+ int rc;
+ int i;
+ int elan_page_idx;
+ int pkmem = atomic_read(&portal_kmemory);
+
+ LASSERT (kqswnal_data.kqn_init == KQN_INIT_NOTHING);
+
+ CDEBUG (D_MALLOC, "start kmem %d\n", atomic_read(&portal_kmemory));
+
+ kqswnal_api.forward = kqswnal_forward;
+ kqswnal_api.shutdown = kqswnal_shutdown;
+ kqswnal_api.yield = kqswnal_yield;
+ kqswnal_api.validate = NULL; /* our api validate is a NOOP */
+ kqswnal_api.lock = kqswnal_lock;
+ kqswnal_api.unlock = kqswnal_unlock;
+ kqswnal_api.nal_data = &kqswnal_data;
+
+ kqswnal_lib.nal_data = &kqswnal_data;
+
+ /* ensure all pointers NULL etc */
+ memset (&kqswnal_data, 0, sizeof (kqswnal_data));
+
+ kqswnal_data.kqn_cb = &kqswnal_lib;
+
+ INIT_LIST_HEAD (&kqswnal_data.kqn_idletxds);
+ INIT_LIST_HEAD (&kqswnal_data.kqn_nblk_idletxds);
+ spin_lock_init (&kqswnal_data.kqn_idletxd_lock);
+ init_waitqueue_head (&kqswnal_data.kqn_idletxd_waitq);
+ INIT_LIST_HEAD (&kqswnal_data.kqn_idletxd_fwdq);
+
+ INIT_LIST_HEAD (&kqswnal_data.kqn_delayedfwds);
+ INIT_LIST_HEAD (&kqswnal_data.kqn_delayedtxds);
+ INIT_LIST_HEAD (&kqswnal_data.kqn_readyrxds);
+
+ spin_lock_init (&kqswnal_data.kqn_sched_lock);
+ init_waitqueue_head (&kqswnal_data.kqn_sched_waitq);
+
+ spin_lock_init (&kqswnal_data.kqn_statelock);
+
+ /* pointers/lists/locks initialised */
+ kqswnal_data.kqn_init = KQN_INIT_DATA;
+
+ /**********************************************************************/
+ /* Find the first Elan device */
+
+ kqswnal_data.kqn_epdev = ep_device (0);
+ if (kqswnal_data.kqn_epdev == NULL)
+ {
+ CERROR ("Can't get elan device 0\n");
+ return (-ENOMEM);
+ }
+
+ /**********************************************************************/
+ /* Get the transmitter */
+
+ kqswnal_data.kqn_eptx = ep_alloc_large_xmtr (kqswnal_data.kqn_epdev);
+ if (kqswnal_data.kqn_eptx == NULL)
+ {
+ CERROR ("Can't allocate transmitter\n");
+ kqswnal_finalise ();
+ return (-ENOMEM);
+ }
+
+ /**********************************************************************/
+ /* Get the receivers */
+
+ kqswnal_data.kqn_eprx_small = ep_install_large_rcvr (kqswnal_data.kqn_epdev,
+ EP_SVC_LARGE_PORTALS_SMALL,
+ KQSW_EP_ENVELOPES_SMALL);
+ if (kqswnal_data.kqn_eprx_small == NULL)
+ {
+ CERROR ("Can't install small msg receiver\n");
+ kqswnal_finalise ();
+ return (-ENOMEM);
+ }
+
+ kqswnal_data.kqn_eprx_large = ep_install_large_rcvr (kqswnal_data.kqn_epdev,
+ EP_SVC_LARGE_PORTALS_LARGE,
+ KQSW_EP_ENVELOPES_LARGE);
+ if (kqswnal_data.kqn_eprx_large == NULL)
+ {
+ CERROR ("Can't install large msg receiver\n");
+ kqswnal_finalise ();
+ return (-ENOMEM);
+ }
+
+ /**********************************************************************/
+ /* Reserve Elan address space for transmit buffers */
+
+ dmareq.Waitfn = DDI_DMA_SLEEP;
+ dmareq.ElanAddr = (E3_Addr) 0;
+ dmareq.Attr = PTE_LOAD_LITTLE_ENDIAN;
+ dmareq.Perm = ELAN_PERM_REMOTEREAD;
+
+ rc = elan3_dma_reserve(kqswnal_data.kqn_epdev->DmaState,
+ KQSW_NTXMSGPAGES*(KQSW_NTXMSGS+KQSW_NNBLK_TXMSGS),
+ &dmareq, &kqswnal_data.kqn_eptxdmahandle);
+ if (rc != DDI_SUCCESS)
+ {
+ CERROR ("Can't reserve rx dma space\n");
+ kqswnal_finalise ();
+ return (-ENOMEM);
+ }
+
+ /**********************************************************************/
+ /* Reserve Elan address space for receive buffers */
+
+ dmareq.Waitfn = DDI_DMA_SLEEP;
+ dmareq.ElanAddr = (E3_Addr) 0;
+ dmareq.Attr = PTE_LOAD_LITTLE_ENDIAN;
+ dmareq.Perm = ELAN_PERM_REMOTEWRITE;
+
+ rc = elan3_dma_reserve (kqswnal_data.kqn_epdev->DmaState,
+ KQSW_NRXMSGPAGES_SMALL * KQSW_NRXMSGS_SMALL +
+ KQSW_NRXMSGPAGES_LARGE * KQSW_NRXMSGS_LARGE,
+ &dmareq, &kqswnal_data.kqn_eprxdmahandle);
+ if (rc != DDI_SUCCESS)
+ {
+ CERROR ("Can't reserve rx dma space\n");
+ kqswnal_finalise ();
+ return (-ENOMEM);
+ }
+
+ /**********************************************************************/
+ /* Allocate/Initialise transmit descriptors */
+
+ PORTAL_ALLOC(kqswnal_data.kqn_txds,
+ sizeof(kqswnal_tx_t) * (KQSW_NTXMSGS + KQSW_NNBLK_TXMSGS));
+ if (kqswnal_data.kqn_txds == NULL)
+ {
+ kqswnal_finalise ();
+ return (-ENOMEM);
+ }
+
+ /* clear flags, null pointers etc */
+ memset(kqswnal_data.kqn_txds, 0,
+ sizeof(kqswnal_tx_t) * (KQSW_NTXMSGS + KQSW_NNBLK_TXMSGS));
+ for (i = 0; i < (KQSW_NTXMSGS + KQSW_NNBLK_TXMSGS); i++)
+ {
+ int premapped_pages;
+ kqswnal_tx_t *ktx = &kqswnal_data.kqn_txds[i];
+ int basepage = i * KQSW_NTXMSGPAGES;
+
+ PORTAL_ALLOC (ktx->ktx_buffer, KQSW_TX_BUFFER_SIZE);
+ if (ktx->ktx_buffer == NULL)
+ {
+ kqswnal_finalise ();
+ return (-ENOMEM);
+ }
+
+ /* Map pre-allocated buffer NOW, to save latency on transmit */
+ premapped_pages = kqswnal_pages_spanned(ktx->ktx_buffer,
+ KQSW_TX_BUFFER_SIZE);
+
+ elan3_dvma_kaddr_load (kqswnal_data.kqn_epdev->DmaState,
+ kqswnal_data.kqn_eptxdmahandle,
+ ktx->ktx_buffer, KQSW_TX_BUFFER_SIZE,
+ basepage, &ktx->ktx_ebuffer);
+
+ ktx->ktx_basepage = basepage + premapped_pages; /* message mapping starts here */
+ ktx->ktx_npages = KQSW_NTXMSGPAGES - premapped_pages; /* for this many pages */
+
+ if (i < KQSW_NTXMSGS)
+ ktx->ktx_idle = &kqswnal_data.kqn_idletxds;
+ else
+ ktx->ktx_idle = &kqswnal_data.kqn_nblk_idletxds;
+
+ list_add_tail (&ktx->ktx_list, ktx->ktx_idle);
+ }
+
+ /**********************************************************************/
+ /* Allocate/Initialise receive descriptors */
+
+ PORTAL_ALLOC (kqswnal_data.kqn_rxds,
+ sizeof (kqswnal_rx_t) * (KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE));
+ if (kqswnal_data.kqn_rxds == NULL)
+ {
+ kqswnal_finalise ();
+ return (-ENOMEM);
+ }
+
+ memset(kqswnal_data.kqn_rxds, 0, /* clear flags, null pointers etc */
+ sizeof(kqswnal_rx_t) * (KQSW_NRXMSGS_SMALL+KQSW_NRXMSGS_LARGE));
+
+ elan_page_idx = 0;
+ for (i = 0; i < KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE; i++)
+ {
+ E3_Addr elanaddr;
+ int j;
+ kqswnal_rx_t *krx = &kqswnal_data.kqn_rxds[i];
+
+ if (i < KQSW_NRXMSGS_SMALL)
+ {
+ krx->krx_npages = KQSW_NRXMSGPAGES_SMALL;
+ krx->krx_eprx = kqswnal_data.kqn_eprx_small;
+ }
+ else
+ {
+ krx->krx_npages = KQSW_NRXMSGPAGES_LARGE;
+ krx->krx_eprx = kqswnal_data.kqn_eprx_large;
+ }
+
+ LASSERT (krx->krx_npages > 0);
+ for (j = 0; j < krx->krx_npages; j++)
+ {
+ krx->krx_pages[j] = alloc_page (GFP_KERNEL);
+ if (krx->krx_pages[j] == NULL)
+ {
+ kqswnal_finalise ();
+ return (-ENOMEM);
+ }
+
+ LASSERT(page_address(krx->krx_pages[j]) != NULL);
+
+ elan3_dvma_kaddr_load(kqswnal_data.kqn_epdev->DmaState,
+ kqswnal_data.kqn_eprxdmahandle,
+ page_address(krx->krx_pages[j]),
+ PAGE_SIZE, elan_page_idx,
+ &elanaddr);
+ elan_page_idx++;
+
+ if (j == 0)
+ krx->krx_elanaddr = elanaddr;
+
+ /* NB we assume a contiguous */
+ LASSERT (elanaddr == krx->krx_elanaddr + j * PAGE_SIZE);
+ }
+ }
+ LASSERT (elan_page_idx ==
+ (KQSW_NRXMSGS_SMALL * KQSW_NRXMSGPAGES_SMALL) +
+ (KQSW_NRXMSGS_LARGE * KQSW_NRXMSGPAGES_LARGE));
+
+ /**********************************************************************/
+ /* Network interface ready to initialise */
+
+ rc = PtlNIInit(kqswnal_init, 32, 4, 0, &kqswnal_ni);
+ if (rc != 0)
+ {
+ CERROR ("PtlNIInit failed %d\n", rc);
+ kqswnal_finalise ();
+ return (-ENOMEM);
+ }
+
+ kqswnal_data.kqn_init = KQN_INIT_PTL;
+
+ /**********************************************************************/
+ /* Queue receives, now that it's OK to run their completion callbacks */
+
+ for (i = 0; i < KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE; i++)
+ {
+ kqswnal_rx_t *krx = &kqswnal_data.kqn_rxds[i];
+
+ /* NB this enqueue can allocate/sleep (attr == 0) */
+ rc = ep_queue_receive(krx->krx_eprx, kqswnal_rxhandler, krx,
+ krx->krx_elanaddr,
+ krx->krx_npages * PAGE_SIZE, 0);
+ if (rc != 0)
+ {
+ CERROR ("failed ep_queue_receive %d\n", rc);
+ kqswnal_finalise ();
+ return (-ENOMEM);
+ }
+ }
+
+ /**********************************************************************/
+ /* Spawn scheduling threads */
+ for (i = 0; i < smp_num_cpus; i++)
+ {
+ rc = kqswnal_thread_start (kqswnal_scheduler, NULL);
+ if (rc != 0)
+ {
+ CERROR ("failed to spawn scheduling thread: %d\n", rc);
+ kqswnal_finalise ();
+ return (rc);
+ }
+ }
+
+ /**********************************************************************/
+ /* Connect to the router */
+ rc = kpr_register (&kqswnal_data.kqn_router, &kqswnal_router_interface);
+ CDEBUG(D_NET, "Can't initialise routing interface (rc = %d): not routing\n",rc);
+
+ PORTAL_SYMBOL_REGISTER(kqswnal_ni);
+ kqswnal_data.kqn_init = KQN_INIT_ALL;
+
+ printk(KERN_INFO "Routing QSW NAL loaded on node %d of %d "
+ "(Routing %s, initial mem %d)\n",
+ ep_nodeid (kqswnal_data.kqn_epdev),
+ ep_numnodes (kqswnal_data.kqn_epdev),
+ kpr_routing (&kqswnal_data.kqn_router) ? "enabled" : "disabled",
+ pkmem);
+
+ return (0);
+}
+
+
+MODULE_AUTHOR("W. Marcus Miller <marcusm@llnl.gov>");
+MODULE_DESCRIPTION("Kernel Quadrics Switch NAL v1.00");
+MODULE_LICENSE("GPL");
+
+module_init (kqswnal_initialise);
+module_exit (kqswnal_finalise);
+
+EXPORT_SYMBOL (kqswnal_ni);
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Basic library routines.
+ *
+ */
+
+#ifndef _QSWNAL_H
+#define _QSWNAL_H
+#define EXPORT_SYMTAB
+
+#ifdef PROPRIETARY_ELAN
+# include <qsw/kernel.h>
+#else
+# include <qsnet/kernel.h>
+#endif
+
+#undef printf /* nasty QSW #define */
+
+#include <linux/config.h>
+#include <linux/module.h>
+
+#include <elan3/elanregs.h>
+#include <elan3/elandev.h>
+#include <elan3/elanvp.h>
+#include <elan3/elan3mmu.h>
+#include <elan3/elanctxt.h>
+#include <elan3/elandebug.h>
+#include <elan3/urom_addrs.h>
+#include <elan3/busops.h>
+#include <elan3/kcomm.h>
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/errno.h>
+#include <linux/locks.h>
+#include <linux/unistd.h>
+#include <net/sock.h>
+#include <linux/uio.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/stat.h>
+#include <linux/list.h>
+#include <asm/uaccess.h>
+#include <asm/segment.h>
+
+#define DEBUG_SUBSYSTEM S_QSWNAL
+
+#include <linux/kp30.h>
+#include <portals/p30.h>
+#include <portals/lib-p30.h>
+
+#define KQSW_CHECKSUM 0
+#if KQSW_CHECKSUM
+typedef unsigned long kqsw_csum_t;
+#define KQSW_CSUM_SIZE (2 * sizeof (kqsw_csum_t))
+#else
+#define KQSW_CSUM_SIZE 0
+#endif
+#define KQSW_HDR_SIZE (sizeof (ptl_hdr_t) + KQSW_CSUM_SIZE)
+
+/*
+ * Elan NAL
+ */
+#define EP_SVC_LARGE_PORTALS_SMALL (0x10) /* Portals over elan port number (large payloads) */
+#define EP_SVC_LARGE_PORTALS_LARGE (0x11) /* Portals over elan port number (small payloads) */
+/* NB small/large message sizes are GLOBAL constants */
+
+/*
+ * Performance Tuning defines
+ * NB no mention of PAGE_SIZE for interoperability
+ */
+#if PTL_LARGE_MTU
+# define KQSW_MAXPAYLOAD (256<<10) /* biggest message this NAL will cope with */
+#else
+# define KQSW_MAXPAYLOAD (64<<10) /* biggest message this NAL will cope with */
+#endif
+
+#define KQSW_SMALLPAYLOAD ((4<<10) - KQSW_HDR_SIZE) /* small/large ep receiver breakpoint */
+
+#define KQSW_TX_MAXCONTIG (1<<10) /* largest payload that gets made contiguous on transmit */
+
+#define KQSW_NTXMSGS 8 /* # normal transmit messages */
+#define KQSW_NNBLK_TXMSGS 128 /* # reserved transmit messages if can't block */
+
+#define KQSW_NRXMSGS_LARGE 64 /* # large receive buffers */
+#define KQSW_EP_ENVELOPES_LARGE 128 /* # large ep envelopes */
+
+#define KQSW_NRXMSGS_SMALL 256 /* # small receive buffers */
+#define KQSW_EP_ENVELOPES_SMALL 2048 /* # small ep envelopes */
+
+#define KQSW_RESCHED 100 /* # busy loops that forces scheduler to yield */
+
+/*
+ * derived constants
+ */
+
+#define KQSW_TX_BUFFER_SIZE (KQSW_HDR_SIZE + KQSW_TX_MAXCONTIG)
+/* The pre-allocated tx buffer (hdr + small payload) */
+
+#define KQSW_NTXMSGPAGES (btopr(KQSW_TX_BUFFER_SIZE) + 1 + btopr(KQSW_MAXPAYLOAD) + 1)
+/* Reserve elan address space for pre-allocated and pre-mapped transmit
+ * buffer and a full payload too. Extra pages allow for page alignment */
+
+#define KQSW_NRXMSGPAGES_SMALL (btopr(KQSW_HDR_SIZE + KQSW_SMALLPAYLOAD))
+/* receive hdr/payload always contiguous and page aligned */
+#define KQSW_NRXMSGBYTES_SMALL (KQSW_NRXMSGPAGES_SMALL * PAGE_SIZE)
+
+#define KQSW_NRXMSGPAGES_LARGE (btopr(KQSW_HDR_SIZE + KQSW_MAXPAYLOAD))
+/* receive hdr/payload always contiguous and page aligned */
+#define KQSW_NRXMSGBYTES_LARGE (KQSW_NRXMSGPAGES_LARGE * PAGE_SIZE)
+/* biggest complete packet we can receive (or transmit) */
+
+
+typedef struct
+{
+ struct list_head krx_list; /* enqueue -> thread */
+ EP_RCVR *krx_eprx; /* port to post receives to */
+ EP_RXD *krx_rxd; /* receive descriptor (for repost) */
+ E3_Addr krx_elanaddr; /* Elan address of buffer (contiguous in elan vm) */
+ int krx_npages; /* # pages in receive buffer */
+ int krx_nob; /* Number Of Bytes received into buffer */
+ kpr_fwd_desc_t krx_fwd; /* embedded forwarding descriptor */
+ struct page *krx_pages[KQSW_NRXMSGPAGES_LARGE]; /* pages allocated */
+ struct iovec krx_iov[KQSW_NRXMSGPAGES_LARGE]; /* iovec for forwarding */
+} kqswnal_rx_t;
+
+typedef struct
+{
+ struct list_head ktx_list; /* enqueue idle/delayed */
+ struct list_head *ktx_idle; /* where to put when idle */
+ char ktx_state; /* What I'm doing */
+ uint32_t ktx_basepage; /* page offset in reserved elan tx vaddrs for mapping pages */
+ int ktx_npages; /* pages reserved for mapping messages */
+ int ktx_nmappedpages; /* # pages mapped for current message */
+ EP_IOVEC ktx_iov[EP_MAXFRAG]; /* msg frags (elan vaddrs) */
+ int ktx_niov; /* # message frags */
+ int ktx_port; /* destination ep port */
+ ptl_nid_t ktx_nid; /* destination node */
+ void *ktx_args[2]; /* completion passthru */
+ E3_Addr ktx_ebuffer; /* elan address of ktx_buffer */
+ char *ktx_buffer; /* pre-allocated contiguous buffer for hdr + small payloads */
+} kqswnal_tx_t;
+
+#define KTX_IDLE 0 /* MUST BE ZERO (so zeroed ktx is idle) */
+#define KTX_SENDING 1 /* local send */
+#define KTX_FORWARDING 2 /* routing a packet */
+
+typedef struct
+{
+ char kqn_init; /* what's been initialised */
+ char kqn_shuttingdown; /* I'm trying to shut down */
+ atomic_t kqn_nthreads; /* # threads still running */
+
+ kqswnal_rx_t *kqn_rxds; /* all the receive descriptors */
+ kqswnal_tx_t *kqn_txds; /* all the transmit descriptors */
+
+ struct list_head kqn_idletxds; /* transmit descriptors free to use */
+ struct list_head kqn_nblk_idletxds; /* reserve of */
+ spinlock_t kqn_idletxd_lock; /* serialise idle txd access */
+ wait_queue_head_t kqn_idletxd_waitq; /* sender blocks here waiting for idle txd */
+ struct list_head kqn_idletxd_fwdq; /* forwarded packets block here waiting for idle txd */
+
+ spinlock_t kqn_sched_lock; /* serialise packet schedulers */
+ wait_queue_head_t kqn_sched_waitq; /* scheduler blocks here */
+
+ struct list_head kqn_readyrxds; /* rxds full of data */
+ struct list_head kqn_delayedfwds; /* delayed forwards */
+ struct list_head kqn_delayedtxds; /* delayed transmits */
+
+ spinlock_t kqn_statelock; /* cb_cli/cb_sti */
+ nal_cb_t *kqn_cb; /* -> kqswnal_lib */
+ EP_DEV *kqn_epdev; /* elan device */
+ EP_XMTR *kqn_eptx; /* elan transmitter */
+ EP_RCVR *kqn_eprx_small; /* elan receiver (small messages) */
+ EP_RCVR *kqn_eprx_large; /* elan receiver (large messages) */
+ ELAN3_DMA_HANDLE *kqn_eptxdmahandle; /* elan reserved tx vaddrs */
+ ELAN3_DMA_HANDLE *kqn_eprxdmahandle; /* elan reserved rx vaddrs */
+ kpr_router_t kqn_router; /* connection to Kernel Portals Router module */
+} kqswnal_data_t;
+
+/* kqn_init state */
+#define KQN_INIT_NOTHING 0 /* MUST BE ZERO so zeroed state is initialised OK */
+#define KQN_INIT_DATA 1
+#define KQN_INIT_PTL 2
+#define KQN_INIT_ALL 3
+
+extern nal_cb_t kqswnal_lib;
+extern nal_t kqswnal_api;
+extern kqswnal_data_t kqswnal_data;
+
+extern int kqswnal_thread_start (int (*fn)(void *arg), void *arg);
+extern void kqswnal_rxhandler(EP_RXD *rxd);
+extern int kqswnal_scheduler (void *);
+extern void kqswnal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd);
+
+static inline void
+kqswnal_requeue_rx (kqswnal_rx_t *krx)
+{
+ ep_requeue_receive (krx->krx_rxd, kqswnal_rxhandler, krx,
+ krx->krx_elanaddr, krx->krx_npages * PAGE_SIZE);
+}
+
+static inline int
+kqswnal_pages_spanned (void *base, int nob)
+{
+ unsigned long first_page = ((unsigned long)base) >> PAGE_SHIFT;
+ unsigned long last_page = (((unsigned long)base) + (nob - 1)) >> PAGE_SHIFT;
+
+ LASSERT (last_page >= first_page); /* can't wrap address space */
+ return (last_page - first_page + 1);
+}
+
+#if KQSW_CHECKSUM
+static inline kqsw_csum_t kqsw_csum (kqsw_csum_t sum, void *base, int nob)
+{
+ unsigned char *ptr = (unsigned char *)base;
+
+ while (nob-- > 0)
+ sum += *ptr++;
+
+ return (sum);
+}
+#endif
+
+#endif /* _QSWNAL_H */
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2002 Cluster File Systems, Inc.
+ * Author: Eric Barton <eric@bartonsoftware.com>
+ *
+ * Copyright (C) 2002, Lawrence Livermore National Labs (LLNL)
+ * W. Marcus Miller - Based on ksocknal
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#include "qswnal.h"
+
+atomic_t kqswnal_packets_launched;
+atomic_t kqswnal_packets_transmitted;
+atomic_t kqswnal_packets_received;
+
+
+/*
+ * LIB functions follow
+ *
+ */
+static int
+kqswnal_read(nal_cb_t *nal, void *private, void *dst_addr, user_ptr src_addr,
+ size_t len)
+{
+ CDEBUG (D_NET, LPX64": reading "LPSZ" bytes from %p -> %p\n",
+ nal->ni.nid, len, src_addr, dst_addr );
+ memcpy( dst_addr, src_addr, len );
+
+ return (0);
+}
+
+static int
+kqswnal_write(nal_cb_t *nal, void *private, user_ptr dst_addr, void *src_addr,
+ size_t len)
+{
+ CDEBUG (D_NET, LPX64": writing "LPSZ" bytes from %p -> %p\n",
+ nal->ni.nid, len, src_addr, dst_addr );
+ memcpy( dst_addr, src_addr, len );
+
+ return (0);
+}
+
+static void *
+kqswnal_malloc(nal_cb_t *nal, size_t len)
+{
+ void *buf;
+
+ PORTAL_ALLOC(buf, len);
+ return (buf);
+}
+
+static void
+kqswnal_free(nal_cb_t *nal, void *buf, size_t len)
+{
+ PORTAL_FREE(buf, len);
+}
+
+static void
+kqswnal_printf (nal_cb_t * nal, const char *fmt, ...)
+{
+ va_list ap;
+ char msg[256];
+
+ va_start (ap, fmt);
+ vsnprintf (msg, sizeof (msg), fmt, ap); /* sprint safely */
+ va_end (ap);
+
+ msg[sizeof (msg) - 1] = 0; /* ensure terminated */
+
+ CDEBUG (D_NET, "%s", msg);
+}
+
+
+static void
+kqswnal_cli(nal_cb_t *nal, unsigned long *flags)
+{
+ kqswnal_data_t *data= nal->nal_data;
+
+ spin_lock_irqsave(&data->kqn_statelock, *flags);
+}
+
+
+static void
+kqswnal_sti(nal_cb_t *nal, unsigned long *flags)
+{
+ kqswnal_data_t *data= nal->nal_data;
+
+ spin_unlock_irqrestore(&data->kqn_statelock, *flags);
+}
+
+
+static int
+kqswnal_dist(nal_cb_t *nal, ptl_nid_t nid, unsigned long *dist)
+{
+ /* network distance doesn't mean much for this nal */
+ *dist = (nid == nal->ni.nid) ? 0 : 1;
+ return (0);
+}
+
+int
+kqswnal_ispeer (ptl_nid_t nid)
+{
+ unsigned int elanid = (unsigned int)nid;
+
+ /* didn't lose high bits on conversion and it's in this machine? */
+ return ((ptl_nid_t)elanid == nid &&
+ elanid < ep_numnodes (kqswnal_data.kqn_epdev));
+}
+
+void
+kqswnal_unmap_tx (kqswnal_tx_t *ktx)
+{
+ if (ktx->ktx_nmappedpages == 0)
+ return;
+
+ CDEBUG (D_NET, "%p[%d] unloading pages %d for %d\n",
+ ktx, ktx->ktx_niov, ktx->ktx_basepage, ktx->ktx_nmappedpages);
+
+ LASSERT (ktx->ktx_nmappedpages <= ktx->ktx_npages);
+ LASSERT (ktx->ktx_basepage + ktx->ktx_nmappedpages <=
+ kqswnal_data.kqn_eptxdmahandle->NumDvmaPages);
+
+ elan3_dvma_unload(kqswnal_data.kqn_epdev->DmaState,
+ kqswnal_data.kqn_eptxdmahandle,
+ ktx->ktx_basepage, ktx->ktx_nmappedpages);
+ ktx->ktx_nmappedpages = 0;
+}
+
+int
+kqswnal_map_tx_kiov (kqswnal_tx_t *ktx, int nob, int niov, ptl_kiov_t *kiov)
+{
+ int nfrags = ktx->ktx_niov;
+ const int maxfrags = sizeof (ktx->ktx_iov)/sizeof (ktx->ktx_iov[0]);
+ int nmapped = ktx->ktx_nmappedpages;
+ int maxmapped = ktx->ktx_npages;
+ uint32_t basepage = ktx->ktx_basepage + nmapped;
+ char *ptr;
+
+ LASSERT (nmapped <= maxmapped);
+ LASSERT (nfrags <= maxfrags);
+ LASSERT (niov > 0);
+ LASSERT (nob > 0);
+
+ do {
+ int fraglen = kiov->kiov_len;
+
+ /* nob exactly spans the iovs */
+ LASSERT (fraglen <= nob);
+ /* each frag fits in a page */
+ LASSERT (kiov->kiov_offset + kiov->kiov_len <= PAGE_SIZE);
+
+ nmapped++;
+ if (nmapped > maxmapped) {
+ CERROR("Can't map message in %d pages (max %d)\n",
+ nmapped, maxmapped);
+ return (-EMSGSIZE);
+ }
+
+ if (nfrags == maxfrags) {
+ CERROR("Message too fragmented in Elan VM (max %d frags)\n",
+ maxfrags);
+ return (-EMSGSIZE);
+ }
+
+ /* XXX this is really crap, but we'll have to kmap until
+ * EKC has a page (rather than vaddr) mapping interface */
+
+ ptr = ((char *)kmap (kiov->kiov_page)) + kiov->kiov_offset;
+
+ CDEBUG(D_NET,
+ "%p[%d] loading %p for %d, page %d, %d total\n",
+ ktx, nfrags, ptr, fraglen, basepage, nmapped);
+
+ elan3_dvma_kaddr_load (kqswnal_data.kqn_epdev->DmaState,
+ kqswnal_data.kqn_eptxdmahandle,
+ ptr, fraglen,
+ basepage, &ktx->ktx_iov[nfrags].Base);
+
+ kunmap (kiov->kiov_page);
+
+ /* keep in loop for failure case */
+ ktx->ktx_nmappedpages = nmapped;
+
+ if (nfrags > 0 && /* previous frag mapped */
+ ktx->ktx_iov[nfrags].Base == /* contiguous with this one */
+ (ktx->ktx_iov[nfrags-1].Base + ktx->ktx_iov[nfrags-1].Len))
+ /* just extend previous */
+ ktx->ktx_iov[nfrags - 1].Len += fraglen;
+ else {
+ ktx->ktx_iov[nfrags].Len = fraglen;
+ nfrags++; /* new frag */
+ }
+
+ basepage++;
+ kiov++;
+ niov--;
+ nob -= fraglen;
+
+ /* iov must not run out before end of data */
+ LASSERT (nob == 0 || niov > 0);
+
+ } while (nob > 0);
+
+ ktx->ktx_niov = nfrags;
+ CDEBUG (D_NET, "%p got %d frags over %d pages\n",
+ ktx, ktx->ktx_niov, ktx->ktx_nmappedpages);
+
+ return (0);
+}
+
+int
+kqswnal_map_tx_iov (kqswnal_tx_t *ktx, int nob, int niov, struct iovec *iov)
+{
+ int nfrags = ktx->ktx_niov;
+ const int maxfrags = sizeof (ktx->ktx_iov)/sizeof (ktx->ktx_iov[0]);
+ int nmapped = ktx->ktx_nmappedpages;
+ int maxmapped = ktx->ktx_npages;
+ uint32_t basepage = ktx->ktx_basepage + nmapped;
+
+ LASSERT (nmapped <= maxmapped);
+ LASSERT (nfrags <= maxfrags);
+ LASSERT (niov > 0);
+ LASSERT (nob > 0);
+
+ do {
+ int fraglen = iov->iov_len;
+ long npages = kqswnal_pages_spanned (iov->iov_base, fraglen);
+
+ /* nob exactly spans the iovs */
+ LASSERT (fraglen <= nob);
+
+ nmapped += npages;
+ if (nmapped > maxmapped) {
+ CERROR("Can't map message in %d pages (max %d)\n",
+ nmapped, maxmapped);
+ return (-EMSGSIZE);
+ }
+
+ if (nfrags == maxfrags) {
+ CERROR("Message too fragmented in Elan VM (max %d frags)\n",
+ maxfrags);
+ return (-EMSGSIZE);
+ }
+
+ CDEBUG(D_NET,
+ "%p[%d] loading %p for %d, pages %d for %ld, %d total\n",
+ ktx, nfrags, iov->iov_base, fraglen, basepage, npages,
+ nmapped);
+
+ elan3_dvma_kaddr_load (kqswnal_data.kqn_epdev->DmaState,
+ kqswnal_data.kqn_eptxdmahandle,
+ iov->iov_base, fraglen,
+ basepage, &ktx->ktx_iov[nfrags].Base);
+ /* keep in loop for failure case */
+ ktx->ktx_nmappedpages = nmapped;
+
+ if (nfrags > 0 && /* previous frag mapped */
+ ktx->ktx_iov[nfrags].Base == /* contiguous with this one */
+ (ktx->ktx_iov[nfrags-1].Base + ktx->ktx_iov[nfrags-1].Len))
+ /* just extend previous */
+ ktx->ktx_iov[nfrags - 1].Len += fraglen;
+ else {
+ ktx->ktx_iov[nfrags].Len = fraglen;
+ nfrags++; /* new frag */
+ }
+
+ basepage += npages;
+ iov++;
+ niov--;
+ nob -= fraglen;
+
+ /* iov must not run out before end of data */
+ LASSERT (nob == 0 || niov > 0);
+
+ } while (nob > 0);
+
+ ktx->ktx_niov = nfrags;
+ CDEBUG (D_NET, "%p got %d frags over %d pages\n",
+ ktx, ktx->ktx_niov, ktx->ktx_nmappedpages);
+
+ return (0);
+}
+
+void
+kqswnal_put_idle_tx (kqswnal_tx_t *ktx)
+{
+ kpr_fwd_desc_t *fwd = NULL;
+ struct list_head *idle = ktx->ktx_idle;
+ unsigned long flags;
+
+ kqswnal_unmap_tx (ktx); /* release temporary mappings */
+ ktx->ktx_state = KTX_IDLE;
+
+ spin_lock_irqsave (&kqswnal_data.kqn_idletxd_lock, flags);
+
+ list_add (&ktx->ktx_list, idle);
+
+ /* reserved for non-blocking tx */
+ if (idle == &kqswnal_data.kqn_nblk_idletxds) {
+ spin_unlock_irqrestore (&kqswnal_data.kqn_idletxd_lock, flags);
+ return;
+ }
+
+ /* anything blocking for a tx descriptor? */
+ if (!list_empty(&kqswnal_data.kqn_idletxd_fwdq)) /* forwarded packet? */
+ {
+ CDEBUG(D_NET,"wakeup fwd\n");
+
+ fwd = list_entry (kqswnal_data.kqn_idletxd_fwdq.next,
+ kpr_fwd_desc_t, kprfd_list);
+ list_del (&fwd->kprfd_list);
+ }
+
+ if (waitqueue_active (&kqswnal_data.kqn_idletxd_waitq)) /* process? */
+ {
+ /* local sender waiting for tx desc */
+ CDEBUG(D_NET,"wakeup process\n");
+ wake_up (&kqswnal_data.kqn_idletxd_waitq);
+ }
+
+ spin_unlock_irqrestore (&kqswnal_data.kqn_idletxd_lock, flags);
+
+ if (fwd == NULL)
+ return;
+
+ /* schedule packet for forwarding again */
+ spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags);
+
+ list_add_tail (&fwd->kprfd_list, &kqswnal_data.kqn_delayedfwds);
+ if (waitqueue_active (&kqswnal_data.kqn_sched_waitq))
+ wake_up (&kqswnal_data.kqn_sched_waitq);
+
+ spin_unlock_irqrestore (&kqswnal_data.kqn_sched_lock, flags);
+}
+
+kqswnal_tx_t *
+kqswnal_get_idle_tx (kpr_fwd_desc_t *fwd, int may_block)
+{
+ unsigned long flags;
+ kqswnal_tx_t *ktx = NULL;
+
+ for (;;) {
+ spin_lock_irqsave (&kqswnal_data.kqn_idletxd_lock, flags);
+
+ /* "normal" descriptor is free */
+ if (!list_empty (&kqswnal_data.kqn_idletxds)) {
+ ktx = list_entry (kqswnal_data.kqn_idletxds.next,
+ kqswnal_tx_t, ktx_list);
+ list_del (&ktx->ktx_list);
+ break;
+ }
+
+ /* "normal" descriptor pool is empty */
+
+ if (fwd != NULL) { /* forwarded packet => queue for idle txd */
+ CDEBUG (D_NET, "blocked fwd [%p]\n", fwd);
+ list_add_tail (&fwd->kprfd_list,
+ &kqswnal_data.kqn_idletxd_fwdq);
+ break;
+ }
+
+ /* doing a local transmit */
+ if (!may_block) {
+ if (list_empty (&kqswnal_data.kqn_nblk_idletxds)) {
+ CERROR ("intr tx desc pool exhausted\n");
+ break;
+ }
+
+ ktx = list_entry (kqswnal_data.kqn_nblk_idletxds.next,
+ kqswnal_tx_t, ktx_list);
+ list_del (&ktx->ktx_list);
+ break;
+ }
+
+ /* block for idle tx */
+
+ spin_unlock_irqrestore (&kqswnal_data.kqn_idletxd_lock, flags);
+
+ CDEBUG (D_NET, "blocking for tx desc\n");
+ wait_event (kqswnal_data.kqn_idletxd_waitq,
+ !list_empty (&kqswnal_data.kqn_idletxds));
+ }
+
+ spin_unlock_irqrestore (&kqswnal_data.kqn_idletxd_lock, flags);
+
+ /* Idle descs can't have any mapped (as opposed to pre-mapped) pages */
+ LASSERT (ktx == NULL || ktx->ktx_nmappedpages == 0);
+ return (ktx);
+}
+
+void
+kqswnal_tx_done (kqswnal_tx_t *ktx, int error)
+{
+ switch (ktx->ktx_state) {
+ case KTX_FORWARDING: /* router asked me to forward this packet */
+ kpr_fwd_done (&kqswnal_data.kqn_router,
+ (kpr_fwd_desc_t *)ktx->ktx_args[0], error);
+ break;
+
+ case KTX_SENDING: /* packet sourced locally */
+ lib_finalize (&kqswnal_lib, ktx->ktx_args[0],
+ (lib_msg_t *)ktx->ktx_args[1]);
+ break;
+
+ default:
+ LASSERT (0);
+ }
+
+ kqswnal_put_idle_tx (ktx);
+}
+
+static void
+kqswnal_txhandler(EP_TXD *txd, void *arg, int status)
+{
+ kqswnal_tx_t *ktx = (kqswnal_tx_t *)arg;
+
+ LASSERT (txd != NULL);
+ LASSERT (ktx != NULL);
+
+ CDEBUG(D_NET, "txd %p, arg %p status %d\n", txd, arg, status);
+
+ if (status == EP_SUCCESS)
+ atomic_inc (&kqswnal_packets_transmitted);
+
+ if (status != EP_SUCCESS)
+ {
+ CERROR ("kqswnal: Transmit failed with %d\n", status);
+ status = -EIO;
+ }
+
+ kqswnal_tx_done (ktx, status);
+}
+
+int
+kqswnal_launch (kqswnal_tx_t *ktx)
+{
+ /* Don't block for transmit descriptor if we're in interrupt context */
+ int attr = in_interrupt() ? (EP_NO_SLEEP | EP_NO_ALLOC) : 0;
+ int rc = ep_transmit_large(kqswnal_data.kqn_eptx, ktx->ktx_nid,
+ ktx->ktx_port, attr, kqswnal_txhandler,
+ ktx, ktx->ktx_iov, ktx->ktx_niov);
+ long flags;
+
+ if (rc == 0)
+ atomic_inc (&kqswnal_packets_launched);
+
+ if (rc != ENOMEM)
+ return (rc);
+
+ /* can't allocate ep txd => queue for later */
+
+ LASSERT (in_interrupt()); /* not called by thread (not looping) */
+
+ spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags);
+
+ list_add_tail (&ktx->ktx_list, &kqswnal_data.kqn_delayedtxds);
+ if (waitqueue_active (&kqswnal_data.kqn_sched_waitq))
+ wake_up (&kqswnal_data.kqn_sched_waitq);
+
+ spin_unlock_irqrestore (&kqswnal_data.kqn_sched_lock, flags);
+
+ return (0);
+}
+
+
+static char *
+hdr_type_string (ptl_hdr_t *hdr)
+{
+ switch (hdr->type) {
+ case PTL_MSG_ACK:
+ return ("ACK");
+ case PTL_MSG_PUT:
+ return ("PUT");
+ case PTL_MSG_GET:
+ return ("GET");
+ case PTL_MSG_REPLY:
+ return ("REPLY");
+ default:
+ return ("<UNKNOWN>");
+ }
+}
+
+static void
+kqswnal_cerror_hdr(ptl_hdr_t * hdr)
+{
+ char *type_str = hdr_type_string (hdr);
+
+ CERROR("P3 Header at %p of type %s\n", hdr, type_str);
+ CERROR(" From nid/pid "LPU64"/%u", NTOH__u64(hdr->src_nid),
+ NTOH__u32(hdr->src_pid));
+ CERROR(" To nid/pid "LPU64"/%u\n", NTOH__u64(hdr->dest_nid),
+ NTOH__u32(hdr->dest_pid));
+
+ switch (NTOH__u32(hdr->type)) {
+ case PTL_MSG_PUT:
+ CERROR(" Ptl index %d, ack md "LPX64"."LPX64", "
+ "match bits "LPX64"\n",
+ NTOH__u32 (hdr->msg.put.ptl_index),
+ hdr->msg.put.ack_wmd.wh_interface_cookie,
+ hdr->msg.put.ack_wmd.wh_object_cookie,
+ NTOH__u64 (hdr->msg.put.match_bits));
+ CERROR(" Length %d, offset %d, hdr data "LPX64"\n",
+ NTOH__u32(PTL_HDR_LENGTH(hdr)),
+ NTOH__u32(hdr->msg.put.offset),
+ hdr->msg.put.hdr_data);
+ break;
+
+ case PTL_MSG_GET:
+ CERROR(" Ptl index %d, return md "LPX64"."LPX64", "
+ "match bits "LPX64"\n",
+ NTOH__u32 (hdr->msg.get.ptl_index),
+ hdr->msg.get.return_wmd.wh_interface_cookie,
+ hdr->msg.get.return_wmd.wh_object_cookie,
+ hdr->msg.get.match_bits);
+ CERROR(" Length %d, src offset %d\n",
+ NTOH__u32 (hdr->msg.get.sink_length),
+ NTOH__u32 (hdr->msg.get.src_offset));
+ break;
+
+ case PTL_MSG_ACK:
+ CERROR(" dst md "LPX64"."LPX64", manipulated length %d\n",
+ hdr->msg.ack.dst_wmd.wh_interface_cookie,
+ hdr->msg.ack.dst_wmd.wh_object_cookie,
+ NTOH__u32 (hdr->msg.ack.mlength));
+ break;
+
+ case PTL_MSG_REPLY:
+ CERROR(" dst md "LPX64"."LPX64", length %d\n",
+ hdr->msg.reply.dst_wmd.wh_interface_cookie,
+ hdr->msg.reply.dst_wmd.wh_object_cookie,
+ NTOH__u32 (PTL_HDR_LENGTH(hdr)));
+ }
+
+} /* end of print_hdr() */
+
+static int
+kqswnal_sendmsg (nal_cb_t *nal,
+ void *private,
+ lib_msg_t *cookie,
+ ptl_hdr_t *hdr,
+ int type,
+ ptl_nid_t nid,
+ ptl_pid_t pid,
+ unsigned int payload_niov,
+ struct iovec *payload_iov,
+ ptl_kiov_t *payload_kiov,
+ size_t payload_nob)
+{
+ kqswnal_tx_t *ktx;
+ int rc;
+ ptl_nid_t gatewaynid;
+#if KQSW_CHECKSUM
+ int i;
+ kqsw_csum_t csum;
+ int sumnob;
+#endif
+
+ /* NB, the return code from this procedure is ignored.
+ * If we can't send, we must still complete with lib_finalize().
+ * We'll have to wait for 3.2 to return an error event.
+ */
+
+ CDEBUG(D_NET, "sending "LPSZ" bytes in %d frags to nid: "LPX64
+ " pid %u\n", payload_nob, payload_niov, nid, pid);
+
+ LASSERT (payload_nob == 0 || payload_niov > 0);
+ LASSERT (payload_niov <= PTL_MD_MAX_IOV);
+
+ /* It must be OK to kmap() if required */
+ LASSERT (payload_kiov == NULL || !in_interrupt ());
+ /* payload is either all vaddrs or all pages */
+ LASSERT (!(payload_kiov != NULL && payload_iov != NULL));
+
+ if (payload_nob > KQSW_MAXPAYLOAD) {
+ CERROR ("request exceeds MTU size "LPSZ" (max %u).\n",
+ payload_nob, KQSW_MAXPAYLOAD);
+ lib_finalize (&kqswnal_lib, private, cookie);
+ return (-1);
+ }
+
+ if (!kqswnal_ispeer (nid)) { /* Can't send direct: find gateway? */
+ rc = kpr_lookup (&kqswnal_data.kqn_router, nid, &gatewaynid);
+ if (rc != 0) {
+ CERROR("Can't route to "LPX64": router error %d\n",
+ nid, rc);
+ lib_finalize (&kqswnal_lib, private, cookie);
+ return (-1);
+ }
+ if (!kqswnal_ispeer (gatewaynid)) {
+ CERROR("Bad gateway "LPX64" for "LPX64"\n",
+ gatewaynid, nid);
+ lib_finalize (&kqswnal_lib, private, cookie);
+ return (-1);
+ }
+ nid = gatewaynid;
+ }
+
+ /* I may not block for a transmit descriptor if I might block the
+ * receiver, or an interrupt handler. */
+ ktx = kqswnal_get_idle_tx(NULL, !(type == PTL_MSG_ACK ||
+ type == PTL_MSG_REPLY ||
+ in_interrupt()));
+ if (ktx == NULL) {
+ kqswnal_cerror_hdr (hdr);
+ lib_finalize (&kqswnal_lib, private, cookie);
+ }
+
+ memcpy (ktx->ktx_buffer, hdr, sizeof (*hdr)); /* copy hdr from caller's stack */
+
+#if KQSW_CHECKSUM
+ csum = kqsw_csum (0, (char *)hdr, sizeof (*hdr));
+ memcpy (ktx->ktx_buffer + sizeof (*hdr), &csum, sizeof (csum));
+ for (csum = 0, i = 0, sumnob = payload_nob; sumnob > 0; i++) {
+ if (payload_kiov != NULL) {
+ ptl_kiov_t *kiov = &payload_kiov[i];
+ char *addr = ((char *)kmap (kiov->kiov_page)) +
+ kiov->kiov_offset;
+
+ csum = kqsw_csum (csum, addr, MIN (sumnob, kiov->kiov_len));
+ sumnob -= kiov->kiov_len;
+ } else {
+ struct iovec *iov = &payload_iov[i];
+
+ csum = kqsw_csum (csum, iov->iov_base, MIN (sumnob, kiov->iov_len));
+ sumnob -= iov->iov_len;
+ }
+ }
+ memcpy(ktx->ktx_buffer +sizeof(*hdr) +sizeof(csum), &csum,sizeof(csum));
+#endif
+
+ /* Set up first frag from pre-mapped buffer (it's at least the
+ * portals header) */
+ ktx->ktx_iov[0].Base = ktx->ktx_ebuffer;
+ ktx->ktx_iov[0].Len = KQSW_HDR_SIZE;
+ ktx->ktx_niov = 1;
+
+ if (payload_nob > 0) { /* got some payload (something more to do) */
+ /* make a single contiguous message? */
+ if (payload_nob <= KQSW_TX_MAXCONTIG) {
+ /* copy payload to ktx_buffer, immediately after hdr */
+ if (payload_kiov != NULL)
+ lib_copy_kiov2buf (ktx->ktx_buffer + KQSW_HDR_SIZE,
+ payload_niov, payload_kiov, payload_nob);
+ else
+ lib_copy_iov2buf (ktx->ktx_buffer + KQSW_HDR_SIZE,
+ payload_niov, payload_iov, payload_nob);
+ /* first frag includes payload */
+ ktx->ktx_iov[0].Len += payload_nob;
+ } else {
+ if (payload_kiov != NULL)
+ rc = kqswnal_map_tx_kiov (ktx, payload_nob,
+ payload_niov, payload_kiov);
+ else
+ rc = kqswnal_map_tx_iov (ktx, payload_nob,
+ payload_niov, payload_iov);
+ if (rc != 0) {
+ kqswnal_put_idle_tx (ktx);
+ lib_finalize (&kqswnal_lib, private, cookie);
+ return (-1);
+ }
+ }
+ }
+
+ ktx->ktx_port = (payload_nob <= KQSW_SMALLPAYLOAD) ?
+ EP_SVC_LARGE_PORTALS_SMALL : EP_SVC_LARGE_PORTALS_LARGE;
+ ktx->ktx_nid = nid;
+ ktx->ktx_state = KTX_SENDING; /* => lib_finalize() on completion */
+ ktx->ktx_args[0] = private;
+ ktx->ktx_args[1] = cookie;
+
+ rc = kqswnal_launch (ktx);
+ if (rc != 0) { /* failed? */
+ CERROR ("Failed to send packet to "LPX64": %d\n", nid, rc);
+ lib_finalize (&kqswnal_lib, private, cookie);
+ return (-1);
+ }
+
+ CDEBUG(D_NET, "send to "LPSZ" bytes to "LPX64"\n", payload_nob, nid);
+ return (0);
+}
+
+static int
+kqswnal_send (nal_cb_t *nal,
+ void *private,
+ lib_msg_t *cookie,
+ ptl_hdr_t *hdr,
+ int type,
+ ptl_nid_t nid,
+ ptl_pid_t pid,
+ unsigned int payload_niov,
+ struct iovec *payload_iov,
+ size_t payload_nob)
+{
+ return (kqswnal_sendmsg (nal, private, cookie, hdr, type, nid, pid,
+ payload_niov, payload_iov, NULL, payload_nob));
+}
+
+static int
+kqswnal_send_pages (nal_cb_t *nal,
+ void *private,
+ lib_msg_t *cookie,
+ ptl_hdr_t *hdr,
+ int type,
+ ptl_nid_t nid,
+ ptl_pid_t pid,
+ unsigned int payload_niov,
+ ptl_kiov_t *payload_kiov,
+ size_t payload_nob)
+{
+ return (kqswnal_sendmsg (nal, private, cookie, hdr, type, nid, pid,
+ payload_niov, NULL, payload_kiov, payload_nob));
+}
+
+int kqswnal_fwd_copy_contig = 0;
+
+void
+kqswnal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd)
+{
+ int rc;
+ kqswnal_tx_t *ktx;
+ struct iovec *iov = fwd->kprfd_iov;
+ int niov = fwd->kprfd_niov;
+ int nob = fwd->kprfd_nob;
+ ptl_nid_t nid = fwd->kprfd_gateway_nid;
+
+#if KQSW_CHECKSUM
+ CERROR ("checksums for forwarded packets not implemented\n");
+ LBUG ();
+#endif
+ /* The router wants this NAL to forward a packet */
+ CDEBUG (D_NET, "forwarding [%p] to "LPX64", %d frags %d bytes\n",
+ fwd, nid, niov, nob);
+
+ LASSERT (niov > 0);
+
+ ktx = kqswnal_get_idle_tx (fwd, FALSE);
+ if (ktx == NULL) /* can't get txd right now */
+ return; /* fwd will be scheduled when tx desc freed */
+
+ if (nid == kqswnal_lib.ni.nid) /* gateway is me */
+ nid = fwd->kprfd_target_nid; /* target is final dest */
+
+ if (!kqswnal_ispeer (nid)) {
+ CERROR("Can't forward [%p] to "LPX64": not a peer\n", fwd, nid);
+ rc = -EHOSTUNREACH;
+ goto failed;
+ }
+
+ if (nob > KQSW_NRXMSGBYTES_LARGE) {
+ CERROR ("Can't forward [%p] to "LPX64
+ ": size %d bigger than max packet size %ld\n",
+ fwd, nid, nob, (long)KQSW_NRXMSGBYTES_LARGE);
+ rc = -EMSGSIZE;
+ goto failed;
+ }
+
+ if ((kqswnal_fwd_copy_contig || niov > 1) &&
+ nob <= KQSW_TX_BUFFER_SIZE)
+ {
+ /* send from ktx's pre-allocated/mapped contiguous buffer? */
+ lib_copy_iov2buf (ktx->ktx_buffer, niov, iov, nob);
+ ktx->ktx_iov[0].Base = ktx->ktx_ebuffer; /* already mapped */
+ ktx->ktx_iov[0].Len = nob;
+ ktx->ktx_niov = 1;
+ }
+ else
+ {
+ /* zero copy */
+ ktx->ktx_niov = 0; /* no frags mapped yet */
+ rc = kqswnal_map_tx_iov (ktx, nob, niov, iov);
+ if (rc != 0)
+ goto failed;
+ }
+
+ ktx->ktx_port = (nob <= (sizeof (ptl_hdr_t) + KQSW_SMALLPAYLOAD)) ?
+ EP_SVC_LARGE_PORTALS_SMALL : EP_SVC_LARGE_PORTALS_LARGE;
+ ktx->ktx_nid = nid;
+ ktx->ktx_state = KTX_FORWARDING; /* kpr_put_packet() on completion */
+ ktx->ktx_args[0] = fwd;
+
+ rc = kqswnal_launch (ktx);
+ if (rc == 0)
+ return;
+
+ failed:
+ LASSERT (rc != 0);
+ CERROR ("Failed to forward [%p] to "LPX64": %d\n", fwd, nid, rc);
+
+ kqswnal_put_idle_tx (ktx);
+ /* complete now (with failure) */
+ kpr_fwd_done (&kqswnal_data.kqn_router, fwd, rc);
+}
+
+void
+kqswnal_fwd_callback (void *arg, int error)
+{
+ kqswnal_rx_t *krx = (kqswnal_rx_t *)arg;
+
+ /* The router has finished forwarding this packet */
+
+ if (error != 0)
+ {
+ ptl_hdr_t *hdr = (ptl_hdr_t *)page_address (krx->krx_pages[0]);
+
+ CERROR("Failed to route packet from "LPX64" to "LPX64": %d\n",
+ NTOH__u64(hdr->src_nid), NTOH__u64(hdr->dest_nid),error);
+ }
+
+ kqswnal_requeue_rx (krx);
+}
+
+void
+kqswnal_rx (kqswnal_rx_t *krx)
+{
+ ptl_hdr_t *hdr = (ptl_hdr_t *) page_address (krx->krx_pages[0]);
+ ptl_nid_t dest_nid = NTOH__u64 (hdr->dest_nid);
+ int nob;
+ int niov;
+
+ if (dest_nid == kqswnal_lib.ni.nid) { /* It's for me :) */
+ /* NB krx requeued when lib_parse() calls back kqswnal_recv */
+ lib_parse (&kqswnal_lib, hdr, krx);
+ return;
+ }
+
+#if KQSW_CHECKSUM
+ CERROR ("checksums for forwarded packets not implemented\n");
+ LBUG ();
+#endif
+ if (kqswnal_ispeer (dest_nid)) /* should have gone direct to peer */
+ {
+ CERROR("dropping packet from "LPX64" for "LPX64
+ ": target is peer\n", NTOH__u64(hdr->src_nid), dest_nid);
+ kqswnal_requeue_rx (krx);
+ return;
+ }
+
+ /* NB forwarding may destroy iov; rebuild every time */
+ for (nob = krx->krx_nob, niov = 0; nob > 0; nob -= PAGE_SIZE, niov++)
+ {
+ LASSERT (niov < krx->krx_npages);
+ krx->krx_iov[niov].iov_base= page_address(krx->krx_pages[niov]);
+ krx->krx_iov[niov].iov_len = MIN(PAGE_SIZE, nob);
+ }
+
+ kpr_fwd_init (&krx->krx_fwd, dest_nid,
+ krx->krx_nob, niov, krx->krx_iov,
+ kqswnal_fwd_callback, krx);
+
+ kpr_fwd_start (&kqswnal_data.kqn_router, &krx->krx_fwd);
+}
+
+/* Receive Interrupt Handler: posts to schedulers */
+void
+kqswnal_rxhandler(EP_RXD *rxd)
+{
+ long flags;
+ int nob = ep_rxd_len (rxd);
+ int status = ep_rxd_status (rxd);
+ kqswnal_rx_t *krx = (kqswnal_rx_t *)ep_rxd_arg (rxd);
+
+ CDEBUG(D_NET, "kqswnal_rxhandler: rxd %p, krx %p, nob %d, status %d\n",
+ rxd, krx, nob, status);
+
+ LASSERT (krx != NULL);
+
+ krx->krx_rxd = rxd;
+ krx->krx_nob = nob;
+
+ /* must receive a whole header to be able to parse */
+ if (status != EP_SUCCESS || nob < sizeof (ptl_hdr_t))
+ {
+ /* receives complete with failure when receiver is removed */
+ if (kqswnal_data.kqn_shuttingdown)
+ return;
+
+ CERROR("receive status failed with status %d nob %d\n",
+ ep_rxd_status(rxd), nob);
+ kqswnal_requeue_rx (krx);
+ return;
+ }
+
+ atomic_inc (&kqswnal_packets_received);
+
+ spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags);
+
+ list_add_tail (&krx->krx_list, &kqswnal_data.kqn_readyrxds);
+ if (waitqueue_active (&kqswnal_data.kqn_sched_waitq))
+ wake_up (&kqswnal_data.kqn_sched_waitq);
+
+ spin_unlock_irqrestore (&kqswnal_data.kqn_sched_lock, flags);
+}
+
+#if KQSW_CHECKSUM
+void
+kqswnal_csum_error (kqswnal_rx_t *krx, int ishdr)
+{
+ ptl_hdr_t *hdr = (ptl_hdr_t *)page_address (krx->krx_pages[0]);
+
+ CERROR ("%s checksum mismatch %p: dnid "LPX64", snid "LPX64
+ ", dpid %d, spid %d, type %d\n",
+ ishdr ? "Header" : "Payload", krx,
+ NTOH__u64(hdr->dest_nid), NTOH__u64(hdr->src_nid)
+ NTOH__u32(hdr->dest_pid), NTOH__u32(hdr->src_pid),
+ NTOH__u32(hdr->type));
+
+ switch (NTOH__u32 (hdr->type))
+ {
+ case PTL_MSG_ACK:
+ CERROR("ACK: mlen %d dmd "LPX64"."LPX64" match "LPX64
+ " len %u\n",
+ NTOH__u32(hdr->msg.ack.mlength),
+ hdr->msg.ack.dst_wmd.handle_cookie,
+ hdr->msg.ack.dst_wmd.handle_idx,
+ NTOH__u64(hdr->msg.ack.match_bits),
+ NTOH__u32(hdr->msg.ack.length));
+ break;
+ case PTL_MSG_PUT:
+ CERROR("PUT: ptl %d amd "LPX64"."LPX64" match "LPX64
+ " len %u off %u data "LPX64"\n",
+ NTOH__u32(hdr->msg.put.ptl_index),
+ hdr->msg.put.ack_wmd.handle_cookie,
+ hdr->msg.put.ack_wmd.handle_idx,
+ NTOH__u64(hdr->msg.put.match_bits),
+ NTOH__u32(hdr->msg.put.length),
+ NTOH__u32(hdr->msg.put.offset),
+ hdr->msg.put.hdr_data);
+ break;
+ case PTL_MSG_GET:
+ CERROR ("GET: <>\n");
+ break;
+ case PTL_MSG_REPLY:
+ CERROR ("REPLY: <>\n");
+ break;
+ default:
+ CERROR ("TYPE?: <>\n");
+ }
+}
+#endif
+
+static int
+kqswnal_recvmsg (nal_cb_t *nal,
+ void *private,
+ lib_msg_t *cookie,
+ unsigned int niov,
+ struct iovec *iov,
+ ptl_kiov_t *kiov,
+ size_t mlen,
+ size_t rlen)
+{
+ kqswnal_rx_t *krx = (kqswnal_rx_t *)private;
+ int page;
+ char *page_ptr;
+ int page_nob;
+ char *iov_ptr;
+ int iov_nob;
+ int frag;
+#if KQSW_CHECKSUM
+ kqsw_csum_t senders_csum;
+ kqsw_csum_t payload_csum = 0;
+ kqsw_csum_t hdr_csum = kqsw_csum(0, page_address(krx->krx_pages[0]),
+ sizeof(ptl_hdr_t));
+ size_t csum_len = mlen;
+ int csum_frags = 0;
+ int csum_nob = 0;
+ static atomic_t csum_counter;
+ int csum_verbose = (atomic_read(&csum_counter)%1000001) == 0;
+
+ atomic_inc (&csum_counter);
+
+ memcpy (&senders_csum, ((char *)page_address (krx->krx_pages[0])) +
+ sizeof (ptl_hdr_t), sizeof (kqsw_csum_t));
+ if (senders_csum != hdr_csum)
+ kqswnal_csum_error (krx, 1);
+#endif
+ CDEBUG(D_NET,"kqswnal_recv, mlen="LPSZ", rlen="LPSZ"\n", mlen, rlen);
+
+ /* What was actually received must be >= payload.
+ * This is an LASSERT, as lib_finalize() doesn't have a completion status. */
+ LASSERT (krx->krx_nob >= KQSW_HDR_SIZE + mlen);
+ LASSERT (mlen <= rlen);
+
+ /* It must be OK to kmap() if required */
+ LASSERT (kiov == NULL || !in_interrupt ());
+ /* Either all pages or all vaddrs */
+ LASSERT (!(kiov != NULL && iov != NULL));
+
+ if (mlen != 0)
+ {
+ page = 0;
+ page_ptr = ((char *) page_address(krx->krx_pages[0])) +
+ KQSW_HDR_SIZE;
+ page_nob = PAGE_SIZE - KQSW_HDR_SIZE;
+
+ LASSERT (niov > 0);
+ if (kiov != NULL) {
+ iov_ptr = ((char *)kmap (kiov->kiov_page)) + kiov->kiov_offset;
+ iov_nob = kiov->kiov_len;
+ } else {
+ iov_ptr = iov->iov_base;
+ iov_nob = iov->iov_len;
+ }
+
+ for (;;)
+ {
+ /* We expect the iov to exactly match mlen */
+ LASSERT (iov_nob <= mlen);
+
+ frag = MIN (page_nob, iov_nob);
+ memcpy (iov_ptr, page_ptr, frag);
+#if KQSW_CHECKSUM
+ payload_csum = kqsw_csum (payload_csum, iov_ptr, frag);
+ csum_nob += frag;
+ csum_frags++;
+#endif
+ mlen -= frag;
+ if (mlen == 0)
+ break;
+
+ page_nob -= frag;
+ if (page_nob != 0)
+ page_ptr += frag;
+ else
+ {
+ page++;
+ LASSERT (page < krx->krx_npages);
+ page_ptr = page_address(krx->krx_pages[page]);
+ page_nob = PAGE_SIZE;
+ }
+
+ iov_nob -= frag;
+ if (iov_nob != 0)
+ iov_ptr += frag;
+ else if (kiov != NULL) {
+ kunmap (kiov->kiov_page);
+ kiov++;
+ niov--;
+ LASSERT (niov > 0);
+ iov_ptr = ((char *)kmap (kiov->kiov_page)) + kiov->kiov_offset;
+ iov_nob = kiov->kiov_len;
+ } else {
+ iov++;
+ niov--;
+ LASSERT (niov > 0);
+ iov_ptr = iov->iov_base;
+ iov_nob = iov->iov_len;
+ }
+ }
+
+ if (kiov != NULL)
+ kunmap (kiov->kiov_page);
+ }
+
+#if KQSW_CHECKSUM
+ memcpy (&senders_csum, ((char *)page_address (krx->krx_pages[0])) +
+ sizeof(ptl_hdr_t) + sizeof(kqsw_csum_t), sizeof(kqsw_csum_t));
+
+ if (csum_len != rlen)
+ CERROR("Unable to checksum data in user's buffer\n");
+ else if (senders_csum != payload_csum)
+ kqswnal_csum_error (krx, 0);
+
+ if (csum_verbose)
+ CERROR("hdr csum %lx, payload_csum %lx, csum_frags %d, "
+ "csum_nob %d\n",
+ hdr_csum, payload_csum, csum_frags, csum_nob);
+#endif
+ lib_finalize(nal, private, cookie);
+
+ kqswnal_requeue_rx (krx);
+
+ return (rlen);
+}
+
+static int
+kqswnal_recv(nal_cb_t *nal,
+ void *private,
+ lib_msg_t *cookie,
+ unsigned int niov,
+ struct iovec *iov,
+ size_t mlen,
+ size_t rlen)
+{
+ return (kqswnal_recvmsg (nal, private, cookie, niov, iov, NULL, mlen, rlen));
+}
+
+static int
+kqswnal_recv_pages (nal_cb_t *nal,
+ void *private,
+ lib_msg_t *cookie,
+ unsigned int niov,
+ ptl_kiov_t *kiov,
+ size_t mlen,
+ size_t rlen)
+{
+ return (kqswnal_recvmsg (nal, private, cookie, niov, NULL, kiov, mlen, rlen));
+}
+
+int
+kqswnal_thread_start (int (*fn)(void *arg), void *arg)
+{
+ long pid = kernel_thread (fn, arg, 0);
+
+ if (pid < 0)
+ return ((int)pid);
+
+ atomic_inc (&kqswnal_data.kqn_nthreads);
+ return (0);
+}
+
+void
+kqswnal_thread_fini (void)
+{
+ atomic_dec (&kqswnal_data.kqn_nthreads);
+}
+
+int
+kqswnal_scheduler (void *arg)
+{
+ kqswnal_rx_t *krx;
+ kqswnal_tx_t *ktx;
+ kpr_fwd_desc_t *fwd;
+ long flags;
+ int rc;
+ int counter = 0;
+ int did_something;
+
+ kportal_daemonize ("kqswnal_sched");
+ kportal_blockallsigs ();
+
+ spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags);
+
+ while (!kqswnal_data.kqn_shuttingdown)
+ {
+ did_something = FALSE;
+
+ if (!list_empty (&kqswnal_data.kqn_readyrxds))
+ {
+ krx = list_entry(kqswnal_data.kqn_readyrxds.next,
+ kqswnal_rx_t, krx_list);
+ list_del (&krx->krx_list);
+ spin_unlock_irqrestore(&kqswnal_data.kqn_sched_lock,
+ flags);
+
+ kqswnal_rx (krx);
+
+ did_something = TRUE;
+ spin_lock_irqsave(&kqswnal_data.kqn_sched_lock, flags);
+ }
+
+ if (!list_empty (&kqswnal_data.kqn_delayedtxds))
+ {
+ ktx = list_entry(kqswnal_data.kqn_delayedtxds.next,
+ kqswnal_tx_t, ktx_list);
+ list_del (&ktx->ktx_list);
+ spin_unlock_irqrestore(&kqswnal_data.kqn_sched_lock,
+ flags);
+
+ rc = kqswnal_launch (ktx);
+ if (rc != 0) /* failed: ktx_nid down? */
+ {
+ CERROR("Failed delayed transmit to "LPX64
+ ": %d\n", ktx->ktx_nid, rc);
+ kqswnal_tx_done (ktx, rc);
+ }
+
+ did_something = TRUE;
+ spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags);
+ }
+
+ if (!list_empty (&kqswnal_data.kqn_delayedfwds))
+ {
+ fwd = list_entry (kqswnal_data.kqn_delayedfwds.next, kpr_fwd_desc_t, kprfd_list);
+ list_del (&fwd->kprfd_list);
+ spin_unlock_irqrestore (&kqswnal_data.kqn_sched_lock, flags);
+
+ kqswnal_fwd_packet (NULL, fwd);
+
+ did_something = TRUE;
+ spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags);
+ }
+
+ /* nothing to do or hogging CPU */
+ if (!did_something || counter++ == KQSW_RESCHED) {
+ spin_unlock_irqrestore(&kqswnal_data.kqn_sched_lock,
+ flags);
+
+ counter = 0;
+
+ if (!did_something) {
+ rc = wait_event_interruptible (kqswnal_data.kqn_sched_waitq,
+ kqswnal_data.kqn_shuttingdown ||
+ !list_empty(&kqswnal_data.kqn_readyrxds) ||
+ !list_empty(&kqswnal_data.kqn_delayedtxds) ||
+ !list_empty(&kqswnal_data.kqn_delayedfwds));
+ LASSERT (rc == 0);
+ } else if (current->need_resched)
+ schedule ();
+
+ spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags);
+ }
+ }
+
+ spin_unlock_irqrestore (&kqswnal_data.kqn_sched_lock, flags);
+
+ kqswnal_thread_fini ();
+ return (0);
+}
+
+nal_cb_t kqswnal_lib =
+{
+ nal_data: &kqswnal_data, /* NAL private data */
+ cb_send: kqswnal_send,
+ cb_send_pages: kqswnal_send_pages,
+ cb_recv: kqswnal_recv,
+ cb_recv_pages: kqswnal_recv_pages,
+ cb_read: kqswnal_read,
+ cb_write: kqswnal_write,
+ cb_malloc: kqswnal_malloc,
+ cb_free: kqswnal_free,
+ cb_printf: kqswnal_printf,
+ cb_cli: kqswnal_cli,
+ cb_sti: kqswnal_sti,
+ cb_dist: kqswnal_dist
+};
--- /dev/null
+# This code is issued under the GNU General Public License.
+# See the file COPYING in this distribution
+
+include ../../Rules.linux
+
+MODULE = kscimacnal
+modulenet_DATA = kscimacnal.o
+EXTRA_PROGRAMS = kscimacnal
+
+DEFS =
+kscimacnal_SOURCES = scimacnal.c scimacnal_cb.c scimacnal.h
--- /dev/null
+
+scimacnal - A NAL for the Scali ScaMAC midlayer.
+
+The ScaMAC midlayer is a simplified API to the SCI high performance
+interconnect.
+
+In order to use this NAL you'll need to tune scimac to use larger buffers.
+See scimac.conf in this directory for an example.
+
+Overall performance and stability isn't great but this can be attributed
+to the scimac driver which apparently is in need of some development.
+
+TODO:
+Routing isn't yet implemented.
--- /dev/null
+# Configuration file for the scimac driver - lustre friendly settings
+#
+
+# The maximal number of message headers to use in the system.
+scimac_max_no_hdrs = 32
+
+# The maximal number of eager buffers to use in the system.
+scimac_max_no_ebufs = 8
+
+# The maximal size in bytes of each eager buffer.
+scimac_max_ebuf_size = 65536
+
+# Enable use of a kernel thread to defer reception of packets.
+# Default is to use a tasklet (sw interrupt).
+scimac_use_ulevel_recv = 1
+
+# The maximal number of packets queued for transfer per path at any one time.
+scimac_max_send_queuelen = 2000
+
+# The packet retransmit time in milliseconds.
+# The time elapsed since a packet was attempted sent until the packet is resent.
+scimac_pkt_rexmit_time = 200
+
+# The packet's maximal retransmit time in milliseconds.
+# The total time that a packet will be attempted sent before it is dropped.
+scimac_max_rexmit_time = 5000
+
+# The lowest valid node identifier in the system.
+scimac_min_nodeid_number = 0x100
+
+# The largest valid node identifier in the system.
+scimac_max_nodeid_number = 0xff00
+
+# The incremental nodeid step in the system.
+scimac_nodeid_increment = 0x100
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:cindent:
+ *
+ * Copyright (C) 2003 High Performance Computing Center North (HPC2N)
+ * Author: Niklas Edmundsson <nikke@hpc2n.umu.se>
+
+ * Based on gmnal, which is based on ksocknal and qswnal
+ *
+ * This file is part of Portals, http://www.sf.net/projects/lustre/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+
+#include "scimacnal.h"
+
+ptl_handle_ni_t kscimacnal_ni;
+nal_t kscimacnal_api;
+
+kscimacnal_data_t kscimacnal_data;
+
+kpr_nal_interface_t kscimacnal_router_interface = {
+ kprni_nalid: SCIMACNAL,
+ kprni_arg: NULL,
+ kprni_fwd: kscimacnal_fwd_packet,
+};
+
+
+static int kscimacnal_forward(nal_t *nal,
+ int id,
+ void *args, size_t args_len,
+ void *ret, size_t ret_len)
+{
+ kscimacnal_data_t *ksci = nal->nal_data;
+ nal_cb_t *nal_cb = ksci->ksci_cb;
+
+ LASSERT (nal == &kscimacnal_api);
+ LASSERT (ksci == &kscimacnal_data);
+ LASSERT (nal_cb == &kscimacnal_lib);
+
+ lib_dispatch(nal_cb, ksci, id, args, ret); /* nal needs ksci */
+ return PTL_OK;
+}
+
+
+static void kscimacnal_lock(nal_t *nal, unsigned long *flags)
+{
+ kscimacnal_data_t *ksci = nal->nal_data;
+ nal_cb_t *nal_cb = ksci->ksci_cb;
+
+
+ LASSERT (nal == &kscimacnal_api);
+ LASSERT (ksci == &kscimacnal_data);
+ LASSERT (nal_cb == &kscimacnal_lib);
+
+ nal_cb->cb_cli(nal_cb,flags);
+}
+
+
+static void kscimacnal_unlock(nal_t *nal, unsigned long *flags)
+{
+ kscimacnal_data_t *ksci = nal->nal_data;
+ nal_cb_t *nal_cb = ksci->ksci_cb;
+
+
+ LASSERT (nal == &kscimacnal_api);
+ LASSERT (ksci == &kscimacnal_data);
+ LASSERT (nal_cb == &kscimacnal_lib);
+
+ nal_cb->cb_sti(nal_cb,flags);
+}
+
+
+static int kscimacnal_shutdown(nal_t *nal, int ni)
+{
+ LASSERT (nal == &kscimacnal_api);
+ return 0;
+}
+
+
+static void kscimacnal_yield( nal_t *nal )
+{
+ LASSERT (nal == &kscimacnal_api);
+
+ if (current->need_resched)
+ schedule();
+ return;
+}
+
+
+static nal_t *kscimacnal_init(int interface, ptl_pt_index_t ptl_size,
+ ptl_ac_index_t ac_size, ptl_pid_t requested_pid)
+{
+ int nnids = 512; /* FIXME: Need ScaMac funktion to get #nodes */
+
+ CDEBUG(D_NET, "calling lib_init with nid 0x%Lx nnids %d\n", kscimacnal_data.ksci_nid, nnids);
+ lib_init(&kscimacnal_lib, kscimacnal_data.ksci_nid, 0, nnids,ptl_size, ac_size);
+ return &kscimacnal_api;
+}
+
+
+/* Called by kernel at module unload time */
+static void __exit
+kscimacnal_finalize(void)
+{
+ /* FIXME: How should the shutdown procedure really look? */
+ kscimacnal_data.ksci_shuttingdown=1;
+
+ PORTAL_SYMBOL_UNREGISTER(kscimacnal_ni);
+
+ PtlNIFini(kscimacnal_ni);
+ lib_fini(&kscimacnal_lib);
+
+ mac_finish(kscimacnal_data.ksci_machandle);
+
+ CDEBUG (D_MALLOC, "done kmem %d\n", atomic_read (&portal_kmemory));
+
+ return;
+}
+
+
+/* Called by kernel at module insertion time */
+static int __init
+kscimacnal_initialize(void)
+{
+ int rc;
+ unsigned long nid=0;
+ mac_handle_t *machandle = NULL;
+
+
+ CDEBUG (D_MALLOC, "start kmem %d\n", atomic_read (&portal_kmemory));
+
+ kscimacnal_api.forward = kscimacnal_forward;
+ kscimacnal_api.shutdown = kscimacnal_shutdown;
+ kscimacnal_api.yield = kscimacnal_yield;
+ kscimacnal_api.validate = NULL; /* our api validate is a NOOP */
+ kscimacnal_api.lock= kscimacnal_lock;
+ kscimacnal_api.unlock= kscimacnal_unlock;
+ kscimacnal_api.nal_data = &kscimacnal_data;
+
+ kscimacnal_lib.nal_data = &kscimacnal_data;
+
+ memset(&kscimacnal_data, 0, sizeof(kscimacnal_data));
+
+ kscimacnal_data.ksci_cb = &kscimacnal_lib;
+
+ /* We're not using this, but cli/sti callbacks does... ??? */
+ spin_lock_init(&kscimacnal_data.ksci_dispatch_lock);
+
+ /* FIXME: We only support one adapter for now */
+ machandle = mac_init(0, MAC_SAPID_LUSTRE, kscimacnal_rx,
+ &kscimacnal_data);
+
+ if(!machandle) {
+ CERROR("mac_init() failed\n");
+ return -1;
+ }
+
+ kscimacnal_data.ksci_machandle = machandle;
+
+ /* Make sure the scimac MTU is tuned */
+ if(mac_get_mtusize(machandle) < SCIMACNAL_MTU) {
+ CERROR("scimac mtu of %ld smaller than SCIMACNAL MTU of %d\n",
+ mac_get_mtusize(machandle), SCIMACNAL_MTU);
+ CERROR("Consult README.scimacnal for more information\n");
+ mac_finish(machandle);
+ return -1;
+ }
+
+ /* Get the node ID */
+ /* mac_get_physaddrlen() is a function instead of define, sigh */
+ LASSERT(mac_get_physaddrlen(machandle) <= sizeof(nid));
+ if(mac_get_physaddr(machandle, (mac_physaddr_t *) &nid)) {
+ CERROR("mac_get_physaddr() failed\n");
+ mac_finish(machandle);
+ return -1;
+ }
+ nid = ntohl(nid);
+ kscimacnal_data.ksci_nid = nid;
+
+
+ /* Initialize Network Interface */
+ /* FIXME: What do the magic numbers mean? Documentation anyone? */
+ rc = PtlNIInit(kscimacnal_init, 32, 4, 0, &kscimacnal_ni);
+ if (rc) {
+ CERROR("PtlNIInit failed %d\n", rc);
+ mac_finish(machandle);
+ return (-ENOMEM);
+ }
+
+ PORTAL_SYMBOL_REGISTER(kscimacnal_ni);
+
+ /* We're done now, it's OK for the RX callback to do stuff */
+ kscimacnal_data.ksci_init = 1;
+
+ return 0;
+}
+
+
+MODULE_AUTHOR("Niklas Edmundsson <nikke@hpc2n.umu.se>");
+MODULE_DESCRIPTION("Kernel Scali ScaMAC SCI NAL v0.0");
+MODULE_LICENSE("GPL");
+
+module_init (kscimacnal_initialize);
+module_exit (kscimacnal_finalize);
+
+EXPORT_SYMBOL(kscimacnal_ni);
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:cindent:
+ *
+ * Copyright (C) 2003 High Performance Computing Center North (HPC2N)
+ * Author: Niklas Edmundsson <nikke@hpc2n.umu.se>
+ */
+
+
+#ifndef _SCIMACNAL_H
+#define _SCIMACNAL_H
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/errno.h>
+#include <linux/locks.h>
+#include <linux/unistd.h>
+#include <linux/init.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/stat.h>
+#include <linux/list.h>
+#include <asm/uaccess.h>
+#include <asm/segment.h>
+#include <asm/page.h> /* For PAGE_SIZE */
+
+#define DEBUG_SUBSYSTEM S_UNDEFINED
+
+#include <linux/kp30.h>
+#include <portals/p30.h>
+#include <portals/lib-p30.h>
+
+#include <scamac.h>
+
+#ifndef MAC_SAPID_LUSTRE
+#define MAC_SAPID_LUSTRE MAC_SAPID_TEST1
+#endif /* MAC_SAPID_LUSTRE */
+
+#define SCIMACNAL_MTU 65536
+/* FIXME: What is really the MTU of lustre? */
+#if PTL_MD_MAX_IOV*PAGE_SIZE > SCIMACNAL_MTU
+#error Max MTU of ScaMAC is 64k, PTL_MD_MAX_IOV*PAGE_SIZE is bigger.
+#endif
+
+typedef struct {
+ mac_handle_t *handle;
+ mac_mblk_t *msg;
+ mac_msg_type_t type;
+ void *userdata;
+} kscimacnal_rx_t;
+
+
+typedef struct {
+ nal_cb_t *ktx_nal;
+ void *ktx_private;
+ lib_msg_t *ktx_cookie;
+ ptl_hdr_t ktx_hdr;
+} kscimacnal_tx_t;
+
+
+typedef struct {
+ char ksci_init;
+ char ksci_shuttingdown;
+ ptl_nid_t ksci_nid;
+ nal_cb_t *ksci_cb;
+ spinlock_t ksci_dispatch_lock;
+ mac_handle_t *ksci_machandle;
+} kscimacnal_data_t;
+
+extern kscimacnal_data_t kscimacnal_data;
+extern nal_t kscimacnal_api;
+extern nal_cb_t kscimacnal_lib;
+
+void kscimacnal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd);
+void kscimacnal_rx(mac_handle_t *handle, mac_mblk_t *msg, mac_msg_type_t type, void *userdata);
+
+
+#endif /* _SCIMACNAL_H */
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:cindent:
+ *
+ * Copyright (C) 2003 High Performance Computing Center North (HPC2N)
+ * Author: Niklas Edmundsson <nikke@hpc2n.umu.se>
+
+ *
+ * This file is part of Portals, http://www.sf.net/projects/lustre/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#include "scimacnal.h"
+
+static int
+kscimacnal_read (nal_cb_t *nal, void *private,
+ void *dst_addr, user_ptr src_addr, size_t len)
+{
+ CDEBUG(D_NET, "0x%Lx: reading %ld bytes from %p -> %p\n",
+ nal->ni.nid, (long)len, src_addr, dst_addr );
+ memcpy( dst_addr, src_addr, len );
+ return 0;
+}
+
+
+static int
+kscimacnal_write(nal_cb_t *nal, void *private,
+ user_ptr dst_addr, void *src_addr, size_t len)
+{
+ CDEBUG(D_NET, "0x%Lx: writing %ld bytes from %p -> %p\n",
+ nal->ni.nid, (long)len, src_addr, dst_addr );
+ memcpy( dst_addr, src_addr, len );
+ return 0;
+}
+
+
+static void *
+kscimacnal_malloc(nal_cb_t *nal, size_t len)
+{
+ void *buf;
+
+ PORTAL_ALLOC(buf, len);
+ return buf;
+}
+
+
+static void
+kscimacnal_free(nal_cb_t *nal, void *buf, size_t len)
+{
+ PORTAL_FREE(buf, len);
+}
+
+
+static void
+kscimacnal_printf(nal_cb_t *nal, const char *fmt, ...)
+{
+ va_list ap;
+ char msg[256];
+
+ if (portal_debug & D_NET) {
+ va_start( ap, fmt );
+ vsnprintf( msg, sizeof(msg), fmt, ap );
+ va_end( ap );
+
+ printk("CPUId: %d %s",smp_processor_id(), msg);
+ }
+}
+
+
+static void
+kscimacnal_cli(nal_cb_t *nal, unsigned long *flags)
+{
+ kscimacnal_data_t *data= nal->nal_data;
+
+ spin_lock_irqsave(&data->ksci_dispatch_lock,*flags);
+}
+
+
+static void
+kscimacnal_sti(nal_cb_t *nal, unsigned long *flags)
+{
+ kscimacnal_data_t *data= nal->nal_data;
+
+ spin_unlock_irqrestore(&data->ksci_dispatch_lock,*flags);
+}
+
+
+static int
+kscimacnal_dist(nal_cb_t *nal, ptl_nid_t nid, unsigned long *dist)
+{
+ /* FIXME: Network distance has a meaning, but is there no easy
+ * way to figure it out (depends on routing) */
+
+ if ( nal->ni.nid == nid ) {
+ *dist = 0;
+ } else {
+ *dist = 1;
+ }
+
+ return 0;
+}
+
+
+static
+char * get_mac_error(mac_status_t status)
+{
+ switch(status) {
+ case MAC_MSG_STAT_OK:
+ return "MAC_MSG_STAT_OK";
+ case MAC_MSG_STAT_FREED:
+ return "MAC_MSG_STAT_FREED";
+ case MAC_MSG_STAT_ABORTED:
+ return "MAC_MSG_STAT_ABORTED";
+ case MAC_MSG_STAT_TIMEDOUT:
+ return "MAC_MSG_STAT_TIMEDOUT";
+ case MAC_MSG_STAT_NODEUNREACH:
+ return "MAC_MSG_STAT_NODEUNREACH";
+ case MAC_MSG_STAT_NETDOWN:
+ return "MAC_MSG_STAT_NETDOWN";
+ case MAC_MSG_STAT_RESET:
+ return "MAC_MSG_STAT_RESET";
+ case MAC_MSG_STAT_INITFAILED:
+ return "MAC_MSG_STAT_INITFAILED";
+ case MAC_MSG_STAT_SYNCFAILED:
+ return "MAC_MSG_STAT_SYNCFAILED";
+ case MAC_MSG_STAT_BADPROTO:
+ return "MAC_MSG_STAT_BADPROTO";
+ case MAC_MSG_STAT_NOBUFSPACE:
+ return "MAC_MSG_STAT_NOBUFSPACE";
+ case MAC_MSG_STAT_CONGESTION:
+ return "MAC_MSG_STAT_CONGESTION";
+ case MAC_MSG_STAT_OTHER:
+ return "MAC_MSG_STAT_OTHER";
+ default:
+ return "Unknown error";
+ }
+}
+
+
+/* FIXME add routing code here ? */
+
+/* Called by ScaMac when transmission is complete (ie. message is released) */
+static void
+kscimacnal_txrelease(mac_mblk_t *msg, mac_msg_status_t status, void *context)
+{
+ kscimacnal_tx_t *ktx = (kscimacnal_tx_t *)context;
+ int err=0;
+
+ LASSERT (ktx != NULL);
+
+ /* Euh, there is no feedback when transmission fails?! */
+ switch(status) {
+ case MAC_MSG_STAT_OK: /* normal */
+ break;
+ default:
+ CERROR("%s (%d):\n", get_mac_error(status), status);
+ err = -EIO;
+ break;
+ }
+
+ lib_finalize(ktx->ktx_nal, ktx->ktx_private, ktx->ktx_cookie);
+
+ PORTAL_FREE(ktx, (sizeof(kscimacnal_tx_t)));
+}
+
+
+/* Called by portals when it wants to send a message.
+ * Since ScaMAC has it's own TX thread we don't bother setting up our own. */
+static int
+kscimacnal_send(nal_cb_t *nal,
+ void *private,
+ lib_msg_t *cookie,
+ ptl_hdr_t *hdr,
+ int type,
+ ptl_nid_t nid,
+ ptl_pid_t pid,
+ unsigned int payload_niov,
+ struct iovec *payload_iov,
+ size_t payload_len)
+{
+ kscimacnal_tx_t *ktx=NULL;
+ kscimacnal_data_t *ksci = nal->nal_data;
+ int rc=0;
+ int buf_len = sizeof(ptl_hdr_t) + payload_len;
+ mac_mblk_t *msg=NULL, *lastblk, *newblk;
+ unsigned long physaddr;
+
+
+ CDEBUG(D_NET, "sending %d bytes from %p to nid 0x%Lx niov: %d\n",
+ payload_len, payload_iov, nid, payload_niov);
+
+ LASSERT(ksci != NULL);
+
+ LASSERT(hdr != NULL);
+
+ /* Do real check if we can send this */
+ if (buf_len > mac_get_mtusize(ksci->ksci_machandle)) {
+ CERROR("kscimacnal:request exceeds TX MTU size (%ld).\n",
+ mac_get_mtusize(ksci->ksci_machandle));
+ return -EINVAL;
+ }
+
+
+ /* save transaction info for later finalize and cleanup */
+ PORTAL_ALLOC(ktx, (sizeof(kscimacnal_tx_t)));
+ if (!ktx) {
+ return -ENOMEM;
+ }
+
+ /* *SIGH* hdr is a stack variable in the calling function, so we
+ * need to copy it to a buffer. Zerocopy magic (or is it just
+ * deferred memcpy?) is annoying sometimes. */
+ memcpy(&ktx->ktx_hdr, hdr, sizeof(ptl_hdr_t));
+
+ /* First, put the header in the main message mblk */
+ msg = mac_alloc_mblk(&ktx->ktx_hdr, sizeof(ptl_hdr_t),
+ kscimacnal_txrelease, ktx);
+ if (!msg) {
+ PORTAL_FREE(ktx, (sizeof(kscimacnal_tx_t)));
+ return -ENOMEM;
+ }
+ mac_put_mblk(msg, sizeof(ptl_hdr_t));
+ lastblk=msg;
+
+ /* Allocate additional mblks for each iov as needed.
+ * Essentially lib_copy_iov2buf with a twist or two */
+ while (payload_len > 0)
+ {
+ ptl_size_t nob;
+
+ LASSERT (payload_niov > 0);
+
+ nob = MIN (payload_iov->iov_len, payload_len);
+
+ /* We don't need a callback on the additional mblks, since
+ * all release callbacks seems to be called when the entire
+ * message has been sent */
+ newblk=mac_alloc_mblk(payload_iov->iov_base, nob, NULL, NULL);
+ if(!newblk) {
+ mac_free_msg(msg);
+ PORTAL_FREE(ktx, (sizeof(kscimacnal_tx_t)));
+ return -ENOMEM;
+ }
+ mac_put_mblk(newblk, nob);
+ mac_link_mblk(lastblk, newblk);
+ lastblk=newblk;
+
+ payload_len -= nob;
+ payload_niov--;
+ payload_iov++;
+ }
+
+ ktx->ktx_nal = nal;
+ ktx->ktx_private = private;
+ ktx->ktx_cookie = cookie;
+
+ CDEBUG(D_NET, "mac_send %d bytes to nid: 0x%Lx\n", buf_len, nid);
+
+ physaddr = htonl(nid);
+
+ if((rc=mac_send(ksci->ksci_machandle, msg,
+ (mac_physaddr_t *) &physaddr))) {
+ CERROR("kscimacnal: mac_send() failed, rc=%d\n", rc);
+ mac_free_msg(msg);
+ PORTAL_FREE(ktx, (sizeof(kscimacnal_tx_t)));
+ return rc;
+ }
+
+ return 0;
+}
+
+
+void
+kscimacnal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd)
+{
+ CERROR ("forwarding not implemented\n");
+}
+
+
+/* Process a received portals packet */
+/* Called by the ScaMac RX thread when a packet is received */
+void
+kscimacnal_rx(mac_handle_t *handle, mac_mblk_t *msg, mac_msg_type_t type,
+ void *userdata)
+{
+ ptl_hdr_t *hdr = NULL;
+ kscimacnal_rx_t krx;
+ mac_size_t size;
+ kscimacnal_data_t *ksci = userdata;
+
+ LASSERT(ksci != NULL);
+
+ if ( !ksci->ksci_init || ksci->ksci_shuttingdown ||
+ type == MAC_MSG_TYPE_CTRL || type == MAC_MSG_TYPE_OTHER ) {
+ /* We're not interested in messages not for us, ignore */
+ mac_free_msg(msg);
+ return;
+ }
+
+ size = mac_msg_size(msg);
+
+ CDEBUG(D_NET,"msg %p type %d, size %ld bytes (%ld mblks)\n",
+ msg, type, size, mac_msg_mblks(msg));
+
+ if( size < sizeof( ptl_hdr_t ) ) {
+ /* XXX what's this for? */
+ if (ksci->ksci_shuttingdown)
+ return;
+ CERROR("kscimacnal: did not receive complete portal header,"
+ "size= %ld\n", size);
+ /* Free the message before exiting */
+ mac_free_msg(msg);
+ return;
+ }
+
+ /* Provide everything we know */
+ krx.handle = handle;
+ krx.msg = msg;
+ krx.type = type;
+ krx.userdata = userdata;
+
+ /* mac_msg_next returns the next mblk with unread data */
+ hdr = mac_get_mblk(mac_msg_next(msg), sizeof(ptl_hdr_t) );
+
+ if(!hdr) {
+ CERROR("kscimacnal: no data block in message %p\n", msg);
+ mac_free_msg(msg);
+ return;
+ }
+
+ if ( hdr->dest_nid == kscimacnal_lib.ni.nid ) {
+ PROF_START(lib_parse);
+ /* sets wanted_len, iovs etc and calls our callback */
+ lib_parse(&kscimacnal_lib, hdr, &krx);
+ PROF_FINISH(lib_parse);
+#if 0 /* FIXME: Is it possible to detect this? */
+ } else if (kgmnal_ispeer(hdr->dest_nid)) {
+ /* should have gone direct to peer */
+ CERROR("dropping packet from 0x%llx to 0x%llx:"
+ "target is a peer\n",
+ hdr->src_nid, hdr->dest_nid);
+ kgmnal_requeue_rx(&krx);
+#endif /* if 0 FIXME */
+ } else {
+ /* forward to gateway */
+ CERROR("forwarding not implemented, mynid=0x%llx dest=0x%llx\n",
+ kscimacnal_lib.ni.nid, hdr->dest_nid);
+ }
+
+ mac_free_msg(msg);
+
+ CDEBUG(D_NET, "msg %p: Done\n", msg);
+}
+
+
+/* Called by portals to process a recieved packet */
+static int kscimacnal_recv(nal_cb_t *nal,
+ void *private,
+ lib_msg_t *cookie,
+ unsigned int niov,
+ struct iovec *iov,
+ size_t mlen,
+ size_t rlen)
+{
+ kscimacnal_rx_t *krx = private;
+ mac_mblk_t *mblk;
+ void *src;
+ mac_size_t pkt_len;
+ ptl_size_t iovused=0;
+
+ LASSERT (krx != NULL);
+ LASSERT (krx->msg != NULL);
+
+ CDEBUG(D_NET,"msg %p: mlen=%d, rlen=%d, niov=%d\n",
+ krx->msg, mlen, rlen, niov);
+
+ /* What was actually received must be >= what sender claims to have
+ * sent. This is an LASSERT, since lib-move doesn't check cb return
+ * code yet. Also, rlen seems to be negative when mlen==0 so don't
+ * assert on that.
+ */
+ LASSERT (mlen==0 || mac_msg_size(krx->msg) >= sizeof(ptl_hdr_t)+rlen);
+ LASSERT (mlen==0 || mlen <= rlen);
+
+ PROF_START(memcpy);
+
+ /* mac_msg_next returns next mblk with unread data (ie. can
+ * be same mblk */
+ while (mlen != 0 && (mblk = mac_msg_next(krx->msg))) {
+ pkt_len = mac_mblk_len(mblk);
+ src = mac_get_mblk(mblk, pkt_len); /* Next unread block */
+
+ CDEBUG(D_NET,"msg %p: mblk: %p pkt_len: %ld src: %p\n",
+ krx->msg, mblk, pkt_len, src);
+
+ LASSERT(src != NULL);
+
+ /* Essentially lib_copy_buf2iov but with continuation support,
+ * we "gracefully" thrash the argument vars ;) */
+ while (pkt_len > 0) {
+ ptl_size_t nob;
+
+ LASSERT (niov > 0);
+
+ LASSERT(iovused < iov->iov_len);
+
+ nob = MIN (iov->iov_len-iovused, pkt_len);
+ CDEBUG(D_NET, "iovbase: %p iovlen: %d src: %p nob: %d "
+ "iovused: %d\n",
+ iov->iov_base, iov->iov_len,
+ src, nob, iovused);
+
+ memcpy (iov->iov_base+iovused, src, nob);
+ pkt_len -= nob;
+ src += nob;
+
+ if(nob+iovused < iov->iov_len) {
+ /* We didn't use all of the iov */
+ iovused+=nob;
+ }
+ else {
+ niov--;
+ iov++;
+ iovused=0;
+ }
+ }
+ }
+ PROF_FINISH(memcpy);
+
+ CDEBUG(D_NET, "Calling lib_finalize.\n");
+
+ PROF_START(lib_finalize);
+ lib_finalize(nal, private, cookie);
+ PROF_FINISH(lib_finalize);
+
+ CDEBUG(D_NET, "Done.\n");
+
+ return rlen;
+}
+
+
+nal_cb_t kscimacnal_lib = {
+ nal_data: &kscimacnal_data, /* NAL private data */
+ cb_send: kscimacnal_send,
+ cb_send_pages: NULL, /* Ignore for now */
+ cb_recv: kscimacnal_recv,
+ cb_recv_pages: NULL,
+ cb_read: kscimacnal_read,
+ cb_write: kscimacnal_write,
+ cb_malloc: kscimacnal_malloc,
+ cb_free: kscimacnal_free,
+ cb_printf: kscimacnal_printf,
+ cb_cli: kscimacnal_cli,
+ cb_sti: kscimacnal_sti,
+ cb_dist: kscimacnal_dist
+};
--- /dev/null
+# Copyright (C) 2001 Cluster File Systems, Inc.
+#
+# This code is issued under the GNU General Public License.
+# See the file COPYING in this distribution
+
+include ../../Rules.linux
+
+MODULE = ksocknal
+modulenet_DATA = ksocknal.o
+EXTRA_PROGRAMS = ksocknal
+
+DEFS =
+ksocknal_SOURCES = socknal.c socknal_cb.c socknal.h
--- /dev/null
+# Copyright (C) 2001 Cluster File Systems, Inc.
+#
+# This code is issued under the GNU General Public License.
+# See the file COPYING in this distribution
+
+include ../../Kernelenv
+
+obj-y += ksocknal.o
+ksocknal-objs := socknal.o socknal_cb.o
+
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ * Author: Zach Brown <zab@zabbo.net>
+ * Author: Peter J. Braam <braam@clusterfs.com>
+ * Author: Phil Schwan <phil@clusterfs.com>
+ * Author: Eric Barton <eric@bartonsoftware.com>
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include "socknal.h"
+
+ptl_handle_ni_t ksocknal_ni;
+static nal_t ksocknal_api;
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+ksock_nal_data_t ksocknal_data;
+#else
+static ksock_nal_data_t ksocknal_data;
+#endif
+
+kpr_nal_interface_t ksocknal_router_interface = {
+ kprni_nalid: SOCKNAL,
+ kprni_arg: &ksocknal_data,
+ kprni_fwd: ksocknal_fwd_packet,
+};
+
+
+int
+ksocknal_api_forward(nal_t *nal, int id, void *args, size_t args_len,
+ void *ret, size_t ret_len)
+{
+ ksock_nal_data_t *k;
+ nal_cb_t *nal_cb;
+
+ k = nal->nal_data;
+ nal_cb = k->ksnd_nal_cb;
+
+ lib_dispatch(nal_cb, k, id, args, ret); /* ksocknal_send needs k */
+ return PTL_OK;
+}
+
+int
+ksocknal_api_shutdown(nal_t *nal, int ni)
+{
+ CDEBUG (D_NET, "closing all connections\n");
+
+ return ksocknal_close_sock(0); /* close all sockets */
+}
+
+void
+ksocknal_api_yield(nal_t *nal)
+{
+ our_cond_resched();
+ return;
+}
+
+void
+ksocknal_api_lock(nal_t *nal, unsigned long *flags)
+{
+ ksock_nal_data_t *k;
+ nal_cb_t *nal_cb;
+
+ k = nal->nal_data;
+ nal_cb = k->ksnd_nal_cb;
+ nal_cb->cb_cli(nal_cb,flags);
+}
+
+void
+ksocknal_api_unlock(nal_t *nal, unsigned long *flags)
+{
+ ksock_nal_data_t *k;
+ nal_cb_t *nal_cb;
+
+ k = nal->nal_data;
+ nal_cb = k->ksnd_nal_cb;
+ nal_cb->cb_sti(nal_cb,flags);
+}
+
+nal_t *
+ksocknal_init(int interface, ptl_pt_index_t ptl_size,
+ ptl_ac_index_t ac_size, ptl_pid_t requested_pid)
+{
+ CDEBUG(D_NET, "calling lib_init with nid "LPX64"\n",
+ ksocknal_data.ksnd_mynid);
+ lib_init(&ksocknal_lib, ksocknal_data.ksnd_mynid, 0, 10, ptl_size,
+ ac_size);
+ return (&ksocknal_api);
+}
+
+/*
+ * EXTRA functions follow
+ */
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+#define SOCKET_I(inode) (&(inode)->u.socket_i)
+#endif
+static __inline__ struct socket *
+socki_lookup(struct inode *inode)
+{
+ return SOCKET_I(inode);
+}
+
+int
+ksocknal_set_mynid(ptl_nid_t nid)
+{
+ lib_ni_t *ni = &ksocknal_lib.ni;
+
+ /* FIXME: we have to do this because we call lib_init() at module
+ * insertion time, which is before we have 'mynid' available. lib_init
+ * sets the NAL's nid, which it uses to tell other nodes where packets
+ * are coming from. This is not a very graceful solution to this
+ * problem. */
+
+ CDEBUG(D_IOCTL, "setting mynid to "LPX64" (old nid="LPX64")\n",
+ nid, ni->nid);
+
+ ksocknal_data.ksnd_mynid = nid;
+ ni->nid = nid;
+ return (0);
+}
+
+void
+ksocknal_bind_irq (unsigned int irq, int cpu)
+{
+#if (defined(CONFIG_SMP) && CPU_AFFINITY)
+ char cmdline[64];
+ char *argv[] = {"/bin/sh",
+ "-c",
+ cmdline,
+ NULL};
+ char *envp[] = {"HOME=/",
+ "PATH=/sbin:/bin:/usr/sbin:/usr/bin",
+ NULL};
+
+ snprintf (cmdline, sizeof (cmdline),
+ "echo %d > /proc/irq/%u/smp_affinity", 1 << cpu, irq);
+
+ printk (KERN_INFO "Binding irq %u to CPU %d with cmd: %s\n",
+ irq, cpu, cmdline);
+
+ /* FIXME: Find a better method of setting IRQ affinity...
+ */
+
+ call_usermodehelper (argv[0], argv, envp);
+#endif
+}
+
+int
+ksocknal_add_sock (ptl_nid_t nid, int fd, int bind_irq)
+{
+ unsigned long flags;
+ ksock_conn_t *conn;
+ struct file *file = NULL;
+ struct socket *sock = NULL;
+ ksock_sched_t *sched = NULL;
+ unsigned int irq = 0;
+ struct net_device *dev = NULL;
+ int ret;
+ int idx;
+ ENTRY;
+
+ LASSERT (!in_interrupt());
+
+ file = fget(fd);
+ if (file == NULL)
+ RETURN(-EINVAL);
+
+ ret = -EINVAL;
+ sock = socki_lookup(file->f_dentry->d_inode);
+ if (sock == NULL)
+ GOTO(error, ret);
+
+ ret = -ENOMEM;
+ PORTAL_ALLOC(conn, sizeof(*conn));
+ if (!conn)
+ GOTO(error, ret);
+
+ memset (conn, 0, sizeof (conn)); /* zero for consistency */
+
+ conn->ksnc_file = file;
+ conn->ksnc_sock = sock;
+ conn->ksnc_saved_data_ready = sock->sk->data_ready;
+ conn->ksnc_saved_write_space = sock->sk->write_space;
+ conn->ksnc_peernid = nid;
+ atomic_set (&conn->ksnc_refcount, 1); /* 1 ref for socklist */
+
+ conn->ksnc_rx_ready = 0;
+ conn->ksnc_rx_scheduled = 0;
+ ksocknal_new_packet (conn, 0);
+
+ INIT_LIST_HEAD (&conn->ksnc_tx_queue);
+ conn->ksnc_tx_ready = 0;
+ conn->ksnc_tx_scheduled = 0;
+
+#warning check it is OK to derefence sk->dst_cache->dev like this...
+ lock_sock (conn->ksnc_sock->sk);
+
+ if (conn->ksnc_sock->sk->dst_cache != NULL) {
+ dev = conn->ksnc_sock->sk->dst_cache->dev;
+ if (dev != NULL) {
+ irq = dev->irq;
+ if (irq >= NR_IRQS) {
+ CERROR ("Unexpected IRQ %x\n", irq);
+ irq = 0;
+ }
+ }
+ }
+
+ release_sock (conn->ksnc_sock->sk);
+
+ write_lock_irqsave (&ksocknal_data.ksnd_socklist_lock, flags);
+
+ if (irq == 0 ||
+ ksocknal_data.ksnd_irq_info[irq] == SOCKNAL_IRQ_UNASSIGNED) {
+ /* This is a software NIC, or we haven't associated it with
+ * a CPU yet */
+
+ /* Choose the CPU with the fewest connections */
+ sched = ksocknal_data.ksnd_schedulers;
+ for (idx = 1; idx < SOCKNAL_N_SCHED; idx++)
+ if (sched->kss_nconns >
+ ksocknal_data.ksnd_schedulers[idx].kss_nconns)
+ sched = &ksocknal_data.ksnd_schedulers[idx];
+
+ if (irq != 0) { /* Hardware NIC */
+ /* Remember which scheduler we chose */
+ idx = sched - ksocknal_data.ksnd_schedulers;
+
+ LASSERT (idx < SOCKNAL_IRQ_SCHED_MASK);
+
+ if (bind_irq) /* remember if we will bind below */
+ idx |= SOCKNAL_IRQ_BOUND;
+
+ ksocknal_data.ksnd_irq_info[irq] = idx;
+ }
+ } else {
+ /* This is a hardware NIC, associated with a CPU */
+ idx = ksocknal_data.ksnd_irq_info[irq];
+
+ /* Don't bind again if we've bound already */
+ if ((idx & SOCKNAL_IRQ_BOUND) != 0)
+ bind_irq = 0;
+
+ sched = &ksocknal_data.ksnd_schedulers[idx & SOCKNAL_IRQ_SCHED_MASK];
+ }
+
+ sched->kss_nconns++;
+ conn->ksnc_scheduler = sched;
+
+ list_add(&conn->ksnc_list, &ksocknal_data.ksnd_socklist);
+
+ write_unlock_irqrestore (&ksocknal_data.ksnd_socklist_lock, flags);
+
+ if (bind_irq && /* irq binding required */
+ irq != 0) /* hardware NIC */
+ ksocknal_bind_irq (irq, sched - ksocknal_data.ksnd_schedulers);
+
+ /* NOW it's safe to get called back when socket is ready... */
+ sock->sk->user_data = conn;
+ sock->sk->data_ready = ksocknal_data_ready;
+ sock->sk->write_space = ksocknal_write_space;
+
+ /* ...which I call right now to get things going */
+ ksocknal_data_ready (sock->sk, 0);
+ ksocknal_write_space (sock->sk);
+
+ CDEBUG(D_IOCTL, "conn [%p] registered for nid "LPX64"\n",
+ conn, conn->ksnc_peernid);
+
+ /* Can't unload while connection active */
+ PORTAL_MODULE_USE;
+ RETURN(0);
+
+error:
+ fput(file);
+ return (ret);
+}
+
+/* Passing in a zero nid will close all connections */
+int
+ksocknal_close_sock(ptl_nid_t nid)
+{
+ long flags;
+ ksock_conn_t *conn;
+ LIST_HEAD (death_row);
+ struct list_head *tmp;
+
+ LASSERT (!in_interrupt());
+ write_lock_irqsave (&ksocknal_data.ksnd_socklist_lock, flags);
+
+ if (nid == 0) { /* close ALL connections */
+ /* insert 'death row' into the socket list... */
+ list_add (&death_row, &ksocknal_data.ksnd_socklist);
+ /* ...extract and reinitialise the socket list itself... */
+ list_del_init (&ksocknal_data.ksnd_socklist);
+ /* ...and voila, death row is the proud owner of all conns */
+ } else list_for_each (tmp, &ksocknal_data.ksnd_socklist) {
+
+ conn = list_entry (tmp, ksock_conn_t, ksnc_list);
+
+ if (conn->ksnc_peernid == nid) {
+ list_del (&conn->ksnc_list);
+ list_add (&conn->ksnc_list, &death_row);
+ break;
+ }
+ }
+
+ write_unlock_irqrestore (&ksocknal_data.ksnd_socklist_lock, flags);
+
+ if (nid && list_empty (&death_row))
+ return (-ENOENT);
+
+ while (!list_empty (&death_row)) {
+ conn = list_entry (death_row.next, ksock_conn_t, ksnc_list);
+ list_del (&conn->ksnc_list);
+
+ /* NB I _have_ to restore the callback, rather than storing
+ * a noop, since the socket could survive past this module
+ * being unloaded!! */
+ conn->ksnc_sock->sk->data_ready = conn->ksnc_saved_data_ready;
+ conn->ksnc_sock->sk->write_space = conn->ksnc_saved_write_space;
+
+ /* OK; no more callbacks, but they could be in progress now,
+ * so wait for them to complete... */
+ write_lock_irqsave (&ksocknal_data.ksnd_socklist_lock, flags);
+
+ /* ...however if I get the lock before a callback gets it,
+ * this will make them noop
+ */
+ conn->ksnc_sock->sk->user_data = NULL;
+
+ /* And drop the scheduler's connection count while I've got
+ * the exclusive lock */
+ conn->ksnc_scheduler->kss_nconns--;
+
+ write_unlock_irqrestore(&ksocknal_data.ksnd_socklist_lock,
+ flags);
+
+ ksocknal_put_conn (conn); /* drop ref for ksnd_socklist */
+ }
+
+ return (0);
+}
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+struct tcp_opt *sock2tcp_opt(struct sock *sk)
+{
+ return &(sk->tp_pinfo.af_tcp);
+}
+#else
+struct tcp_opt *sock2tcp_opt(struct sock *sk)
+{
+ struct tcp_sock *s = (struct tcp_sock *)sk;
+ return &s->tcp;
+}
+#endif
+
+void
+ksocknal_push_conn (ksock_conn_t *conn)
+{
+ struct sock *sk = conn->ksnc_sock->sk;
+ struct tcp_opt *tp = sock2tcp_opt(sk);
+ int nonagle;
+ int val = 1;
+ int rc;
+ mm_segment_t oldmm;
+
+ lock_sock (sk);
+ nonagle = tp->nonagle;
+ tp->nonagle = 1;
+ release_sock (sk);
+
+ oldmm = get_fs ();
+ set_fs (KERNEL_DS);
+
+ rc = sk->prot->setsockopt (sk, SOL_TCP, TCP_NODELAY,
+ (char *)&val, sizeof (val));
+ LASSERT (rc == 0);
+
+ set_fs (oldmm);
+
+ lock_sock (sk);
+ tp->nonagle = nonagle;
+ release_sock (sk);
+}
+
+/* Passing in a zero nid pushes all connections */
+int
+ksocknal_push_sock (ptl_nid_t nid)
+{
+ ksock_conn_t *conn;
+ struct list_head *tmp;
+ int index;
+ int i;
+
+ if (nid != 0) {
+ conn = ksocknal_get_conn (nid);
+
+ if (conn == NULL)
+ return (-ENOENT);
+
+ ksocknal_push_conn (conn);
+ ksocknal_put_conn (conn);
+
+ return (0);
+ }
+
+ /* NB we can't remove connections from the socket list so we have to
+ * cope with them being removed from under us...
+ */
+ for (index = 0; ; index++) {
+ read_lock (&ksocknal_data.ksnd_socklist_lock);
+
+ i = 0;
+ conn = NULL;
+
+ list_for_each (tmp, &ksocknal_data.ksnd_socklist) {
+ if (i++ == index) {
+ conn = list_entry(tmp, ksock_conn_t, ksnc_list);
+ atomic_inc (&conn->ksnc_refcount); // take a ref
+ break;
+ }
+ }
+
+ read_unlock (&ksocknal_data.ksnd_socklist_lock);
+
+ if (conn == NULL)
+ break;
+
+ ksocknal_push_conn (conn);
+ ksocknal_put_conn (conn);
+ }
+
+ return (0);
+}
+
+ksock_conn_t *
+ksocknal_get_conn (ptl_nid_t nid)
+{
+ struct list_head *tmp;
+ ksock_conn_t *conn;
+
+ PROF_START(conn_list_walk);
+
+ read_lock (&ksocknal_data.ksnd_socklist_lock);
+
+ list_for_each(tmp, &ksocknal_data.ksnd_socklist) {
+
+ conn = list_entry(tmp, ksock_conn_t, ksnc_list);
+
+ if (conn->ksnc_peernid == nid) {
+ /* caller is referencing */
+ atomic_inc (&conn->ksnc_refcount);
+
+ read_unlock (&ksocknal_data.ksnd_socklist_lock);
+
+ CDEBUG(D_NET, "got conn [%p] -> "LPX64" (%d)\n",
+ conn, nid, atomic_read (&conn->ksnc_refcount));
+
+ PROF_FINISH(conn_list_walk);
+ return (conn);
+ }
+ }
+
+ read_unlock (&ksocknal_data.ksnd_socklist_lock);
+
+ CDEBUG(D_NET, "No connection found when looking for nid "LPX64"\n",
+ nid);
+ PROF_FINISH(conn_list_walk);
+ return (NULL);
+}
+
+void
+ksocknal_close_conn (ksock_conn_t *conn)
+{
+ CDEBUG (D_NET, "connection [%p] closed \n", conn);
+
+ fput (conn->ksnc_file);
+ PORTAL_FREE (conn, sizeof (*conn));
+
+ /* One less connection keeping us hanging on */
+ PORTAL_MODULE_UNUSE;
+}
+
+void
+_ksocknal_put_conn (ksock_conn_t *conn)
+{
+ unsigned long flags;
+
+ CDEBUG (D_NET, "connection [%p] handed the black spot\n", conn);
+
+ /* "But what is the black spot, captain?" I asked.
+ * "That's a summons, mate..." */
+
+ LASSERT (atomic_read (&conn->ksnc_refcount) == 0);
+ LASSERT (conn->ksnc_sock->sk->data_ready != ksocknal_data_ready);
+ LASSERT (conn->ksnc_sock->sk->write_space != ksocknal_write_space);
+ LASSERT (conn->ksnc_sock->sk->user_data == NULL);
+ LASSERT (!conn->ksnc_rx_scheduled);
+
+ if (!in_interrupt()) {
+ ksocknal_close_conn (conn);
+ return;
+ }
+
+ spin_lock_irqsave (&ksocknal_data.ksnd_reaper_lock, flags);
+
+ list_add (&conn->ksnc_list, &ksocknal_data.ksnd_reaper_list);
+ wake_up (&ksocknal_data.ksnd_reaper_waitq);
+
+ spin_unlock_irqrestore (&ksocknal_data.ksnd_reaper_lock, flags);
+}
+
+int
+ksocknal_cmd(struct portal_ioctl_data * data, void * private)
+{
+ int rc = -EINVAL;
+
+ LASSERT (data != NULL);
+
+ switch(data->ioc_nal_cmd) {
+ case NAL_CMD_REGISTER_PEER_FD: {
+ rc = ksocknal_add_sock(data->ioc_nid, data->ioc_fd,
+ data->ioc_flags);
+ break;
+ }
+ case NAL_CMD_CLOSE_CONNECTION: {
+ rc = ksocknal_close_sock(data->ioc_nid);
+ break;
+ }
+ case NAL_CMD_REGISTER_MYNID: {
+ rc = ksocknal_set_mynid (data->ioc_nid);
+ break;
+ }
+ case NAL_CMD_PUSH_CONNECTION: {
+ rc = ksocknal_push_sock (data->ioc_nid);
+ break;
+ }
+ }
+
+ return rc;
+}
+
+void
+ksocknal_free_buffers (void)
+{
+ if (ksocknal_data.ksnd_fmbs != NULL) {
+ ksock_fmb_t *fmb = (ksock_fmb_t *)ksocknal_data.ksnd_fmbs;
+ int i;
+ int j;
+
+ for (i = 0;
+ i < (SOCKNAL_SMALL_FWD_NMSGS + SOCKNAL_LARGE_FWD_NMSGS);
+ i++, fmb++)
+ for (j = 0; j < fmb->fmb_npages; j++)
+ if (fmb->fmb_pages[j] != NULL)
+ __free_page (fmb->fmb_pages[j]);
+
+ PORTAL_FREE (ksocknal_data.ksnd_fmbs,
+ sizeof (ksock_fmb_t) * (SOCKNAL_SMALL_FWD_NMSGS +
+ SOCKNAL_LARGE_FWD_NMSGS));
+ }
+
+ if (ksocknal_data.ksnd_ltxs != NULL)
+ PORTAL_FREE (ksocknal_data.ksnd_ltxs,
+ sizeof (ksock_ltx_t) * (SOCKNAL_NLTXS +
+ SOCKNAL_NNBLK_LTXS));
+
+ if (ksocknal_data.ksnd_schedulers != NULL)
+ PORTAL_FREE (ksocknal_data.ksnd_schedulers,
+ sizeof (ksock_sched_t) * SOCKNAL_N_SCHED);
+}
+
+void __exit
+ksocknal_module_fini (void)
+{
+ int i;
+
+ CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n",
+ atomic_read (&portal_kmemory));
+
+ switch (ksocknal_data.ksnd_init) {
+ default:
+ LASSERT (0);
+
+ case SOCKNAL_INIT_ALL:
+ kportal_nal_unregister(SOCKNAL);
+ PORTAL_SYMBOL_UNREGISTER (ksocknal_ni);
+ /* fall through */
+
+ case SOCKNAL_INIT_PTL:
+ PtlNIFini(ksocknal_ni);
+ lib_fini(&ksocknal_lib);
+ /* fall through */
+
+ case SOCKNAL_INIT_DATA:
+ /* Module refcount only gets to zero when all connections
+ * have been closed so all lists must be empty */
+ LASSERT (list_empty (&ksocknal_data.ksnd_socklist));
+ LASSERT (list_empty (&ksocknal_data.ksnd_reaper_list));
+ LASSERT (list_empty (&ksocknal_data.ksnd_small_fmp.fmp_blocked_conns));
+ LASSERT (list_empty (&ksocknal_data.ksnd_large_fmp.fmp_blocked_conns));
+
+ if (ksocknal_data.ksnd_schedulers != NULL)
+ for (i = 0; i < SOCKNAL_N_SCHED; i++) {
+ ksock_sched_t *kss =
+ &ksocknal_data.ksnd_schedulers[i];
+
+ LASSERT (list_empty (&kss->kss_tx_conns));
+ LASSERT (list_empty (&kss->kss_rx_conns));
+ LASSERT (kss->kss_nconns == 0);
+ }
+
+ /* stop router calling me */
+ kpr_shutdown (&ksocknal_data.ksnd_router);
+
+ /* flag threads to terminate; wake and wait for them to die */
+ ksocknal_data.ksnd_shuttingdown = 1;
+ wake_up_all (&ksocknal_data.ksnd_reaper_waitq);
+
+ for (i = 0; i < SOCKNAL_N_SCHED; i++)
+ wake_up_all(&ksocknal_data.ksnd_schedulers[i].kss_waitq);
+
+ while (atomic_read (&ksocknal_data.ksnd_nthreads) != 0) {
+ CDEBUG (D_NET, "waitinf for %d threads to terminate\n",
+ atomic_read (&ksocknal_data.ksnd_nthreads));
+ set_current_state (TASK_UNINTERRUPTIBLE);
+ schedule_timeout (HZ);
+ }
+
+ kpr_deregister (&ksocknal_data.ksnd_router);
+
+ ksocknal_free_buffers();
+ /* fall through */
+
+ case SOCKNAL_INIT_NOTHING:
+ break;
+ }
+
+ CDEBUG(D_MALLOC, "after NAL cleanup: kmem %d\n",
+ atomic_read (&portal_kmemory));
+
+ printk(KERN_INFO "Routing socket NAL unloaded (final mem %d)\n",
+ atomic_read(&portal_kmemory));
+}
+
+
+int __init
+ksocknal_module_init (void)
+{
+ int pkmem = atomic_read(&portal_kmemory);
+ int rc;
+ int i;
+ int j;
+
+ /* packet descriptor must fit in a router descriptor's scratchpad */
+ LASSERT(sizeof (ksock_tx_t) <= sizeof (kprfd_scratch_t));
+
+ LASSERT (ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING);
+
+ ksocknal_api.forward = ksocknal_api_forward;
+ ksocknal_api.shutdown = ksocknal_api_shutdown;
+ ksocknal_api.yield = ksocknal_api_yield;
+ ksocknal_api.validate = NULL; /* our api validate is a NOOP */
+ ksocknal_api.lock = ksocknal_api_lock;
+ ksocknal_api.unlock = ksocknal_api_unlock;
+ ksocknal_api.nal_data = &ksocknal_data;
+
+ ksocknal_lib.nal_data = &ksocknal_data;
+
+ memset (&ksocknal_data, 0, sizeof (ksocknal_data)); /* zero pointers */
+
+ INIT_LIST_HEAD(&ksocknal_data.ksnd_socklist);
+ rwlock_init(&ksocknal_data.ksnd_socklist_lock);
+
+ ksocknal_data.ksnd_nal_cb = &ksocknal_lib;
+ spin_lock_init (&ksocknal_data.ksnd_nal_cb_lock);
+
+ spin_lock_init(&ksocknal_data.ksnd_small_fmp.fmp_lock);
+ INIT_LIST_HEAD(&ksocknal_data.ksnd_small_fmp.fmp_idle_fmbs);
+ INIT_LIST_HEAD(&ksocknal_data.ksnd_small_fmp.fmp_blocked_conns);
+
+ spin_lock_init(&ksocknal_data.ksnd_large_fmp.fmp_lock);
+ INIT_LIST_HEAD(&ksocknal_data.ksnd_large_fmp.fmp_idle_fmbs);
+ INIT_LIST_HEAD(&ksocknal_data.ksnd_large_fmp.fmp_blocked_conns);
+
+ spin_lock_init(&ksocknal_data.ksnd_idle_ltx_lock);
+ INIT_LIST_HEAD(&ksocknal_data.ksnd_idle_nblk_ltx_list);
+ INIT_LIST_HEAD(&ksocknal_data.ksnd_idle_ltx_list);
+ init_waitqueue_head(&ksocknal_data.ksnd_idle_ltx_waitq);
+
+ spin_lock_init (&ksocknal_data.ksnd_reaper_lock);
+ INIT_LIST_HEAD (&ksocknal_data.ksnd_reaper_list);
+ init_waitqueue_head(&ksocknal_data.ksnd_reaper_waitq);
+
+ memset (&ksocknal_data.ksnd_irq_info, SOCKNAL_IRQ_UNASSIGNED,
+ sizeof (ksocknal_data.ksnd_irq_info));
+
+ /* flag lists/ptrs/locks initialised */
+ ksocknal_data.ksnd_init = SOCKNAL_INIT_DATA;
+
+ PORTAL_ALLOC(ksocknal_data.ksnd_schedulers,
+ sizeof(ksock_sched_t) * SOCKNAL_N_SCHED);
+ if (ksocknal_data.ksnd_schedulers == NULL)
+ RETURN(-ENOMEM);
+
+ for (i = 0; i < SOCKNAL_N_SCHED; i++) {
+ ksock_sched_t *kss = &ksocknal_data.ksnd_schedulers[i];
+
+ spin_lock_init (&kss->kss_lock);
+ INIT_LIST_HEAD (&kss->kss_rx_conns);
+ INIT_LIST_HEAD (&kss->kss_tx_conns);
+#if SOCKNAL_ZC
+ INIT_LIST_HEAD (&kss->kss_zctxdone_list);
+#endif
+ init_waitqueue_head (&kss->kss_waitq);
+ }
+
+ CERROR ("ltx "LPSZ", total "LPSZ"\n", sizeof (ksock_ltx_t),
+ sizeof (ksock_ltx_t) * (SOCKNAL_NLTXS + SOCKNAL_NNBLK_LTXS));
+
+ PORTAL_ALLOC(ksocknal_data.ksnd_ltxs,
+ sizeof(ksock_ltx_t) * (SOCKNAL_NLTXS +SOCKNAL_NNBLK_LTXS));
+ if (ksocknal_data.ksnd_ltxs == NULL) {
+ ksocknal_module_fini ();
+ return (-ENOMEM);
+ }
+
+ /* Deterministic bugs please */
+ memset (ksocknal_data.ksnd_ltxs, 0xeb,
+ sizeof (ksock_ltx_t) * (SOCKNAL_NLTXS + SOCKNAL_NNBLK_LTXS));
+
+ for (i = 0; i < SOCKNAL_NLTXS + SOCKNAL_NNBLK_LTXS; i++) {
+ ksock_ltx_t *ltx = &((ksock_ltx_t *)ksocknal_data.ksnd_ltxs)[i];
+
+ ltx->ltx_idle = i < SOCKNAL_NLTXS ?
+ &ksocknal_data.ksnd_idle_ltx_list :
+ &ksocknal_data.ksnd_idle_nblk_ltx_list;
+ list_add (<x->ltx_tx.tx_list, ltx->ltx_idle);
+ }
+
+ rc = PtlNIInit(ksocknal_init, 32, 4, 0, &ksocknal_ni);
+ if (rc != 0) {
+ CERROR("ksocknal: PtlNIInit failed: error %d\n", rc);
+ ksocknal_module_fini ();
+ RETURN (rc);
+ }
+ PtlNIDebug(ksocknal_ni, ~0);
+
+ ksocknal_data.ksnd_init = SOCKNAL_INIT_PTL; // flag PtlNIInit() called
+
+ for (i = 0; i < SOCKNAL_N_SCHED; i++) {
+ rc = ksocknal_thread_start (ksocknal_scheduler,
+ &ksocknal_data.ksnd_schedulers[i]);
+ if (rc != 0) {
+ CERROR("Can't spawn socknal scheduler[%d]: %d\n",
+ i, rc);
+ ksocknal_module_fini ();
+ RETURN (rc);
+ }
+ }
+
+ rc = ksocknal_thread_start (ksocknal_reaper, NULL);
+ if (rc != 0) {
+ CERROR("Can't spawn socknal reaper: %d\n", rc);
+ ksocknal_module_fini ();
+ RETURN (rc);
+ }
+
+ rc = kpr_register(&ksocknal_data.ksnd_router,
+ &ksocknal_router_interface);
+ if (rc != 0) {
+ CDEBUG(D_NET, "Can't initialise routing interface "
+ "(rc = %d): not routing\n", rc);
+ } else {
+ /* Only allocate forwarding buffers if I'm on a gateway */
+
+ PORTAL_ALLOC(ksocknal_data.ksnd_fmbs,
+ sizeof(ksock_fmb_t) * (SOCKNAL_SMALL_FWD_NMSGS +
+ SOCKNAL_LARGE_FWD_NMSGS));
+ if (ksocknal_data.ksnd_fmbs == NULL) {
+ ksocknal_module_fini ();
+ RETURN(-ENOMEM);
+ }
+
+ /* NULL out buffer pointers etc */
+ memset(ksocknal_data.ksnd_fmbs, 0,
+ sizeof(ksock_fmb_t) * (SOCKNAL_SMALL_FWD_NMSGS +
+ SOCKNAL_LARGE_FWD_NMSGS));
+
+ for (i = 0; i < (SOCKNAL_SMALL_FWD_NMSGS +
+ SOCKNAL_LARGE_FWD_NMSGS); i++) {
+ ksock_fmb_t *fmb =
+ &((ksock_fmb_t *)ksocknal_data.ksnd_fmbs)[i];
+
+ if (i < SOCKNAL_SMALL_FWD_NMSGS) {
+ fmb->fmb_npages = SOCKNAL_SMALL_FWD_PAGES;
+ fmb->fmb_pool = &ksocknal_data.ksnd_small_fmp;
+ } else {
+ fmb->fmb_npages = SOCKNAL_LARGE_FWD_PAGES;
+ fmb->fmb_pool = &ksocknal_data.ksnd_large_fmp;
+ }
+
+ LASSERT (fmb->fmb_npages > 0);
+ for (j = 0; j < fmb->fmb_npages; j++) {
+ fmb->fmb_pages[j] = alloc_page (GFP_KERNEL);
+
+ if (fmb->fmb_pages[j] == NULL) {
+ ksocknal_module_fini ();
+ return (-ENOMEM);
+ }
+
+ LASSERT(page_address (fmb->fmb_pages[j]) !=
+ NULL);
+ }
+
+ list_add(&fmb->fmb_list, &fmb->fmb_pool->fmp_idle_fmbs);
+ }
+ }
+
+ rc = kportal_nal_register(SOCKNAL, &ksocknal_cmd, NULL);
+ if (rc != 0) {
+ CERROR ("Can't initialise command interface (rc = %d)\n", rc);
+ ksocknal_module_fini ();
+ return (rc);
+ }
+
+ PORTAL_SYMBOL_REGISTER(ksocknal_ni);
+
+ /* flag everything initialised */
+ ksocknal_data.ksnd_init = SOCKNAL_INIT_ALL;
+
+ printk(KERN_INFO "Routing socket NAL loaded (Routing %s, initial "
+ "mem %d)\n",
+ kpr_routing (&ksocknal_data.ksnd_router) ?
+ "enabled" : "disabled", pkmem);
+
+ return (0);
+}
+
+MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
+MODULE_DESCRIPTION("Kernel TCP Socket NAL v0.01");
+MODULE_LICENSE("GPL");
+
+module_init(ksocknal_module_init);
+module_exit(ksocknal_module_fini);
+
+EXPORT_SYMBOL (ksocknal_ni);
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ * Author: Zach Brown <zab@zabbo.net>
+ * Author: Peter J. Braam <braam@clusterfs.com>
+ * Author: Phil Schwan <phil@clusterfs.com>
+ * Author: Eric Barton <eric@bartonsoftware.com>
+ *
+ * This file is part of Portals, http://www.sf.net/projects/lustre/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#define DEBUG_PORTAL_ALLOC
+#define EXPORT_SYMTAB
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/errno.h>
+#include <linux/smp_lock.h>
+#include <linux/unistd.h>
+#include <net/sock.h>
+#include <net/tcp.h>
+#include <linux/uio.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/stat.h>
+#include <linux/list.h>
+#include <linux/kmod.h>
+#include <asm/uaccess.h>
+#include <asm/segment.h>
+
+#define DEBUG_SUBSYSTEM S_SOCKNAL
+
+#include <linux/kp30.h>
+#include <portals/p30.h>
+#include <portals/lib-p30.h>
+
+#define SOCKNAL_N_SCHED num_online_cpus() /* # socknal schedulers */
+
+#if PTL_LARGE_MTU
+# define SOCKNAL_MAX_FWD_PAYLOAD (256<<10) /* biggest payload I can forward */
+#else
+# define SOCKNAL_MAX_FWD_PAYLOAD (64<<10) /* biggest payload I can forward */
+#endif
+
+#define SOCKNAL_NLTXS 128 /* # normal transmit messages */
+#define SOCKNAL_NNBLK_LTXS 128 /* # transmit messages reserved if can't block */
+
+#define SOCKNAL_SMALL_FWD_NMSGS 128 /* # small messages I can be forwarding at any time */
+#define SOCKNAL_LARGE_FWD_NMSGS 64 /* # large messages I can be forwarding at any time */
+
+#define SOCKNAL_SMALL_FWD_PAGES 1 /* # pages in a small message fwd buffer */
+
+#define SOCKNAL_LARGE_FWD_PAGES (PAGE_ALIGN (sizeof (ptl_hdr_t) + SOCKNAL_MAX_FWD_PAYLOAD) >> PAGE_SHIFT)
+ /* # pages in a large message fwd buffer */
+
+#define SOCKNAL_RESCHED 100 /* # scheduler loops before reschedule */
+
+#define SOCKNAL_TX_LOW_WATER(sk) (((sk)->sndbuf*8)/10)
+
+typedef struct /* pool of forwarding buffers */
+{
+ spinlock_t fmp_lock; /* serialise */
+ struct list_head fmp_idle_fmbs; /* buffers waiting for a connection */
+ struct list_head fmp_blocked_conns; /* connections waiting for a buffer */
+} ksock_fmb_pool_t;
+
+
+typedef struct /* per scheduler state */
+{
+ spinlock_t kss_lock; /* serialise */
+ struct list_head kss_rx_conns; /* conn waiting to be read */
+ struct list_head kss_tx_conns; /* conn waiting to be written */
+#if SOCKNAL_ZC
+ struct list_head kss_zctxdone_list; /* completed ZC transmits */
+#endif
+ wait_queue_head_t kss_waitq; /* where scheduler sleeps */
+ int kss_nconns; /* # connections assigned to this scheduler */
+} ksock_sched_t;
+
+typedef struct {
+ int ksnd_init; /* initialisation state */
+
+ struct list_head ksnd_socklist; /* all my connections */
+ rwlock_t ksnd_socklist_lock; /* stabilise add/find/remove */
+
+ ptl_nid_t ksnd_mynid;
+ nal_cb_t *ksnd_nal_cb;
+ spinlock_t ksnd_nal_cb_lock; /* lib cli/sti lock */
+
+ atomic_t ksnd_nthreads; /* # live threads */
+ int ksnd_shuttingdown; /* tell threads to exit */
+ ksock_sched_t *ksnd_schedulers; /* scheduler state */
+
+ kpr_router_t ksnd_router; /* THE router */
+
+ void *ksnd_fmbs; /* all the pre-allocated FMBs */
+ ksock_fmb_pool_t ksnd_small_fmp; /* small message forwarding buffers */
+ ksock_fmb_pool_t ksnd_large_fmp; /* large message forwarding buffers */
+
+ void *ksnd_ltxs; /* all the pre-allocated LTXs */
+ spinlock_t ksnd_idle_ltx_lock; /* serialise ltx alloc/free */
+ struct list_head ksnd_idle_ltx_list; /* where to get an idle LTX */
+ struct list_head ksnd_idle_nblk_ltx_list; /* where to get an idle LTX if you can't block */
+ wait_queue_head_t ksnd_idle_ltx_waitq; /* where to block for an idle LTX */
+
+ struct list_head ksnd_reaper_list; /* conn waiting to be reaped */
+ wait_queue_head_t ksnd_reaper_waitq; /* reaper sleeps here */
+ spinlock_t ksnd_reaper_lock; /* serialise */
+ unsigned char ksnd_irq_info[NR_IRQS]; /* irq->scheduler lookup */
+} ksock_nal_data_t;
+
+#define SOCKNAL_INIT_NOTHING 0
+#define SOCKNAL_INIT_DATA 1
+#define SOCKNAL_INIT_PTL 2
+#define SOCKNAL_INIT_ALL 3
+
+#define SOCKNAL_IRQ_BOUND 0x80 /* flag we _did_ bind already */
+#define SOCKNAL_IRQ_SCHED_MASK 0x7f /* we assume < 127 CPUs */
+#define SOCKNAL_IRQ_UNASSIGNED 0xff /* flag unassigned */
+
+/* A packet just assembled for transmission is represented by 1 or more
+ * struct iovec fragments and 0 or more ptl_kiov_t fragments. Forwarded
+ * messages, or messages from an MD with PTL_MD_KIOV _not_ set have 0
+ * ptl_kiov_t fragments. Messages from an MD with PTL_MD_KIOV set, have 1
+ * struct iovec fragment (the header) and up to PTL_MD_MAX_IOV ptl_kiov_t
+ * fragments.
+ *
+ * On the receive side, initially 1 struct iovec fragment is posted for
+ * receive (the header). Once the header has been received, if the message
+ * requires forwarding or will be received into mapped memory, up to
+ * PTL_MD_MAX_IOV struct iovec fragments describe the target memory.
+ * Otherwise up to PTL_MD_MAX_IOV ptl_kiov_t fragments are used.
+ */
+
+typedef struct /* transmit packet */
+{
+ struct list_head tx_list; /* queue on conn for transmission etc */
+ char tx_isfwd; /* forwarding / sourced here */
+ int tx_nob; /* # packet bytes */
+ int tx_niov; /* # packet iovec frags */
+ struct iovec *tx_iov; /* packet iovec frags */
+ int tx_nkiov; /* # packet page frags */
+ ptl_kiov_t *tx_kiov; /* packet page frags */
+#if SOCKNAL_ZC
+ ksock_sched_t *tx_sched; /* who to wake on callback */
+ zccd_t tx_zccd; /* zero copy callback descriptor */
+#endif
+} ksock_tx_t;
+
+#define KSOCK_ZCCD_2_TX(ptr) list_entry (ptr, ksock_tx_t, tx_zccd)
+/* network zero copy callback descriptor embedded in ksock_tx_t */
+
+/* space for the tx frag descriptors: hdr is always 1 iovec
+ * and payload is PTL_MD_MAX of either type. */
+typedef struct
+{
+ struct iovec hdr;
+ union {
+ struct iovec iov[PTL_MD_MAX_IOV];
+ ptl_kiov_t kiov[PTL_MD_MAX_IOV];
+ } payload;
+} ksock_txiovspace_t;
+
+typedef struct /* locally transmitted packet */
+{
+ ksock_tx_t ltx_tx; /* send info */
+ struct list_head *ltx_idle; /* where to put when idle */
+ void *ltx_private; /* lib_finalize() callback arg */
+ void *ltx_cookie; /* lib_finalize() callback arg */
+ ksock_txiovspace_t ltx_iov_space; /* where to stash frag descriptors */
+ ptl_hdr_t ltx_hdr; /* buffer for packet header */
+} ksock_ltx_t;
+
+#define KSOCK_TX_2_KPR_FWD_DESC(ptr) list_entry ((kprfd_scratch_t *)ptr, kpr_fwd_desc_t, kprfd_scratch)
+/* forwarded packets (router->socknal) embedded in kpr_fwd_desc_t::kprfd_scratch */
+
+#define KSOCK_TX_2_KSOCK_LTX(ptr) list_entry (ptr, ksock_ltx_t, ltx_tx)
+/* local packets (lib->socknal) embedded in ksock_ltx_t::ltx_tx */
+
+/* NB list_entry() is used here as convenient macro for calculating a
+ * pointer to a struct from the address of a member.
+ */
+
+typedef struct /* Kernel portals Socket Forwarding message buffer */
+{ /* (socknal->router) */
+ struct list_head fmb_list; /* queue idle */
+ kpr_fwd_desc_t fmb_fwd; /* router's descriptor */
+ int fmb_npages; /* # pages allocated */
+ ksock_fmb_pool_t *fmb_pool; /* owning pool */
+ struct page *fmb_pages[SOCKNAL_LARGE_FWD_PAGES];
+ struct iovec fmb_iov[SOCKNAL_LARGE_FWD_PAGES];
+} ksock_fmb_t;
+
+/* space for the rx frag descriptors; we either read a single contiguous
+ * header, or PTL_MD_MAX_IOV frags of payload of either type. */
+typedef union {
+ struct iovec iov[PTL_MD_MAX_IOV];
+ ptl_kiov_t kiov[PTL_MD_MAX_IOV];
+} ksock_rxiovspace_t;
+
+#define SOCKNAL_RX_HEADER 1 /* reading header */
+#define SOCKNAL_RX_BODY 2 /* reading body (to deliver here) */
+#define SOCKNAL_RX_BODY_FWD 3 /* reading body (to forward) */
+#define SOCKNAL_RX_SLOP 4 /* skipping body */
+#define SOCKNAL_RX_GET_FMB 5 /* scheduled for forwarding */
+#define SOCKNAL_RX_FMB_SLEEP 6 /* blocked waiting for a fwd desc */
+
+typedef struct
+{
+ struct list_head ksnc_list; /* stash on global socket list */
+ struct file *ksnc_file; /* socket filp */
+ struct socket *ksnc_sock; /* actual socket */
+ void *ksnc_saved_data_ready; /* socket's original data_ready() callback */
+ void *ksnc_saved_write_space; /* socket's original write_space() callback */
+ ptl_nid_t ksnc_peernid; /* who's on the other end */
+ atomic_t ksnc_refcount; /* # users */
+ ksock_sched_t *ksnc_scheduler; /* who schedules this connection */
+
+ /* READER */
+ struct list_head ksnc_rx_list; /* where I enq waiting input or a forwarding descriptor */
+ volatile int ksnc_rx_ready; /* data ready to read */
+ int ksnc_rx_scheduled; /* being progressed */
+ int ksnc_rx_state; /* what is being read */
+ int ksnc_rx_nob_left; /* # bytes to next hdr/body */
+ int ksnc_rx_nob_wanted; /* bytes actually wanted */
+ int ksnc_rx_niov; /* # iovec frags */
+ struct iovec *ksnc_rx_iov; /* the iovec frags */
+ int ksnc_rx_nkiov; /* # page frags */
+ ptl_kiov_t *ksnc_rx_kiov; /* the page frags */
+ ksock_rxiovspace_t ksnc_rx_iov_space; /* space for frag descriptors */
+ void *ksnc_cookie; /* rx lib_finalize passthru arg */
+ ptl_hdr_t ksnc_hdr; /* where I read headers into */
+
+ /* WRITER */
+ struct list_head ksnc_tx_list; /* where I enq waiting for output space */
+ struct list_head ksnc_tx_queue; /* packets waiting to be sent */
+ volatile int ksnc_tx_ready; /* write space */
+ int ksnc_tx_scheduled; /* being progressed */
+
+} ksock_conn_t;
+
+extern int ksocknal_add_sock (ptl_nid_t nid, int fd, int client);
+extern int ksocknal_close_sock(ptl_nid_t nid);
+extern int ksocknal_set_mynid(ptl_nid_t nid);
+extern int ksocknal_push_sock(ptl_nid_t nid);
+extern ksock_conn_t *ksocknal_get_conn (ptl_nid_t nid);
+extern void _ksocknal_put_conn (ksock_conn_t *conn);
+extern void ksocknal_close_conn (ksock_conn_t *conn);
+
+static inline void
+ksocknal_put_conn (ksock_conn_t *conn)
+{
+ CDEBUG (D_OTHER, "putting conn[%p] -> "LPX64" (%d)\n",
+ conn, conn->ksnc_peernid, atomic_read (&conn->ksnc_refcount));
+
+ if (atomic_dec_and_test (&conn->ksnc_refcount))
+ _ksocknal_put_conn (conn);
+}
+
+extern int ksocknal_thread_start (int (*fn)(void *arg), void *arg);
+extern int ksocknal_new_packet (ksock_conn_t *conn, int skip);
+extern void ksocknal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd);
+extern int ksocknal_scheduler (void *arg);
+extern int ksocknal_reaper (void *arg);
+extern void ksocknal_data_ready(struct sock *sk, int n);
+extern void ksocknal_write_space(struct sock *sk);
+
+
+extern nal_cb_t ksocknal_lib;
+extern ksock_nal_data_t ksocknal_data;
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ * Author: Zach Brown <zab@zabbo.net>
+ * Author: Peter J. Braam <braam@clusterfs.com>
+ * Author: Phil Schwan <phil@clusterfs.com>
+ * Author: Eric Barton <eric@bartonsoftware.com>
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include "socknal.h"
+
+atomic_t ksocknal_packets_received;
+atomic_t ksocknal_packets_launched;
+atomic_t ksocknal_packets_being_sent;
+
+#if SOCKNAL_ZC
+int ksocknal_do_zc = 1;
+int ksocknal_zc_min_frag = 2048;
+#endif
+
+/*
+ * LIB functions follow
+ *
+ */
+int
+ksocknal_read(nal_cb_t *nal, void *private, void *dst_addr,
+ user_ptr src_addr, size_t len)
+{
+ CDEBUG(D_NET, LPX64": reading %ld bytes from %p -> %p\n",
+ nal->ni.nid, (long)len, src_addr, dst_addr);
+
+ memcpy( dst_addr, src_addr, len );
+ return 0;
+}
+
+int
+ksocknal_write(nal_cb_t *nal, void *private, user_ptr dst_addr,
+ void *src_addr, size_t len)
+{
+ CDEBUG(D_NET, LPX64": writing %ld bytes from %p -> %p\n",
+ nal->ni.nid, (long)len, src_addr, dst_addr);
+
+ memcpy( dst_addr, src_addr, len );
+ return 0;
+}
+
+int
+ksocknal_callback (nal_cb_t * nal, void *private, lib_eq_t *eq,
+ ptl_event_t *ev)
+{
+ CDEBUG(D_NET, LPX64": callback eq %p ev %p\n",
+ nal->ni.nid, eq, ev);
+
+ if (eq->event_callback != NULL)
+ eq->event_callback(ev);
+
+ return 0;
+}
+
+void *
+ksocknal_malloc(nal_cb_t *nal, size_t len)
+{
+ void *buf;
+
+ PORTAL_ALLOC(buf, len);
+
+ if (buf != NULL)
+ memset(buf, 0, len);
+
+ return (buf);
+}
+
+void
+ksocknal_free(nal_cb_t *nal, void *buf, size_t len)
+{
+ PORTAL_FREE(buf, len);
+}
+
+void
+ksocknal_printf(nal_cb_t *nal, const char *fmt, ...)
+{
+ va_list ap;
+ char msg[256];
+
+ va_start (ap, fmt);
+ vsnprintf (msg, sizeof (msg), fmt, ap); /* sprint safely */
+ va_end (ap);
+
+ msg[sizeof (msg) - 1] = 0; /* ensure terminated */
+
+ CDEBUG (D_NET, "%s", msg);
+}
+
+void
+ksocknal_cli(nal_cb_t *nal, unsigned long *flags)
+{
+ ksock_nal_data_t *data = nal->nal_data;
+
+ spin_lock(&data->ksnd_nal_cb_lock);
+}
+
+void
+ksocknal_sti(nal_cb_t *nal, unsigned long *flags)
+{
+ ksock_nal_data_t *data;
+ data = nal->nal_data;
+
+ spin_unlock(&data->ksnd_nal_cb_lock);
+}
+
+int
+ksocknal_dist(nal_cb_t *nal, ptl_nid_t nid, unsigned long *dist)
+{
+ /* I would guess that if ksocknal_get_conn(nid) == NULL,
+ and we're not routing, then 'nid' is very distant :) */
+ if ( nal->ni.nid == nid ) {
+ *dist = 0;
+ } else {
+ *dist = 1;
+ }
+
+ return 0;
+}
+
+ksock_ltx_t *
+ksocknal_get_ltx (int may_block)
+{
+ long flags;
+ ksock_ltx_t *ltx = NULL;
+
+ for (;;) {
+ spin_lock_irqsave (&ksocknal_data.ksnd_idle_ltx_lock, flags);
+
+ if (!list_empty (&ksocknal_data.ksnd_idle_ltx_list)) {
+ ltx = list_entry(ksocknal_data.ksnd_idle_ltx_list.next,
+ ksock_ltx_t, ltx_tx.tx_list);
+ list_del (<x->ltx_tx.tx_list);
+ break;
+ }
+
+ if (!may_block) {
+ if (!list_empty(&ksocknal_data.ksnd_idle_nblk_ltx_list)) {
+ ltx = list_entry(ksocknal_data.ksnd_idle_nblk_ltx_list.next,
+ ksock_ltx_t, ltx_tx.tx_list);
+ list_del (<x->ltx_tx.tx_list);
+ }
+ break;
+ }
+
+ spin_unlock_irqrestore(&ksocknal_data.ksnd_idle_ltx_lock,
+ flags);
+
+ wait_event (ksocknal_data.ksnd_idle_ltx_waitq,
+ !list_empty (&ksocknal_data.ksnd_idle_ltx_list));
+ }
+
+ spin_unlock_irqrestore (&ksocknal_data.ksnd_idle_ltx_lock, flags);
+
+ return (ltx);
+}
+
+#if SOCKNAL_ZC
+struct page *
+ksocknal_kvaddr_to_page (unsigned long vaddr)
+{
+ struct page *page;
+
+ if (vaddr >= VMALLOC_START &&
+ vaddr < VMALLOC_END)
+ page = vmalloc_to_page ((void *)vaddr);
+#if CONFIG_HIGHMEM
+ else if (vaddr >= PKMAP_BASE &&
+ vaddr < (PKMAP_BASE + LAST_PKMAP * PAGE_SIZE))
+ page = vmalloc_to_page ((void *)vaddr);
+ /* in 2.4 ^ just walks the page tables */
+#endif
+ else
+ page = virt_to_page (vaddr);
+
+ if (page == NULL ||
+ !VALID_PAGE (page))
+ return (NULL);
+
+ return (page);
+}
+#endif
+
+int
+ksocknal_send_iov (struct socket *sock, ksock_tx_t *tx, int more)
+{
+ struct iovec *iov = tx->tx_iov;
+ int fragsize = iov->iov_len;
+ unsigned long vaddr = (unsigned long)iov->iov_base;
+#if SOCKNAL_ZC
+ int offset = vaddr & (PAGE_SIZE - 1);
+ int zcsize = MIN (fragsize, PAGE_SIZE - offset);
+ struct page *page;
+#endif
+ int rc;
+
+ /* NB we can't trust socket ops to either consume our iovs
+ * or leave them alone, so we only send 1 frag at a time. */
+ LASSERT (fragsize <= tx->tx_nob);
+ LASSERT (tx->tx_niov > 0);
+ more |= (tx->tx_niov > 1);
+
+#if SOCKNAL_ZC
+ if (ksocknal_do_zc &&
+ (sock->sk->route_caps & NETIF_F_SG) &&
+ (sock->sk->route_caps & (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | NETIF_F_HW_CSUM)) &&
+ zcsize >= ksocknal_zc_min_frag &&
+ (page = ksocknal_kvaddr_to_page (vaddr)) != NULL) {
+
+ CDEBUG(D_NET, "vaddr %p, page %p->%p + offset %x for %d\n",
+ (void *)vaddr, page, page_address(page), offset, zcsize);
+
+ more |= (zcsize < fragsize);
+
+ rc = tcp_sendpage_zccd(sock, page, offset, zcsize,
+ more ? (MSG_DONTWAIT | MSG_MORE) : MSG_DONTWAIT,
+ &tx->tx_zccd);
+ } else
+#endif
+ {
+ /* NB don't pass tx's iov; sendmsg may or may not update it */
+ struct iovec fragiov = { .iov_base = (void *)vaddr,
+ .iov_len = fragsize};
+ struct msghdr msg = {
+ .msg_name = NULL,
+ .msg_namelen = 0,
+ .msg_iov = &fragiov,
+ .msg_iovlen = 1,
+ .msg_control = NULL,
+ .msg_controllen = 0,
+ .msg_flags = more ? (MSG_DONTWAIT | MSG_MORE) : MSG_DONTWAIT
+ };
+ mm_segment_t oldmm = get_fs();
+
+ set_fs (KERNEL_DS);
+ rc = sock->sk->prot->sendmsg(sock->sk, &msg, fragsize);
+ set_fs (oldmm);
+ }
+
+ if (rc <= 0)
+ return (rc);
+
+ tx->tx_nob -= rc;
+
+ if (rc < fragsize) {
+ /* didn't send whole frag */
+ iov->iov_base = (void *)(vaddr + rc);
+ iov->iov_len = fragsize - rc;
+ return (-EAGAIN);
+ }
+
+ /* everything went */
+ LASSERT (rc == fragsize);
+ tx->tx_iov++;
+ tx->tx_niov--;
+ return (1);
+}
+
+int
+ksocknal_send_kiov (struct socket *sock, ksock_tx_t *tx, int more)
+{
+ ptl_kiov_t *kiov = tx->tx_kiov;
+ int fragsize = kiov->kiov_len;
+ struct page *page = kiov->kiov_page;
+ int offset = kiov->kiov_offset;
+ int rc;
+
+ /* NB we can't trust socket ops to either consume our iovs
+ * or leave them alone, so we only send 1 frag at a time. */
+ LASSERT (fragsize <= tx->tx_nob);
+ LASSERT (offset + fragsize <= PAGE_SIZE);
+ LASSERT (tx->tx_nkiov > 0);
+ more |= (tx->tx_nkiov > 1);
+
+#if SOCKNAL_ZC
+ if (ksocknal_do_zc &&
+ (sock->sk->route_caps & NETIF_F_SG) &&
+ (sock->sk->route_caps & (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | NETIF_F_HW_CSUM)) &&
+ fragsize >= ksocknal_zc_min_frag) {
+
+ CDEBUG(D_NET, "page %p + offset %x for %d\n",
+ page, offset, fragsize);
+
+ rc = tcp_sendpage_zccd(sock, page, offset, fragsize,
+ more ? (MSG_DONTWAIT | MSG_MORE) : MSG_DONTWAIT,
+ &tx->tx_zccd);
+ } else
+#endif
+ {
+ char *addr = ((char *)kmap (page)) + offset;
+ struct iovec fragiov = {.iov_base = addr,
+ .iov_len = fragsize};
+ struct msghdr msg = {
+ .msg_name = NULL,
+ .msg_namelen = 0,
+ .msg_iov = &fragiov,
+ .msg_iovlen = 1,
+ .msg_control = NULL,
+ .msg_controllen = 0,
+ .msg_flags = more ? (MSG_DONTWAIT | MSG_MORE) : MSG_DONTWAIT
+ };
+ mm_segment_t oldmm = get_fs();
+
+ set_fs (KERNEL_DS);
+ rc = sock->sk->prot->sendmsg(sock->sk, &msg, fragsize);
+ set_fs (oldmm);
+ kunmap (page);
+ }
+
+ if (rc <= 0)
+ return (rc);
+
+ tx->tx_nob -= rc;
+
+ if (rc < fragsize) {
+ /* didn't send whole frag */
+ kiov->kiov_offset = offset + rc;
+ kiov->kiov_len = fragsize - rc;
+ return (-EAGAIN);
+ }
+
+ /* everything went */
+ LASSERT (rc == fragsize);
+ tx->tx_kiov++;
+ tx->tx_nkiov--;
+ return (1);
+}
+
+int
+ksocknal_sendmsg (struct socket *sock, ksock_tx_t *tx, int more)
+{
+ int rc;
+ int sent_some = 0;
+ ENTRY;
+
+ LASSERT (!in_interrupt());
+
+ for (;;) {
+ if (tx->tx_niov != 0)
+ rc = ksocknal_send_iov (sock, tx, more || tx->tx_nkiov != 0);
+ else
+ rc = ksocknal_send_kiov (sock, tx, more);
+
+ /* Interpret a zero rc the same as -EAGAIN (Adaptech TOE) */
+ if (rc <= 0) /* error or partial send */
+ RETURN ((sent_some || rc == -EAGAIN) ? 0 : rc);
+
+ if (tx->tx_nob == 0) /* sent everything */
+ RETURN (0);
+
+ sent_some = 1;
+ }
+}
+
+int
+ksocknal_recv_iov (ksock_conn_t *conn)
+{
+ struct iovec *iov = conn->ksnc_rx_iov;
+ int fragsize = iov->iov_len;
+ unsigned long vaddr = (unsigned long)iov->iov_base;
+ struct iovec fragiov = { .iov_base = (void *)vaddr,
+ .iov_len = fragsize};
+ struct msghdr msg = {
+ .msg_name = NULL,
+ .msg_namelen = 0,
+ .msg_iov = &fragiov,
+ .msg_iovlen = 1,
+ .msg_control = NULL,
+ .msg_controllen = 0,
+ .msg_flags = 0
+ };
+ mm_segment_t oldmm = get_fs();
+ int rc;
+
+ /* NB we can't trust socket ops to either consume our iovs
+ * or leave them alone, so we only receive 1 frag at a time. */
+ LASSERT (conn->ksnc_rx_niov > 0);
+ LASSERT (fragsize <= conn->ksnc_rx_nob_wanted);
+
+ set_fs (KERNEL_DS);
+ rc = sock_recvmsg (conn->ksnc_sock, &msg, fragsize, MSG_DONTWAIT);
+ /* NB this is just a boolean............................^ */
+ set_fs (oldmm);
+
+ if (rc <= 0)
+ return (rc);
+
+ conn->ksnc_rx_nob_wanted -= rc;
+ conn->ksnc_rx_nob_left -= rc;
+
+ if (rc < fragsize) {
+ iov->iov_base = (void *)(vaddr + rc);
+ iov->iov_len = fragsize - rc;
+ return (-EAGAIN);
+ }
+
+ LASSERT (rc == fragsize);
+ conn->ksnc_rx_iov++;
+ conn->ksnc_rx_niov--;
+ return (1);
+}
+
+int
+ksocknal_recv_kiov (ksock_conn_t *conn)
+{
+ ptl_kiov_t *kiov = conn->ksnc_rx_kiov;
+ struct page *page = kiov->kiov_page;
+ int offset = kiov->kiov_offset;
+ int fragsize = kiov->kiov_len;
+ unsigned long vaddr = ((unsigned long)kmap (page)) + offset;
+ struct iovec fragiov = { .iov_base = (void *)vaddr,
+ .iov_len = fragsize};
+ struct msghdr msg = {
+ .msg_name = NULL,
+ .msg_namelen = 0,
+ .msg_iov = &fragiov,
+ .msg_iovlen = 1,
+ .msg_control = NULL,
+ .msg_controllen = 0,
+ .msg_flags = 0
+ };
+ mm_segment_t oldmm = get_fs();
+ int rc;
+
+ /* NB we can't trust socket ops to either consume our iovs
+ * or leave them alone, so we only receive 1 frag at a time. */
+ LASSERT (fragsize <= conn->ksnc_rx_nob_wanted);
+ LASSERT (conn->ksnc_rx_nkiov > 0);
+ LASSERT (offset + fragsize <= PAGE_SIZE);
+
+ set_fs (KERNEL_DS);
+ rc = sock_recvmsg (conn->ksnc_sock, &msg, fragsize, MSG_DONTWAIT);
+ /* NB this is just a boolean............................^ */
+ set_fs (oldmm);
+ kunmap (page);
+
+ if (rc <= 0)
+ return (rc);
+
+ conn->ksnc_rx_nob_wanted -= rc;
+ conn->ksnc_rx_nob_left -= rc;
+
+ if (rc < fragsize) {
+ kiov->kiov_offset = offset + rc;
+ kiov->kiov_len = fragsize - rc;
+ return (-EAGAIN);
+ }
+
+ LASSERT (rc == fragsize);
+ conn->ksnc_rx_kiov++;
+ conn->ksnc_rx_nkiov--;
+ return (1);
+}
+
+int
+ksocknal_recvmsg (ksock_conn_t *conn)
+{
+ int rc;
+ int got_some = 0;
+ ENTRY;
+
+ LASSERT (!in_interrupt ());
+
+ for (;;) {
+ LASSERT (conn->ksnc_rx_nob_wanted > 0);
+
+ if (conn->ksnc_rx_niov != 0)
+ rc = ksocknal_recv_iov (conn);
+ else
+ rc = ksocknal_recv_kiov (conn);
+
+ /* CAVEAT EMPTOR: we return...
+ * <= 0 for error (0 == EOF) and > 0 for success (unlike sendmsg()) */
+
+ if (rc <= 0) /* error/EOF or partial receive */
+ RETURN ((got_some || rc == -EAGAIN) ? 1 : rc);
+
+ if (conn->ksnc_rx_nob_wanted == 0)
+ RETURN (1);
+
+ got_some = 0;
+ }
+}
+
+#if SOCKNAL_ZC
+void
+ksocknal_zc_callback (zccd_t *zcd)
+{
+ ksock_tx_t *tx = KSOCK_ZCCD_2_TX(zcd);
+ ksock_sched_t *sched = tx->tx_sched;
+ unsigned long flags;
+ ENTRY;
+
+ /* Schedule tx for cleanup (can't do it now due to lock conflicts) */
+
+ spin_lock_irqsave (&sched->kss_lock, flags);
+
+ list_add_tail (&tx->tx_list, &sched->kss_zctxdone_list);
+ if (waitqueue_active (&sched->kss_waitq))
+ wake_up (&sched->kss_waitq);
+
+ spin_unlock_irqrestore (&sched->kss_lock, flags);
+ EXIT;
+}
+#endif
+
+void
+ksocknal_tx_done (ksock_tx_t *tx)
+{
+ long flags;
+ ksock_ltx_t *ltx;
+ ENTRY;
+
+ atomic_dec (&ksocknal_packets_being_sent);
+
+ if (tx->tx_isfwd) { /* was a forwarded packet? */
+ kpr_fwd_done (&ksocknal_data.ksnd_router,
+ KSOCK_TX_2_KPR_FWD_DESC (tx), 0);
+ EXIT;
+ return;
+ }
+
+ /* local send */
+ ltx = KSOCK_TX_2_KSOCK_LTX (tx);
+
+ lib_finalize (&ksocknal_lib, ltx->ltx_private, ltx->ltx_cookie);
+
+ spin_lock_irqsave (&ksocknal_data.ksnd_idle_ltx_lock, flags);
+
+ list_add_tail (<x->ltx_tx.tx_list, ltx->ltx_idle);
+
+ /* normal tx desc => wakeup anyone blocking for one */
+ if (ltx->ltx_idle == &ksocknal_data.ksnd_idle_ltx_list &&
+ waitqueue_active (&ksocknal_data.ksnd_idle_ltx_waitq))
+ wake_up (&ksocknal_data.ksnd_idle_ltx_waitq);
+
+ spin_unlock_irqrestore (&ksocknal_data.ksnd_idle_ltx_lock, flags);
+ EXIT;
+}
+
+void
+ksocknal_process_transmit (ksock_sched_t *sched, long *irq_flags)
+{
+ ksock_conn_t *conn;
+ ksock_tx_t *tx;
+ int rc;
+
+ LASSERT (!list_empty (&sched->kss_tx_conns));
+ conn = list_entry(sched->kss_tx_conns.next, ksock_conn_t, ksnc_tx_list);
+ list_del (&conn->ksnc_tx_list);
+
+ LASSERT (conn->ksnc_tx_scheduled);
+ LASSERT (conn->ksnc_tx_ready);
+ LASSERT (!list_empty (&conn->ksnc_tx_queue));
+ tx = list_entry (conn->ksnc_tx_queue.next, ksock_tx_t, tx_list);
+ /* assume transmit will complete now, so dequeue while I've got lock */
+ list_del (&tx->tx_list);
+
+ spin_unlock_irqrestore (&sched->kss_lock, *irq_flags);
+
+ LASSERT (tx->tx_nob > 0);
+
+ conn->ksnc_tx_ready = 0;/* write_space may race with me and set ready */
+ mb(); /* => clear BEFORE trying to write */
+
+ rc = ksocknal_sendmsg (conn->ksnc_sock, tx,
+ !list_empty (&conn->ksnc_tx_queue)); /* more to come? */
+
+ CDEBUG (D_NET, "send(%d) %d\n", tx->tx_nob, rc);
+
+ if (rc != 0) {
+#warning FIXME: handle socket errors properly
+ CERROR("Error socknal send(%d) %p: %d\n", tx->tx_nob, conn, rc);
+ /* kid on for now the whole packet went.
+ * NB when we handle the error better, we'll still need to
+ * block for zccd completion.
+ */
+ tx->tx_nob = 0;
+ }
+
+ if (tx->tx_nob == 0) /* nothing left to send */
+ {
+ /* everything went; assume more can go, so prevent write_space locking */
+ conn->ksnc_tx_ready = 1;
+
+ ksocknal_put_conn (conn); /* release packet's ref */
+ atomic_inc (&ksocknal_packets_being_sent);
+#if SOCKNAL_ZC
+ if (atomic_read (&tx->tx_zccd.zccd_count) != 1) {
+ /* zccd skbufs are still in-flight. Release my
+ * initial ref on zccd, so callback can occur */
+ zccd_put (&tx->tx_zccd);
+ } else
+#endif
+ ksocknal_tx_done (tx);
+
+ spin_lock_irqsave (&sched->kss_lock, *irq_flags);
+ } else {
+ spin_lock_irqsave (&sched->kss_lock, *irq_flags);
+
+ /* back onto HEAD of tx_queue */
+ list_add (&tx->tx_list, &conn->ksnc_tx_queue);
+ }
+
+ if (!conn->ksnc_tx_ready || /* no space to write now */
+ list_empty (&conn->ksnc_tx_queue)) {/* nothing to write */
+ conn->ksnc_tx_scheduled = 0; /* not being scheduled */
+ ksocknal_put_conn (conn); /* release scheduler's ref */
+ } else /* let scheduler call me again */
+ list_add_tail (&conn->ksnc_tx_list, &sched->kss_tx_conns);
+}
+
+void
+ksocknal_launch_packet (ksock_conn_t *conn, ksock_tx_t *tx)
+{
+ unsigned long flags;
+ ksock_sched_t *sched = conn->ksnc_scheduler;
+
+ /* Ensure the frags we've been given EXACTLY match the number of
+ * bytes we want to send. Many TCP/IP stacks disregard any total
+ * size parameters passed to them and just look at the frags.
+ *
+ * We always expect at least 1 mapped fragment containing the
+ * complete portals header.
+ */
+ LASSERT (lib_iov_nob (tx->tx_niov, tx->tx_iov) +
+ lib_kiov_nob (tx->tx_nkiov, tx->tx_kiov) == tx->tx_nob);
+ LASSERT (tx->tx_niov >= 1);
+ LASSERT (tx->tx_iov[0].iov_len >= sizeof (ptl_hdr_t));
+
+ CDEBUG (D_NET, "type %d, nob %d niov %d nkiov %d\n",
+ ((ptl_hdr_t *)tx->tx_iov[0].iov_base)->type, tx->tx_nob,
+ tx->tx_niov, tx->tx_nkiov);
+
+#if SOCKNAL_ZC
+ zccd_init (&tx->tx_zccd, ksocknal_zc_callback);
+ /* NB this sets 1 ref on zccd, so the callback can only occur
+ * after I've released this ref */
+ tx->tx_sched = sched;
+#endif
+ spin_lock_irqsave (&sched->kss_lock, flags);
+
+ list_add_tail (&tx->tx_list, &conn->ksnc_tx_queue);
+
+ if (conn->ksnc_tx_ready && /* able to send */
+ !conn->ksnc_tx_scheduled) { /* not scheduled to send */
+ list_add_tail (&conn->ksnc_tx_list, &sched->kss_tx_conns);
+ conn->ksnc_tx_scheduled = 1;
+ atomic_inc (&conn->ksnc_refcount); /* extra ref for scheduler */
+ if (waitqueue_active (&sched->kss_waitq))
+ wake_up (&sched->kss_waitq);
+ }
+
+ spin_unlock_irqrestore (&sched->kss_lock, flags);
+
+ atomic_inc (&ksocknal_packets_launched);
+}
+
+ksock_conn_t *
+ksocknal_send_target (ptl_nid_t nid)
+{
+ ptl_nid_t gatewaynid;
+ ksock_conn_t *conn;
+ int rc;
+
+ if ((conn = ksocknal_get_conn (nid)) == NULL) {
+ /* It's not a peer; try to find a gateway */
+ rc = kpr_lookup (&ksocknal_data.ksnd_router, nid, &gatewaynid);
+ if (rc != 0) {
+ CERROR("Can't route to "LPX64": router error %d\n",
+ nid, rc);
+ return (NULL);
+ }
+
+ if ((conn = ksocknal_get_conn (gatewaynid)) == NULL) {
+ CERROR ("Can't route to "LPX64": gateway "LPX64
+ " is not a peer\n", nid, gatewaynid);
+ return (NULL);
+ }
+ }
+
+ return (conn);
+}
+
+ksock_ltx_t *
+ksocknal_setup_hdr (nal_cb_t *nal, void *private, lib_msg_t *cookie,
+ ptl_hdr_t *hdr, int type)
+{
+ ksock_ltx_t *ltx;
+
+ /* I may not block for a transmit descriptor if I might block the
+ * receiver, or an interrupt handler. */
+ ltx = ksocknal_get_ltx (!(type == PTL_MSG_ACK ||
+ type == PTL_MSG_REPLY ||
+ in_interrupt ()));
+ if (ltx == NULL) {
+ CERROR ("Can't allocate tx desc\n");
+ return (NULL);
+ }
+
+ /* Init local send packet (storage for hdr, finalize() args) */
+ ltx->ltx_hdr = *hdr;
+ ltx->ltx_private = private;
+ ltx->ltx_cookie = cookie;
+
+ /* Init common ltx_tx */
+ ltx->ltx_tx.tx_isfwd = 0;
+ ltx->ltx_tx.tx_nob = sizeof (*hdr);
+
+ /* We always have 1 mapped frag for the header */
+ ltx->ltx_tx.tx_niov = 1;
+ ltx->ltx_tx.tx_iov = <x->ltx_iov_space.hdr;
+ ltx->ltx_tx.tx_iov[0].iov_base = <x->ltx_hdr;
+ ltx->ltx_tx.tx_iov[0].iov_len = sizeof (ltx->ltx_hdr);
+
+ ltx->ltx_tx.tx_kiov = NULL;
+ ltx->ltx_tx.tx_nkiov = 0;
+
+ return (ltx);
+}
+
+int
+ksocknal_send (nal_cb_t *nal, void *private, lib_msg_t *cookie,
+ ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
+ unsigned int payload_niov, struct iovec *payload_iov, size_t payload_len)
+{
+ ksock_ltx_t *ltx;
+ ksock_conn_t *conn;
+
+ /* NB 'private' is different depending on what we're sending.
+ * Just ignore it until we can rely on it
+ *
+ * Also, the return code from this procedure is ignored.
+ * If we can't send, we must still complete with lib_finalize().
+ * We'll have to wait for 3.2 to return an error event.
+ */
+
+ CDEBUG(D_NET,
+ "sending "LPSZ" bytes in %d mapped frags to nid: "LPX64" pid %d\n",
+ payload_len, payload_niov, nid, pid);
+
+ conn = ksocknal_send_target (nid);
+ if (conn == NULL) {
+ lib_finalize (&ksocknal_lib, private, cookie);
+ return (-1);
+ }
+
+ ltx = ksocknal_setup_hdr (nal, private, cookie, hdr, type);
+ if (ltx == NULL) {
+ ksocknal_put_conn (conn);
+ lib_finalize (&ksocknal_lib, private, cookie);
+ return (-1);
+ }
+
+ /* append the payload_iovs to the one pointing at the header */
+ LASSERT (ltx->ltx_tx.tx_niov == 1 && ltx->ltx_tx.tx_nkiov == 0);
+ LASSERT (payload_niov <= PTL_MD_MAX_IOV);
+
+ memcpy (ltx->ltx_tx.tx_iov + 1, payload_iov,
+ payload_niov * sizeof (*payload_iov));
+ ltx->ltx_tx.tx_niov = 1 + payload_niov;
+ ltx->ltx_tx.tx_nob = sizeof (*hdr) + payload_len;
+
+ ksocknal_launch_packet (conn, <x->ltx_tx);
+ return (0);
+}
+
+int
+ksocknal_send_pages (nal_cb_t *nal, void *private, lib_msg_t *cookie,
+ ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
+ unsigned int payload_niov, ptl_kiov_t *payload_iov, size_t payload_len)
+{
+ ksock_ltx_t *ltx;
+ ksock_conn_t *conn;
+
+ /* NB 'private' is different depending on what we're sending.
+ * Just ignore it until we can rely on it */
+
+ CDEBUG(D_NET,
+ "sending "LPSZ" bytes in %d mapped frags to nid: "LPX64" pid %d\n",
+ payload_len, payload_niov, nid, pid);
+
+ conn = ksocknal_send_target (nid);
+ if (conn == NULL)
+ return (-1);
+
+ ltx = ksocknal_setup_hdr (nal, private, cookie, hdr, type);
+ if (ltx == NULL) {
+ ksocknal_put_conn (conn);
+ return (-1);
+ }
+
+ LASSERT (ltx->ltx_tx.tx_niov == 1 && ltx->ltx_tx.tx_nkiov == 0);
+ LASSERT (payload_niov <= PTL_MD_MAX_IOV);
+
+ ltx->ltx_tx.tx_kiov = ltx->ltx_iov_space.payload.kiov;
+ memcpy (ltx->ltx_tx.tx_kiov, payload_iov,
+ payload_niov * sizeof (*payload_iov));
+ ltx->ltx_tx.tx_nkiov = payload_niov;
+ ltx->ltx_tx.tx_nob = sizeof (*hdr) + payload_len;
+
+ ksocknal_launch_packet (conn, <x->ltx_tx);
+ return (0);
+}
+
+void
+ksocknal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd)
+{
+ ksock_conn_t *conn;
+ ptl_nid_t nid = fwd->kprfd_gateway_nid;
+ ksock_tx_t *tx = (ksock_tx_t *)&fwd->kprfd_scratch;
+
+ CDEBUG (D_NET, "Forwarding [%p] -> "LPX64" ("LPX64"))\n", fwd,
+ fwd->kprfd_gateway_nid, fwd->kprfd_target_nid);
+
+ /* I'm the gateway; must be the last hop */
+ if (nid == ksocknal_lib.ni.nid)
+ nid = fwd->kprfd_target_nid;
+
+ conn = ksocknal_get_conn (nid);
+ if (conn == NULL) {
+ CERROR ("[%p] fwd to "LPX64" isn't a peer\n", fwd, nid);
+ kpr_fwd_done (&ksocknal_data.ksnd_router, fwd, -EHOSTUNREACH);
+ return;
+ }
+
+ /* This forward has now got a ref on conn */
+
+ tx->tx_isfwd = 1; /* This is a forwarding packet */
+ tx->tx_nob = fwd->kprfd_nob;
+ tx->tx_niov = fwd->kprfd_niov;
+ tx->tx_iov = fwd->kprfd_iov;
+ tx->tx_nkiov = 0;
+ tx->tx_kiov = NULL;
+
+ ksocknal_launch_packet (conn, tx);
+}
+
+int
+ksocknal_thread_start (int (*fn)(void *arg), void *arg)
+{
+ long pid = kernel_thread (fn, arg, 0);
+
+ if (pid < 0)
+ return ((int)pid);
+
+ atomic_inc (&ksocknal_data.ksnd_nthreads);
+ return (0);
+}
+
+void
+ksocknal_thread_fini (void)
+{
+ atomic_dec (&ksocknal_data.ksnd_nthreads);
+}
+
+void
+ksocknal_fmb_callback (void *arg, int error)
+{
+ ksock_fmb_t *fmb = (ksock_fmb_t *)arg;
+ ksock_fmb_pool_t *fmp = fmb->fmb_pool;
+ ptl_hdr_t *hdr = (ptl_hdr_t *) page_address(fmb->fmb_pages[0]);
+ ksock_conn_t *conn = NULL;
+ ksock_sched_t *sched;
+ long flags;
+
+ if (error != 0)
+ CERROR("Failed to route packet from "LPX64" to "LPX64": %d\n",
+ NTOH__u64(hdr->src_nid), NTOH__u64(hdr->dest_nid),
+ error);
+ else
+ CDEBUG (D_NET, "routed packet from "LPX64" to "LPX64": OK\n",
+ NTOH__u64 (hdr->src_nid), NTOH__u64 (hdr->dest_nid));
+
+ spin_lock_irqsave (&fmp->fmp_lock, flags);
+
+ list_add (&fmb->fmb_list, &fmp->fmp_idle_fmbs);
+
+ if (!list_empty (&fmp->fmp_blocked_conns)) {
+ conn = list_entry (fmb->fmb_pool->fmp_blocked_conns.next,
+ ksock_conn_t, ksnc_rx_list);
+ list_del (&conn->ksnc_rx_list);
+ }
+
+ spin_unlock_irqrestore (&fmp->fmp_lock, flags);
+
+ if (conn == NULL)
+ return;
+
+ CDEBUG (D_NET, "Scheduling conn %p\n", conn);
+ LASSERT (conn->ksnc_rx_scheduled);
+ LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_FMB_SLEEP);
+
+ conn->ksnc_rx_state = SOCKNAL_RX_GET_FMB;
+
+ sched = conn->ksnc_scheduler;
+
+ spin_lock_irqsave (&sched->kss_lock, flags);
+
+ list_add_tail (&conn->ksnc_rx_list, &sched->kss_rx_conns);
+
+ if (waitqueue_active (&sched->kss_waitq))
+ wake_up (&sched->kss_waitq);
+
+ spin_unlock_irqrestore (&sched->kss_lock, flags);
+}
+
+ksock_fmb_t *
+ksocknal_get_idle_fmb (ksock_conn_t *conn)
+{
+ int payload_nob = conn->ksnc_rx_nob_left;
+ int packet_nob = sizeof (ptl_hdr_t) + payload_nob;
+ long flags;
+ ksock_fmb_pool_t *pool;
+ ksock_fmb_t *fmb;
+
+ LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_GET_FMB);
+ LASSERT (ksocknal_data.ksnd_fmbs != NULL);
+
+ if (packet_nob <= SOCKNAL_SMALL_FWD_PAGES * PAGE_SIZE)
+ pool = &ksocknal_data.ksnd_small_fmp;
+ else
+ pool = &ksocknal_data.ksnd_large_fmp;
+
+ spin_lock_irqsave (&pool->fmp_lock, flags);
+
+ if (!list_empty (&pool->fmp_idle_fmbs)) {
+ fmb = list_entry(pool->fmp_idle_fmbs.next,
+ ksock_fmb_t, fmb_list);
+ list_del (&fmb->fmb_list);
+ spin_unlock_irqrestore (&pool->fmp_lock, flags);
+
+ return (fmb);
+ }
+
+ /* deschedule until fmb free */
+
+ conn->ksnc_rx_state = SOCKNAL_RX_FMB_SLEEP;
+
+ list_add_tail (&conn->ksnc_rx_list,
+ &pool->fmp_blocked_conns);
+
+ spin_unlock_irqrestore (&pool->fmp_lock, flags);
+ return (NULL);
+}
+
+
+int
+ksocknal_init_fmb (ksock_conn_t *conn, ksock_fmb_t *fmb)
+{
+ int payload_nob = conn->ksnc_rx_nob_left;
+ int packet_nob = sizeof (ptl_hdr_t) + payload_nob;
+ ptl_nid_t dest_nid = NTOH__u64 (conn->ksnc_hdr.dest_nid);
+ int niov; /* at least the header */
+ int nob;
+
+ LASSERT (conn->ksnc_rx_scheduled);
+ LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_GET_FMB);
+ LASSERT (conn->ksnc_rx_nob_wanted == conn->ksnc_rx_nob_left);
+ LASSERT (payload_nob >= 0);
+ LASSERT (packet_nob <= fmb->fmb_npages * PAGE_SIZE);
+ LASSERT (sizeof (ptl_hdr_t) < PAGE_SIZE);
+
+ /* Got a forwarding buffer; copy the header we just read into the
+ * forwarding buffer. If there's payload start reading reading it
+ * into the buffer, otherwise the forwarding buffer can be kicked
+ * off immediately.
+ *
+ * NB fmb->fmb_iov spans the WHOLE packet.
+ * conn->ksnc_rx_iov spans just the payload.
+ */
+
+ fmb->fmb_iov[0].iov_base = page_address (fmb->fmb_pages[0]);
+
+ /* copy header */
+ memcpy (fmb->fmb_iov[0].iov_base, &conn->ksnc_hdr, sizeof (ptl_hdr_t));
+
+ if (payload_nob == 0) { /* got complete packet already */
+ atomic_inc (&ksocknal_packets_received);
+
+ CDEBUG (D_NET, "%p "LPX64"->"LPX64" %d fwd_start (immediate)\n",
+ conn, NTOH__u64 (conn->ksnc_hdr.src_nid),
+ dest_nid, packet_nob);
+
+ fmb->fmb_iov[0].iov_len = sizeof (ptl_hdr_t);
+
+ kpr_fwd_init (&fmb->fmb_fwd, dest_nid,
+ packet_nob, 1, fmb->fmb_iov,
+ ksocknal_fmb_callback, fmb);
+
+ /* forward it now */
+ kpr_fwd_start (&ksocknal_data.ksnd_router, &fmb->fmb_fwd);
+
+ ksocknal_new_packet (conn, 0); /* on to next packet */
+ return (1);
+ }
+
+ niov = 1;
+ if (packet_nob <= PAGE_SIZE) { /* whole packet fits in first page */
+ fmb->fmb_iov[0].iov_len = packet_nob;
+ } else {
+ fmb->fmb_iov[0].iov_len = PAGE_SIZE;
+ nob = packet_nob - PAGE_SIZE;
+
+ do {
+ LASSERT (niov < fmb->fmb_npages);
+ fmb->fmb_iov[niov].iov_base =
+ page_address (fmb->fmb_pages[niov]);
+ fmb->fmb_iov[niov].iov_len = MIN (PAGE_SIZE, nob);
+ nob -= PAGE_SIZE;
+ niov++;
+ } while (nob > 0);
+ }
+
+ kpr_fwd_init (&fmb->fmb_fwd, dest_nid,
+ packet_nob, niov, fmb->fmb_iov,
+ ksocknal_fmb_callback, fmb);
+
+ /* stash router's descriptor ready for call to kpr_fwd_start */
+ conn->ksnc_cookie = &fmb->fmb_fwd;
+
+ conn->ksnc_rx_state = SOCKNAL_RX_BODY_FWD; /* read in the payload */
+
+ /* payload is desc's iov-ed buffer, but skipping the hdr */
+ LASSERT (niov <= sizeof (conn->ksnc_rx_iov_space) /
+ sizeof (struct iovec));
+
+ conn->ksnc_rx_iov = (struct iovec *)&conn->ksnc_rx_iov_space;
+ conn->ksnc_rx_iov[0].iov_base =
+ (void *)(((unsigned long)fmb->fmb_iov[0].iov_base) +
+ sizeof (ptl_hdr_t));
+ conn->ksnc_rx_iov[0].iov_len =
+ fmb->fmb_iov[0].iov_len - sizeof (ptl_hdr_t);
+
+ if (niov > 1)
+ memcpy(&conn->ksnc_rx_iov[1], &fmb->fmb_iov[1],
+ (niov - 1) * sizeof (struct iovec));
+
+ conn->ksnc_rx_niov = niov;
+
+ CDEBUG (D_NET, "%p "LPX64"->"LPX64" %d reading body\n", conn,
+ NTOH__u64 (conn->ksnc_hdr.src_nid), dest_nid, payload_nob);
+ return (0);
+}
+
+void
+ksocknal_fwd_parse (ksock_conn_t *conn)
+{
+ ksock_conn_t *conn2;
+ ptl_nid_t dest_nid = NTOH__u64 (conn->ksnc_hdr.dest_nid);
+ int body_len = NTOH__u32 (PTL_HDR_LENGTH(&conn->ksnc_hdr));
+
+ CDEBUG (D_NET, "%p "LPX64"->"LPX64" %d parsing header\n", conn,
+ NTOH__u64 (conn->ksnc_hdr.src_nid),
+ dest_nid, conn->ksnc_rx_nob_left);
+
+ LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_HEADER);
+ LASSERT (conn->ksnc_rx_scheduled);
+
+ if (body_len < 0) { /* length corrupt (overflow) */
+ CERROR("dropping packet from "LPX64" for "LPX64": packet "
+ "size %d illegal\n", NTOH__u64 (conn->ksnc_hdr.src_nid),
+ dest_nid, body_len);
+ ksocknal_new_packet (conn, 0); /* on to new packet */
+ return;
+ }
+
+ if (ksocknal_data.ksnd_fmbs == NULL) { /* not forwarding */
+ CERROR("dropping packet from "LPX64" for "LPX64": not "
+ "forwarding\n", conn->ksnc_hdr.src_nid,
+ conn->ksnc_hdr.dest_nid);
+ /* on to new packet (skip this one's body) */
+ ksocknal_new_packet (conn, body_len);
+ return;
+ }
+
+ if (body_len > SOCKNAL_MAX_FWD_PAYLOAD) { /* too big to forward */
+ CERROR ("dropping packet from "LPX64" for "LPX64
+ ": packet size %d too big\n", conn->ksnc_hdr.src_nid,
+ conn->ksnc_hdr.dest_nid, body_len);
+ /* on to new packet (skip this one's body) */
+ ksocknal_new_packet (conn, body_len);
+ return;
+ }
+
+ /* should have gone direct */
+ conn2 = ksocknal_get_conn (conn->ksnc_hdr.dest_nid);
+ if (conn2 != NULL) {
+ CERROR ("dropping packet from "LPX64" for "LPX64
+ ": target is a peer\n", conn->ksnc_hdr.src_nid,
+ conn->ksnc_hdr.dest_nid);
+ ksocknal_put_conn (conn2); /* drop ref from get above */
+
+ /* on to next packet (skip this one's body) */
+ ksocknal_new_packet (conn, body_len);
+ return;
+ }
+
+ conn->ksnc_rx_state = SOCKNAL_RX_GET_FMB; /* Getting FMB now */
+ conn->ksnc_rx_nob_left = body_len; /* stash packet size */
+ conn->ksnc_rx_nob_wanted = body_len; /* (no slop) */
+}
+
+int
+ksocknal_new_packet (ksock_conn_t *conn, int nob_to_skip)
+{
+ static char ksocknal_slop_buffer[4096];
+
+ int nob;
+ int niov;
+ int skipped;
+
+ if (nob_to_skip == 0) { /* right at next packet boundary now */
+ conn->ksnc_rx_state = SOCKNAL_RX_HEADER;
+ conn->ksnc_rx_nob_wanted = sizeof (ptl_hdr_t);
+ conn->ksnc_rx_nob_left = sizeof (ptl_hdr_t);
+
+ conn->ksnc_rx_iov = (struct iovec *)&conn->ksnc_rx_iov_space;
+ conn->ksnc_rx_iov[0].iov_base = (char *)&conn->ksnc_hdr;
+ conn->ksnc_rx_iov[0].iov_len = sizeof (ptl_hdr_t);
+ conn->ksnc_rx_niov = 1;
+
+ conn->ksnc_rx_kiov = NULL;
+ conn->ksnc_rx_nkiov = 0;
+ return (1);
+ }
+
+ /* Set up to skip as much a possible now. If there's more left
+ * (ran out of iov entries) we'll get called again */
+
+ conn->ksnc_rx_state = SOCKNAL_RX_SLOP;
+ conn->ksnc_rx_nob_left = nob_to_skip;
+ conn->ksnc_rx_iov = (struct iovec *)&conn->ksnc_rx_iov_space;
+ skipped = 0;
+ niov = 0;
+
+ do {
+ nob = MIN (nob_to_skip, sizeof (ksocknal_slop_buffer));
+
+ conn->ksnc_rx_iov[niov].iov_base = ksocknal_slop_buffer;
+ conn->ksnc_rx_iov[niov].iov_len = nob;
+ niov++;
+ skipped += nob;
+ nob_to_skip -=nob;
+
+ } while (nob_to_skip != 0 && /* mustn't overflow conn's rx iov */
+ niov < sizeof(conn->ksnc_rx_iov_space) / sizeof (struct iovec));
+
+ conn->ksnc_rx_niov = niov;
+ conn->ksnc_rx_kiov = NULL;
+ conn->ksnc_rx_nkiov = 0;
+ conn->ksnc_rx_nob_wanted = skipped;
+ return (0);
+}
+
+void
+ksocknal_process_receive (ksock_sched_t *sched, long *irq_flags)
+{
+ ksock_conn_t *conn;
+ ksock_fmb_t *fmb;
+ int rc;
+
+ /* NB: sched->ksnc_lock lock held */
+
+ LASSERT (!list_empty (&sched->kss_rx_conns));
+ conn = list_entry(sched->kss_rx_conns.next, ksock_conn_t, ksnc_rx_list);
+ list_del (&conn->ksnc_rx_list);
+
+ spin_unlock_irqrestore (&sched->kss_lock, *irq_flags);
+
+ CDEBUG(D_NET, "sched %p conn %p\n", sched, conn);
+ LASSERT (atomic_read (&conn->ksnc_refcount) > 0);
+ LASSERT (conn->ksnc_rx_scheduled);
+ LASSERT (conn->ksnc_rx_ready);
+
+ /* doesn't need a forwarding buffer */
+ if (conn->ksnc_rx_state != SOCKNAL_RX_GET_FMB)
+ goto try_read;
+
+ get_fmb:
+ fmb = ksocknal_get_idle_fmb (conn);
+ if (fmb == NULL) { /* conn descheduled waiting for idle fmb */
+ spin_lock_irqsave (&sched->kss_lock, *irq_flags);
+ return;
+ }
+
+ if (ksocknal_init_fmb (conn, fmb)) /* packet forwarded ? */
+ goto out; /* come back later for next packet */
+
+ try_read:
+ /* NB: sched lock NOT held */
+ LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_HEADER ||
+ conn->ksnc_rx_state == SOCKNAL_RX_BODY ||
+ conn->ksnc_rx_state == SOCKNAL_RX_BODY_FWD ||
+ conn->ksnc_rx_state == SOCKNAL_RX_SLOP);
+
+ LASSERT (conn->ksnc_rx_nob_wanted > 0);
+
+ conn->ksnc_rx_ready = 0;/* data ready may race with me and set ready */
+ mb(); /* => clear BEFORE trying to read */
+
+ rc = ksocknal_recvmsg(conn);
+
+ if (rc == 0)
+ goto out;
+ if (rc < 0) {
+#warning FIXME: handle socket errors properly
+ CERROR ("Error socknal read %p: %d\n", conn, rc);
+ goto out;
+ }
+
+ if (conn->ksnc_rx_nob_wanted != 0) /* short read */
+ goto out; /* try again later */
+
+ /* got all I wanted, assume there's more - prevent data_ready locking */
+ conn->ksnc_rx_ready = 1;
+
+ switch (conn->ksnc_rx_state) {
+ case SOCKNAL_RX_HEADER:
+ /* It's not for me */
+ if (conn->ksnc_hdr.type != PTL_MSG_HELLO &&
+ NTOH__u64(conn->ksnc_hdr.dest_nid) != ksocknal_lib.ni.nid) {
+ ksocknal_fwd_parse (conn);
+ switch (conn->ksnc_rx_state) {
+ case SOCKNAL_RX_HEADER: /* skipped (zero payload) */
+ goto out; /* => come back later */
+ case SOCKNAL_RX_SLOP: /* skipping packet's body */
+ goto try_read; /* => go read it */
+ case SOCKNAL_RX_GET_FMB: /* forwarding */
+ goto get_fmb; /* => go get a fwd msg buffer */
+ default:
+ LBUG ();
+ }
+ /* Not Reached */
+ }
+
+ PROF_START(lib_parse);
+ /* sets wanted_len, iovs etc */
+ lib_parse(&ksocknal_lib, &conn->ksnc_hdr, conn);
+ PROF_FINISH(lib_parse);
+
+ if (conn->ksnc_rx_nob_wanted != 0) { /* need to get payload? */
+ conn->ksnc_rx_state = SOCKNAL_RX_BODY;
+ goto try_read; /* go read the payload */
+ }
+ /* Fall through (completed packet for me) */
+
+ case SOCKNAL_RX_BODY:
+ atomic_inc (&ksocknal_packets_received);
+ /* packet is done now */
+ lib_finalize(&ksocknal_lib, NULL, conn->ksnc_cookie);
+ /* Fall through */
+
+ case SOCKNAL_RX_SLOP:
+ /* starting new packet? */
+ if (ksocknal_new_packet (conn, conn->ksnc_rx_nob_left))
+ goto out; /* come back later */
+ goto try_read; /* try to finish reading slop now */
+
+ case SOCKNAL_RX_BODY_FWD:
+ CDEBUG (D_NET, "%p "LPX64"->"LPX64" %d fwd_start (got body)\n",
+ conn, NTOH__u64 (conn->ksnc_hdr.src_nid),
+ NTOH__u64 (conn->ksnc_hdr.dest_nid),
+ conn->ksnc_rx_nob_left);
+
+ atomic_inc (&ksocknal_packets_received);
+
+ /* ksocknal_init_fmb() put router desc. in conn->ksnc_cookie */
+ kpr_fwd_start (&ksocknal_data.ksnd_router,
+ (kpr_fwd_desc_t *)conn->ksnc_cookie);
+
+ /* no slop in forwarded packets */
+ LASSERT (conn->ksnc_rx_nob_left == 0);
+
+ ksocknal_new_packet (conn, 0); /* on to next packet */
+ goto out; /* (later) */
+
+ default:
+ }
+
+ /* Not Reached */
+ LBUG ();
+
+ out:
+ spin_lock_irqsave (&sched->kss_lock, *irq_flags);
+
+ /* no data there to read? */
+ if (!conn->ksnc_rx_ready) {
+ /* let socket callback schedule again */
+ conn->ksnc_rx_scheduled = 0;
+ ksocknal_put_conn (conn); /* release scheduler's ref */
+ } else /* let scheduler call me again */
+ list_add_tail (&conn->ksnc_rx_list, &sched->kss_rx_conns);
+}
+
+int
+ksocknal_recv (nal_cb_t *nal, void *private, lib_msg_t *msg,
+ unsigned int niov, struct iovec *iov, size_t mlen, size_t rlen)
+{
+ ksock_conn_t *conn = (ksock_conn_t *)private;
+
+ LASSERT (mlen <= rlen);
+ LASSERT (niov <= PTL_MD_MAX_IOV);
+
+ conn->ksnc_cookie = msg;
+ conn->ksnc_rx_nob_wanted = mlen;
+ conn->ksnc_rx_nob_left = rlen;
+
+ conn->ksnc_rx_nkiov = 0;
+ conn->ksnc_rx_kiov = NULL;
+ conn->ksnc_rx_niov = niov;
+ conn->ksnc_rx_iov = conn->ksnc_rx_iov_space.iov;
+ memcpy (conn->ksnc_rx_iov, iov, niov * sizeof (*iov));
+
+ LASSERT (mlen ==
+ lib_iov_nob (conn->ksnc_rx_niov, conn->ksnc_rx_iov) +
+ lib_kiov_nob (conn->ksnc_rx_nkiov, conn->ksnc_rx_kiov));
+
+ return (rlen);
+}
+
+int
+ksocknal_recv_pages (nal_cb_t *nal, void *private, lib_msg_t *msg,
+ unsigned int niov, ptl_kiov_t *kiov, size_t mlen, size_t rlen)
+{
+ ksock_conn_t *conn = (ksock_conn_t *)private;
+
+ LASSERT (mlen <= rlen);
+ LASSERT (niov <= PTL_MD_MAX_IOV);
+
+ conn->ksnc_cookie = msg;
+ conn->ksnc_rx_nob_wanted = mlen;
+ conn->ksnc_rx_nob_left = rlen;
+
+ conn->ksnc_rx_niov = 0;
+ conn->ksnc_rx_iov = NULL;
+ conn->ksnc_rx_nkiov = niov;
+ conn->ksnc_rx_kiov = conn->ksnc_rx_iov_space.kiov;
+ memcpy (conn->ksnc_rx_kiov, kiov, niov * sizeof (*kiov));
+
+ LASSERT (mlen ==
+ lib_iov_nob (conn->ksnc_rx_niov, conn->ksnc_rx_iov) +
+ lib_kiov_nob (conn->ksnc_rx_nkiov, conn->ksnc_rx_kiov));
+
+ return (rlen);
+}
+
+int ksocknal_scheduler (void *arg)
+{
+ ksock_sched_t *sched = (ksock_sched_t *)arg;
+ unsigned long flags;
+ int rc;
+ int nloops = 0;
+ int id = sched - ksocknal_data.ksnd_schedulers;
+ char name[16];
+#if (CONFIG_SMP && CPU_AFFINITY)
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+ int cpu = cpu_logical_map(id % num_online_cpus());
+#else
+#warning "Take care of architecure specific logical APIC map"
+ int cpu = 1; /* Have to change later. */
+#endif /* LINUX_VERSION_CODE */
+
+ set_cpus_allowed (current, 1 << cpu);
+ id = cpu;
+#endif /* CONFIG_SMP && CPU_AFFINITY */
+
+ snprintf (name, sizeof (name),"ksocknald[%d]", id);
+ kportal_daemonize (name);
+ kportal_blockallsigs ();
+
+ spin_lock_irqsave (&sched->kss_lock, flags);
+
+ while (!ksocknal_data.ksnd_shuttingdown) {
+ int did_something = 0;
+
+ /* Ensure I progress everything semi-fairly */
+
+ if (!list_empty (&sched->kss_rx_conns)) {
+ did_something = 1;
+ /* drops & regains kss_lock */
+ ksocknal_process_receive (sched, &flags);
+ }
+
+ if (!list_empty (&sched->kss_tx_conns)) {
+ did_something = 1;
+ /* drops and regains kss_lock */
+ ksocknal_process_transmit (sched, &flags);
+ }
+#if SOCKNAL_ZC
+ if (!list_empty (&sched->kss_zctxdone_list)) {
+ ksock_tx_t *tx =
+ list_entry(sched->kss_zctxdone_list.next,
+ ksock_tx_t, tx_list);
+ did_something = 1;
+
+ list_del (&tx->tx_list);
+ spin_unlock_irqrestore (&sched->kss_lock, flags);
+
+ ksocknal_tx_done (tx);
+
+ spin_lock_irqsave (&sched->kss_lock, flags);
+ }
+#endif
+ if (!did_something || /* nothing to do */
+ ++nloops == SOCKNAL_RESCHED) { /* hogging CPU? */
+ spin_unlock_irqrestore (&sched->kss_lock, flags);
+
+ nloops = 0;
+
+ if (!did_something) { /* wait for something to do */
+#if SOCKNAL_ZC
+ rc = wait_event_interruptible (sched->kss_waitq,
+ ksocknal_data.ksnd_shuttingdown ||
+ !list_empty(&sched->kss_rx_conns) ||
+ !list_empty(&sched->kss_tx_conns) ||
+ !list_empty(&sched->kss_zctxdone_list));
+#else
+ rc = wait_event_interruptible (sched->kss_waitq,
+ ksocknal_data.ksnd_shuttingdown ||
+ !list_empty(&sched->kss_rx_conns) ||
+ !list_empty(&sched->kss_tx_conns));
+#endif
+ LASSERT (rc == 0);
+ } else
+ our_cond_resched();
+
+ spin_lock_irqsave (&sched->kss_lock, flags);
+ }
+ }
+
+ spin_unlock_irqrestore (&sched->kss_lock, flags);
+ ksocknal_thread_fini ();
+ return (0);
+}
+
+void
+ksocknal_data_ready (struct sock *sk, int n)
+{
+ unsigned long flags;
+ ksock_conn_t *conn;
+ ksock_sched_t *sched;
+ ENTRY;
+
+ /* interleave correctly with closing sockets... */
+ read_lock (&ksocknal_data.ksnd_socklist_lock);
+
+ conn = sk->user_data;
+ if (conn == NULL) { /* raced with ksocknal_close_sock */
+ LASSERT (sk->data_ready != &ksocknal_data_ready);
+ sk->data_ready (sk, n);
+ } else if (!conn->ksnc_rx_ready) { /* new news */
+ /* Set ASAP in case of concurrent calls to me */
+ conn->ksnc_rx_ready = 1;
+
+ sched = conn->ksnc_scheduler;
+
+ spin_lock_irqsave (&sched->kss_lock, flags);
+
+ /* Set again (process_receive may have cleared while I blocked for the lock) */
+ conn->ksnc_rx_ready = 1;
+
+ if (!conn->ksnc_rx_scheduled) { /* not being progressed */
+ list_add_tail(&conn->ksnc_rx_list,
+ &sched->kss_rx_conns);
+ conn->ksnc_rx_scheduled = 1;
+ /* extra ref for scheduler */
+ atomic_inc (&conn->ksnc_refcount);
+
+ if (waitqueue_active (&sched->kss_waitq))
+ wake_up (&sched->kss_waitq);
+ }
+
+ spin_unlock_irqrestore (&sched->kss_lock, flags);
+ }
+
+ read_unlock (&ksocknal_data.ksnd_socklist_lock);
+
+ EXIT;
+}
+
+void
+ksocknal_write_space (struct sock *sk)
+{
+ unsigned long flags;
+ ksock_conn_t *conn;
+ ksock_sched_t *sched;
+
+ /* interleave correctly with closing sockets... */
+ read_lock (&ksocknal_data.ksnd_socklist_lock);
+
+ conn = sk->user_data;
+
+ CDEBUG(D_NET, "sk %p wspace %d low water %d conn %p%s%s%s\n",
+ sk, tcp_wspace(sk), SOCKNAL_TX_LOW_WATER(sk), conn,
+ (conn == NULL) ? "" : (test_bit (0, &conn->ksnc_tx_ready) ?
+ " ready" : " blocked"),
+ (conn == NULL) ? "" : (conn->ksnc_tx_scheduled ?
+ " scheduled" : " idle"),
+ (conn == NULL) ? "" : (list_empty (&conn->ksnc_tx_queue) ?
+ " empty" : " queued"));
+
+ if (conn == NULL) { /* raced with ksocknal_close_sock */
+ LASSERT (sk->write_space != &ksocknal_write_space);
+ sk->write_space (sk);
+ } else if (tcp_wspace(sk) >= SOCKNAL_TX_LOW_WATER(sk)) { /* got enough space */
+ clear_bit (SOCK_NOSPACE, &sk->socket->flags);
+
+ if (!conn->ksnc_tx_ready) { /* new news */
+ /* Set ASAP in case of concurrent calls to me */
+ conn->ksnc_tx_ready = 1;
+
+ sched = conn->ksnc_scheduler;
+
+ spin_lock_irqsave (&sched->kss_lock, flags);
+
+ /* Set again (process_transmit may have
+ cleared while I blocked for the lock) */
+ conn->ksnc_tx_ready = 1;
+
+ if (!conn->ksnc_tx_scheduled && // not being progressed
+ !list_empty(&conn->ksnc_tx_queue)){//packets to send
+ list_add_tail (&conn->ksnc_tx_list,
+ &sched->kss_tx_conns);
+ conn->ksnc_tx_scheduled = 1;
+ /* extra ref for scheduler */
+ atomic_inc (&conn->ksnc_refcount);
+
+ if (waitqueue_active (&sched->kss_waitq))
+ wake_up (&sched->kss_waitq);
+ }
+
+ spin_unlock_irqrestore (&sched->kss_lock, flags);
+ }
+ }
+
+ read_unlock (&ksocknal_data.ksnd_socklist_lock);
+}
+
+int
+ksocknal_reaper (void *arg)
+{
+ unsigned long flags;
+ ksock_conn_t *conn;
+ int rc;
+
+ kportal_daemonize ("ksocknal_reaper");
+ kportal_blockallsigs ();
+
+ while (!ksocknal_data.ksnd_shuttingdown) {
+ spin_lock_irqsave (&ksocknal_data.ksnd_reaper_lock, flags);
+
+ if (list_empty (&ksocknal_data.ksnd_reaper_list)) {
+ conn = NULL;
+ } else {
+ conn = list_entry (ksocknal_data.ksnd_reaper_list.next,
+ ksock_conn_t, ksnc_list);
+ list_del (&conn->ksnc_list);
+ }
+
+ spin_unlock_irqrestore (&ksocknal_data.ksnd_reaper_lock, flags);
+
+ if (conn != NULL)
+ ksocknal_close_conn (conn);
+ else {
+ rc = wait_event_interruptible (ksocknal_data.ksnd_reaper_waitq,
+ ksocknal_data.ksnd_shuttingdown ||
+ !list_empty(&ksocknal_data.ksnd_reaper_list));
+ LASSERT (rc == 0);
+ }
+ }
+
+ ksocknal_thread_fini ();
+ return (0);
+}
+
+nal_cb_t ksocknal_lib = {
+ nal_data: &ksocknal_data, /* NAL private data */
+ cb_send: ksocknal_send,
+ cb_send_pages: ksocknal_send_pages,
+ cb_recv: ksocknal_recv,
+ cb_recv_pages: ksocknal_recv_pages,
+ cb_read: ksocknal_read,
+ cb_write: ksocknal_write,
+ cb_callback: ksocknal_callback,
+ cb_malloc: ksocknal_malloc,
+ cb_free: ksocknal_free,
+ cb_printf: ksocknal_printf,
+ cb_cli: ksocknal_cli,
+ cb_sti: ksocknal_sti,
+ cb_dist: ksocknal_dist
+};
--- /dev/null
+# Copyright (C) 2001 Cluster File Systems, Inc.
+#
+# This code is issued under the GNU General Public License.
+# See the file COPYING in this distribution
+
+include ../../Rules.linux
+
+MODULE = ktoenal
+modulenet_DATA = ktoenal.o
+EXTRA_PROGRAMS = ktoenal
+
+DEFS =
+ktoenal_SOURCES = toenal.c toenal_cb.c toenal.h
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ * Author: Zach Brown <zab@zabbo.net>
+ * Author: Peter J. Braam <braam@clusterfs.com>
+ * Author: Phil Schwan <phil@clusterfs.com>
+ * Author: Eric Barton <eric@bartonsoftware.com>
+ * Author: Kedar Sovani <kedar@calsoftinc.com>
+ * Author: Amey Inamdar <amey@calsoftinc.com>
+ *
+ * This file is part of Portals, http://www.sf.net/projects/lustre/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+#include <linux/poll.h>
+#include "toenal.h"
+
+ptl_handle_ni_t ktoenal_ni;
+static nal_t ktoenal_api;
+static ksock_nal_data_t ktoenal_data;
+
+/*
+ksocknal_interface_t ktoenal_interface = {
+ ksni_add_sock: ktoenal_add_sock,
+ ksni_close_sock: ktoenal_close_sock,
+ ksni_set_mynid: ktoenal_set_mynid,
+};
+*/
+
+kpr_nal_interface_t ktoenal_router_interface = {
+ kprni_nalid: TOENAL,
+ kprni_arg: &ktoenal_data,
+ kprni_fwd: ktoenal_fwd_packet,
+};
+
+
+int
+ktoenal_api_forward(nal_t *nal, int id, void *args, size_t args_len,
+ void *ret, size_t ret_len)
+{
+ ksock_nal_data_t *k;
+ nal_cb_t *nal_cb;
+
+ k = nal->nal_data;
+ nal_cb = k->ksnd_nal_cb;
+
+ lib_dispatch(nal_cb, k, id, args, ret); /* ktoenal_send needs k */
+ return PTL_OK;
+}
+
+int
+ktoenal_api_shutdown(nal_t *nal, int ni)
+{
+ CDEBUG (D_NET, "closing all connections\n");
+
+ return ktoenal_close_sock(0); /* close all sockets */
+}
+
+void
+ktoenal_api_yield(nal_t *nal)
+{
+ our_cond_resched();
+ return;
+}
+
+void
+ktoenal_api_lock(nal_t *nal, unsigned long *flags)
+{
+ ksock_nal_data_t *k;
+ nal_cb_t *nal_cb;
+
+ k = nal->nal_data;
+ nal_cb = k->ksnd_nal_cb;
+ nal_cb->cb_cli(nal_cb,flags);
+}
+
+void
+ktoenal_api_unlock(nal_t *nal, unsigned long *flags)
+{
+ ksock_nal_data_t *k;
+ nal_cb_t *nal_cb;
+
+ k = nal->nal_data;
+ nal_cb = k->ksnd_nal_cb;
+ nal_cb->cb_sti(nal_cb,flags);
+}
+
+nal_t *
+ktoenal_init(int interface, ptl_pt_index_t ptl_size,
+ ptl_ac_index_t ac_size, ptl_pid_t requested_pid)
+{
+ CDEBUG(D_NET, "calling lib_init with nid "LPX64"\n",
+ ktoenal_data.ksnd_mynid);
+ lib_init(&ktoenal_lib, ktoenal_data.ksnd_mynid, 0, 10, ptl_size,
+ ac_size);
+ return (&ktoenal_api);
+}
+
+/*
+ * EXTRA functions follow
+ */
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+#define SOCKET_I(inode) (&(inode)->u.socket_i)
+#endif
+static __inline__ struct socket *
+socki_lookup(struct inode *inode)
+{
+ return SOCKET_I(inode);
+}
+
+int
+ktoenal_set_mynid(ptl_nid_t nid)
+{
+ lib_ni_t *ni = &ktoenal_lib.ni;
+
+ /* FIXME: we have to do this because we call lib_init() at module
+ * insertion time, which is before we have 'mynid' available. lib_init
+ * sets the NAL's nid, which it uses to tell other nodes where packets
+ * are coming from. This is not a very graceful solution to this
+ * problem. */
+
+ CDEBUG(D_IOCTL, "setting mynid to "LPX64" (old nid="LPX64")\n", nid, ni->nid);
+
+ ktoenal_data.ksnd_mynid = nid;
+ ni->nid = nid;
+ return (0);
+}
+
+int
+ktoenal_add_sock (ptl_nid_t nid, int fd)
+{
+ unsigned long flags;
+ ksock_conn_t *conn;
+ struct file *file = NULL;
+ struct socket *sock = NULL;
+ int ret;
+ ENTRY;
+
+ file = fget(fd);
+ if (file == NULL)
+ RETURN(-EINVAL);
+
+ ret = -EINVAL;
+ sock = socki_lookup(file->f_dentry->d_inode);
+ if (sock == NULL)
+ GOTO(error, ret);
+
+ ret = -ENOMEM;
+ PORTAL_ALLOC(conn, sizeof(*conn));
+ if (!conn)
+ GOTO(error, ret);
+
+ memset (conn, 0, sizeof (conn)); /* zero for consistency */
+ file->f_flags |= O_NONBLOCK; /* Does this have any conflicts */
+ conn->ksnc_file = file;
+ conn->ksnc_sock = sock;
+ conn->ksnc_peernid = nid;
+ atomic_set (&conn->ksnc_refcount, 1); /* 1 ref for socklist */
+
+ conn->ksnc_rx_ready = 0;
+ conn->ksnc_rx_scheduled = 0;
+ ktoenal_new_packet (conn, 0);
+
+ INIT_LIST_HEAD (&conn->ksnc_tx_queue);
+ conn->ksnc_tx_ready = 0;
+ conn->ksnc_tx_scheduled = 0;
+
+ LASSERT (!in_interrupt());
+ write_lock_irqsave (&ktoenal_data.ksnd_socklist_lock, flags);
+
+ list_add(&conn->ksnc_list, &ktoenal_data.ksnd_socklist);
+ write_unlock_irqrestore (&ktoenal_data.ksnd_socklist_lock, flags);
+
+ ktoenal_data_ready(conn);
+ ktoenal_write_space(conn);
+
+ ktoenal_data.ksnd_slistchange = 1;
+ wake_up_process(ktoenal_data.ksnd_pollthread_tsk);
+ /* Schedule pollthread so that it will poll
+ * for newly created socket
+ */
+
+
+ CDEBUG(D_IOCTL, "conn [%p] registered for nid "LPX64"\n",
+ conn, conn->ksnc_peernid);
+
+ /* Can't unload while connection active */
+ PORTAL_MODULE_USE;
+ RETURN(0);
+
+error:
+ fput(file);
+ return (ret);
+}
+
+/* Passing in a zero nid will close all connections */
+int
+ktoenal_close_sock(ptl_nid_t nid)
+{
+ long flags;
+ ksock_conn_t *conn;
+ LIST_HEAD (death_row);
+ struct list_head *tmp;
+
+ LASSERT (!in_interrupt());
+ write_lock_irqsave (&ktoenal_data.ksnd_socklist_lock, flags);
+
+ if (nid == 0) /* close ALL connections */
+ {
+ /* insert 'death row' into the socket list... */
+ list_add (&death_row, &ktoenal_data.ksnd_socklist);
+ /* ...extract and reinitialise the socket list itself... */
+ list_del_init (&ktoenal_data.ksnd_socklist);
+ /* ...and voila, death row is the proud owner of all conns */
+ } else list_for_each (tmp, &ktoenal_data.ksnd_socklist) {
+
+ conn = list_entry (tmp, ksock_conn_t, ksnc_list);
+
+ if (conn->ksnc_peernid == nid)
+ {
+ list_del (&conn->ksnc_list);
+ list_add (&conn->ksnc_list, &death_row);
+ break;
+ }
+ }
+
+
+ write_unlock_irqrestore (&ktoenal_data.ksnd_socklist_lock, flags);
+
+ if (list_empty (&death_row))
+ return (-ENOENT);
+
+ do {
+ conn = list_entry (death_row.next, ksock_conn_t, ksnc_list);
+ list_del (&conn->ksnc_list);
+ ktoenal_put_conn (conn); /* drop ref for ksnd_socklist */
+ } while (!list_empty (&death_row));
+
+ ktoenal_data.ksnd_slistchange = 1;
+ wake_up_process(ktoenal_data.ksnd_pollthread_tsk);
+
+ return (0);
+}
+
+
+ksock_conn_t *
+ktoenal_get_conn (ptl_nid_t nid)
+{
+ struct list_head *tmp;
+ ksock_conn_t *conn;
+
+ PROF_START(conn_list_walk);
+
+ read_lock (&ktoenal_data.ksnd_socklist_lock);
+
+ list_for_each(tmp, &ktoenal_data.ksnd_socklist) {
+
+ conn = list_entry(tmp, ksock_conn_t, ksnc_list);
+
+ if (conn->ksnc_peernid == nid)
+ {
+ /* caller is referencing */
+ atomic_inc (&conn->ksnc_refcount);
+
+ read_unlock (&ktoenal_data.ksnd_socklist_lock);
+
+ CDEBUG(D_NET, "got conn [%p] -> "LPX64" (%d)\n",
+ conn, nid, atomic_read (&conn->ksnc_refcount));
+
+ PROF_FINISH(conn_list_walk);
+ return (conn);
+ }
+ }
+
+ read_unlock (&ktoenal_data.ksnd_socklist_lock);
+
+ CDEBUG(D_NET, "No connection found when looking for nid "LPX64"\n", nid);
+ PROF_FINISH(conn_list_walk);
+ return (NULL);
+}
+
+void
+ktoenal_close_conn (ksock_conn_t *conn)
+{
+ CDEBUG (D_NET, "connection [%p] closed \n", conn);
+
+ fput (conn->ksnc_file);
+ PORTAL_FREE (conn, sizeof (*conn));
+ /* One less connection keeping us hanging on */
+ PORTAL_MODULE_UNUSE;
+}
+
+void
+_ktoenal_put_conn (ksock_conn_t *conn)
+{
+ unsigned long flags;
+
+ CDEBUG (D_NET, "connection [%p] handed the black spot\n", conn);
+
+ /* "But what is the black spot, captain?" I asked.
+ * "That's a summons, mate..." */
+
+ LASSERT (atomic_read (&conn->ksnc_refcount) == 0);
+ LASSERT (!conn->ksnc_rx_scheduled);
+
+ if (!in_interrupt())
+ {
+ ktoenal_close_conn (conn);
+ return;
+ }
+
+ spin_lock_irqsave (&ktoenal_data.ksnd_reaper_lock, flags);
+
+ list_add (&conn->ksnc_list, &ktoenal_data.ksnd_reaper_list);
+ wake_up (&ktoenal_data.ksnd_reaper_waitq);
+
+ spin_unlock_irqrestore (&ktoenal_data.ksnd_reaper_lock, flags);
+}
+
+void
+ktoenal_free_buffers (void)
+{
+ if (ktoenal_data.ksnd_fmbs != NULL)
+ {
+ ksock_fmb_t *fmb = (ksock_fmb_t *)ktoenal_data.ksnd_fmbs;
+ int i;
+ int j;
+
+ for (i = 0; i < (SOCKNAL_SMALL_FWD_NMSGS + SOCKNAL_LARGE_FWD_NMSGS); i++, fmb++)
+ for (j = 0; j < fmb->fmb_npages; j++)
+ if (fmb->fmb_pages[j] != NULL)
+ __free_page (fmb->fmb_pages[j]);
+
+ PORTAL_FREE (ktoenal_data.ksnd_fmbs,
+ sizeof (ksock_fmb_t) * (SOCKNAL_SMALL_FWD_NMSGS + SOCKNAL_LARGE_FWD_NMSGS));
+ }
+
+ if (ktoenal_data.ksnd_ltxs != NULL)
+ PORTAL_FREE (ktoenal_data.ksnd_ltxs,
+ sizeof (ksock_ltx_t) * (SOCKNAL_NLTXS + SOCKNAL_NNBLK_LTXS));
+}
+
+int
+ktoenal_cmd(struct portal_ioctl_data * data, void * private)
+{
+ int rc = -EINVAL;
+
+ LASSERT (data != NULL);
+
+ switch(data->ioc_nal_cmd) {
+ case NAL_CMD_REGISTER_PEER_FD: {
+ rc = ktoenal_add_sock(data->ioc_nid, data->ioc_fd);
+ break;
+ }
+ case NAL_CMD_CLOSE_CONNECTION: {
+ rc = ktoenal_close_sock(data->ioc_nid);
+ break;
+ }
+ case NAL_CMD_REGISTER_MYNID: {
+ rc = ktoenal_set_mynid (data->ioc_nid);
+ break;
+ }
+ }
+
+ return rc;
+}
+
+
+void __exit
+ktoenal_module_fini (void)
+{
+ CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n",
+ atomic_read (&portal_kmemory));
+
+ switch (ktoenal_data.ksnd_init)
+ {
+ default:
+ LASSERT (0);
+
+ case SOCKNAL_INIT_ALL:
+ kportal_nal_unregister(TOENAL);
+ PORTAL_SYMBOL_UNREGISTER (ktoenal_ni);
+ /* fall through */
+
+ case SOCKNAL_INIT_PTL:
+ PtlNIFini(ktoenal_ni);
+ lib_fini(&ktoenal_lib);
+ /* fall through */
+
+ case SOCKNAL_INIT_DATA:
+ /* Module refcount only gets to zero when all connections
+ * have been closed so all lists must be empty */
+ LASSERT (list_empty (&ktoenal_data.ksnd_socklist));
+ LASSERT (list_empty (&ktoenal_data.ksnd_reaper_list));
+ LASSERT (list_empty (&ktoenal_data.ksnd_rx_conns));
+ LASSERT (list_empty (&ktoenal_data.ksnd_tx_conns));
+ LASSERT (list_empty (&ktoenal_data.ksnd_small_fmp.fmp_blocked_conns));
+ LASSERT (list_empty (&ktoenal_data.ksnd_large_fmp.fmp_blocked_conns));
+
+ kpr_shutdown (&ktoenal_data.ksnd_router); /* stop router calling me */
+
+ /* flag threads to terminate; wake and wait for them to die */
+ ktoenal_data.ksnd_shuttingdown = 1;
+ wake_up_all (&ktoenal_data.ksnd_reaper_waitq);
+ wake_up_all (&ktoenal_data.ksnd_sched_waitq);
+ wake_up_process(ktoenal_data.ksnd_pollthread_tsk);
+
+ while (atomic_read (&ktoenal_data.ksnd_nthreads) != 0)
+ {
+ CDEBUG (D_NET, "waitinf for %d threads to terminate\n",
+ atomic_read (&ktoenal_data.ksnd_nthreads));
+ set_current_state (TASK_UNINTERRUPTIBLE);
+ schedule_timeout (HZ);
+ }
+
+ kpr_deregister (&ktoenal_data.ksnd_router);
+
+ ktoenal_free_buffers();
+ /* fall through */
+
+ case SOCKNAL_INIT_NOTHING:
+ break;
+ }
+
+ CDEBUG(D_MALLOC, "after NAL cleanup: kmem %d\n",
+ atomic_read (&portal_kmemory));
+
+ printk(KERN_INFO "Routing socket NAL unloaded (final mem %d)\n",
+ atomic_read(&portal_kmemory));
+}
+
+int __init
+ktoenal_module_init (void)
+{
+ int pkmem = atomic_read(&portal_kmemory);
+ int rc;
+ int i;
+ int j;
+
+ /* packet descriptor must fit in a router descriptor's scratchpad */
+ LASSERT(sizeof (ksock_tx_t) <= sizeof (kprfd_scratch_t));
+
+ LASSERT (ktoenal_data.ksnd_init == SOCKNAL_INIT_NOTHING);
+
+ ktoenal_api.forward = ktoenal_api_forward;
+ ktoenal_api.shutdown = ktoenal_api_shutdown;
+ ktoenal_api.yield = ktoenal_api_yield;
+ ktoenal_api.validate = NULL; /* our api validate is a NOOP */
+ ktoenal_api.lock = ktoenal_api_lock;
+ ktoenal_api.unlock = ktoenal_api_unlock;
+ ktoenal_api.nal_data = &ktoenal_data;
+
+ ktoenal_lib.nal_data = &ktoenal_data;
+
+ memset (&ktoenal_data, 0, sizeof (ktoenal_data)); /* zero pointers */
+
+ INIT_LIST_HEAD(&ktoenal_data.ksnd_socklist);
+ rwlock_init(&ktoenal_data.ksnd_socklist_lock);
+
+ ktoenal_data.ksnd_nal_cb = &ktoenal_lib;
+ spin_lock_init (&ktoenal_data.ksnd_nal_cb_lock);
+
+ spin_lock_init (&ktoenal_data.ksnd_sched_lock);
+
+ init_waitqueue_head (&ktoenal_data.ksnd_sched_waitq);
+
+ INIT_LIST_HEAD (&ktoenal_data.ksnd_rx_conns);
+ INIT_LIST_HEAD (&ktoenal_data.ksnd_tx_conns);
+
+ INIT_LIST_HEAD(&ktoenal_data.ksnd_small_fmp.fmp_idle_fmbs);
+ INIT_LIST_HEAD(&ktoenal_data.ksnd_small_fmp.fmp_blocked_conns);
+ INIT_LIST_HEAD(&ktoenal_data.ksnd_large_fmp.fmp_idle_fmbs);
+ INIT_LIST_HEAD(&ktoenal_data.ksnd_large_fmp.fmp_blocked_conns);
+
+ INIT_LIST_HEAD(&ktoenal_data.ksnd_idle_nblk_ltx_list);
+ INIT_LIST_HEAD(&ktoenal_data.ksnd_idle_ltx_list);
+ init_waitqueue_head(&ktoenal_data.ksnd_idle_ltx_waitq);
+
+ INIT_LIST_HEAD (&ktoenal_data.ksnd_reaper_list);
+ init_waitqueue_head(&ktoenal_data.ksnd_reaper_waitq);
+ spin_lock_init (&ktoenal_data.ksnd_reaper_lock);
+
+ ktoenal_data.ksnd_init = SOCKNAL_INIT_DATA; /* flag lists/ptrs/locks initialised */
+
+ PORTAL_ALLOC(ktoenal_data.ksnd_fmbs,
+ sizeof(ksock_fmb_t) * (SOCKNAL_SMALL_FWD_NMSGS + SOCKNAL_LARGE_FWD_NMSGS));
+ if (ktoenal_data.ksnd_fmbs == NULL)
+ RETURN(-ENOMEM);
+
+ /* NULL out buffer pointers etc */
+ memset(ktoenal_data.ksnd_fmbs, 0,
+ sizeof(ksock_fmb_t) * (SOCKNAL_SMALL_FWD_NMSGS + SOCKNAL_LARGE_FWD_NMSGS));
+
+ for (i = 0; i < (SOCKNAL_SMALL_FWD_NMSGS + SOCKNAL_LARGE_FWD_NMSGS); i++)
+ {
+ ksock_fmb_t *fmb = &((ksock_fmb_t *)ktoenal_data.ksnd_fmbs)[i];
+
+ if (i < SOCKNAL_SMALL_FWD_NMSGS)
+ {
+ fmb->fmb_npages = SOCKNAL_SMALL_FWD_PAGES;
+ fmb->fmb_pool = &ktoenal_data.ksnd_small_fmp;
+ }
+ else
+ {
+ fmb->fmb_npages = SOCKNAL_LARGE_FWD_PAGES;
+ fmb->fmb_pool = &ktoenal_data.ksnd_large_fmp;
+ }
+
+ LASSERT (fmb->fmb_npages > 0);
+ for (j = 0; j < fmb->fmb_npages; j++)
+ {
+ fmb->fmb_pages[j] = alloc_page (GFP_KERNEL);
+
+ if (fmb->fmb_pages[j] == NULL)
+ {
+ ktoenal_module_fini ();
+ return (-ENOMEM);
+ }
+
+ LASSERT (page_address (fmb->fmb_pages[j]) != NULL);
+ }
+
+ list_add (&fmb->fmb_list, &fmb->fmb_pool->fmp_idle_fmbs);
+ }
+
+ PORTAL_ALLOC(ktoenal_data.ksnd_ltxs,
+ sizeof (ksock_ltx_t) * (SOCKNAL_NLTXS + SOCKNAL_NNBLK_LTXS));
+ if (ktoenal_data.ksnd_ltxs == NULL)
+ {
+ ktoenal_module_fini ();
+ return (-ENOMEM);
+ }
+
+ /* Deterministic bugs please */
+ memset (ktoenal_data.ksnd_ltxs, 0xeb,
+ sizeof (ksock_ltx_t) * (SOCKNAL_NLTXS + SOCKNAL_NNBLK_LTXS));
+
+ for (i = 0; i < SOCKNAL_NLTXS + SOCKNAL_NNBLK_LTXS; i++)
+ {
+ ksock_ltx_t *ltx = &((ksock_ltx_t *)ktoenal_data.ksnd_ltxs)[i];
+
+ ltx->ltx_idle = i < SOCKNAL_NLTXS ?
+ &ktoenal_data.ksnd_idle_ltx_list :
+ &ktoenal_data.ksnd_idle_nblk_ltx_list;
+ list_add (<x->ltx_tx.tx_list, ltx->ltx_idle);
+ }
+
+ rc = PtlNIInit(ktoenal_init, 32, 4, 0, &ktoenal_ni);
+ if (rc != 0)
+ {
+ CERROR("ktoenal: PtlNIInit failed: error %d\n", rc);
+ ktoenal_module_fini ();
+ RETURN (rc);
+ }
+ PtlNIDebug(ktoenal_ni, ~0);
+
+ ktoenal_data.ksnd_init = SOCKNAL_INIT_PTL; /* flag PtlNIInit() called */
+
+ ktoenal_data.ksnd_slistchange = 1;
+ for (i = 0; i < TOENAL_N_SCHED; i++)
+ {
+ rc = ktoenal_thread_start (ktoenal_scheduler, NULL);
+ if (rc != 0)
+ {
+ CERROR("Can't spawn socknal scheduler[%d]: %d\n", i, rc);
+ ktoenal_module_fini ();
+ RETURN (rc);
+ }
+ }
+
+ rc = ktoenal_thread_start (ktoenal_reaper, NULL);
+ if (rc != 0)
+ {
+ CERROR("Can't spawn socknal reaper: %d\n", rc);
+ ktoenal_module_fini ();
+ RETURN (rc);
+ }
+
+ rc = ktoenal_thread_start (ktoenal_pollthread, NULL);
+ if (rc != 0)
+ {
+ CERROR("Can't spawn socknal pollthread: %d\n", rc);
+ ktoenal_module_fini ();
+ RETURN (rc);
+ }
+
+ rc = kpr_register(&ktoenal_data.ksnd_router,
+ &ktoenal_router_interface);
+ if (rc != 0)
+ CDEBUG (D_NET, "Can't initialise routing interface (rc = %d): not routing\n", rc);
+
+ rc = kportal_nal_register(TOENAL, &ktoenal_cmd, NULL);
+ if (rc != 0)
+ CDEBUG(D_NET, "Can't initialise command interface (rc = %d)\n",
+ rc);
+
+ PORTAL_SYMBOL_REGISTER(ktoenal_ni);
+
+ /* flag everything initialised */
+ ktoenal_data.ksnd_init = SOCKNAL_INIT_ALL;
+
+ printk(KERN_INFO"Routing TOE NAL loaded (Routing %s, initial mem %d)\n",
+ kpr_routing(&ktoenal_data.ksnd_router) ? "enabled" : "disabled",
+ pkmem);
+
+ return (0);
+}
+
+MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
+MODULE_DESCRIPTION("Kernel TCP Socket NAL v0.01");
+MODULE_LICENSE("GPL");
+
+module_init(ktoenal_module_init);
+module_exit(ktoenal_module_fini);
+
+EXPORT_SYMBOL (ktoenal_ni);
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ * Author: Zach Brown <zab@zabbo.net>
+ * Author: Peter J. Braam <braam@clusterfs.com>
+ * Author: Phil Schwan <phil@clusterfs.com>
+ * Author: Eric Barton <eric@bartonsoftware.com>
+ * Author: Kedar Sovani <kedar@calsoftinc.com>
+ * Author: Amey Inamdar <amey@calsoftinc.com>
+ *
+ * This file is part of Portals, http://www.sf.net/projects/lustre/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#define DEBUG_PORTAL_ALLOC
+#define EXPORT_SYMTAB
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/errno.h>
+#include <linux/smp_lock.h>
+#include <linux/unistd.h>
+#include <net/tcp.h>
+#include <linux/uio.h>
+#include <linux/sched.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/stat.h>
+#include <linux/list.h>
+#include <asm/uaccess.h>
+#include <asm/segment.h>
+
+#define DEBUG_SUBSYSTEM S_SOCKNAL
+
+#include <linux/kp30.h>
+#include <portals/p30.h>
+#include <portals/lib-p30.h>
+
+#define SOCKNAL_MAX_FWD_PAYLOAD (64<<10) /* biggest payload I can forward */
+
+#define SOCKNAL_NLTXS 128 /* # normal transmit messages */
+#define SOCKNAL_NNBLK_LTXS 128 /* # transmit messages reserved if can't block */
+
+#define SOCKNAL_SMALL_FWD_NMSGS 128 /* # small messages I can be forwarding at any time */
+#define SOCKNAL_LARGE_FWD_NMSGS 32 /* # large messages I can be forwarding at any time */
+
+#define SOCKNAL_SMALL_FWD_PAGES 1 /* # pages in a small message fwd buffer */
+
+#define SOCKNAL_LARGE_FWD_PAGES (PAGE_ALIGN (sizeof (ptl_hdr_t) + SOCKNAL_MAX_FWD_PAYLOAD) >> PAGE_SHIFT)
+ /* # pages in a large message fwd buffer */
+
+#define SOCKNAL_RESCHED 100 /* # scheduler loops before reschedule */
+
+#define SOCKNAL_TX_LOW_WATER(sk) (((sk)->sndbuf*8)/10)
+
+#define TOENAL_N_SCHED 1
+
+typedef struct /* pool of forwarding buffers */
+{
+ struct list_head fmp_idle_fmbs; /* buffers waiting for a connection */
+ struct list_head fmp_blocked_conns; /* connections waiting for a buffer */
+} ksock_fmb_pool_t;
+
+typedef struct {
+ int ksnd_init; /* initialisation state */
+
+ struct list_head ksnd_socklist; /* all my connections */
+ rwlock_t ksnd_socklist_lock; /* stabilise add/find/remove */
+
+
+ ptl_nid_t ksnd_mynid;
+ nal_cb_t *ksnd_nal_cb;
+ spinlock_t ksnd_nal_cb_lock; /* lib cli/sti lock */
+
+ atomic_t ksnd_nthreads; /* # live threads */
+ int ksnd_shuttingdown; /* tell threads to exit */
+
+ kpr_router_t ksnd_router; /* THE router */
+
+ spinlock_t ksnd_sched_lock; /* serialise packet scheduling */
+ wait_queue_head_t ksnd_sched_waitq; /* where scheduler(s) wait */
+
+ struct list_head ksnd_rx_conns; /* conn waiting to be read */
+ struct list_head ksnd_tx_conns; /* conn waiting to be written */
+
+ void *ksnd_fmbs; /* all the pre-allocated FMBs */
+ ksock_fmb_pool_t ksnd_small_fmp; /* small message forwarding buffers */
+ ksock_fmb_pool_t ksnd_large_fmp; /* large message forwarding buffers */
+
+ void *ksnd_ltxs; /* all the pre-allocated LTXs */
+ struct list_head ksnd_idle_ltx_list; /* where to get an idle LTX */
+ struct list_head ksnd_idle_nblk_ltx_list; /* where to get an idle LTX if you can't block */
+ wait_queue_head_t ksnd_idle_ltx_waitq; /* where to block for an idle LTX */
+
+ struct list_head ksnd_reaper_list; /* conn waiting to be reaped */
+ wait_queue_head_t ksnd_reaper_waitq; /* reaper sleeps here */
+ spinlock_t ksnd_reaper_lock; /* serialise */
+
+ struct task_struct *ksnd_pollthread_tsk;/* task_struct for the poll thread */
+ poll_table ksnd_pwait; /* poll wait table for the socket */
+ int ksnd_slistchange; /* informs the pollthread that
+ * the socklist has changed */
+} ksock_nal_data_t;
+
+#define SOCKNAL_INIT_NOTHING 0
+#define SOCKNAL_INIT_DATA 1
+#define SOCKNAL_INIT_PTL 2
+#define SOCKNAL_INIT_ALL 3
+
+typedef struct /* transmit packet */
+{
+ struct list_head tx_list; /* queue on conn for transmission etc */
+ char tx_isfwd; /* forwarding / sourced here */
+ int tx_nob; /* # packet bytes */
+ int tx_niov; /* # packet frags */
+ struct iovec *tx_iov; /* packet frags */
+} ksock_tx_t;
+
+typedef struct /* locally transmitted packet */
+{
+ ksock_tx_t ltx_tx; /* send info */
+ struct list_head *ltx_idle; /* where to put when idle */
+ void *ltx_private; /* lib_finalize() callback arg */
+ void *ltx_cookie; /* lib_finalize() callback arg */
+ struct iovec ltx_iov[1 + PTL_MD_MAX_IOV]; /* msg frags */
+ ptl_hdr_t ltx_hdr; /* buffer for packet header */
+} ksock_ltx_t;
+
+#define KSOCK_TX_2_KPR_FWD_DESC(ptr) list_entry (ptr, kpr_fwd_desc_t, kprfd_scratch)
+/* forwarded packets (router->socknal) embedded in kpr_fwd_desc_t::kprfd_scratch */
+
+#define KSOCK_TX_2_KSOCK_LTX(ptr) list_entry (ptr, ksock_ltx_t, ltx_tx)
+/* local packets (lib->socknal) embedded in ksock_ltx_t::ltx_tx */
+
+/* NB list_entry() is used here as convenient macro for calculating a
+ * pointer to a struct from the addres of a member.
+ */
+
+typedef struct /* Kernel portals Socket Forwarding message buffer */
+{ /* (socknal->router) */
+ struct list_head fmb_list; /* queue idle */
+ kpr_fwd_desc_t fmb_fwd; /* router's descriptor */
+ int fmb_npages; /* # pages allocated */
+ ksock_fmb_pool_t *fmb_pool; /* owning pool */
+ struct page *fmb_pages[SOCKNAL_LARGE_FWD_PAGES];
+ struct iovec fmb_iov[SOCKNAL_LARGE_FWD_PAGES];
+} ksock_fmb_t;
+
+#define SOCKNAL_RX_HEADER 1 /* reading header */
+#define SOCKNAL_RX_BODY 2 /* reading body (to deliver here) */
+#define SOCKNAL_RX_BODY_FWD 3 /* reading body (to forward) */
+#define SOCKNAL_RX_SLOP 4 /* skipping body */
+#define SOCKNAL_RX_GET_FMB 5 /* scheduled for forwarding */
+#define SOCKNAL_RX_FMB_SLEEP 6 /* blocked waiting for a fwd desc */
+
+typedef struct
+{
+ struct list_head ksnc_list; /* stash on global socket list */
+ struct file *ksnc_file; /* socket filp */
+ struct socket *ksnc_sock; /* socket */
+ ptl_nid_t ksnc_peernid; /* who's on the other end */
+ atomic_t ksnc_refcount; /* # users */
+
+ /* READER */
+ struct list_head ksnc_rx_list; /* where I enq waiting input or a forwarding descriptor */
+ unsigned long ksnc_rx_ready; /* data ready to read */
+ int ksnc_rx_scheduled; /* being progressed */
+ int ksnc_rx_state; /* what is being read */
+ int ksnc_rx_nob_left; /* # bytes to next hdr/body */
+ int ksnc_rx_nob_wanted; /* bytes actually wanted */
+ int ksnc_rx_niov; /* # frags */
+ struct iovec ksnc_rx_iov[1 + PTL_MD_MAX_IOV]; /* the frags */
+
+ void *ksnc_cookie; /* rx lib_finalize passthru arg */
+ ptl_hdr_t ksnc_hdr; /* where I read headers into */
+
+ /* WRITER */
+ struct list_head ksnc_tx_list; /* where I enq waiting for output space */
+ struct list_head ksnc_tx_queue; /* packets waiting to be sent */
+ unsigned long ksnc_tx_ready; /* write space */
+ int ksnc_tx_scheduled; /* being progressed */
+
+} ksock_conn_t;
+
+extern int ktoenal_add_sock (ptl_nid_t nid, int fd);
+extern int ktoenal_close_sock(ptl_nid_t nid);
+extern int ktoenal_set_mynid(ptl_nid_t nid);
+extern int ktoenal_push_sock(ptl_nid_t nid);
+extern ksock_conn_t *ktoenal_get_conn (ptl_nid_t nid);
+extern void _ktoenal_put_conn (ksock_conn_t *conn);
+extern void ktoenal_close_conn (ksock_conn_t *conn);
+
+static inline void
+ktoenal_put_conn (ksock_conn_t *conn)
+{
+ CDEBUG (D_OTHER, "putting conn[%p] -> "LPX64" (%d)\n",
+ conn, conn->ksnc_peernid, atomic_read (&conn->ksnc_refcount));
+
+ if (atomic_dec_and_test (&conn->ksnc_refcount))
+ _ktoenal_put_conn (conn);
+}
+
+extern int ktoenal_thread_start (int (*fn)(void *arg), void *arg);
+extern int ktoenal_new_packet (ksock_conn_t *conn, int skip);
+extern void ktoenal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd);
+extern int ktoenal_scheduler (void *arg);
+extern int ktoenal_reaper (void *arg);
+extern int ktoenal_pollthread (void *arg);
+extern void ktoenal_data_ready(ksock_conn_t *conn);
+extern void ktoenal_write_space(ksock_conn_t *conn);
+
+
+extern nal_cb_t ktoenal_lib;
+extern ksock_nal_data_t ktoenal_data;
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ * Author: Zach Brown <zab@zabbo.net>
+ * Author: Peter J. Braam <braam@clusterfs.com>
+ * Author: Phil Schwan <phil@clusterfs.com>
+ * Author: Eric Barton <eric@bartonsoftware.com>
+ * Author: Kedar Sovani <kedar@calsoftinc.com>
+ * Author: Amey Inamdar <amey@calsoftinc.com>
+ *
+ * This file is part of Portals, http://www.sf.net/projects/lustre/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#include <linux/poll.h>
+#include "toenal.h"
+
+atomic_t ktoenal_packets_received;
+long ktoenal_packets_launched;
+long ktoenal_packets_transmitted;
+
+/*
+ * LIB functions follow
+ *
+ */
+int
+ktoenal_read(nal_cb_t *nal, void *private, void *dst_addr,
+ user_ptr src_addr, size_t len)
+{
+ CDEBUG(D_NET, LPX64": reading %ld bytes from %p -> %p\n",
+ nal->ni.nid, (long)len, src_addr, dst_addr);
+
+ memcpy( dst_addr, src_addr, len );
+ return 0;
+}
+
+int
+ktoenal_write(nal_cb_t *nal, void *private, user_ptr dst_addr,
+ void *src_addr, size_t len)
+{
+ CDEBUG(D_NET, LPX64": writing %ld bytes from %p -> %p\n",
+ nal->ni.nid, (long)len, src_addr, dst_addr);
+
+ memcpy( dst_addr, src_addr, len );
+ return 0;
+}
+
+int
+ktoenal_callback (nal_cb_t * nal, void *private, lib_eq_t *eq,
+ ptl_event_t *ev)
+{
+ CDEBUG(D_NET, LPX64": callback eq %p ev %p\n",
+ nal->ni.nid, eq, ev);
+
+ if (eq->event_callback != NULL)
+ eq->event_callback(ev);
+
+ return 0;
+}
+
+void *
+ktoenal_malloc(nal_cb_t *nal, size_t len)
+{
+ void *buf;
+
+ PORTAL_ALLOC(buf, len);
+
+ if (buf != NULL)
+ memset(buf, 0, len);
+
+ return (buf);
+}
+
+void
+ktoenal_free(nal_cb_t *nal, void *buf, size_t len)
+{
+ PORTAL_FREE(buf, len);
+}
+
+void
+ktoenal_printf(nal_cb_t *nal, const char *fmt, ...)
+{
+ va_list ap;
+ char msg[256];
+
+ va_start (ap, fmt);
+ vsnprintf (msg, sizeof (msg), fmt, ap); /* sprint safely */
+ va_end (ap);
+
+ msg[sizeof (msg) - 1] = 0; /* ensure terminated */
+
+ CDEBUG (D_NET, "%s", msg);
+}
+
+void
+ktoenal_cli(nal_cb_t *nal, unsigned long *flags)
+{
+ ksock_nal_data_t *data = nal->nal_data;
+
+ spin_lock(&data->ksnd_nal_cb_lock);
+}
+
+void
+ktoenal_sti(nal_cb_t *nal, unsigned long *flags)
+{
+ ksock_nal_data_t *data;
+ data = nal->nal_data;
+
+ spin_unlock(&data->ksnd_nal_cb_lock);
+}
+
+int
+ktoenal_dist(nal_cb_t *nal, ptl_nid_t nid, unsigned long *dist)
+{
+ /* I would guess that if ktoenal_get_conn(nid) == NULL,
+ and we're not routing, then 'nid' is very distant :) */
+ if ( nal->ni.nid == nid ) {
+ *dist = 0;
+ } else {
+ *dist = 1;
+ }
+
+ return 0;
+}
+
+ksock_ltx_t *
+ktoenal_get_ltx (int may_block)
+{
+ long flags;
+ ksock_ltx_t *ltx = NULL;
+
+ for (;;)
+ {
+ spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, flags);
+
+ if (!list_empty (&ktoenal_data.ksnd_idle_ltx_list))
+ {
+ ltx = list_entry (ktoenal_data.ksnd_idle_ltx_list.next, ksock_ltx_t, ltx_tx.tx_list);
+ list_del (<x->ltx_tx.tx_list);
+ break;
+ }
+
+ if (!may_block)
+ {
+ if (!list_empty (&ktoenal_data.ksnd_idle_nblk_ltx_list))
+ {
+ ltx = list_entry (ktoenal_data.ksnd_idle_nblk_ltx_list.next,
+ ksock_ltx_t, ltx_tx.tx_list);
+ list_del (<x->ltx_tx.tx_list);
+ }
+ break;
+ }
+
+ spin_unlock_irqrestore (&ktoenal_data.ksnd_sched_lock, flags);
+
+ wait_event (ktoenal_data.ksnd_idle_ltx_waitq,
+ !list_empty (&ktoenal_data.ksnd_idle_ltx_list));
+ }
+
+ spin_unlock_irqrestore (&ktoenal_data.ksnd_sched_lock, flags);
+
+ return (ltx);
+}
+
+int
+ktoenal_sendmsg (struct file *sock, struct iovec *iov, int niov, int nob, int flags)
+{
+ /* NB This procedure "consumes" iov (actually we do, tcp_sendmsg doesn't)
+ */
+ mm_segment_t oldmm;
+ int rc;
+
+ LASSERT (niov > 0);
+ LASSERT (nob > 0);
+
+ oldmm = get_fs();
+ set_fs (KERNEL_DS);
+
+#ifdef PORTAL_DEBUG
+ {
+ int total_nob;
+ int i;
+
+ for (i = total_nob = 0; i < niov; i++)
+ total_nob += iov[i].iov_len;
+
+ LASSERT (nob == total_nob);
+ }
+#endif
+ LASSERT (!in_interrupt());
+
+ rc = sock->f_op->writev(sock, iov, niov, NULL);
+
+ set_fs (oldmm);
+
+ if (rc > 0) /* sent something? */
+ {
+ nob = rc; /* consume iov */
+ for (;;)
+ {
+ LASSERT (niov > 0);
+
+ if (iov->iov_len >= nob)
+ {
+ iov->iov_len -= nob;
+ iov->iov_base = (void *)(((unsigned long)iov->iov_base) + nob);
+ break;
+ }
+ nob -= iov->iov_len;
+ iov->iov_len = 0;
+ iov++;
+ niov--;
+ }
+ }
+
+ return (rc);
+}
+
+int
+ktoenal_recvmsg(struct file *sock, struct iovec *iov, int niov, int toread)
+{
+ /* NB This procedure "consumes" iov (actually tcp_recvmsg does)
+ */
+ mm_segment_t oldmm;
+ int ret, i, len = 0, origlen = 0;
+
+ PROF_START(our_recvmsg);
+ for(i = 0; i < niov; i++) {
+ len += iov[i].iov_len;
+ if(len >= toread)
+ break;
+ }
+
+ if(len >= toread) {
+ origlen = iov[i].iov_len;
+ iov[i].iov_len -= (len - toread);
+ }
+ else { /* i == niov */
+ i = niov - 1;
+ }
+
+ oldmm = get_fs();
+ set_fs(KERNEL_DS);
+
+ ret = sock->f_op->readv(sock, iov, i + 1, NULL);
+
+ set_fs(oldmm);
+
+ if(origlen)
+ iov[i].iov_len = origlen;
+
+ PROF_FINISH(our_recvmsg);
+ return ret;
+}
+
+void
+ktoenal_process_transmit (ksock_conn_t *conn, long *irq_flags)
+{
+ ksock_tx_t *tx = list_entry (conn->ksnc_tx_queue.next, ksock_tx_t, tx_list);
+ int rc;
+
+ LASSERT (conn->ksnc_tx_scheduled);
+ LASSERT (conn->ksnc_tx_ready);
+ LASSERT (!list_empty (&conn->ksnc_tx_queue));
+
+ /* assume transmit will complete now, so dequeue while I've got the lock */
+ list_del (&tx->tx_list);
+
+ spin_unlock_irqrestore (&ktoenal_data.ksnd_sched_lock, *irq_flags);
+
+ LASSERT (tx->tx_nob > 0);
+
+ conn->ksnc_tx_ready = 0; /* write_space may race with me and set ready */
+ mb(); /* => clear BEFORE trying to write */
+
+ rc = ktoenal_sendmsg (conn->ksnc_file,
+ tx->tx_iov, tx->tx_niov, tx->tx_nob,
+ list_empty (&conn->ksnc_tx_queue) ?
+ MSG_DONTWAIT : (MSG_DONTWAIT | MSG_MORE));
+
+ CDEBUG (D_NET, "send(%d) %d\n", tx->tx_nob, rc);
+
+ if (rc < 0) /* error */
+ {
+ if (rc == -EAGAIN) /* socket full => */
+ rc = 0; /* nothing sent */
+ else
+ {
+#warning FIXME: handle socket errors properly
+ CERROR ("Error socknal send(%d) %p: %d\n", tx->tx_nob, conn, rc);
+ rc = tx->tx_nob; /* kid on for now whole packet went */
+ }
+ }
+
+ if (rc == tx->tx_nob) /* everything went */
+ {
+ conn->ksnc_tx_ready = 1; /* assume more can go (ASAP) */
+ ktoenal_put_conn (conn); /* release packet's ref */
+
+ if (tx->tx_isfwd) /* was a forwarded packet? */
+ {
+ kpr_fwd_done (&ktoenal_data.ksnd_router,
+ KSOCK_TX_2_KPR_FWD_DESC (tx), 0);
+
+ spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, *irq_flags);
+ }
+ else /* local send */
+ {
+ ksock_ltx_t *ltx = KSOCK_TX_2_KSOCK_LTX (tx);
+
+ lib_finalize (&ktoenal_lib, ltx->ltx_private, ltx->ltx_cookie);
+
+ spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, *irq_flags);
+
+ list_add (<x->ltx_tx.tx_list, ltx->ltx_idle);
+
+ /* normal tx desc => wakeup anyone blocking for one */
+ if (ltx->ltx_idle == &ktoenal_data.ksnd_idle_ltx_list &&
+ waitqueue_active (&ktoenal_data.ksnd_idle_ltx_waitq))
+ wake_up (&ktoenal_data.ksnd_idle_ltx_waitq);
+ }
+ ktoenal_packets_transmitted++;
+ }
+ else
+ {
+ tx->tx_nob -= rc;
+
+ spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, *irq_flags);
+
+ /* back onto HEAD of tx_queue */
+ list_add (&tx->tx_list, &conn->ksnc_tx_queue);
+ }
+
+ if (!conn->ksnc_tx_ready || /* no space to write now */
+ list_empty (&conn->ksnc_tx_queue)) /* nothing to write */
+ {
+ conn->ksnc_tx_scheduled = 0; /* not being scheduled */
+ ktoenal_put_conn (conn); /* release scheduler's ref */
+ }
+ else /* let scheduler call me again */
+ list_add_tail (&conn->ksnc_tx_list, &ktoenal_data.ksnd_tx_conns);
+}
+
+void
+ktoenal_launch_packet (ksock_conn_t *conn, ksock_tx_t *tx)
+{
+ long flags;
+ int nob = tx->tx_nob;
+ struct iovec *iov = tx->tx_iov;
+ int niov = 1;
+
+ LASSERT (nob >= sizeof (ptl_hdr_t));
+
+ /* Truncate iov to exactly match total packet length
+ * since socket sendmsg pays no attention to requested length.
+ */
+ for (;;)
+ {
+ LASSERT (niov <= tx->tx_niov);
+ LASSERT (iov->iov_len >= 0);
+
+ if (iov->iov_len >= nob)
+ {
+ iov->iov_len = nob;
+ break;
+ }
+ nob -= iov->iov_len;
+ iov++;
+ niov++;
+ }
+ tx->tx_niov = niov;
+
+ spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, flags);
+ list_add_tail (&tx->tx_list, &conn->ksnc_tx_queue);
+
+ if (conn->ksnc_tx_ready && /* able to send */
+ !conn->ksnc_tx_scheduled) /* not scheduled to send */
+ {
+ list_add_tail (&conn->ksnc_tx_list, &ktoenal_data.ksnd_tx_conns);
+ conn->ksnc_tx_scheduled = 1;
+ atomic_inc (&conn->ksnc_refcount); /* extra ref for scheduler */
+ if (waitqueue_active (&ktoenal_data.ksnd_sched_waitq))
+ wake_up (&ktoenal_data.ksnd_sched_waitq);
+ }
+
+ ktoenal_packets_launched++;
+ spin_unlock_irqrestore (&ktoenal_data.ksnd_sched_lock, flags);
+}
+
+int
+ktoenal_send(nal_cb_t *nal, void *private, lib_msg_t *cookie,
+ ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
+ unsigned int payload_niov, struct iovec *payload_iov, size_t payload_len)
+{
+ ptl_nid_t gatewaynid;
+ ksock_conn_t *conn;
+ ksock_ltx_t *ltx;
+ int rc;
+ int i;
+
+ /* By this point, as it happens, we have absolutely no idea what
+ * 'private' is. It might be ksock_nal_data or it might be ksock_conn.
+ * Ha ha, isn't that a funny joke?
+ *
+ * FIXME: this is not the right way to fix this; the right way is to
+ * always pass in the same kind of structure. This is hard right now.
+ * To revisit this issue, set a breakpoint in here and watch for when
+ * it's called from lib_finalize. I think this occurs when we send a
+ * packet as a side-effect of another packet, such as when an ACK has
+ * been requested. -phil */
+
+ CDEBUG(D_NET, "sending "LPSZ" bytes from [%d](%p,%d)... to nid: "LPX64" pid %d\n",
+ payload_len, payload_niov,
+ payload_niov > 0 ? payload_iov[0].iov_base : NULL,
+ payload_niov > 0 ? payload_iov[0].iov_len : 0,
+ nid, pid);
+
+ if ((conn = ktoenal_get_conn (nid)) == NULL)
+ {
+ /* It's not a peer; try to find a gateway */
+ rc = kpr_lookup (&ktoenal_data.ksnd_router, nid, &gatewaynid);
+ if (rc != 0)
+ {
+ CERROR ("Can't route to "LPX64": router error %d\n", nid, rc);
+ return (-1);
+ }
+
+ if ((conn = ktoenal_get_conn (gatewaynid)) == NULL)
+ {
+ CERROR ("Can't route to "LPX64": gateway "LPX64" is not a peer\n",
+ nid, gatewaynid);
+ return (-1);
+ }
+ }
+
+ /* This transmit has now got a ref on conn */
+
+ /* I may not block for a transmit descriptor if I might block the
+ * receiver, or an interrupt handler. */
+ ltx = ktoenal_get_ltx (!(type == PTL_MSG_ACK ||
+ type == PTL_MSG_REPLY ||
+ in_interrupt ()));
+ if (ltx == NULL)
+ {
+ CERROR ("Can't allocate tx desc\n");
+ ktoenal_put_conn (conn);
+ return (-1);
+ }
+
+ /* Init common (to sends and forwards) packet part */
+ ltx->ltx_tx.tx_isfwd = 0;
+ ltx->ltx_tx.tx_nob = sizeof (*hdr) + payload_len;
+ ltx->ltx_tx.tx_niov = 1 + payload_niov;
+ ltx->ltx_tx.tx_iov = ltx->ltx_iov;
+
+ /* Init local send packet (storage for hdr, finalize() args, iov) */
+ ltx->ltx_hdr = *hdr;
+ ltx->ltx_private = private;
+ ltx->ltx_cookie = cookie;
+
+ ltx->ltx_iov[0].iov_base = <x->ltx_hdr;
+ ltx->ltx_iov[0].iov_len = sizeof (ltx->ltx_hdr);
+
+ LASSERT (payload_niov <= PTL_MD_MAX_IOV);
+
+ for (i = 0; i < payload_niov; i++)
+ {
+ ltx->ltx_iov[1 + i].iov_base = payload_iov[i].iov_base;
+ ltx->ltx_iov[1 + i].iov_len = payload_iov[i].iov_len;
+ }
+
+ ktoenal_launch_packet (conn, <x->ltx_tx);
+ return (0);
+}
+
+void
+ktoenal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd)
+{
+ ksock_conn_t *conn;
+ ptl_nid_t nid = fwd->kprfd_gateway_nid;
+ ksock_tx_t *tx = (ksock_tx_t *)&fwd->kprfd_scratch;
+
+ CDEBUG (D_NET, "Forwarding [%p] -> "LPX64" ("LPX64"))\n", fwd,
+ fwd->kprfd_gateway_nid, fwd->kprfd_target_nid);
+
+ if (nid == ktoenal_lib.ni.nid) /* I'm the gateway; must be the last hop */
+ nid = fwd->kprfd_target_nid;
+
+ conn = ktoenal_get_conn (nid);
+ if (conn == NULL)
+ {
+ CERROR ("[%p] fwd to "LPX64" isn't a peer\n", fwd, nid);
+ kpr_fwd_done (&ktoenal_data.ksnd_router, fwd, -EHOSTUNREACH);
+ return;
+ }
+
+ /* This forward has now got a ref on conn */
+
+ tx->tx_isfwd = 1; /* This is a forwarding packet */
+ tx->tx_nob = fwd->kprfd_nob;
+ tx->tx_niov = fwd->kprfd_niov;
+ tx->tx_iov = fwd->kprfd_iov;
+
+ ktoenal_launch_packet (conn, tx);
+}
+
+int
+ktoenal_thread_start (int (*fn)(void *arg), void *arg)
+{
+ long pid = kernel_thread (fn, arg, 0);
+
+ if (pid < 0)
+ return ((int)pid);
+
+ atomic_inc (&ktoenal_data.ksnd_nthreads);
+ return (0);
+}
+
+void
+ktoenal_thread_fini (void)
+{
+ atomic_dec (&ktoenal_data.ksnd_nthreads);
+}
+
+void
+ktoenal_fmb_callback (void *arg, int error)
+{
+ ksock_fmb_t *fmb = (ksock_fmb_t *)arg;
+ ptl_hdr_t *hdr = (ptl_hdr_t *) page_address(fmb->fmb_pages[0]);
+ ksock_conn_t *conn;
+ long flags;
+
+ CDEBUG (D_NET, "routed packet from "LPX64" to "LPX64": %d\n",
+ hdr->src_nid, hdr->dest_nid, error);
+
+ if (error != 0)
+ CERROR ("Failed to route packet from "LPX64" to "LPX64": %d\n",
+ hdr->src_nid, hdr->dest_nid, error);
+
+ spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, flags);
+
+ list_add (&fmb->fmb_list, &fmb->fmb_pool->fmp_idle_fmbs);
+
+ if (!list_empty (&fmb->fmb_pool->fmp_blocked_conns))
+ {
+ conn = list_entry (fmb->fmb_pool->fmp_blocked_conns.next, ksock_conn_t, ksnc_rx_list);
+ list_del (&conn->ksnc_rx_list);
+
+ CDEBUG (D_NET, "Scheduling conn %p\n", conn);
+ LASSERT (conn->ksnc_rx_scheduled);
+ LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_FMB_SLEEP);
+
+ conn->ksnc_rx_state = SOCKNAL_RX_GET_FMB;
+ list_add_tail (&conn->ksnc_rx_list, &ktoenal_data.ksnd_rx_conns);
+
+ if (waitqueue_active (&ktoenal_data.ksnd_sched_waitq))
+ wake_up (&ktoenal_data.ksnd_sched_waitq);
+ }
+
+ spin_unlock_irqrestore (&ktoenal_data.ksnd_sched_lock, flags);
+}
+
+ksock_fmb_t *
+ktoenal_get_idle_fmb (ksock_conn_t *conn)
+{
+ /* NB called with sched lock held */
+ int payload_nob = conn->ksnc_rx_nob_left;
+ int packet_nob = sizeof (ptl_hdr_t) + payload_nob;
+ ksock_fmb_pool_t *pool;
+ ksock_fmb_t *fmb;
+
+ LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_GET_FMB);
+
+ if (packet_nob <= SOCKNAL_SMALL_FWD_PAGES * PAGE_SIZE)
+ pool = &ktoenal_data.ksnd_small_fmp;
+ else
+ pool = &ktoenal_data.ksnd_large_fmp;
+
+ if (!list_empty (&pool->fmp_idle_fmbs))
+ {
+ fmb = list_entry (pool->fmp_idle_fmbs.next, ksock_fmb_t, fmb_list);
+ list_del (&fmb->fmb_list);
+ return (fmb);
+ }
+
+ /* deschedule until fmb free */
+
+ conn->ksnc_rx_state = SOCKNAL_RX_FMB_SLEEP;
+
+ list_add_tail (&conn->ksnc_rx_list,
+ &pool->fmp_blocked_conns);
+ return (NULL);
+}
+
+
+int
+ktoenal_init_fmb (ksock_conn_t *conn, ksock_fmb_t *fmb)
+{
+ int payload_nob = conn->ksnc_rx_nob_left;
+ int packet_nob = sizeof (ptl_hdr_t) + payload_nob;
+ int niov; /* at least the header */
+ int nob;
+
+ LASSERT (conn->ksnc_rx_scheduled);
+ LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_GET_FMB);
+ LASSERT (conn->ksnc_rx_nob_wanted == conn->ksnc_rx_nob_left);
+ LASSERT (payload_nob >= 0);
+ LASSERT (packet_nob <= fmb->fmb_npages * PAGE_SIZE);
+ LASSERT (sizeof (ptl_hdr_t) < PAGE_SIZE);
+
+ /* Got a forwarding buffer; copy the header we just read into the
+ * forwarding buffer. If there's payload start reading reading it
+ * into the buffer, otherwise the forwarding buffer can be kicked
+ * off immediately.
+ *
+ * NB fmb->fmb_iov spans the WHOLE packet.
+ * conn->ksnc_rx_iov spans just the payload.
+ */
+
+ fmb->fmb_iov[0].iov_base = page_address (fmb->fmb_pages[0]);
+
+ memcpy (fmb->fmb_iov[0].iov_base, &conn->ksnc_hdr, sizeof (ptl_hdr_t)); /* copy header */
+
+ if (payload_nob == 0) /* got complete packet already */
+ {
+ atomic_inc (&ktoenal_packets_received);
+
+ CDEBUG (D_NET, "%p "LPX64"->"LPX64" %d fwd_start (immediate)\n", conn,
+ conn->ksnc_hdr.src_nid, conn->ksnc_hdr.dest_nid, packet_nob);
+
+ fmb->fmb_iov[0].iov_len = sizeof (ptl_hdr_t);
+
+ kpr_fwd_init (&fmb->fmb_fwd, conn->ksnc_hdr.dest_nid,
+ packet_nob, 1, fmb->fmb_iov,
+ ktoenal_fmb_callback, fmb);
+
+ kpr_fwd_start (&ktoenal_data.ksnd_router, &fmb->fmb_fwd); /* forward it now */
+
+ ktoenal_new_packet (conn, 0); /* on to next packet */
+ return (1);
+ }
+
+ niov = 1;
+ if (packet_nob <= PAGE_SIZE) /* whole packet fits in first page */
+ fmb->fmb_iov[0].iov_len = packet_nob;
+ else
+ {
+ fmb->fmb_iov[0].iov_len = PAGE_SIZE;
+ nob = packet_nob - PAGE_SIZE;
+
+ do
+ {
+ LASSERT (niov < fmb->fmb_npages);
+ fmb->fmb_iov[niov].iov_base = page_address (fmb->fmb_pages[niov]);
+ fmb->fmb_iov[niov].iov_len = MIN (PAGE_SIZE, nob);
+ nob -= PAGE_SIZE;
+ niov++;
+ } while (nob > 0);
+ }
+
+ kpr_fwd_init (&fmb->fmb_fwd, conn->ksnc_hdr.dest_nid,
+ packet_nob, niov, fmb->fmb_iov,
+ ktoenal_fmb_callback, fmb);
+
+ /* stash router's descriptor ready for call to kpr_fwd_start */
+ conn->ksnc_cookie = &fmb->fmb_fwd;
+
+ conn->ksnc_rx_state = SOCKNAL_RX_BODY_FWD; /* read in the payload */
+
+ /* payload is desc's iov-ed buffer, but skipping the hdr */
+ LASSERT (niov <= sizeof (conn->ksnc_rx_iov) / sizeof (conn->ksnc_rx_iov[0]));
+
+ conn->ksnc_rx_iov[0].iov_base = (void *)(((unsigned long)fmb->fmb_iov[0].iov_base) + sizeof (ptl_hdr_t));
+ conn->ksnc_rx_iov[0].iov_len = fmb->fmb_iov[0].iov_len - sizeof (ptl_hdr_t);
+
+ if (niov > 1)
+ memcpy (&conn->ksnc_rx_iov[1], &fmb->fmb_iov[1], (niov - 1) * sizeof (struct iovec));
+
+ conn->ksnc_rx_niov = niov;
+
+ CDEBUG (D_NET, "%p "LPX64"->"LPX64" %d reading body\n", conn,
+ conn->ksnc_hdr.src_nid, conn->ksnc_hdr.dest_nid, payload_nob);
+ return (0);
+}
+
+void
+ktoenal_fwd_parse (ksock_conn_t *conn)
+{
+ ksock_conn_t *conn2;
+ int body_len;
+
+ CDEBUG (D_NET, "%p "LPX64"->"LPX64" %d parsing header\n", conn,
+ conn->ksnc_hdr.src_nid, conn->ksnc_hdr.dest_nid, conn->ksnc_rx_nob_left);
+
+ LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_HEADER);
+ LASSERT (conn->ksnc_rx_scheduled);
+
+ switch (conn->ksnc_hdr.type)
+ {
+ case PTL_MSG_GET:
+ case PTL_MSG_ACK:
+ body_len = 0;
+ break;
+ case PTL_MSG_PUT:
+ body_len = conn->ksnc_hdr.msg.put.length;
+ break;
+ case PTL_MSG_REPLY:
+ body_len = conn->ksnc_hdr.msg.reply.length;
+ break;
+ default:
+ /* Unrecognised packet type */
+ CERROR ("Unrecognised packet type %d from "LPX64" for "LPX64"\n",
+ conn->ksnc_hdr.type, conn->ksnc_hdr.src_nid, conn->ksnc_hdr.dest_nid);
+ /* Ignore this header and go back to reading a new packet. */
+ ktoenal_new_packet (conn, 0);
+ return;
+ }
+
+ if (body_len < 0) /* length corrupt */
+ {
+ CERROR ("dropping packet from "LPX64" for "LPX64": packet size %d illegal\n",
+ conn->ksnc_hdr.src_nid, conn->ksnc_hdr.dest_nid, body_len);
+ ktoenal_new_packet (conn, 0); /* on to new packet */
+ return;
+ }
+
+ if (body_len > SOCKNAL_MAX_FWD_PAYLOAD) /* too big to forward */
+ {
+ CERROR ("dropping packet from "LPX64" for "LPX64": packet size %d too big\n",
+ conn->ksnc_hdr.src_nid, conn->ksnc_hdr.dest_nid, body_len);
+ ktoenal_new_packet (conn, body_len); /* on to new packet (skip this one's body) */
+ return;
+ }
+
+ conn2 = ktoenal_get_conn (conn->ksnc_hdr.dest_nid); /* should have gone direct */
+ if (conn2 != NULL)
+ {
+ CERROR ("dropping packet from "LPX64" for "LPX64": target is a peer\n",
+ conn->ksnc_hdr.src_nid, conn->ksnc_hdr.dest_nid);
+ ktoenal_put_conn (conn2); /* drop ref from get above */
+
+ ktoenal_new_packet (conn, body_len); /* on to next packet (skip this one's body) */
+ return;
+ }
+
+ conn->ksnc_rx_state = SOCKNAL_RX_GET_FMB; /* Getting FMB now */
+ conn->ksnc_rx_nob_left = body_len; /* stash packet size */
+ conn->ksnc_rx_nob_wanted = body_len; /* (no slop) */
+}
+
+int
+ktoenal_new_packet (ksock_conn_t *conn, int nob_to_skip)
+{
+ static char ktoenal_slop_buffer[4096];
+
+ int nob;
+ int niov;
+ int skipped;
+
+ if (nob_to_skip == 0) /* right at next packet boundary now */
+ {
+ conn->ksnc_rx_state = SOCKNAL_RX_HEADER;
+ conn->ksnc_rx_nob_wanted = sizeof (ptl_hdr_t);
+ conn->ksnc_rx_nob_left = sizeof (ptl_hdr_t);
+
+ conn->ksnc_rx_iov[0].iov_base = (char *)&conn->ksnc_hdr;
+ conn->ksnc_rx_iov[0].iov_len = sizeof (ptl_hdr_t);
+ conn->ksnc_rx_niov = 1;
+ return (1);
+ }
+
+ /* set up to skip as much a possible now */
+ /* if there's more left (ran out of iov entries) we'll get called again */
+
+ conn->ksnc_rx_state = SOCKNAL_RX_SLOP;
+ conn->ksnc_rx_nob_left = nob_to_skip;
+ skipped = 0;
+ niov = 0;
+
+ do
+ {
+ nob = MIN (nob_to_skip, sizeof (ktoenal_slop_buffer));
+
+ conn->ksnc_rx_iov[niov].iov_base = ktoenal_slop_buffer;
+ conn->ksnc_rx_iov[niov].iov_len = nob;
+ niov++;
+ skipped += nob;
+ nob_to_skip -=nob;
+
+ } while (nob_to_skip != 0 && /* mustn't overflow conn's rx iov */
+ niov < sizeof (conn->ksnc_rx_iov)/sizeof (conn->ksnc_rx_iov[0]));
+
+ conn->ksnc_rx_niov = niov;
+ conn->ksnc_rx_nob_wanted = skipped;
+ return (0);
+}
+
+void
+ktoenal_process_receive (ksock_conn_t *conn, long *irq_flags)
+{
+ ksock_fmb_t *fmb;
+ int len;
+ LASSERT (atomic_read (&conn->ksnc_refcount) > 0);
+ LASSERT (conn->ksnc_rx_scheduled);
+ LASSERT (conn->ksnc_rx_ready);
+
+ /* NB: sched lock held */
+ CDEBUG(D_NET, "conn %p\n", conn);
+
+ if (conn->ksnc_rx_state != SOCKNAL_RX_GET_FMB) /* doesn't need a forwarding buffer */
+ {
+ spin_unlock_irqrestore (&ktoenal_data.ksnd_sched_lock, *irq_flags);
+ goto try_read;
+ }
+
+ get_fmb:
+ /* NB: sched lock held */
+ fmb = ktoenal_get_idle_fmb (conn);
+ if (fmb == NULL) /* conn descheduled waiting for idle fmb */
+ return;
+
+ spin_unlock_irqrestore (&ktoenal_data.ksnd_sched_lock, *irq_flags);
+
+ if (ktoenal_init_fmb (conn, fmb)) /* packet forwarded ? */
+ goto out; /* come back later for next packet */
+
+ try_read:
+ /* NB: sched lock NOT held */
+ LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_HEADER ||
+ conn->ksnc_rx_state == SOCKNAL_RX_BODY ||
+ conn->ksnc_rx_state == SOCKNAL_RX_BODY_FWD ||
+ conn->ksnc_rx_state == SOCKNAL_RX_SLOP);
+
+ LASSERT (conn->ksnc_rx_niov > 0);
+ LASSERT (conn->ksnc_rx_nob_wanted > 0);
+
+ conn->ksnc_rx_ready = 0; /* data ready may race with me and set ready */
+ mb(); /* => clear BEFORE trying to read */
+
+ /* NB ktoenal_recvmsg "consumes" the iov passed to it */
+ len = ktoenal_recvmsg(conn->ksnc_file,
+ conn->ksnc_rx_iov, conn->ksnc_rx_niov,
+ conn->ksnc_rx_nob_wanted);
+ CDEBUG (D_NET, "%p read(%d) %d\n", conn, conn->ksnc_rx_nob_wanted, len);
+
+ if (len <= 0) /* nothing ready (EAGAIN) or EOF or error */
+ {
+ if (len != -EAGAIN && /* ! nothing to read now */
+ len != 0) /* ! nothing to read ever */
+ {
+#warning FIXME: handle socket errors properly
+ CERROR ("Error socknal read(%d) %p: %d\n",
+ conn->ksnc_rx_nob_wanted, conn, len);
+ }
+ goto out; /* come back when there's data ready */
+ }
+
+ LASSERT (len <= conn->ksnc_rx_nob_wanted);
+ conn->ksnc_rx_nob_wanted -= len;
+ conn->ksnc_rx_nob_left -= len;
+
+ if (conn->ksnc_rx_nob_wanted != 0) /* short read */
+ goto out; /* try again later */
+
+ conn->ksnc_rx_ready = 1; /* assume there's more to be had */
+
+ switch (conn->ksnc_rx_state)
+ {
+ case SOCKNAL_RX_HEADER:
+ if (conn->ksnc_hdr.dest_nid != ktoenal_lib.ni.nid) /* It's not for me */
+ {
+ ktoenal_fwd_parse (conn);
+ switch (conn->ksnc_rx_state)
+ {
+ case SOCKNAL_RX_HEADER: /* skipped this packet (zero payload) */
+ goto out; /* => come back later */
+ case SOCKNAL_RX_SLOP: /* skipping this packet's body */
+ goto try_read; /* => go read it */
+ case SOCKNAL_RX_GET_FMB: /* forwarding */
+ spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, *irq_flags);
+ goto get_fmb; /* => go get a fwd msg buffer */
+ default:
+ }
+ /* Not Reached */
+ LBUG ();
+ }
+
+ PROF_START(lib_parse);
+ lib_parse(&ktoenal_lib, &conn->ksnc_hdr, conn); /* sets wanted_len, iovs etc */
+ PROF_FINISH(lib_parse);
+
+ if (conn->ksnc_rx_nob_wanted != 0) /* need to get some payload? */
+ {
+ conn->ksnc_rx_state = SOCKNAL_RX_BODY;
+ goto try_read; /* go read the payload */
+ }
+ /* Fall through (completed packet for me) */
+
+ case SOCKNAL_RX_BODY:
+ atomic_inc (&ktoenal_packets_received);
+ lib_finalize(&ktoenal_lib, NULL, conn->ksnc_cookie); /* packet is done now */
+ /* Fall through */
+
+ case SOCKNAL_RX_SLOP:
+ if (ktoenal_new_packet (conn, conn->ksnc_rx_nob_left)) /* starting new packet? */
+ goto out; /* come back later */
+ goto try_read; /* try to finish reading slop now */
+
+ case SOCKNAL_RX_BODY_FWD:
+ CDEBUG (D_NET, "%p "LPX64"->"LPX64" %d fwd_start (got body)\n", conn,
+ conn->ksnc_hdr.src_nid, conn->ksnc_hdr.dest_nid, conn->ksnc_rx_nob_left);
+
+ atomic_inc (&ktoenal_packets_received);
+
+ /* ktoenal_init_fmb() stashed router descriptor in conn->ksnc_cookie */
+ kpr_fwd_start (&ktoenal_data.ksnd_router, (kpr_fwd_desc_t *)conn->ksnc_cookie);
+
+ LASSERT (conn->ksnc_rx_nob_left == 0); /* no slop in forwarded packets */
+
+ ktoenal_new_packet (conn, 0); /* on to next packet */
+ goto out; /* (later) */
+
+ default:
+ }
+
+ /* Not Reached */
+ LBUG ();
+
+ out:
+ spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, *irq_flags);
+
+ if (!conn->ksnc_rx_ready) /* no data there to read? */
+ {
+ conn->ksnc_rx_scheduled = 0; /* let socket callback schedule again */
+ ktoenal_put_conn (conn); /* release scheduler's ref */
+ }
+ else /* let scheduler call me again */
+ list_add_tail (&conn->ksnc_rx_list, &ktoenal_data.ksnd_rx_conns);
+}
+
+int
+ktoenal_recv(nal_cb_t *nal, void *private, lib_msg_t *msg,
+ unsigned int niov, struct iovec *iov, size_t mlen, size_t rlen)
+{
+ ksock_conn_t *conn = (ksock_conn_t *)private;
+ int i;
+
+ conn->ksnc_cookie = msg;
+
+ LASSERT (niov <= PTL_MD_MAX_IOV);
+ for (i = 0; i < niov; i++)
+ {
+ conn->ksnc_rx_iov[i].iov_len = iov[i].iov_len;
+ conn->ksnc_rx_iov[i].iov_base = iov[i].iov_base;
+ }
+
+ conn->ksnc_rx_niov = niov;
+ conn->ksnc_rx_nob_wanted = mlen;
+ conn->ksnc_rx_nob_left = rlen;
+
+ return (rlen);
+}
+
+int
+ktoenal_scheduler (void *arg)
+{
+ unsigned long flags;
+ ksock_conn_t *conn;
+ int rc;
+ int nloops = 0;
+
+ kportal_daemonize ("ktoenal_sched");
+ kportal_blockallsigs ();
+
+ spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, flags);
+
+ while (!ktoenal_data.ksnd_shuttingdown)
+ {
+ int did_something = 0;
+
+ /* Ensure I progress everything semi-fairly */
+
+ if (!list_empty (&ktoenal_data.ksnd_rx_conns))
+ {
+ did_something = 1;
+ conn = list_entry (ktoenal_data.ksnd_rx_conns.next,
+ ksock_conn_t, ksnc_rx_list);
+ list_del (&conn->ksnc_rx_list);
+
+ ktoenal_process_receive (conn, &flags); /* drops & regains ksnd_sched_lock */
+ }
+
+ if (!list_empty (&ktoenal_data.ksnd_tx_conns))
+ {
+ did_something = 1;
+ conn = list_entry (ktoenal_data.ksnd_tx_conns.next,
+ ksock_conn_t, ksnc_tx_list);
+
+ list_del (&conn->ksnc_tx_list);
+ ktoenal_process_transmit (conn, &flags); /* drops and regains ksnd_sched_lock */
+ }
+
+ if (!did_something || /* nothing to do */
+ ++nloops == SOCKNAL_RESCHED) /* hogging CPU? */
+ {
+ spin_unlock_irqrestore (&ktoenal_data.ksnd_sched_lock, flags);
+
+ nloops = 0;
+
+ if (!did_something) { /* wait for something to do */
+ rc = wait_event_interruptible (ktoenal_data.ksnd_sched_waitq,
+ ktoenal_data.ksnd_shuttingdown ||
+ !list_empty (&ktoenal_data.ksnd_rx_conns) ||
+ !list_empty (&ktoenal_data.ksnd_tx_conns));
+ LASSERT (rc == 0);
+ } else
+ our_cond_resched();
+
+ spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, flags);
+ }
+ }
+
+ spin_unlock_irqrestore (&ktoenal_data.ksnd_sched_lock, flags);
+ ktoenal_thread_fini ();
+ return (0);
+}
+
+
+int
+ktoenal_reaper (void *arg)
+{
+ unsigned long flags;
+ ksock_conn_t *conn;
+ int rc;
+
+ kportal_daemonize ("ktoenal_reaper");
+ kportal_blockallsigs ();
+
+ while (!ktoenal_data.ksnd_shuttingdown)
+ {
+ spin_lock_irqsave (&ktoenal_data.ksnd_reaper_lock, flags);
+
+ if (list_empty (&ktoenal_data.ksnd_reaper_list))
+ conn = NULL;
+ else
+ {
+ conn = list_entry (ktoenal_data.ksnd_reaper_list.next,
+ ksock_conn_t, ksnc_list);
+ list_del (&conn->ksnc_list);
+ }
+
+ spin_unlock_irqrestore (&ktoenal_data.ksnd_reaper_lock, flags);
+
+ if (conn != NULL)
+ ktoenal_close_conn (conn);
+ else {
+ rc = wait_event_interruptible (ktoenal_data.ksnd_reaper_waitq,
+ ktoenal_data.ksnd_shuttingdown ||
+ !list_empty(&ktoenal_data.ksnd_reaper_list));
+ LASSERT (rc == 0);
+ }
+ }
+
+ ktoenal_thread_fini ();
+ return (0);
+}
+
+#define POLLREAD (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)
+#define POLLWRITE (POLLOUT | POLLWRNORM | POLLWRBAND)
+
+int
+ktoenal_pollthread(void *arg)
+{
+ unsigned int mask;
+ struct list_head *tmp;
+ ksock_conn_t *conn;
+
+ /* Save the task struct for waking it up */
+ ktoenal_data.ksnd_pollthread_tsk = current;
+
+ kportal_daemonize ("ktoenal_pollthread");
+ kportal_blockallsigs ();
+
+ poll_initwait(&ktoenal_data.ksnd_pwait);
+
+ while(!ktoenal_data.ksnd_shuttingdown) {
+
+ set_current_state(TASK_INTERRUPTIBLE);
+
+ read_lock (&ktoenal_data.ksnd_socklist_lock);
+ list_for_each(tmp, &ktoenal_data.ksnd_socklist) {
+
+ conn = list_entry(tmp, ksock_conn_t, ksnc_list);
+ atomic_inc(&conn->ksnc_refcount);
+ read_unlock (&ktoenal_data.ksnd_socklist_lock);
+
+ mask = conn->ksnc_file->f_op->poll(conn->ksnc_file,
+ ktoenal_data.ksnd_slistchange ?
+ &ktoenal_data.ksnd_pwait : NULL);
+
+ if(mask & POLLREAD) {
+ ktoenal_data_ready(conn);
+
+ }
+ if (mask & POLLWRITE) {
+ ktoenal_write_space(conn);
+
+ }
+ if (mask & (POLLERR | POLLHUP)) {
+ /* Do error processing */
+ }
+
+ read_lock (&ktoenal_data.ksnd_socklist_lock);
+ if(atomic_dec_and_test(&conn->ksnc_refcount))
+ _ktoenal_put_conn(conn);
+ }
+ ktoenal_data.ksnd_slistchange = 0;
+ read_unlock (&ktoenal_data.ksnd_socklist_lock);
+
+ schedule_timeout(MAX_SCHEDULE_TIMEOUT);
+ if(ktoenal_data.ksnd_slistchange) {
+ poll_freewait(&ktoenal_data.ksnd_pwait);
+ poll_initwait(&ktoenal_data.ksnd_pwait);
+ }
+ }
+ poll_freewait(&ktoenal_data.ksnd_pwait);
+ ktoenal_thread_fini();
+ return (0);
+}
+
+void
+ktoenal_data_ready (ksock_conn_t *conn)
+{
+ unsigned long flags;
+ ENTRY;
+
+ if (!test_and_set_bit (0, &conn->ksnc_rx_ready)) {
+ spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, flags);
+
+ if (!conn->ksnc_rx_scheduled) { /* not being progressed */
+ list_add_tail (&conn->ksnc_rx_list,
+ &ktoenal_data.ksnd_rx_conns);
+ conn->ksnc_rx_scheduled = 1;
+ /* extra ref for scheduler */
+ atomic_inc (&conn->ksnc_refcount);
+
+ /* This is done to avoid the effects of a sequence
+ * of events in which the rx_ready is lost
+ */
+ conn->ksnc_rx_ready=1;
+
+ if (waitqueue_active (&ktoenal_data.ksnd_sched_waitq))
+ wake_up (&ktoenal_data.ksnd_sched_waitq);
+ }
+
+ spin_unlock_irqrestore (&ktoenal_data.ksnd_sched_lock, flags);
+ }
+
+ EXIT;
+}
+
+void
+ktoenal_write_space (ksock_conn_t *conn)
+{
+ unsigned long flags;
+
+ CDEBUG (D_NET, "conn %p%s%s%s\n",
+ conn,
+ (conn == NULL) ? "" : (test_bit (0, &conn->ksnc_tx_ready) ? " ready" : " blocked"),
+ (conn == NULL) ? "" : (conn->ksnc_tx_scheduled ? " scheduled" : " idle"),
+ (conn == NULL) ? "" : (list_empty (&conn->ksnc_tx_queue) ? " empty" : " queued"));
+
+
+ if (!test_and_set_bit (0, &conn->ksnc_tx_ready)) {
+ spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, flags);
+
+ if (!list_empty (&conn->ksnc_tx_queue) && /* packets to send */
+ !conn->ksnc_tx_scheduled) { /* not being progressed */
+
+ list_add_tail (&conn->ksnc_tx_list,
+ &ktoenal_data.ksnd_tx_conns);
+ conn->ksnc_tx_scheduled = 1;
+ /* extra ref for scheduler */
+ atomic_inc (&conn->ksnc_refcount);
+
+ if (waitqueue_active (&ktoenal_data.ksnd_sched_waitq))
+ wake_up (&ktoenal_data.ksnd_sched_waitq);
+ }
+ spin_unlock_irqrestore (&ktoenal_data.ksnd_sched_lock, flags);
+ }
+}
+
+nal_cb_t ktoenal_lib = {
+ nal_data: &ktoenal_data, /* NAL private data */
+ cb_send: ktoenal_send,
+ cb_recv: ktoenal_recv,
+ cb_read: ktoenal_read,
+ cb_write: ktoenal_write,
+ cb_callback: ktoenal_callback,
+ cb_malloc: ktoenal_malloc,
+ cb_free: ktoenal_free,
+ cb_printf: ktoenal_printf,
+ cb_cli: ktoenal_cli,
+ cb_sti: ktoenal_sti,
+ cb_dist: ktoenal_dist
+};
--- /dev/null
+# Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+#
+# This code is issued under the GNU General Public License.
+# See the file COPYING in this distribution
+
+
+MODULE = portals
+modulenet_DATA = portals.o
+EXTRA_PROGRAMS = portals
+
+LIBLINKS := lib-dispatch.c lib-eq.c lib-init.c lib-md.c lib-me.c lib-move.c lib-msg.c lib-ni.c lib-not-impl.c lib-pid.c
+APILINKS := api-eq.c api-errno.c api-init.c api-md.c api-me.c api-ni.c api-wrap.c
+LINKS = $(APILINKS) $(LIBLINKS)
+DISTCLEANFILES = $(LINKS) link-stamp *.orig *.rej
+
+$(LINKS): link-stamp
+link-stamp:
+ -list='$(LIBLINKS)'; for f in $$list; do echo $$f ; ln -sf $(srcdir)/../portals/$$f .; done
+ -list='$(APILINKS)'; for f in $$list; do echo $$f ; ln -sf $(srcdir)/../portals/$$f .; done
+ echo timestamp > link-stamp
+
+DEFS =
+portals_SOURCES = $(LINKS) module.c proc.c debug.c
+
+# Don't distribute any patched files.
+dist-hook:
+ list='$(EXT2C)'; for f in $$list; do rm -f $(distdir)/$$f; done
+
+include ../Rules.linux
--- /dev/null
+# Copyright (C) 2001 Cluster File Systems, Inc.
+#
+# This code is issued under the GNU General Public License.
+# See the file COPYING in this distribution
+
+include fs/lustre/portals/Kernelenv
+
+obj-y += libcfs.o
+licfs-objs := module.o proc.o debug.o
\ No newline at end of file
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2002 Cluster File Systems, Inc.
+ * Author: Phil Schwan <phil@clusterfs.com>
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2.1 of the GNU Lesser General
+ * Public License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#define EXPORT_SYMTAB
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kmod.h>
+#include <linux/notifier.h>
+#include <linux/kernel.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/errno.h>
+#include <linux/smp_lock.h>
+#include <linux/unistd.h>
+#include <linux/interrupt.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <linux/completion.h>
+
+#include <linux/fs.h>
+#include <linux/stat.h>
+#include <asm/uaccess.h>
+#include <asm/segment.h>
+#include <linux/miscdevice.h>
+
+# define DEBUG_SUBSYSTEM S_PORTALS
+
+#include <linux/kp30.h>
+
+#define DEBUG_OVERFLOW 1024
+static char *debug_buf = NULL;
+static unsigned long debug_size = 0;
+static atomic_t debug_off_a = ATOMIC_INIT(0);
+static int debug_wrapped;
+wait_queue_head_t debug_ctlwq;
+#define DAEMON_SND_SIZE (64 << 10)
+
+/*
+ * used by the daemon to keep track the offset into debug_buffer for the next
+ * write to the file. Usually, the daemon is to write out buffer
+ * from debug_daemon_next_write upto debug_off
+ * variable usage
+ * Reader - portals_debug_msg()
+ * Writer - portals_debug_daemon()
+ * portals_debug_daemon_start() during daemon init time
+ * portals_debug_daemon_continue() to reset to debug_off
+ * portals_debug_clear_buffer() reset to debug_off for clear
+ * Note that *_start(), *_continue() & *clear_buffer() should serialized;
+ */
+static atomic_t debug_daemon_next_write;
+
+/*
+ * A debug_daemon can be in following states
+ * stopped - stopped state means there is no debug_daemon running.
+ * accordingly, it must be in paused state
+ * a daemon is in !stopped && !paused state after
+ * "lctl debug_daemon start" creates debug_daemon successfully
+ * Variable Usage
+ * Reader - portals_debug_daemon()
+ * portals_debug_set_daemon() routines
+ * Writer - portals_debug_set_daemon() routines
+ * portals_debug_daemon() on IO error
+ * paused - a debug_daemon state is changed from !paused into paused
+ * when "lctl debug_daemon paused" is issued
+ * "lctl debug_daemon continue" gets a daemon into !paused mode
+ * Reader - portals_debug_set_daemon() routines
+ * portals_debug_msg()
+ * Writer - portals_debug_set_daemon() on init
+ * portals_debug_daemon()
+ *
+ * Daemon state diagram.
+ * (stopped, paused)
+ * | <-- debug_daemon start
+ * V
+ * (!stopped, !paused)
+ * | <-- debug_daemon pause
+ * V
+ * (!stopped, paused)
+ * | <-- debug_daemon continue
+ * V
+ * (!stopped, !paused)
+ * | <-- debug_daemon stop
+ * V
+ * (stopped, paused)
+ * Overlapped - this is a state when CDEBUG is too fast for the daemon to
+ * write out the debug_bufferr. That is, debug_off is to
+ * overlap debug_daemon_next_write;
+ * Reader - portals_debug_msg()
+ * Writer - portals_debug_msg()
+ */
+
+/*
+ * Description on Trace Daemon Synchronization
+ *
+ * Three categories of code are synchronizing between each other
+ * 1. lctl, portals_debug_set_daemon(), the user debug control code,
+ * as well as portals_debug_clear_buffer()
+ * 2. CDEBUG, portals_debug_msg(), the debug put messages routine
+ * 3. Daemon, portals_debug_daemon(), to write out debug log file
+ *
+ *
+ * Three different controls for synchronizations
+ *
+ * 1. debug_daemon_semaphore
+ * The usage of this semaphore is to serialize multiple lctl controls
+ * in manipulating debug daemon state. The semaphore serves as the
+ * gatekeeper to allow only one user control thread, at any giving time,
+ * to access debug daemon state and keeps the other user control requests
+ * in wait state until the current control request is serviced.
+ *
+ * 2. wait_queue_head_t lctl (paired with lctl_event flag)
+ * Lctl event is the event between portals_debug_set_daemon() and
+ * portals_debug_daemon(). Lctl is an indicator for portals_debug_daemon()
+ * to flush data out to file. portals_debug_daemon() is to use lctl event
+ * as signal channel to wakeup portals_debug_set_daemon() upon flush
+ * operation is done.
+ *
+ * Producer :
+ * portals_debug_daemon() uses to wake up
+ * portals_debug_set_daemon(), pause and stop, routines
+ * Consumer :
+ * portals_debug_set_daemon(), stop and pause operations,
+ * wait and sleep on the event
+ *
+ * 3. wait_queue_head_t daemon (paired with daemon_event flag)
+ * This is an event channel to wakeup portals_debug_daemon. Daemon
+ * wakes up to run whenever there is an event posted. Daemon handles
+ * 2 types of operations . 1. Writes data out to debug file, 2. Flushes
+ * file and terminates base on lctl event.
+ * File operation -
+ * Daemon is normally in a sleep state.
+ * Daemon is woken up through daemon event whenever CDEBUG is
+ * putting data over any 64K boundary.
+ * File flush and termination -
+ * On portals_debug_daemon_stop/pause() operations, lctl control
+ * is to wake up daemon through daemon event.
+ *
+ * We can't use sleep_on() and wake_up() to replace daemon event because
+ * portals_debug_daemon() must catch the wakeup operation posted by
+ * portals_debug_daemon_stop/pause(). Otherwise, stop and pause may
+ * stuck in lctl wait event.
+ *
+ * Producer :
+ * a. portals_debug_daemon_pause() and portals_debug_daemon_stop()
+ * uses the event to wake up portals_debug_daemon()
+ * b. portals_debug_msg() uses the event to wake up
+ * portals_debug_daemon() whenever the data output is acrossing
+ * a 64K bytes boundary.
+ * Consumer :
+ * portals_debug_daemon() wakes up upon daemon event.
+ *
+ * Sequence for portals_debug_daemon_stop() operation
+ *
+ * _Portals_debug_daemon_stop()_ _Daemon_
+ * Wait_event(daemon) or running
+ * Paused = 1;
+ * Wakeup_event (daemon)
+ * Wait_event(lctl)
+ * Set force_flush flag if lctlevnt
+ * Flush data
+ * Wakeup_event (lctl)
+ * Wait_event(daemon)
+ * Stopped = 1;
+ * Wakeup_event (daemon)
+ * Wait_event(lctl)
+ * Exit daemon loop if (Stopped)
+ * Wakeup_event (lctl)
+ * Exit
+ * Return to user application
+ *
+ *
+ * _Portals_debug_msg()_ _Daemon_
+ * Wait_event(daemon) or running
+ * If (WriteStart<64K<WriteEnd)
+ * Wakeup_event(daemon)
+ * Do file IO
+ * Wait_event(daemon)
+ */
+struct debug_daemon_state {
+ unsigned long overlapped;
+ unsigned long stopped;
+ atomic_t paused;
+ unsigned long lctl_event; /* event for lctl */
+ wait_queue_head_t lctl;
+ unsigned long daemon_event; /* event for daemon */
+ wait_queue_head_t daemon;
+};
+static struct debug_daemon_state debug_daemon_state;
+static DECLARE_MUTEX(debug_daemon_semaphore);
+
+static loff_t daemon_file_size_limit;
+char debug_daemon_file_path[1024] = "";
+
+spinlock_t portals_debug_lock = SPIN_LOCK_UNLOCKED;
+char debug_file_path[1024] = "/tmp/lustre-log";
+char debug_file_name[1024];
+int handled_panic; /* to avoid recursive calls to notifiers */
+char portals_upcall[1024] = "/usr/lib/lustre/portals_upcall";
+
+
+int portals_do_debug_dumplog(void *arg)
+{
+ struct file *file;
+ void *journal_info;
+ int rc;
+ mm_segment_t oldfs;
+ unsigned long debug_off;
+
+ kportal_daemonize("");
+
+ reparent_to_init();
+ journal_info = current->journal_info;
+ current->journal_info = NULL;
+ sprintf(debug_file_name, "%s.%ld", debug_file_path, CURRENT_TIME);
+ file = filp_open(debug_file_name, O_CREAT|O_TRUNC|O_RDWR, 0644);
+
+ if (!file || IS_ERR(file)) {
+ CERROR("cannot open %s for dumping", debug_file_name);
+ GOTO(out, PTR_ERR(file));
+ } else {
+ printk(KERN_ALERT "dumping log to %s ... writing ...\n",
+ debug_file_name);
+ }
+
+ debug_off = atomic_read(&debug_off_a);
+ oldfs = get_fs();
+ set_fs(get_ds());
+ if (debug_wrapped) {
+ rc = file->f_op->write(file, debug_buf + debug_off + 1,
+ debug_size-debug_off-1, &file->f_pos);
+ rc += file->f_op->write(file, debug_buf, debug_off + 1,
+ &file->f_pos);
+ } else {
+ rc = file->f_op->write(file, debug_buf, debug_off,&file->f_pos);
+ }
+ printk("wrote %d bytes\n", rc);
+ set_fs(oldfs);
+
+ rc = file->f_op->fsync(file, file->f_dentry, 1);
+ if (rc)
+ CERROR("sync returns %d\n", rc);
+ filp_close(file, 0);
+out:
+ current->journal_info = journal_info;
+ wake_up(&debug_ctlwq);
+ return 0;
+}
+
+int portals_debug_daemon(void *arg)
+{
+ struct file *file;
+ void *journal_info;
+ mm_segment_t oldfs;
+ unsigned long force_flush = 0;
+ unsigned long size;
+ int rc;
+
+ kportal_daemonize("ldebug_daemon");
+ reparent_to_init();
+ journal_info = current->journal_info;
+ current->journal_info = NULL;
+
+ file = filp_open(debug_daemon_file_path,
+ O_CREAT|O_TRUNC|O_RDWR|O_LARGEFILE, 0644);
+
+ if (!file || IS_ERR(file)) {
+ CERROR("cannot open %s for logging", debug_daemon_file_path);
+ GOTO(out1, PTR_ERR(file));
+ } else {
+ printk(KERN_ALERT "daemon dumping log to %s ... writing ...\n",
+ debug_daemon_file_path);
+ }
+
+ debug_daemon_state.overlapped = 0;
+ debug_daemon_state.stopped = 0;
+ atomic_set(&debug_daemon_state.paused, 0);
+ oldfs = get_fs();
+ set_fs(KERNEL_DS);
+ while (1) {
+ unsigned long ending;
+ unsigned long start, tail;
+ long delta;
+
+ debug_daemon_state.daemon_event = 0;
+
+ ending = atomic_read(&debug_off_a);
+ start = atomic_read(&debug_daemon_next_write);
+
+ /* check if paused is imposed by lctl ? */
+ force_flush = !debug_daemon_state.lctl_event;
+
+ delta = ending - start;
+ tail = debug_size - start;
+ size = (delta >= 0) ? delta : tail;
+ while (size && (force_flush || (delta < 0) ||
+ (size >= DAEMON_SND_SIZE))) {
+ if (daemon_file_size_limit) {
+ int ssize = daemon_file_size_limit - file->f_pos;
+ if (size > ssize)
+ size = ssize;
+ }
+
+ rc = file->f_op->write(file, debug_buf+start,
+ size, &file->f_pos);
+ if (rc < 0) {
+ printk(KERN_ALERT
+ "Debug_daemon write error %d\n", rc);
+ goto out;
+ }
+ start += rc;
+ delta = ending - start;
+ tail = debug_size - start;
+ if (tail == 0)
+ start = 0;
+ if (delta >= 0)
+ size = delta;
+ else
+ size = (tail == 0) ? ending : tail;
+ if (daemon_file_size_limit == file->f_pos) {
+ // file wrapped around
+ file->f_pos = 0;
+ }
+ }
+ atomic_set(&debug_daemon_next_write, start);
+ if (force_flush) {
+ rc = file->f_op->fsync(file, file->f_dentry, 1);
+ if (rc < 0) {
+ printk(KERN_ALERT
+ "Debug_daemon sync error %d\n", rc);
+ goto out;
+ }
+ if (debug_daemon_state.stopped)
+ break;
+ debug_daemon_state.lctl_event = 1;
+ wake_up(&debug_daemon_state.lctl);
+ }
+ wait_event(debug_daemon_state.daemon,
+ debug_daemon_state.daemon_event);
+ }
+out:
+ atomic_set(&debug_daemon_state.paused, 1);
+ debug_daemon_state.stopped = 1;
+ set_fs(oldfs);
+ filp_close(file, 0);
+ current->journal_info = journal_info;
+out1:
+ debug_daemon_state.lctl_event = 1;
+ wake_up(&debug_daemon_state.lctl);
+ return 0;
+}
+
+void portals_debug_print(void)
+{
+ unsigned long dumplen = 64 * 1024;
+ char *start1, *start2;
+ char *end1, *end2;
+ unsigned long debug_off = atomic_read(&debug_off_a);
+
+ start1 = debug_buf + debug_off - dumplen;
+ if (start1 < debug_buf) {
+ start1 += debug_size;
+ end1 = debug_buf + debug_size - 1;
+ start2 = debug_buf;
+ end2 = debug_buf + debug_off;
+ } else {
+ end1 = debug_buf + debug_off;
+ start2 = debug_buf + debug_off;
+ end2 = debug_buf + debug_off;
+ }
+
+ while (start1 < end1) {
+ int count = MIN(1024, end1 - start1);
+ printk("%*s", count, start1);
+ start1 += 1024;
+ }
+ while (start2 < end2) {
+ int count = MIN(1024, end2 - start2);
+ printk("%*s", count, start2);
+ start2 += 1024;
+ }
+}
+
+void portals_debug_dumplog(void)
+{
+ int rc;
+ ENTRY;
+
+ init_waitqueue_head(&debug_ctlwq);
+
+ rc = kernel_thread(portals_do_debug_dumplog,
+ NULL, CLONE_VM | CLONE_FS | CLONE_FILES);
+ if (rc < 0) {
+ printk(KERN_ERR "cannot start dump thread\n");
+ return;
+ }
+ sleep_on(&debug_ctlwq);
+}
+
+int portals_debug_daemon_start(char *file, unsigned int size)
+{
+ int rc;
+
+ if (!debug_daemon_state.stopped)
+ return -EALREADY;
+
+ if (file != NULL)
+ strncpy(debug_daemon_file_path, file, 1024);
+
+ init_waitqueue_head(&debug_daemon_state.lctl);
+ init_waitqueue_head(&debug_daemon_state.daemon);
+
+ atomic_set(&debug_daemon_next_write, atomic_read(&debug_off_a));
+
+ daemon_file_size_limit = size << 20;
+
+ debug_daemon_state.lctl_event = 0;
+ rc = kernel_thread(portals_debug_daemon, NULL, 0);
+ if (rc < 0) {
+ printk(KERN_ERR "cannot start debug daemon thread\n");
+ strncpy(debug_daemon_file_path, "\0", 1);
+ return rc;
+ }
+ wait_event(debug_daemon_state.lctl, debug_daemon_state.lctl_event);
+ return 0;
+}
+
+int portals_debug_daemon_pause(void)
+{
+ if (atomic_read(&debug_daemon_state.paused))
+ return -EALREADY;
+
+ atomic_set(&debug_daemon_state.paused, 1);
+ debug_daemon_state.lctl_event = 0;
+ debug_daemon_state.daemon_event = 1;
+ wake_up(&debug_daemon_state.daemon);
+ wait_event(debug_daemon_state.lctl, debug_daemon_state.lctl_event);
+ return 0;
+}
+
+int portals_debug_daemon_continue(void)
+{
+ if (!atomic_read(&debug_daemon_state.paused))
+ return -EINVAL;
+ if (debug_daemon_state.stopped)
+ return -EINVAL;
+
+ debug_daemon_state.overlapped = 0;
+ atomic_set(&debug_daemon_next_write, atomic_read(&debug_off_a));
+ atomic_set(&debug_daemon_state.paused, 0);
+ return 0;
+}
+
+int portals_debug_daemon_stop(void)
+{
+ if (debug_daemon_state.stopped)
+ return -EALREADY;
+
+ if (!atomic_read(&debug_daemon_state.paused))
+ portals_debug_daemon_pause();
+
+ debug_daemon_state.lctl_event = 0;
+ debug_daemon_state.stopped = 1;
+
+ debug_daemon_state.daemon_event = 1;
+ wake_up(&debug_daemon_state.daemon);
+ wait_event(debug_daemon_state.lctl, debug_daemon_state.lctl_event);
+
+ debug_daemon_file_path[0] = '\0';
+ return 0;
+}
+
+int portals_debug_set_daemon(unsigned int cmd, unsigned int length,
+ char *filename, unsigned int size)
+{
+ int rc = -EINVAL;
+
+ down(&debug_daemon_semaphore);
+ switch (cmd) {
+ case DEBUG_DAEMON_START:
+ if (length && (filename[length -1] != '\0')) {
+ CERROR("Invalid filename for debug_daemon\n");
+ rc = -EINVAL;
+ break;
+ }
+ rc = portals_debug_daemon_start(filename, size);
+ break;
+ case DEBUG_DAEMON_STOP:
+ rc = portals_debug_daemon_stop();
+ break;
+ case DEBUG_DAEMON_PAUSE:
+ rc = portals_debug_daemon_pause();
+ break;
+ case DEBUG_DAEMON_CONTINUE:
+ rc = portals_debug_daemon_continue();
+ break;
+ default:
+ CERROR("unknown set_daemon cmd\n");
+ }
+ up(&debug_daemon_semaphore);
+ return rc;
+}
+
+static int panic_dumplog(struct notifier_block *self, unsigned long unused1,
+ void *unused2)
+{
+ if (handled_panic)
+ return 0;
+ else
+ handled_panic = 1;
+
+ if (in_interrupt()) {
+ portals_debug_print();
+ return 0;
+ }
+
+ while (current->lock_depth >= 0)
+ unlock_kernel();
+ portals_debug_dumplog();
+ return 0;
+}
+
+static struct notifier_block lustre_panic_notifier = {
+ notifier_call : panic_dumplog,
+ next : NULL,
+ priority : 10000
+};
+
+int portals_debug_init(unsigned long bufsize)
+{
+ unsigned long debug_off = atomic_read(&debug_off_a);
+ if (debug_buf != NULL)
+ return -EALREADY;
+
+ atomic_set(&debug_daemon_state.paused, 1);
+ debug_daemon_state.stopped = 1;
+
+ debug_buf = vmalloc(bufsize + DEBUG_OVERFLOW);
+ if (debug_buf == NULL)
+ return -ENOMEM;
+ memset(debug_buf, 0, debug_size);
+ debug_wrapped = 0;
+
+ printk(KERN_INFO "Portals: allocated %lu byte debug buffer at %p.\n",
+ bufsize, debug_buf);
+ atomic_set(&debug_off_a, debug_off);
+ notifier_chain_register(&panic_notifier_list, &lustre_panic_notifier);
+ debug_size = bufsize;
+
+ return 0;
+}
+
+int portals_debug_cleanup(void)
+{
+ notifier_chain_unregister(&panic_notifier_list, &lustre_panic_notifier);
+ if (debug_buf == NULL)
+ return -EINVAL;
+
+ down(&debug_daemon_semaphore);
+ portals_debug_daemon_stop();
+
+ vfree(debug_buf);
+ atomic_set(&debug_off_a, 0);
+ up(&debug_daemon_semaphore);
+
+ return 0;
+}
+
+int portals_debug_clear_buffer(void)
+{
+ unsigned long flags;
+ unsigned long state;
+
+ if (debug_buf == NULL)
+ return -EINVAL;
+
+ down(&debug_daemon_semaphore);
+ state = atomic_read(&debug_daemon_state.paused);
+ if (!state)
+ portals_debug_daemon_pause();
+ spin_lock_irqsave(&portals_debug_lock, flags);
+ atomic_set(&debug_off_a, 0);
+ debug_wrapped = 0;
+ atomic_set(&debug_daemon_next_write, 0);
+ debug_daemon_state.overlapped = 0;
+ spin_unlock_irqrestore(&portals_debug_lock, flags);
+
+ if (!state)
+ atomic_set(&debug_daemon_state.paused, 0);
+ up(&debug_daemon_semaphore);
+
+ return 0;
+}
+
+/* Debug markers, although printed by S_PORTALS
+ * should not be be marked as such.
+ */
+#undef DEBUG_SUBSYSTEM
+#define DEBUG_SUBSYSTEM S_UNDEFINED
+int portals_debug_mark_buffer(char *text)
+{
+ if (debug_buf == NULL)
+ return -EINVAL;
+
+ CDEBUG(0, "*******************************************************************************\n");
+ CDEBUG(0, "DEBUG MARKER: %s\n", text);
+ CDEBUG(0, "*******************************************************************************\n");
+
+ return 0;
+}
+#undef DEBUG_SUBSYSTEM
+#define DEBUG_SUBSYSTEM S_PORTALS
+
+__s32 portals_debug_copy_to_user(char *buf, unsigned long len)
+{
+ int rc;
+ unsigned long debug_off;
+ unsigned long flags;
+
+ if (len < debug_size)
+ return -ENOSPC;
+
+ debug_off = atomic_read(&debug_off_a);
+ spin_lock_irqsave(&portals_debug_lock, flags);
+ if (debug_wrapped) {
+ /* All of this juggling with the 1s is to keep the trailing nul
+ * (which falls at debug_buf + debug_off) at the end of what we
+ * copy into user space */
+ copy_to_user(buf, debug_buf + debug_off + 1,
+ debug_size - debug_off - 1);
+ copy_to_user(buf + debug_size - debug_off - 1,
+ debug_buf, debug_off + 1);
+ rc = debug_size;
+ } else {
+ copy_to_user(buf, debug_buf, debug_off);
+ rc = debug_off;
+ }
+ spin_unlock_irqrestore(&portals_debug_lock, flags);
+
+ return rc;
+}
+
+/* FIXME: I'm not very smart; someone smarter should make this better. */
+void
+portals_debug_msg (int subsys, int mask, char *file, char *fn, int line,
+ unsigned long stack, const char *format, ...)
+{
+ va_list ap;
+ unsigned long flags;
+ int max_nob;
+ int prefix_nob;
+ int msg_nob;
+ struct timeval tv;
+ unsigned long base_offset;
+ unsigned long debug_off;
+
+ if (debug_buf == NULL) {
+ printk("portals_debug_msg: debug_buf is NULL!\n");
+ return;
+ }
+
+ spin_lock_irqsave(&portals_debug_lock, flags);
+ debug_off = atomic_read(&debug_off_a);
+ if (!atomic_read(&debug_daemon_state.paused)) {
+ unsigned long available;
+ long delta;
+ long v = atomic_read(&debug_daemon_next_write);
+
+ delta = debug_off - v;
+ available = (delta>=0) ? debug_size-delta : -delta;
+ // Check if we still have enough debug buffer for CDEBUG
+ if (available < DAEMON_SND_SIZE) {
+ /* Drop CDEBUG packets until enough debug_buffer is
+ * available */
+ if (debug_daemon_state.overlapped)
+ goto out;
+ /* If this is the first time, leave a marker in the
+ * output */
+ debug_daemon_state.overlapped = 1;
+ ap = NULL;
+ format = "DEBUG MARKER: Debug buffer overlapped\n";
+ } else /* More space just became available */
+ debug_daemon_state.overlapped = 0;
+ }
+
+ max_nob = debug_size - debug_off + DEBUG_OVERFLOW;
+ if (max_nob <= 0) {
+ spin_unlock_irqrestore(&portals_debug_lock, flags);
+ printk("logic error in portals_debug_msg: <0 bytes to write\n");
+ return;
+ }
+
+ /* NB since we pass a non-zero sized buffer (at least) on the first
+ * print, we can be assured that by the end of all the snprinting,
+ * we _do_ have a terminated buffer, even if our message got truncated.
+ */
+
+ do_gettimeofday(&tv);
+
+ prefix_nob = snprintf(debug_buf + debug_off, max_nob,
+ "%02x:%06x:%d:%lu.%06lu ",
+ subsys >> 24, mask, smp_processor_id(),
+ tv.tv_sec, tv.tv_usec);
+ max_nob -= prefix_nob;
+
+#if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20))
+ msg_nob = snprintf(debug_buf + debug_off + prefix_nob, max_nob,
+ "(%s:%d:%s() %d | %d+%lu): ",
+ file, line, fn, current->pid,
+ current->thread.extern_pid, stack);
+#elif defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+ msg_nob = snprintf(debug_buf + debug_off + prefix_nob, max_nob,
+ "(%s:%d:%s() %d | %d+%lu): ",
+ file, line, fn, current->pid,
+ current->thread.mode.tt.extern_pid, stack);
+#else
+ msg_nob = snprintf(debug_buf + debug_off + prefix_nob, max_nob,
+ "(%s:%d:%s() %d+%lu): ",
+ file, line, fn, current->pid, stack);
+#endif
+ max_nob -= msg_nob;
+
+ va_start(ap, format);
+ msg_nob += vsnprintf(debug_buf + debug_off + prefix_nob + msg_nob,
+ max_nob, format, ap);
+ max_nob -= msg_nob;
+ va_end(ap);
+
+ /* Print to console, while msg is contiguous in debug_buf */
+ /* NB safely terminated see above */
+ if ((mask & D_EMERG) != 0)
+ printk(KERN_EMERG "%s", debug_buf + debug_off + prefix_nob);
+ if ((mask & D_ERROR) != 0)
+ printk(KERN_ERR "%s", debug_buf + debug_off + prefix_nob);
+ else if (portal_printk)
+ printk("<%d>%s", portal_printk, debug_buf+debug_off+prefix_nob);
+ base_offset = debug_off & 0xFFFF;
+
+ debug_off += prefix_nob + msg_nob;
+ if (debug_off > debug_size) {
+ memcpy(debug_buf, debug_buf + debug_size,
+ debug_off - debug_size + 1);
+ debug_off -= debug_size;
+ debug_wrapped = 1;
+ }
+
+ atomic_set(&debug_off_a, debug_off);
+ if (!atomic_read(&debug_daemon_state.paused) &&
+ ((base_offset+prefix_nob+msg_nob) >= DAEMON_SND_SIZE)) {
+ debug_daemon_state.daemon_event = 1;
+ wake_up(&debug_daemon_state.daemon);
+ }
+out:
+ spin_unlock_irqrestore(&portals_debug_lock, flags);
+}
+
+void portals_debug_set_level(unsigned int debug_level)
+{
+ printk("Setting portals debug level to %08x\n", debug_level);
+ portal_debug = debug_level;
+}
+
+void portals_run_lbug_upcall(char * file, char *fn, int line)
+{
+ char *argv[6];
+ char *envp[3];
+ char buf[32];
+ int rc;
+
+ ENTRY;
+ snprintf (buf, sizeof buf, "%d", line);
+
+ argv[0] = portals_upcall;
+ argv[1] = "LBUG";
+ argv[2] = file;
+ argv[3] = fn;
+ argv[4] = buf;
+ argv[5] = NULL;
+
+ envp[0] = "HOME=/";
+ envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
+ envp[2] = NULL;
+
+ rc = call_usermodehelper(argv[0], argv, envp);
+ if (rc < 0) {
+ CERROR("Error invoking lbug upcall %s %s %s %s %s: %d; check "
+ "/proc/sys/portals/upcall\n",
+ argv[0], argv[1], argv[2], argv[3], argv[4], rc);
+
+ } else {
+ CERROR("Invoked upcall %s %s %s %s %s\n",
+ argv[0], argv[1], argv[2], argv[3], argv[4]);
+ }
+}
+
+
+EXPORT_SYMBOL(portals_debug_dumplog);
+EXPORT_SYMBOL(portals_debug_msg);
+EXPORT_SYMBOL(portals_debug_set_level);
+EXPORT_SYMBOL(portals_run_lbug_upcall);
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2.1 of the GNU Lesser General
+ * Public License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#define EXPORT_SYMTAB
+#define DEBUG_SUBSYSTEM S_PORTALS
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/smp_lock.h>
+#include <linux/unistd.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+
+#include <linux/fs.h>
+#include <linux/stat.h>
+#include <asm/uaccess.h>
+#include <asm/segment.h>
+#include <linux/miscdevice.h>
+
+#include <portals/lib-p30.h>
+#include <portals/p30.h>
+#include <linux/kp30.h>
+
+#define PORTAL_MINOR 240
+
+extern void (kping_client)(struct portal_ioctl_data *);
+
+struct nal_cmd_handler {
+ nal_cmd_handler_t nch_handler;
+ void * nch_private;
+};
+
+static struct nal_cmd_handler nal_cmd[NAL_MAX_NR + 1];
+struct semaphore nal_cmd_sem;
+
+#ifdef PORTAL_DEBUG
+void
+kportal_assertion_failed (char *expr, char *file, char *func, int line)
+{
+ unsigned long stack = CDEBUG_STACK(stack);
+ portals_debug_msg(0, D_EMERG, file, func, line, stack,
+ "ASSERTION(%s) failed\n", expr);
+ LBUG();
+}
+#endif
+
+void
+kportal_daemonize (char *str)
+{
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,63))
+ daemonize(str);
+#else
+ daemonize();
+ snprintf (current->comm, sizeof (current->comm), "%s", str);
+#endif
+}
+
+void
+kportal_blockallsigs ()
+{
+ unsigned long flags;
+
+ spin_lock_irqsave (¤t->sigmask_lock, flags);
+ siginitsetinv (¤t->blocked, 0);
+ recalc_sigpending (current);
+ spin_unlock_irqrestore (¤t->sigmask_lock, flags);
+}
+
+/* called when opening /dev/device */
+static int kportal_psdev_open(struct inode * inode, struct file * file)
+{
+ ENTRY;
+
+ if (!inode)
+ RETURN(-EINVAL);
+ PORTAL_MODULE_USE;
+ RETURN(0);
+}
+
+/* called when closing /dev/device */
+static int kportal_psdev_release(struct inode * inode, struct file * file)
+{
+ ENTRY;
+
+ if (!inode)
+ RETURN(-EINVAL);
+
+ PORTAL_MODULE_UNUSE;
+ RETURN(0);
+}
+
+static inline void freedata(void *data, int len)
+{
+ PORTAL_FREE(data, len);
+}
+
+static int
+kportal_add_route(int gateway_nalid, ptl_nid_t gateway_nid, ptl_nid_t lo_nid,
+ ptl_nid_t hi_nid)
+{
+ int rc;
+ kpr_control_interface_t *ci;
+
+ ci = (kpr_control_interface_t *) PORTAL_SYMBOL_GET (kpr_control_interface);
+ if (ci == NULL)
+ return (-ENODEV);
+
+ rc = ci->kprci_add_route (gateway_nalid, gateway_nid, lo_nid, hi_nid);
+
+ PORTAL_SYMBOL_PUT(kpr_control_interface);
+ return (rc);
+}
+
+static int
+kportal_del_route(ptl_nid_t target)
+{
+ int rc;
+ kpr_control_interface_t *ci;
+
+ ci = (kpr_control_interface_t *)PORTAL_SYMBOL_GET(kpr_control_interface);
+ if (ci == NULL)
+ return (-ENODEV);
+
+ rc = ci->kprci_del_route (target);
+
+ PORTAL_SYMBOL_PUT(kpr_control_interface);
+ return (rc);
+}
+
+static int
+kportal_get_route(int index, __u32 *gateway_nalidp, ptl_nid_t *gateway_nidp,
+ ptl_nid_t *lo_nidp, ptl_nid_t *hi_nidp)
+{
+ int gateway_nalid;
+ ptl_nid_t gateway_nid;
+ ptl_nid_t lo_nid;
+ ptl_nid_t hi_nid;
+ int rc;
+ kpr_control_interface_t *ci;
+
+ ci = (kpr_control_interface_t *) PORTAL_SYMBOL_GET(kpr_control_interface);
+ if (ci == NULL)
+ return (-ENODEV);
+
+ rc = ci->kprci_get_route(index, &gateway_nalid, &gateway_nid, &lo_nid,
+ &hi_nid);
+
+ if (rc == 0) {
+ CDEBUG(D_IOCTL, "got route [%d] %d "LPX64":"LPX64" - "LPX64"\n",
+ index, gateway_nalid, gateway_nid, lo_nid, hi_nid);
+
+ *gateway_nalidp = (__u32)gateway_nalid;
+ *gateway_nidp = (__u32)gateway_nid;
+ *lo_nidp = (__u32)lo_nid;
+ *hi_nidp = (__u32)hi_nid;
+ }
+
+ PORTAL_SYMBOL_PUT (kpr_control_interface);
+ return (rc);
+}
+
+static int
+kportal_nal_cmd(int nal, struct portal_ioctl_data *data)
+{
+ int rc = -EINVAL;
+
+ ENTRY;
+
+ down(&nal_cmd_sem);
+ if (nal > 0 && nal <= NAL_MAX_NR && nal_cmd[nal].nch_handler) {
+ CDEBUG(D_IOCTL, "calling handler nal: %d, cmd: %d\n", nal, data->ioc_nal_cmd);
+ rc = nal_cmd[nal].nch_handler(data, nal_cmd[nal].nch_private);
+ }
+ up(&nal_cmd_sem);
+ RETURN(rc);
+}
+
+ptl_handle_ni_t *
+kportal_get_ni (int nal)
+{
+
+ switch (nal)
+ {
+ case QSWNAL:
+ return (PORTAL_SYMBOL_GET(kqswnal_ni));
+ case SOCKNAL:
+ return (PORTAL_SYMBOL_GET(ksocknal_ni));
+ case TOENAL:
+ return (PORTAL_SYMBOL_GET(ktoenal_ni));
+ case GMNAL:
+ return (PORTAL_SYMBOL_GET(kgmnal_ni));
+ case TCPNAL:
+ /* userspace NAL */
+ return (NULL);
+ case SCIMACNAL:
+ return (PORTAL_SYMBOL_GET(kscimacnal_ni));
+ default:
+ /* A warning to a naive caller */
+ CERROR ("unknown nal: %d\n", nal);
+ return (NULL);
+ }
+}
+
+void
+kportal_put_ni (int nal)
+{
+
+ switch (nal)
+ {
+ case QSWNAL:
+ PORTAL_SYMBOL_PUT(kqswnal_ni);
+ break;
+ case SOCKNAL:
+ PORTAL_SYMBOL_PUT(ksocknal_ni);
+ break;
+ case TOENAL:
+ PORTAL_SYMBOL_PUT(ktoenal_ni);
+ break;
+ case GMNAL:
+ PORTAL_SYMBOL_PUT(kgmnal_ni);
+ break;
+ case TCPNAL:
+ /* A lesson to a malicious caller */
+ LBUG ();
+ case SCIMACNAL:
+ PORTAL_SYMBOL_PUT(kscimacnal_ni);
+ break;
+ default:
+ CERROR ("unknown nal: %d\n", nal);
+ }
+}
+
+int
+kportal_nal_register(int nal, nal_cmd_handler_t handler, void * private)
+{
+ int rc = 0;
+
+ CDEBUG(D_IOCTL, "Register NAL %d, handler: %p\n", nal, handler);
+
+ if (nal > 0 && nal <= NAL_MAX_NR) {
+ down(&nal_cmd_sem);
+ if (nal_cmd[nal].nch_handler != NULL)
+ rc = -EBUSY;
+ else {
+ nal_cmd[nal].nch_handler = handler;
+ nal_cmd[nal].nch_private = private;
+ }
+ up(&nal_cmd_sem);
+ }
+ return rc;
+}
+
+int
+kportal_nal_unregister(int nal)
+{
+ int rc = 0;
+
+ CDEBUG(D_IOCTL, "Unregister NAL %d\n", nal);
+
+ if (nal > 0 && nal <= NAL_MAX_NR) {
+ down(&nal_cmd_sem);
+ nal_cmd[nal].nch_handler = NULL;
+ nal_cmd[nal].nch_private = NULL;
+ up(&nal_cmd_sem);
+ }
+ return rc;
+}
+
+
+static int kportal_ioctl(struct inode *inode, struct file *file,
+ unsigned int cmd, unsigned long arg)
+{
+ int err = 0;
+ char buf[1024];
+ struct portal_ioctl_data *data;
+
+ ENTRY;
+
+ if ( _IOC_TYPE(cmd) != IOC_PORTAL_TYPE ||
+ _IOC_NR(cmd) < IOC_PORTAL_MIN_NR ||
+ _IOC_NR(cmd) > IOC_PORTAL_MAX_NR ) {
+ CDEBUG(D_IOCTL, "invalid ioctl ( type %d, nr %d, size %d )\n",
+ _IOC_TYPE(cmd), _IOC_NR(cmd), _IOC_SIZE(cmd));
+ RETURN(-EINVAL);
+ }
+
+ if (portal_ioctl_getdata(buf, buf + 800, (void *)arg)) {
+ CERROR("PORTALS ioctl: data error\n");
+ RETURN(-EINVAL);
+ }
+
+ data = (struct portal_ioctl_data *)buf;
+
+ switch (cmd) {
+ case IOC_PORTAL_SET_DAEMON:
+ RETURN (portals_debug_set_daemon (
+ (unsigned int) data->ioc_count,
+ (unsigned int) data->ioc_inllen1,
+ (char *) data->ioc_inlbuf1,
+ (unsigned int) data->ioc_misc));
+ case IOC_PORTAL_GET_DEBUG: {
+ __s32 size = portals_debug_copy_to_user(data->ioc_pbuf1,
+ data->ioc_plen1);
+
+ if (size < 0)
+ RETURN(size);
+
+ data->ioc_size = size;
+ err = copy_to_user((char *)arg, data, sizeof(*data));
+ RETURN(err);
+ }
+ case IOC_PORTAL_CLEAR_DEBUG:
+ portals_debug_clear_buffer();
+ RETURN(0);
+ case IOC_PORTAL_PANIC:
+ if (!capable (CAP_SYS_BOOT))
+ RETURN (-EPERM);
+ panic("debugctl-invoked panic");
+ RETURN(0);
+ case IOC_PORTAL_MARK_DEBUG:
+ if (data->ioc_inlbuf1 == NULL ||
+ data->ioc_inlbuf1[data->ioc_inllen1 - 1] != '\0')
+ RETURN(-EINVAL);
+ portals_debug_mark_buffer(data->ioc_inlbuf1);
+ RETURN(0);
+ case IOC_PORTAL_PING: {
+ void (*ping)(struct portal_ioctl_data *);
+
+ CDEBUG(D_IOCTL, "doing %d pings to nid "LPU64"\n",
+ data->ioc_count, data->ioc_nid);
+ ping = PORTAL_SYMBOL_GET(kping_client);
+ if (!ping)
+ CERROR("PORTAL_SYMBOL_GET failed\n");
+ else {
+ ping(data);
+ PORTAL_SYMBOL_PUT(kping_client);
+ }
+ RETURN(0);
+ }
+
+ case IOC_PORTAL_ADD_ROUTE:
+ CDEBUG(D_IOCTL, "Adding route: [%d] "LPU64" : "LPU64" - "LPU64"\n",
+ data->ioc_nal, data->ioc_nid, data->ioc_nid2,
+ data->ioc_nid3);
+ err = kportal_add_route(data->ioc_nal, data->ioc_nid,
+ MIN (data->ioc_nid2, data->ioc_nid3),
+ MAX (data->ioc_nid2, data->ioc_nid3));
+ break;
+
+ case IOC_PORTAL_DEL_ROUTE:
+ CDEBUG (D_IOCTL, "Removing route to "LPU64"\n", data->ioc_nid);
+ err = kportal_del_route (data->ioc_nid);
+ break;
+
+ case IOC_PORTAL_GET_ROUTE:
+ CDEBUG (D_IOCTL, "Getting route [%d]\n", data->ioc_count);
+ err = kportal_get_route(data->ioc_count, &data->ioc_nal,
+ &data->ioc_nid, &data->ioc_nid2,
+ &data->ioc_nid3);
+ if (err == 0)
+ if (copy_to_user((char *)arg, data, sizeof (*data)))
+ err = -EFAULT;
+ break;
+
+ case IOC_PORTAL_GET_NID: {
+ const ptl_handle_ni_t *nip;
+ ptl_process_id_t pid;
+
+ CDEBUG (D_IOCTL, "Getting nid [%d]\n", data->ioc_nal);
+
+ nip = kportal_get_ni (data->ioc_nal);
+ if (nip == NULL)
+ RETURN (-EINVAL);
+
+ err = PtlGetId (*nip, &pid);
+ LASSERT (err == PTL_OK);
+ kportal_put_ni (data->ioc_nal);
+
+ data->ioc_nid = pid.nid;
+ if (copy_to_user ((char *)arg, data, sizeof (*data)))
+ err = -EFAULT;
+ break;
+ }
+
+ case IOC_PORTAL_NAL_CMD:
+ CDEBUG (D_IOCTL, "nal command nal %d cmd %d\n", data->ioc_nal,
+ data->ioc_nal_cmd);
+ err = kportal_nal_cmd(data->ioc_nal, data);
+ if (err == 0)
+ if (copy_to_user((char *)arg, data, sizeof (*data)))
+ err = -EFAULT;
+ break;
+
+ case IOC_PORTAL_FAIL_NID: {
+ const ptl_handle_ni_t *nip;
+
+ CDEBUG (D_IOCTL, "fail nid: [%d] "LPU64" count %d\n",
+ data->ioc_nal, data->ioc_nid, data->ioc_count);
+
+ nip = kportal_get_ni (data->ioc_nal);
+ if (nip == NULL)
+ return (-EINVAL);
+
+ err = PtlFailNid (*nip, data->ioc_nid, data->ioc_count);
+ break;
+ }
+
+ default:
+ err = -EINVAL;
+ break;
+ }
+
+ RETURN(err);
+}
+
+
+static struct file_operations portalsdev_fops = {
+ ioctl: kportal_ioctl,
+ open: kportal_psdev_open,
+ release: kportal_psdev_release
+};
+
+
+static struct miscdevice portal_dev = {
+ PORTAL_MINOR,
+ "portals",
+ &portalsdev_fops
+};
+
+extern int insert_proc(void);
+extern void remove_proc(void);
+MODULE_AUTHOR("Peter J. Braam <braam@clusterfs.com>");
+MODULE_DESCRIPTION("Portals v3.1");
+MODULE_LICENSE("GPL");
+
+static int init_kportals_module(void)
+{
+ int rc;
+
+ rc = portals_debug_init(5 * 1024 * 1024);
+ if (rc < 0) {
+ printk(KERN_ERR "portals_debug_init: %d\n", rc);
+ return (rc);
+ }
+
+ sema_init(&nal_cmd_sem, 1);
+
+ rc = misc_register(&portal_dev);
+ if (rc) {
+ CERROR("misc_register: error %d\n", rc);
+ goto cleanup_debug;
+ }
+
+ rc = PtlInit();
+ if (rc) {
+ CERROR("PtlInit: error %d\n", rc);
+ goto cleanup_deregister;
+ }
+
+ rc = insert_proc();
+ if (rc) {
+ CERROR("insert_proc: error %d\n", rc);
+ goto cleanup_fini;
+ }
+
+ CDEBUG (D_OTHER, "portals setup OK\n");
+ return (0);
+
+ cleanup_fini:
+ PtlFini();
+ cleanup_deregister:
+ misc_deregister(&portal_dev);
+ cleanup_debug:
+ portals_debug_cleanup();
+ return rc;
+}
+
+static void exit_kportals_module(void)
+{
+ int rc;
+
+ remove_proc();
+ PtlFini();
+
+ CDEBUG(D_MALLOC, "before Portals cleanup: kmem %d\n",
+ atomic_read(&portal_kmemory));
+
+
+ rc = misc_deregister(&portal_dev);
+ if (rc)
+ CERROR("misc_deregister error %d\n", rc);
+
+ if (atomic_read(&portal_kmemory) != 0)
+ CERROR("Portals memory leaked: %d bytes\n",
+ atomic_read(&portal_kmemory));
+
+ rc = portals_debug_cleanup();
+ if (rc)
+ printk(KERN_ERR "portals_debug_cleanup: %d\n", rc);
+}
+
+EXPORT_SYMBOL(lib_dispatch);
+EXPORT_SYMBOL(PtlMEAttach);
+EXPORT_SYMBOL(PtlMEInsert);
+EXPORT_SYMBOL(PtlMEUnlink);
+EXPORT_SYMBOL(PtlEQAlloc);
+EXPORT_SYMBOL(PtlMDAttach);
+EXPORT_SYMBOL(PtlMDUnlink);
+EXPORT_SYMBOL(PtlNIInit);
+EXPORT_SYMBOL(PtlNIFini);
+EXPORT_SYMBOL(PtlNIDebug);
+EXPORT_SYMBOL(PtlInit);
+EXPORT_SYMBOL(PtlFini);
+EXPORT_SYMBOL(PtlPut);
+EXPORT_SYMBOL(PtlGet);
+EXPORT_SYMBOL(ptl_err_str);
+EXPORT_SYMBOL(portal_subsystem_debug);
+EXPORT_SYMBOL(portal_debug);
+EXPORT_SYMBOL(portal_stack);
+EXPORT_SYMBOL(portal_printk);
+EXPORT_SYMBOL(PtlEQWait);
+EXPORT_SYMBOL(PtlEQFree);
+EXPORT_SYMBOL(PtlEQGet);
+EXPORT_SYMBOL(PtlGetId);
+EXPORT_SYMBOL(PtlMDBind);
+EXPORT_SYMBOL(lib_iov_nob);
+EXPORT_SYMBOL(lib_copy_iov2buf);
+EXPORT_SYMBOL(lib_copy_buf2iov);
+EXPORT_SYMBOL(lib_kiov_nob);
+EXPORT_SYMBOL(lib_copy_kiov2buf);
+EXPORT_SYMBOL(lib_copy_buf2kiov);
+EXPORT_SYMBOL(lib_finalize);
+EXPORT_SYMBOL(lib_parse);
+EXPORT_SYMBOL(lib_init);
+EXPORT_SYMBOL(lib_fini);
+EXPORT_SYMBOL(portal_kmemory);
+EXPORT_SYMBOL(kportal_daemonize);
+EXPORT_SYMBOL(kportal_blockallsigs);
+EXPORT_SYMBOL(kportal_nal_register);
+EXPORT_SYMBOL(kportal_nal_unregister);
+EXPORT_SYMBOL(kportal_assertion_failed);
+EXPORT_SYMBOL(dispatch_name);
+EXPORT_SYMBOL(kportal_get_ni);
+EXPORT_SYMBOL(kportal_put_ni);
+
+module_init(init_kportals_module);
+module_exit (exit_kportals_module);
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ * Author: Zach Brown <zab@zabbo.net>
+ * Author: Peter J. Braam <braam@clusterfs.com>
+ * Author: Phil Schwan <phil@clusterfs.com>
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2.1 of the GNU Lesser General
+ * Public License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#define EXPORT_SYMTAB
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/errno.h>
+#include <linux/smp_lock.h>
+#include <linux/unistd.h>
+#include <net/sock.h>
+#include <linux/uio.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/stat.h>
+#include <linux/list.h>
+#include <asm/uaccess.h>
+#include <asm/segment.h>
+
+#include <linux/proc_fs.h>
+#include <linux/sysctl.h>
+
+# define DEBUG_SUBSYSTEM S_PORTALS
+
+#include <linux/kp30.h>
+#include <asm/div64.h>
+
+static struct ctl_table_header *portals_table_header = NULL;
+extern char debug_file_path[1024];
+extern char debug_daemon_file_path[1024];
+extern char portals_upcall[1024];
+
+#define PSDEV_PORTALS (0x100)
+#define PSDEV_DEBUG 1 /* control debugging */
+#define PSDEV_SUBSYSTEM_DEBUG 2 /* control debugging */
+#define PSDEV_PRINTK 3 /* force all errors to console */
+#define PSDEV_DEBUG_PATH 4 /* crashdump log location */
+#define PSDEV_DEBUG_DUMP_PATH 5 /* crashdump tracelog location */
+#define PSDEV_PORTALS_UPCALL 6 /* User mode upcall script */
+
+#define PORTALS_PRIMARY_CTLCNT 6
+static struct ctl_table portals_table[PORTALS_PRIMARY_CTLCNT + 1] = {
+ {PSDEV_DEBUG, "debug", &portal_debug, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+ {PSDEV_SUBSYSTEM_DEBUG, "subsystem_debug", &portal_subsystem_debug,
+ sizeof(int), 0644, NULL, &proc_dointvec},
+ {PSDEV_PRINTK, "printk", &portal_printk, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+ {PSDEV_DEBUG_PATH, "debug_path", debug_file_path,
+ sizeof(debug_file_path), 0644, NULL, &proc_dostring, &sysctl_string},
+ {PSDEV_DEBUG_DUMP_PATH, "debug_daemon_path", debug_daemon_file_path,
+ sizeof(debug_daemon_file_path), 0644, NULL, &proc_dostring,
+ &sysctl_string},
+ {PSDEV_PORTALS_UPCALL, "upcall", portals_upcall,
+ sizeof(portals_upcall), 0644, NULL, &proc_dostring,
+ &sysctl_string},
+ {0}
+};
+
+static struct ctl_table top_table[2] = {
+ {PSDEV_PORTALS, "portals", NULL, 0, 0555, portals_table},
+ {0}
+};
+
+
+#ifdef PORTALS_PROFILING
+/*
+ * profiling stuff. we do this statically for now 'cause its simple,
+ * but we could do some tricks with elf sections to have this array
+ * automatically built.
+ */
+#define def_prof(FOO) [PROF__##FOO] = {#FOO, 0, }
+
+struct prof_ent prof_ents[] = {
+ def_prof(our_recvmsg),
+ def_prof(our_sendmsg),
+ def_prof(socknal_recv),
+ def_prof(lib_parse),
+ def_prof(conn_list_walk),
+ def_prof(memcpy),
+ def_prof(lib_finalize),
+ def_prof(pingcli_time),
+ def_prof(gmnal_send),
+ def_prof(gmnal_recv),
+};
+
+EXPORT_SYMBOL(prof_ents);
+
+/*
+ * this function is as crazy as the proc filling api
+ * requires.
+ *
+ * buffer: page allocated for us to scribble in. the
+ * data returned to the user will be taken from here.
+ * *start: address of the pointer that will tell the
+ * caller where in buffer the data the user wants is.
+ * ppos: offset in the entire /proc file that the user
+ * currently wants.
+ * wanted: the amount of data the user wants.
+ *
+ * while going, 'curpos' is the offset in the entire
+ * file where we currently are. We only actually
+ * start filling buffer when we get to a place in
+ * the file that the user cares about.
+ *
+ * we take care to only sprintf when the user cares because
+ * we're holding a lock while we do this.
+ *
+ * we're smart and know that we generate fixed size lines.
+ * we only start writing to the buffer when the user cares.
+ * This is unpredictable because we don't snapshot the
+ * list between calls that are filling in a file from
+ * the list. The list could change mid read and the
+ * output will look very weird indeed. oh well.
+ */
+
+static int prof_read_proc(char *buffer, char **start, off_t ppos, int wanted,
+ int *eof, void *data)
+{
+ int len = 0, i;
+ int curpos;
+ char *header = "Interval Cycles_per (Starts Finishes Total)\n";
+ int header_len = strlen(header);
+ char *format = "%-15s %.12Ld (%.12d %.12d %.12Ld)";
+ int line_len = (15 + 1 + 12 + 2 + 12 + 1 + 12 + 1 + 12 + 1);
+
+ *start = buffer;
+
+ if (ppos < header_len) {
+ int diff = MIN(header_len, wanted);
+ memcpy(buffer, header + ppos, diff);
+ len += diff;
+ ppos += diff;
+ }
+
+ if (len >= wanted)
+ goto out;
+
+ curpos = header_len;
+
+ for ( i = 0; i < MAX_PROFS ; i++) {
+ int copied;
+ struct prof_ent *pe = &prof_ents[i];
+ long long cycles_per;
+ /*
+ * find the part of the array that the buffer wants
+ */
+ if (ppos >= (curpos + line_len)) {
+ curpos += line_len;
+ continue;
+ }
+ /* the clever caller split a line */
+ if (ppos > curpos) {
+ *start = buffer + (ppos - curpos);
+ }
+
+ if (pe->finishes == 0)
+ cycles_per = 0;
+ else
+ {
+ cycles_per = pe->total_cycles;
+ do_div (cycles_per, pe->finishes);
+ }
+
+ copied = sprintf(buffer + len, format, pe->str, cycles_per,
+ pe->starts, pe->finishes, pe->total_cycles);
+
+ len += copied;
+
+ /* pad to line len, -1 for \n */
+ if ((copied < line_len-1)) {
+ int diff = (line_len-1) - copied;
+ memset(buffer + len, ' ', diff);
+ len += diff;
+ copied += diff;
+ }
+
+ buffer[len++]= '\n';
+
+ /* bail if we have enough */
+ if (((buffer + len) - *start) >= wanted)
+ break;
+
+ curpos += line_len;
+ }
+
+ /* lameness */
+ if (i == MAX_PROFS)
+ *eof = 1;
+ out:
+
+ return MIN(((buffer + len) - *start), wanted);
+}
+
+/*
+ * all kids love /proc :/
+ */
+static unsigned char basedir[]="net/portals";
+#endif /* PORTALS_PROFILING */
+
+int insert_proc(void)
+{
+#if PORTALS_PROFILING
+ unsigned char dir[128];
+ struct proc_dir_entry *ent;
+
+ if (ARRAY_SIZE(prof_ents) != MAX_PROFS) {
+ CERROR("profiling enum and array are out of sync.\n");
+ return -1;
+ }
+
+ /*
+ * This is pretty lame. assuming that failure just
+ * means that they already existed.
+ */
+ strcat(dir, basedir);
+ create_proc_entry(dir, S_IFDIR, 0);
+
+ strcat(dir, "/cycles");
+ ent = create_proc_entry(dir, 0, 0);
+ if (!ent) {
+ CERROR("couldn't register %s?\n", dir);
+ return -1;
+ }
+
+ ent->data = NULL;
+ ent->read_proc = prof_read_proc;
+#endif /* PORTALS_PROFILING */
+
+#ifdef CONFIG_SYSCTL
+ if (!portals_table_header)
+ portals_table_header = register_sysctl_table(top_table, 0);
+#endif
+
+ return 0;
+}
+
+void remove_proc(void)
+{
+#if PORTALS_PROFILING
+ unsigned char dir[128];
+ int end;
+
+ dir[0]='\0';
+ strcat(dir, basedir);
+
+ end = strlen(dir);
+
+ strcat(dir, "/cycles");
+ remove_proc_entry(dir,0);
+
+ dir[end] = '\0';
+ remove_proc_entry(dir,0);
+#endif /* PORTALS_PROFILING */
+
+#ifdef CONFIG_SYSCTL
+ if (portals_table_header)
+ unregister_sysctl_table(portals_table_header);
+ portals_table_header = NULL;
+#endif
+}
--- /dev/null
+# Copyright (C) 2002 Cluster File Systems, Inc.
+#
+# This code is issued under the GNU General Public License.
+# See the file COPYING in this distribution
+
+
+CPPFLAGS=
+INCLUDES=-I$(top_srcdir)/portals/include -I$(top_srcdir)/include
+lib_LIBRARIES= libportals.a
+libportals_a_SOURCES= api-eq.c api-init.c api-me.c api-errno.c api-md.c api-ni.c api-wrap.c lib-dispatch.c lib-init.c lib-me.c lib-msg.c lib-not-impl.c lib-eq.c lib-md.c lib-move.c lib-ni.c lib-pid.c
--- /dev/null
+# Copyright (C) 2001 Cluster File Systems, Inc.
+#
+# This code is issued under the GNU General Public License.
+# See the file COPYING in this distribution
+
+include ../Kernelenv
+
+obj-y += portals.o
+portals-objs := lib-dispatch.o lib-eq.o lib-init.o lib-md.o lib-me.o lib-move.o lib-msg.o lib-ni.o lib-not-impl.o lib-pid.o api-eq.o api-errno.o api-init.o api-md.o api-me.o api-ni.o api-wrap.o
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * api/api-eq.c
+ * User-level event queue management routines
+ *
+ * Copyright (c) 2001-2003 Cluster File Systems, Inc.
+ * Copyright (c) 2001-2002 Sandia National Laboratories
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2.1 of the GNU Lesser General
+ * Public License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * PtlMDUpdate is here so that it can access the per-eventq
+ * structures.
+ */
+
+#include <portals/api-support.h>
+
+int ptl_eq_init(void)
+{
+ /* Nothing to do anymore... */
+ return PTL_OK;
+}
+
+void ptl_eq_fini(void)
+{
+ /* Nothing to do anymore... */
+}
+
+int ptl_eq_ni_init(nal_t * nal)
+{
+ /* Nothing to do anymore... */
+ return PTL_OK;
+}
+
+void ptl_eq_ni_fini(nal_t * nal)
+{
+ /* Nothing to do anymore... */
+}
+
+int PtlEQGet(ptl_handle_eq_t eventq, ptl_event_t * ev)
+{
+ ptl_eq_t *eq;
+ int rc, new_index;
+ unsigned long flags;
+ ptl_event_t *new_event;
+ nal_t *nal;
+ ENTRY;
+
+ if (!ptl_init)
+ RETURN(PTL_NOINIT);
+
+ nal = ptl_hndl2nal(&eventq);
+ if (!nal)
+ RETURN(PTL_INV_EQ);
+
+ eq = ptl_handle2usereq(&eventq);
+ nal->lock(nal, &flags);
+
+ /* size must be a power of 2 to handle a wrapped sequence # */
+ LASSERT (eq->size != 0 &&
+ eq->size == LOWEST_BIT_SET (eq->size));
+
+ new_index = eq->sequence & (eq->size - 1);
+ new_event = &eq->base[new_index];
+ CDEBUG(D_INFO, "new_event: %p, sequence: %lu, eq->size: %u\n",
+ new_event, eq->sequence, eq->size);
+ if (PTL_SEQ_GT (eq->sequence, new_event->sequence)) {
+ nal->unlock(nal, &flags);
+ RETURN(PTL_EQ_EMPTY);
+ }
+
+ *ev = *new_event;
+
+ /* Set the unlinked_me interface number if there is one to pass
+ * back, since the NAL hasn't a clue what it is and therefore can't
+ * set it. */
+ if (!PtlHandleEqual (ev->unlinked_me, PTL_HANDLE_NONE))
+ ev->unlinked_me.nal_idx = eventq.nal_idx;
+
+ /* ensure event is delivered correctly despite possible
+ races with lib_finalize */
+ if (eq->sequence != new_event->sequence) {
+ CERROR("DROPPING EVENT: eq seq %lu ev seq %lu\n",
+ eq->sequence, new_event->sequence);
+ rc = PTL_EQ_DROPPED;
+ } else {
+ rc = PTL_OK;
+ }
+
+ eq->sequence = new_event->sequence + 1;
+ nal->unlock(nal, &flags);
+ RETURN(rc);
+}
+
+
+int PtlEQWait(ptl_handle_eq_t eventq_in, ptl_event_t *event_out)
+{
+ int rc;
+
+ /* PtlEQGet does the handle checking */
+ while ((rc = PtlEQGet(eventq_in, event_out)) == PTL_EQ_EMPTY) {
+ nal_t *nal = ptl_hndl2nal(&eventq_in);
+
+ if (nal->yield)
+ nal->yield(nal);
+ }
+
+ return rc;
+}
+
+#ifndef __KERNEL__
+static jmp_buf eq_jumpbuf;
+
+static void eq_timeout(int signal)
+{
+ longjmp(eq_jumpbuf, -1);
+}
+
+int PtlEQWait_timeout(ptl_handle_eq_t eventq_in, ptl_event_t * event_out,
+ int timeout)
+{
+ static void (*prev) (int);
+ static int left_over;
+ time_t time_at_start;
+ int rc;
+
+ if (setjmp(eq_jumpbuf)) {
+ signal(SIGALRM, prev);
+ alarm(left_over - timeout);
+ return PTL_EQ_EMPTY;
+ }
+
+ left_over = alarm(timeout);
+ prev = signal(SIGALRM, eq_timeout);
+ time_at_start = time(NULL);
+ if (left_over < timeout)
+ alarm(left_over);
+
+ rc = PtlEQWait(eventq_in, event_out);
+
+ signal(SIGALRM, prev);
+ alarm(left_over); /* Should compute how long we waited */
+
+ return rc;
+}
+
+#endif
+
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * api/api-errno.c
+ * Instantiate the string table of errors
+ *
+ * Copyright (c) 2001-2003 Cluster File Systems, Inc.
+ * Copyright (c) 2001-2002 Sandia National Laboratories
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2.1 of the GNU Lesser General
+ * Public License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <portals/api-support.h>
+
+/* If you change these, you must update the number table in portals/errno.h */
+const char *ptl_err_str[] = {
+ "PTL_OK",
+ "PTL_SEGV",
+
+ "PTL_NOSPACE",
+ "PTL_INUSE",
+ "PTL_VAL_FAILED",
+
+ "PTL_NAL_FAILED",
+ "PTL_NOINIT",
+ "PTL_INIT_DUP",
+ "PTL_INIT_INV",
+ "PTL_AC_INV_INDEX",
+
+ "PTL_INV_ASIZE",
+ "PTL_INV_HANDLE",
+ "PTL_INV_MD",
+ "PTL_INV_ME",
+ "PTL_INV_NI",
+/* If you change these, you must update the number table in portals/errno.h */
+ "PTL_ILL_MD",
+ "PTL_INV_PROC",
+ "PTL_INV_PSIZE",
+ "PTL_INV_PTINDEX",
+ "PTL_INV_REG",
+
+ "PTL_INV_SR_INDX",
+ "PTL_ML_TOOLONG",
+ "PTL_ADDR_UNKNOWN",
+ "PTL_INV_EQ",
+ "PTL_EQ_DROPPED",
+
+ "PTL_EQ_EMPTY",
+ "PTL_NOUPDATE",
+ "PTL_FAIL",
+ "PTL_NOT_IMPLEMENTED",
+ "PTL_NO_ACK",
+
+ "PTL_IOV_TOO_MANY",
+ "PTL_IOV_TOO_SMALL",
+
+ "PTL_EQ_INUSE",
+ "PTL_MD_INUSE"
+};
+/* If you change these, you must update the number table in portals/errno.h */
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * api/api-init.c
+ * Initialization and global data for the p30 user side library
+ *
+ * Copyright (c) 2001-2003 Cluster File Systems, Inc.
+ * Copyright (c) 2001-2002 Sandia National Laboratories
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2.1 of the GNU Lesser General
+ * Public License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * All handles have their interface number stored in the second 16 bit word
+ */
+
+#include <portals/api-support.h>
+
+int ptl_init;
+unsigned int portal_subsystem_debug = 0xfff7e3ff;
+unsigned int portal_debug = ~0;
+unsigned int portal_printk;
+unsigned int portal_stack;
+
+#ifdef __KERNEL__
+atomic_t portal_kmemory = ATOMIC_INIT(0);
+#endif
+
+int __p30_initialized;
+int __p30_myr_initialized;
+int __p30_ip_initialized;
+ptl_handle_ni_t __myr_ni_handle;
+ptl_handle_ni_t __ip_ni_handle;
+
+int __p30_myr_timeout = 10;
+int __p30_ip_timeout;
+
+int PtlInit(void)
+{
+
+ if (ptl_init)
+ return PTL_OK;
+
+ ptl_ni_init();
+ ptl_me_init();
+ ptl_eq_init();
+ ptl_init = 1;
+ __p30_initialized = 1;
+
+ return PTL_OK;
+}
+
+
+void PtlFini(void)
+{
+
+ /* Reverse order of initialization */
+ ptl_eq_fini();
+ ptl_me_fini();
+ ptl_ni_fini();
+ ptl_init = 0;
+}
--- /dev/null
+/*
+ * api-p30/md.c
+ *
+ * Memory descriptor functions that need address validation
+ * There are a few standing issues...
+ * - Addresses are invalidated by the library without telling us.
+ */
+#include <portals/api-support.h>
+
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * api/api-me.c
+ * Match Entry local operations.
+ *
+ * Copyright (c) 2001-2003 Cluster File Systems, Inc.
+ * Copyright (c) 2001-2002 Sandia National Laboratories
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2.1 of the GNU Lesser General
+ * Public License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <portals/api-support.h>
+
+int ptl_me_init(void)
+{
+ return PTL_OK;
+}
+void ptl_me_fini(void)
+{ /* Nothing to do */
+}
+int ptl_me_ni_init(nal_t * nal)
+{
+ return PTL_OK;
+}
+
+void ptl_me_ni_fini(nal_t * nal)
+{ /* Nothing to do... */
+}
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * api/api-ni.c
+ * Network Interface code
+ *
+ * Copyright (c) 2001-2003 Cluster File Systems, Inc.
+ * Copyright (c) 2001-2002 Sandia National Laboratories
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2.1 of the GNU Lesser General
+ * Public License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <portals/api-support.h>
+
+#define MAX_NIS 8
+static nal_t *ptl_interfaces[MAX_NIS];
+int ptl_num_interfaces = 0;
+
+nal_t *ptl_hndl2nal(ptl_handle_any_t *handle)
+{
+ unsigned int idx = handle->nal_idx;
+
+ /* XXX we really rely on the caller NOT racing with interface
+ * setup/teardown. That ensures her NI handle can't get
+ * invalidated out from under her (or worse, swapped for a
+ * completely different interface!) */
+
+ if (idx < MAX_NIS)
+ return ptl_interfaces[idx];
+
+ return NULL;
+}
+
+int ptl_ni_init(void)
+{
+ int i;
+
+ for (i = 0; i < MAX_NIS; i++)
+ ptl_interfaces[i] = NULL;
+
+ return PTL_OK;
+}
+
+void ptl_ni_fini(void)
+{
+ int i;
+
+ for (i = 0; i < MAX_NIS; i++) {
+ nal_t *nal = ptl_interfaces[i];
+ if (!nal)
+ continue;
+
+ if (nal->shutdown)
+ nal->shutdown(nal, i);
+ }
+}
+
+#ifdef __KERNEL__
+DECLARE_MUTEX(ptl_ni_init_mutex);
+
+static void ptl_ni_init_mutex_enter (void)
+{
+ down (&ptl_ni_init_mutex);
+}
+
+static void ptl_ni_init_mutex_exit (void)
+{
+ up (&ptl_ni_init_mutex);
+}
+
+#else
+static void ptl_ni_init_mutex_enter (void)
+{
+}
+
+static void ptl_ni_init_mutex_exit (void)
+{
+}
+
+#endif
+
+int PtlNIInit(ptl_interface_t interface, ptl_pt_index_t ptl_size,
+ ptl_ac_index_t acl_size, ptl_pid_t requested_pid,
+ ptl_handle_ni_t * handle)
+{
+ nal_t *nal;
+ int i;
+
+ if (!ptl_init)
+ return PTL_NOINIT;
+
+ ptl_ni_init_mutex_enter ();
+
+ nal = interface(ptl_num_interfaces, ptl_size, acl_size, requested_pid);
+
+ if (!nal) {
+ ptl_ni_init_mutex_exit ();
+ return PTL_NAL_FAILED;
+ }
+
+ for (i = 0; i < ptl_num_interfaces; i++) {
+ if (ptl_interfaces[i] == nal) {
+ nal->refct++;
+ handle->nal_idx = i;
+ fprintf(stderr, "Returning existing NAL (%d)\n", i);
+ ptl_ni_init_mutex_exit ();
+ return PTL_OK;
+ }
+ }
+ nal->refct = 1;
+
+ handle->nal_idx = ptl_num_interfaces;
+ if (ptl_num_interfaces >= MAX_NIS) {
+ if (nal->shutdown)
+ nal->shutdown (nal, ptl_num_interfaces);
+ ptl_ni_init_mutex_exit ();
+ return PTL_NOSPACE;
+ }
+
+ ptl_interfaces[ptl_num_interfaces++] = nal;
+
+ ptl_eq_ni_init(nal);
+ ptl_me_ni_init(nal);
+
+ ptl_ni_init_mutex_exit ();
+ return PTL_OK;
+}
+
+
+int PtlNIFini(ptl_handle_ni_t ni)
+{
+ nal_t *nal;
+ int rc;
+
+ if (!ptl_init)
+ return PTL_NOINIT;
+
+ ptl_ni_init_mutex_enter ();
+
+ nal = ptl_hndl2nal (&ni);
+ if (nal == NULL) {
+ ptl_ni_init_mutex_exit ();
+ return PTL_INV_HANDLE;
+ }
+
+ nal->refct--;
+ if (nal->refct > 0) {
+ ptl_ni_init_mutex_exit ();
+ return PTL_OK;
+ }
+
+ ptl_me_ni_fini(nal);
+ ptl_eq_ni_fini(nal);
+
+ rc = PTL_OK;
+ if (nal->shutdown)
+ rc = nal->shutdown(nal, ni.nal_idx);
+
+ ptl_interfaces[ni.nal_idx] = NULL;
+ ptl_num_interfaces--;
+
+ ptl_ni_init_mutex_exit ();
+ return rc;
+}
+
+int PtlNIHandle(ptl_handle_any_t handle_in, ptl_handle_ni_t * ni_out)
+{
+ *ni_out = handle_in;
+
+ return PTL_OK;
+}
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * api/api-wrap.c
+ * User-level wrappers that dispatch across the protection boundaries
+ *
+ * Copyright (c) 2001-2003 Cluster File Systems, Inc.
+ * Copyright (c) 2001-2002 Sandia National Laboratories
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2.1 of the GNU Lesser General
+ * Public License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Assumes the handle encodes the network number in the second 16 bit word
+ */
+
+# define DEBUG_SUBSYSTEM S_PORTALS
+#include <portals/api-support.h>
+
+static int do_forward(ptl_handle_any_t any_h, int cmd, void *argbuf,
+ int argsize, void *retbuf, int retsize)
+{
+ nal_t *nal;
+
+ if (!ptl_init) {
+ fprintf(stderr, "PtlGetId: Not initialized\n");
+ return PTL_NOINIT;
+ }
+
+ nal = ptl_hndl2nal(&any_h);
+ if (!nal)
+ return PTL_INV_HANDLE;
+
+ nal->forward(nal, cmd, argbuf, argsize, retbuf, retsize);
+
+ return PTL_OK;
+}
+
+int PtlGetId(ptl_handle_ni_t ni_handle, ptl_process_id_t *id)
+{
+ PtlGetId_in args;
+ PtlGetId_out ret;
+ int rc;
+
+ args.handle_in = ni_handle;
+
+ rc = do_forward(ni_handle, PTL_GETID, &args, sizeof(args), &ret,
+ sizeof(ret));
+ if (rc != PTL_OK)
+ return rc;
+
+ if (id)
+ *id = ret.id_out;
+
+ return ret.rc;
+}
+
+int PtlFailNid (ptl_handle_ni_t interface, ptl_nid_t nid, unsigned int threshold)
+{
+ PtlFailNid_in args;
+ PtlFailNid_out ret;
+ int rc;
+
+ args.interface = interface;
+ args.nid = nid;
+ args.threshold = threshold;
+
+ rc = do_forward (interface, PTL_FAILNID,
+ &args, sizeof(args), &ret, sizeof (ret));
+
+ return ((rc != PTL_OK) ? rc : ret.rc);
+}
+
+int PtlNIStatus(ptl_handle_ni_t interface_in, ptl_sr_index_t register_in,
+ ptl_sr_value_t * status_out)
+{
+ PtlNIStatus_in args;
+ PtlNIStatus_out ret;
+ int rc;
+
+ args.interface_in = interface_in;
+ args.register_in = register_in;
+
+ rc = do_forward(interface_in, PTL_NISTATUS, &args, sizeof(args), &ret,
+ sizeof(ret));
+
+ if (rc != PTL_OK)
+ return rc;
+
+ if (status_out)
+ *status_out = ret.status_out;
+
+ return ret.rc;
+}
+
+int PtlNIDist(ptl_handle_ni_t interface_in, ptl_process_id_t process_in,
+ unsigned long *distance_out)
+{
+ PtlNIDist_in args;
+ PtlNIDist_out ret;
+ int rc;
+
+ args.interface_in = interface_in;
+ args.process_in = process_in;
+
+ rc = do_forward(interface_in, PTL_NIDIST, &args, sizeof(args), &ret,
+ sizeof(ret));
+
+ if (rc != PTL_OK)
+ return rc;
+
+ if (distance_out)
+ *distance_out = ret.distance_out;
+
+ return ret.rc;
+}
+
+
+
+unsigned int PtlNIDebug(ptl_handle_ni_t ni, unsigned int mask_in)
+{
+ PtlNIDebug_in args;
+ PtlNIDebug_out ret;
+ int rc;
+
+ args.mask_in = mask_in;
+
+ rc = do_forward(ni, PTL_NIDEBUG, &args, sizeof(args), &ret,
+ sizeof(ret));
+
+ if (rc != PTL_OK)
+ return rc;
+
+ return ret.rc;
+}
+
+int PtlMEAttach(ptl_handle_ni_t interface_in, ptl_pt_index_t index_in,
+ ptl_process_id_t match_id_in, ptl_match_bits_t match_bits_in,
+ ptl_match_bits_t ignore_bits_in, ptl_unlink_t unlink_in,
+ ptl_ins_pos_t pos_in, ptl_handle_me_t * handle_out)
+{
+ PtlMEAttach_in args;
+ PtlMEAttach_out ret;
+ int rc;
+
+ args.interface_in = interface_in;
+ args.index_in = index_in;
+ args.match_id_in = match_id_in;
+ args.match_bits_in = match_bits_in;
+ args.ignore_bits_in = ignore_bits_in;
+ args.unlink_in = unlink_in;
+ args.position_in = pos_in;
+
+ rc = do_forward(interface_in, PTL_MEATTACH, &args, sizeof(args), &ret,
+ sizeof(ret));
+
+ if (rc != PTL_OK)
+ return rc;
+
+ if (handle_out) {
+ handle_out->nal_idx = interface_in.nal_idx;
+ handle_out->cookie = ret.handle_out.cookie;
+ }
+
+ return ret.rc;
+}
+
+int PtlMEInsert(ptl_handle_me_t current_in, ptl_process_id_t match_id_in,
+ ptl_match_bits_t match_bits_in, ptl_match_bits_t ignore_bits_in,
+ ptl_unlink_t unlink_in, ptl_ins_pos_t position_in,
+ ptl_handle_me_t * handle_out)
+{
+ PtlMEInsert_in args;
+ PtlMEInsert_out ret;
+ int rc;
+
+ args.current_in = current_in;
+ args.match_id_in = match_id_in;
+ args.match_bits_in = match_bits_in;
+ args.ignore_bits_in = ignore_bits_in;
+ args.unlink_in = unlink_in;
+ args.position_in = position_in;
+
+ rc = do_forward(current_in, PTL_MEINSERT, &args, sizeof(args), &ret,
+ sizeof(ret));
+
+ if (rc != PTL_OK)
+ return (rc == PTL_INV_HANDLE) ? PTL_INV_ME : rc;
+
+ if (handle_out) {
+ handle_out->nal_idx = current_in.nal_idx;
+ handle_out->cookie = ret.handle_out.cookie;
+ }
+ return ret.rc;
+}
+
+int PtlMEUnlink(ptl_handle_me_t current_in)
+{
+ PtlMEUnlink_in args;
+ PtlMEUnlink_out ret;
+ int rc;
+
+ args.current_in = current_in;
+ args.unlink_in = PTL_RETAIN;
+
+ rc = do_forward(current_in, PTL_MEUNLINK, &args, sizeof(args), &ret,
+ sizeof(ret));
+
+ if (rc != PTL_OK)
+ return (rc == PTL_INV_HANDLE) ? PTL_INV_ME : rc;
+
+ return ret.rc;
+}
+
+int PtlTblDump(ptl_handle_ni_t ni, int index_in)
+{
+ PtlTblDump_in args;
+ PtlTblDump_out ret;
+ int rc;
+
+ args.index_in = index_in;
+
+ rc = do_forward(ni, PTL_TBLDUMP, &args, sizeof(args), &ret,
+ sizeof(ret));
+
+ if (rc != PTL_OK)
+ return rc;
+
+ return ret.rc;
+}
+
+int PtlMEDump(ptl_handle_me_t current_in)
+{
+ PtlMEDump_in args;
+ PtlMEDump_out ret;
+ int rc;
+
+ args.current_in = current_in;
+
+ rc = do_forward(current_in, PTL_MEDUMP, &args, sizeof(args), &ret,
+ sizeof(ret));
+
+ if (rc != PTL_OK)
+ return (rc == PTL_INV_HANDLE) ? PTL_INV_ME : rc;
+
+ return ret.rc;
+}
+
+static int validate_md(ptl_handle_any_t current_in, ptl_md_t md_in)
+{
+ nal_t *nal;
+ int rc;
+ int i;
+
+ if (!ptl_init) {
+ fprintf(stderr, "PtlMDAttach/Bind/Update: Not initialized\n");
+ return PTL_NOINIT;
+ }
+
+ nal = ptl_hndl2nal(¤t_in);
+ if (!nal)
+ return PTL_INV_HANDLE;
+
+ if (nal->validate != NULL) /* nal->validate not a NOOP */
+ {
+ if ((md_in.options & PTL_MD_IOV) == 0) /* contiguous */
+ {
+ rc = nal->validate (nal, md_in.start, md_in.length);
+ if (rc)
+ return (PTL_SEGV);
+ }
+ else
+ {
+ struct iovec *iov = (struct iovec *)md_in.start;
+
+ for (i = 0; i < md_in.niov; i++, iov++)
+ {
+ rc = nal->validate (nal, iov->iov_base, iov->iov_len);
+ if (rc)
+ return (PTL_SEGV);
+ }
+ }
+ }
+
+ return 0;
+}
+
+static ptl_handle_eq_t md2eq (ptl_md_t *md)
+{
+ if (PtlHandleEqual (md->eventq, PTL_EQ_NONE))
+ return (PTL_EQ_NONE);
+
+ return (ptl_handle2usereq (&md->eventq)->cb_eq_handle);
+}
+
+
+int PtlMDAttach(ptl_handle_me_t me_in, ptl_md_t md_in,
+ ptl_unlink_t unlink_in, ptl_handle_md_t * handle_out)
+{
+ PtlMDAttach_in args;
+ PtlMDAttach_out ret;
+ int rc;
+
+ rc = validate_md(me_in, md_in);
+ if (rc == PTL_OK) {
+ args.eq_in = md2eq(&md_in);
+ args.me_in = me_in;
+ args.md_in = md_in;
+ args.unlink_in = unlink_in;
+
+ rc = do_forward(me_in, PTL_MDATTACH,
+ &args, sizeof(args), &ret, sizeof(ret));
+ }
+
+ if (rc != PTL_OK)
+ return (rc == PTL_INV_HANDLE) ? PTL_INV_ME : rc;
+
+ if (handle_out) {
+ handle_out->nal_idx = me_in.nal_idx;
+ handle_out->cookie = ret.handle_out.cookie;
+ }
+ return ret.rc;
+}
+
+
+
+int PtlMDBind(ptl_handle_ni_t ni_in, ptl_md_t md_in,
+ ptl_handle_md_t * handle_out)
+{
+ PtlMDBind_in args;
+ PtlMDBind_out ret;
+ int rc;
+
+ rc = validate_md(ni_in, md_in);
+ if (rc != PTL_OK)
+ return rc;
+
+ args.eq_in = md2eq(&md_in);
+ args.ni_in = ni_in;
+ args.md_in = md_in;
+
+ rc = do_forward(ni_in, PTL_MDBIND,
+ &args, sizeof(args), &ret, sizeof(ret));
+
+ if (rc != PTL_OK)
+ return rc;
+
+ if (handle_out) {
+ handle_out->nal_idx = ni_in.nal_idx;
+ handle_out->cookie = ret.handle_out.cookie;
+ }
+ return ret.rc;
+}
+
+int PtlMDUpdate(ptl_handle_md_t md_in, ptl_md_t *old_inout,
+ ptl_md_t *new_inout, ptl_handle_eq_t testq_in)
+{
+ PtlMDUpdate_internal_in args;
+ PtlMDUpdate_internal_out ret;
+ int rc;
+
+ args.md_in = md_in;
+
+ if (old_inout) {
+ args.old_inout = *old_inout;
+ args.old_inout_valid = 1;
+ } else
+ args.old_inout_valid = 0;
+
+ if (new_inout) {
+ rc = validate_md (md_in, *new_inout);
+ if (rc != PTL_OK)
+ return (rc == PTL_INV_HANDLE) ? PTL_INV_MD : rc;
+ args.new_inout = *new_inout;
+ args.new_inout_valid = 1;
+ } else
+ args.new_inout_valid = 0;
+
+ if (PtlHandleEqual (testq_in, PTL_EQ_NONE)) {
+ args.testq_in = PTL_EQ_NONE;
+ args.sequence_in = -1;
+ } else {
+ ptl_eq_t *eq = ptl_handle2usereq (&testq_in);
+
+ args.testq_in = eq->cb_eq_handle;
+ args.sequence_in = eq->sequence;
+ }
+
+ rc = do_forward(md_in, PTL_MDUPDATE, &args, sizeof(args), &ret,
+ sizeof(ret));
+ if (rc != PTL_OK)
+ return (rc == PTL_INV_HANDLE) ? PTL_INV_MD : rc;
+
+ if (old_inout)
+ *old_inout = ret.old_inout;
+
+ return ret.rc;
+}
+
+int PtlMDUnlink(ptl_handle_md_t md_in)
+{
+ PtlMDUnlink_in args;
+ PtlMDUnlink_out ret;
+ int rc;
+
+ args.md_in = md_in;
+ rc = do_forward(md_in, PTL_MDUNLINK, &args, sizeof(args), &ret,
+ sizeof(ret));
+ if (rc != PTL_OK)
+ return (rc == PTL_INV_HANDLE) ? PTL_INV_MD : rc;
+
+ return ret.rc;
+}
+
+int PtlEQAlloc(ptl_handle_ni_t interface, ptl_size_t count,
+ int (*callback) (ptl_event_t * event),
+ ptl_handle_eq_t * handle_out)
+{
+ ptl_eq_t *eq = NULL;
+ ptl_event_t *ev = NULL;
+ PtlEQAlloc_in args;
+ PtlEQAlloc_out ret;
+ int rc, i;
+ nal_t *nal;
+
+ if (!ptl_init)
+ return PTL_NOINIT;
+
+ nal = ptl_hndl2nal (&interface);
+ if (nal == NULL)
+ return PTL_INV_HANDLE;
+
+ if (count != LOWEST_BIT_SET(count)) { /* not a power of 2 already */
+ do { /* knock off all but the top bit... */
+ count &= ~LOWEST_BIT_SET (count);
+ } while (count != LOWEST_BIT_SET(count));
+
+ count <<= 1; /* ...and round up */
+ }
+
+ if (count == 0) /* catch bad parameter / overflow on roundup */
+ return (PTL_VAL_FAILED);
+
+ PORTAL_ALLOC(ev, count * sizeof(ptl_event_t));
+ if (!ev)
+ return PTL_NOSPACE;
+
+ for (i = 0; i < count; i++)
+ ev[i].sequence = 0;
+
+ if (nal->validate != NULL) {
+ rc = nal->validate(nal, ev, count * sizeof(ptl_event_t));
+ if (rc != PTL_OK)
+ goto fail;
+ }
+
+ args.ni_in = interface;
+ args.count_in = count;
+ args.base_in = ev;
+ args.len_in = count * sizeof(*ev);
+ args.callback_in = callback;
+
+ rc = do_forward(interface, PTL_EQALLOC, &args, sizeof(args), &ret,
+ sizeof(ret));
+ if (rc != PTL_OK)
+ goto fail;
+ if (ret.rc)
+ GOTO(fail, rc = ret.rc);
+
+ PORTAL_ALLOC(eq, sizeof(*eq));
+ if (!eq) {
+ rc = PTL_NOSPACE;
+ goto fail;
+ }
+
+ eq->sequence = 1;
+ eq->size = count;
+ eq->base = ev;
+
+ /* EQ handles are a little wierd. PtlEQGet() just looks at the
+ * queued events in shared memory. It doesn't want to do_forward()
+ * at all, so the cookie in the EQ handle we pass out of here is
+ * simply a pointer to the event queue we just set up. We stash
+ * the handle returned by do_forward(), so we can pass it back via
+ * do_forward() when we need to. */
+
+ eq->cb_eq_handle.nal_idx = interface.nal_idx;
+ eq->cb_eq_handle.cookie = ret.handle_out.cookie;
+
+ handle_out->nal_idx = interface.nal_idx;
+ handle_out->cookie = (__u64)((unsigned long)eq);
+ return PTL_OK;
+
+fail:
+ PORTAL_FREE(ev, count * sizeof(ptl_event_t));
+ return rc;
+}
+
+int PtlEQFree(ptl_handle_eq_t eventq)
+{
+ PtlEQFree_in args;
+ PtlEQFree_out ret;
+ ptl_eq_t *eq;
+ int rc;
+
+ eq = ptl_handle2usereq (&eventq);
+ args.eventq_in = eq->cb_eq_handle;
+
+ rc = do_forward(eq->cb_eq_handle, PTL_EQFREE, &args,
+ sizeof(args), &ret, sizeof(ret));
+
+ /* XXX we're betting rc == PTL_OK here */
+ PORTAL_FREE(eq->base, eq->size * sizeof(ptl_event_t));
+ PORTAL_FREE(eq, sizeof(*eq));
+
+ return rc;
+}
+
+int PtlACEntry(ptl_handle_ni_t ni_in, ptl_ac_index_t index_in,
+ ptl_process_id_t match_id_in, ptl_pt_index_t portal_in)
+{
+ PtlACEntry_in args;
+ PtlACEntry_out ret;
+ int rc;
+
+ /*
+ * Copy arguments into the argument block to
+ * hand to the forwarding object
+ */
+ args.ni_in = ni_in;
+ args.index_in = index_in;
+ args.match_id_in = match_id_in;
+ args.portal_in = portal_in;
+
+ rc = do_forward(ni_in, PTL_ACENTRY, &args, sizeof(args), &ret,
+ sizeof(ret));
+
+ return (rc != PTL_OK) ? rc : ret.rc;
+}
+
+int PtlPut(ptl_handle_md_t md_in, ptl_ack_req_t ack_req_in,
+ ptl_process_id_t target_in, ptl_pt_index_t portal_in,
+ ptl_ac_index_t cookie_in, ptl_match_bits_t match_bits_in,
+ ptl_size_t offset_in, ptl_hdr_data_t hdr_data_in)
+{
+ PtlPut_in args;
+ PtlPut_out ret;
+ int rc;
+
+ /*
+ * Copy arguments into the argument block to
+ * hand to the forwarding object
+ */
+ args.md_in = md_in;
+ args.ack_req_in = ack_req_in;
+ args.target_in = target_in;
+ args.portal_in = portal_in;
+ args.cookie_in = cookie_in;
+ args.match_bits_in = match_bits_in;
+ args.offset_in = offset_in;
+ args.hdr_data_in = hdr_data_in;
+
+ rc = do_forward(md_in, PTL_PUT, &args, sizeof(args), &ret, sizeof(ret));
+
+ return (rc != PTL_OK) ? rc : ret.rc;
+}
+
+int PtlGet(ptl_handle_md_t md_in, ptl_process_id_t target_in,
+ ptl_pt_index_t portal_in, ptl_ac_index_t cookie_in,
+ ptl_match_bits_t match_bits_in, ptl_size_t offset_in)
+{
+ PtlGet_in args;
+ PtlGet_out ret;
+ int rc;
+
+ /*
+ * Copy arguments into the argument block to
+ * hand to the forwarding object
+ */
+ args.md_in = md_in;
+ args.target_in = target_in;
+ args.portal_in = portal_in;
+ args.cookie_in = cookie_in;
+ args.match_bits_in = match_bits_in;
+ args.offset_in = offset_in;
+
+ rc = do_forward(md_in, PTL_GET, &args, sizeof(args), &ret, sizeof(ret));
+
+ return (rc != PTL_OK) ? rc : ret.rc;
+}
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * lib/lib-dispatch.c
+ *
+ * Copyright (c) 2001-2003 Cluster File Systems, Inc.
+ * Copyright (c) 2001-2002 Sandia National Laboratories
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2.1 of the GNU Lesser General
+ * Public License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#define DEBUG_SUBSYSTEM S_PORTALS
+#include <portals/lib-p30.h>
+#include <portals/lib-dispatch.h>
+
+typedef struct {
+ int (*fun) (nal_cb_t * nal, void *private, void *in, void *out);
+ char *name;
+} dispatch_table_t;
+
+static dispatch_table_t dispatch_table[] = {
+ [PTL_GETID] {do_PtlGetId, "PtlGetId"},
+ [PTL_NISTATUS] {do_PtlNIStatus, "PtlNIStatus"},
+ [PTL_NIDIST] {do_PtlNIDist, "PtlNIDist"},
+ [PTL_NIDEBUG] {do_PtlNIDebug, "PtlNIDebug"},
+ [PTL_MEATTACH] {do_PtlMEAttach, "PtlMEAttach"},
+ [PTL_MEINSERT] {do_PtlMEInsert, "PtlMEInsert"},
+ [PTL_MEUNLINK] {do_PtlMEUnlink, "PtlMEUnlink"},
+ [PTL_TBLDUMP] {do_PtlTblDump, "PtlTblDump"},
+ [PTL_MEDUMP] {do_PtlMEDump, "PtlMEDump"},
+ [PTL_MDATTACH] {do_PtlMDAttach, "PtlMDAttach"},
+ [PTL_MDBIND] {do_PtlMDBind, "PtlMDBind"},
+ [PTL_MDUPDATE] {do_PtlMDUpdate_internal, "PtlMDUpdate_internal"},
+ [PTL_MDUNLINK] {do_PtlMDUnlink, "PtlMDUnlink"},
+ [PTL_EQALLOC] {do_PtlEQAlloc_internal, "PtlEQAlloc_internal"},
+ [PTL_EQFREE] {do_PtlEQFree_internal, "PtlEQFree_internal"},
+ [PTL_ACENTRY] {do_PtlACEntry, "PtlACEntry"},
+ [PTL_PUT] {do_PtlPut, "PtlPut"},
+ [PTL_GET] {do_PtlGet, "PtlGet"},
+ [PTL_FAILNID] {do_PtlFailNid, "PtlFailNid"},
+ /* */ {0, ""}
+};
+
+/*
+ * This really should be elsewhere, but lib-p30/dispatch.c is
+ * an automatically generated file.
+ */
+void lib_dispatch(nal_cb_t * nal, void *private, int index, void *arg_block,
+ void *ret_block)
+{
+ lib_ni_t *ni = &nal->ni;
+
+ if (index < 0 || index > LIB_MAX_DISPATCH ||
+ !dispatch_table[index].fun) {
+ CDEBUG(D_NET, LPU64": Invalid API call %d\n", ni->nid, index);
+ return;
+ }
+
+ CDEBUG(D_NET, LPU64": API call %s (%d)\n", ni->nid,
+ dispatch_table[index].name, index);
+
+ dispatch_table[index].fun(nal, private, arg_block, ret_block);
+}
+
+char *dispatch_name(int index)
+{
+ return dispatch_table[index].name;
+}
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * lib/lib-eq.c
+ * Library level Event queue management routines
+ *
+ * Copyright (c) 2001-2003 Cluster File Systems, Inc.
+ * Copyright (c) 2001-2002 Sandia National Laboratories
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2.1 of the GNU Lesser General
+ * Public License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#define DEBUG_SUBSYSTEM S_PORTALS
+#include <portals/lib-p30.h>
+#include <portals/arg-blocks.h>
+
+int do_PtlEQAlloc_internal(nal_cb_t * nal, void *private, void *v_args,
+ void *v_ret)
+{
+ /*
+ * Incoming:
+ * ptl_handle_ni_t ni_in
+ * ptl_size_t count_in
+ * void * base_in
+ *
+ * Outgoing:
+ * ptl_handle_eq_t * handle_out
+ */
+
+ PtlEQAlloc_in *args = v_args;
+ PtlEQAlloc_out *ret = v_ret;
+
+ lib_eq_t *eq;
+ unsigned long flags;
+
+ /* api should have rounded up */
+ if (args->count_in != LOWEST_BIT_SET (args->count_in))
+ return ret->rc = PTL_VAL_FAILED;
+
+ eq = lib_eq_alloc (nal);
+ if (eq == NULL)
+ return (ret->rc = PTL_NOSPACE);
+
+ state_lock(nal, &flags);
+
+ if (nal->cb_map != NULL) {
+ struct iovec iov = {
+ .iov_base = args->base_in,
+ .iov_len = args->count_in * sizeof (ptl_event_t) };
+
+ ret->rc = nal->cb_map (nal, 1, &iov, &eq->eq_addrkey);
+ if (ret->rc != PTL_OK) {
+ lib_eq_free (nal, eq);
+
+ state_unlock (nal, &flags);
+ return (ret->rc);
+ }
+ }
+
+ eq->sequence = 1;
+ eq->base = args->base_in;
+ eq->size = args->count_in;
+ eq->eq_refcount = 0;
+ eq->event_callback = args->callback_in;
+
+ lib_initialise_handle (nal, &eq->eq_lh);
+ list_add (&eq->eq_list, &nal->ni.ni_active_eqs);
+
+ state_unlock(nal, &flags);
+
+ ptl_eq2handle(&ret->handle_out, eq);
+ return (ret->rc = PTL_OK);
+}
+
+int do_PtlEQFree_internal(nal_cb_t * nal, void *private, void *v_args,
+ void *v_ret)
+{
+ /*
+ * Incoming:
+ * ptl_handle_eq_t eventq_in
+ *
+ * Outgoing:
+ */
+
+ PtlEQFree_in *args = v_args;
+ PtlEQFree_out *ret = v_ret;
+ lib_eq_t *eq;
+ long flags;
+
+ state_lock (nal, &flags);
+
+ eq = ptl_handle2eq(&args->eventq_in, nal);
+ if (eq == NULL) {
+ ret->rc = PTL_INV_EQ;
+ } else if (eq->eq_refcount != 0) {
+ ret->rc = PTL_EQ_INUSE;
+ } else {
+ if (nal->cb_unmap != NULL) {
+ struct iovec iov = {
+ .iov_base = eq->base,
+ .iov_len = eq->size * sizeof (ptl_event_t) };
+
+ nal->cb_unmap(nal, 1, &iov, &eq->eq_addrkey);
+ }
+
+ lib_invalidate_handle (nal, &eq->eq_lh);
+ list_del (&eq->eq_list);
+ lib_eq_free (nal, eq);
+ ret->rc = PTL_OK;
+ }
+
+ state_unlock (nal, &flags);
+
+ return (ret->rc);
+}
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * lib/lib-init.c
+ * Start up the internal library and clear all structures
+ * Called by the NAL when it initializes. Safe to call multiple times.
+ *
+ * Copyright (c) 2001-2003 Cluster File Systems, Inc.
+ * Copyright (c) 2001-2002 Sandia National Laboratories
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2.1 of the GNU Lesser General
+ * Public License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+# define DEBUG_SUBSYSTEM S_PORTALS
+#include <portals/lib-p30.h>
+
+#ifdef __KERNEL__
+# include <linux/string.h> /* for memset() */
+# include <linux/kp30.h>
+# ifdef KERNEL_ADDR_CACHE
+# include <compute/OS/addrCache/cache.h>
+# endif
+#else
+# include <string.h>
+# include <sys/time.h>
+#endif
+
+#ifdef PTL_USE_SLAB_CACHE
+static int ptl_slab_users;
+
+kmem_cache_t *ptl_md_slab;
+kmem_cache_t *ptl_msg_slab;
+kmem_cache_t *ptl_me_slab;
+kmem_cache_t *ptl_eq_slab;
+
+atomic_t md_in_use_count;
+atomic_t msg_in_use_count;
+atomic_t me_in_use_count;
+atomic_t eq_in_use_count;
+
+/* NB zeroing in ctor and on freeing ensures items that
+ * kmem_cache_validate() OK, but haven't been initialised
+ * as an MD/ME/EQ can't have valid handles
+ */
+static void
+ptl_md_slab_ctor (void *obj, kmem_cache_t *slab, unsigned long flags)
+{
+ memset (obj, 0, sizeof (lib_md_t));
+}
+
+static void
+ptl_me_slab_ctor (void *obj, kmem_cache_t *slab, unsigned long flags)
+{
+ memset (obj, 0, sizeof (lib_me_t));
+}
+
+static void
+ptl_eq_slab_ctor (void *obj, kmem_cache_t *slab, unsigned long flags)
+{
+ memset (obj, 0, sizeof (lib_eq_t));
+}
+
+int
+kportal_descriptor_setup (nal_cb_t *nal)
+{
+ /* NB on failure caller must still call kportal_descriptor_cleanup */
+ /* ****** */
+
+ /* We'll have 1 set of slabs for ALL the nals :) */
+
+ if (ptl_slab_users++)
+ return 0;
+
+ ptl_md_slab = kmem_cache_create("portals_MD",
+ sizeof(lib_md_t), 0,
+ SLAB_HWCACHE_ALIGN,
+ ptl_md_slab_ctor, NULL);
+ if (!ptl_md_slab) {
+ CERROR("couldn't allocate ptl_md_t slab");
+ RETURN (PTL_NOSPACE);
+ }
+
+ /* NB no ctor for msgs; they don't need handle verification */
+ ptl_msg_slab = kmem_cache_create("portals_MSG",
+ sizeof(lib_msg_t), 0,
+ SLAB_HWCACHE_ALIGN,
+ NULL, NULL);
+ if (!ptl_msg_slab) {
+ CERROR("couldn't allocate ptl_msg_t slab");
+ RETURN (PTL_NOSPACE);
+ }
+
+ ptl_me_slab = kmem_cache_create("portals_ME",
+ sizeof(lib_me_t), 0,
+ SLAB_HWCACHE_ALIGN,
+ ptl_me_slab_ctor, NULL);
+ if (!ptl_me_slab) {
+ CERROR("couldn't allocate ptl_me_t slab");
+ RETURN (PTL_NOSPACE);
+ }
+
+ ptl_eq_slab = kmem_cache_create("portals_EQ",
+ sizeof(lib_eq_t), 0,
+ SLAB_HWCACHE_ALIGN,
+ ptl_eq_slab_ctor, NULL);
+ if (!ptl_eq_slab) {
+ CERROR("couldn't allocate ptl_eq_t slab");
+ RETURN (PTL_NOSPACE);
+ }
+
+ RETURN(PTL_OK);
+}
+
+void
+kportal_descriptor_cleanup (nal_cb_t *nal)
+{
+ if (--ptl_slab_users != 0)
+ return;
+
+ LASSERT (atomic_read (&md_in_use_count) == 0);
+ LASSERT (atomic_read (&me_in_use_count) == 0);
+ LASSERT (atomic_read (&eq_in_use_count) == 0);
+ LASSERT (atomic_read (&msg_in_use_count) == 0);
+
+ if (ptl_md_slab != NULL)
+ kmem_cache_destroy(ptl_md_slab);
+ if (ptl_msg_slab != NULL)
+ kmem_cache_destroy(ptl_msg_slab);
+ if (ptl_me_slab != NULL)
+ kmem_cache_destroy(ptl_me_slab);
+ if (ptl_eq_slab != NULL)
+ kmem_cache_destroy(ptl_eq_slab);
+}
+#else
+
+int
+lib_freelist_init (nal_cb_t *nal, lib_freelist_t *fl, int n, int size)
+{
+ char *space;
+
+ LASSERT (n > 0);
+
+ size += offsetof (lib_freeobj_t, fo_contents);
+
+ space = nal->cb_malloc (nal, n * size);
+ if (space == NULL)
+ return (PTL_NOSPACE);
+
+ INIT_LIST_HEAD (&fl->fl_list);
+ fl->fl_objs = space;
+ fl->fl_nobjs = n;
+ fl->fl_objsize = size;
+
+ do
+ {
+ memset (space, 0, size);
+ list_add ((struct list_head *)space, &fl->fl_list);
+ space += size;
+ } while (--n != 0);
+
+ return (PTL_OK);
+}
+
+void
+lib_freelist_fini (nal_cb_t *nal, lib_freelist_t *fl)
+{
+ struct list_head *el;
+ int count;
+
+ if (fl->fl_nobjs == 0)
+ return;
+
+ count = 0;
+ for (el = fl->fl_list.next; el != &fl->fl_list; el = el->next)
+ count++;
+
+ LASSERT (count == fl->fl_nobjs);
+
+ nal->cb_free (nal, fl->fl_objs, fl->fl_nobjs * fl->fl_objsize);
+ memset (fl, 0, sizeof (fl));
+}
+
+int
+kportal_descriptor_setup (nal_cb_t *nal)
+{
+ /* NB on failure caller must still call kportal_descriptor_cleanup */
+ /* ****** */
+ int rc;
+
+ memset (&nal->ni.ni_free_mes, 0, sizeof (nal->ni.ni_free_mes));
+ memset (&nal->ni.ni_free_msgs, 0, sizeof (nal->ni.ni_free_msgs));
+ memset (&nal->ni.ni_free_mds, 0, sizeof (nal->ni.ni_free_mds));
+ memset (&nal->ni.ni_free_eqs, 0, sizeof (nal->ni.ni_free_eqs));
+
+ rc = lib_freelist_init (nal, &nal->ni.ni_free_mes,
+ MAX_MES, sizeof (lib_me_t));
+ if (rc != PTL_OK)
+ return (rc);
+
+ rc = lib_freelist_init (nal, &nal->ni.ni_free_msgs,
+ MAX_MSGS, sizeof (lib_msg_t));
+ if (rc != PTL_OK)
+ return (rc);
+
+ rc = lib_freelist_init (nal, &nal->ni.ni_free_mds,
+ MAX_MDS, sizeof (lib_md_t));
+ if (rc != PTL_OK)
+ return (rc);
+
+ rc = lib_freelist_init (nal, &nal->ni.ni_free_eqs,
+ MAX_EQS, sizeof (lib_eq_t));
+ return (rc);
+}
+
+void
+kportal_descriptor_cleanup (nal_cb_t *nal)
+{
+ lib_freelist_fini (nal, &nal->ni.ni_free_mes);
+ lib_freelist_fini (nal, &nal->ni.ni_free_msgs);
+ lib_freelist_fini (nal, &nal->ni.ni_free_mds);
+ lib_freelist_fini (nal, &nal->ni.ni_free_eqs);
+}
+
+#endif
+
+__u64
+lib_create_interface_cookie (nal_cb_t *nal)
+{
+ /* NB the interface cookie in wire handles guards against delayed
+ * replies and ACKs appearing valid in a new instance of the same
+ * interface. Initialisation time, even if it's only implemented
+ * to millisecond resolution is probably easily good enough. */
+ struct timeval tv;
+ __u64 cookie;
+#ifndef __KERNEL__
+ int rc = gettimeofday (&tv, NULL);
+ LASSERT (rc == 0);
+#else
+ do_gettimeofday(&tv);
+#endif
+ cookie = tv.tv_sec;
+ cookie *= 1000000;
+ cookie += tv.tv_usec;
+ return (cookie);
+}
+
+int
+lib_setup_handle_hash (nal_cb_t *nal)
+{
+ lib_ni_t *ni = &nal->ni;
+ int i;
+
+ /* Arbitrary choice of hash table size */
+#ifdef __KERNEL__
+ ni->ni_lh_hash_size = PAGE_SIZE / sizeof (struct list_head);
+#else
+ ni->ni_lh_hash_size = (MAX_MES + MAX_MDS + MAX_EQS)/4;
+#endif
+ ni->ni_lh_hash_table =
+ (struct list_head *)nal->cb_malloc (nal, ni->ni_lh_hash_size
+ * sizeof (struct list_head));
+ if (ni->ni_lh_hash_table == NULL)
+ return (PTL_NOSPACE);
+
+ for (i = 0; i < ni->ni_lh_hash_size; i++)
+ INIT_LIST_HEAD (&ni->ni_lh_hash_table[i]);
+
+ ni->ni_next_object_cookie = 0;
+
+ return (PTL_OK);
+}
+
+void
+lib_cleanup_handle_hash (nal_cb_t *nal)
+{
+ lib_ni_t *ni = &nal->ni;
+
+ if (ni->ni_lh_hash_table == NULL)
+ return;
+
+ nal->cb_free (nal, ni->ni_lh_hash_table,
+ ni->ni_lh_hash_size * sizeof (struct list_head));
+}
+
+lib_handle_t *
+lib_lookup_cookie (nal_cb_t *nal, __u64 cookie)
+{
+ /* ALWAYS called with statelock held */
+ lib_ni_t *ni = &nal->ni;
+ struct list_head *list;
+ struct list_head *el;
+ unsigned int hash;
+
+ hash = ((unsigned int)cookie) % ni->ni_lh_hash_size;
+ list = &ni->ni_lh_hash_table[hash];
+
+ list_for_each (el, list) {
+ lib_handle_t *lh = list_entry (el, lib_handle_t, lh_hash_chain);
+
+ if (lh->lh_cookie == cookie)
+ return (lh);
+ }
+
+ return (NULL);
+}
+
+void
+lib_initialise_handle (nal_cb_t *nal, lib_handle_t *lh)
+{
+ /* ALWAYS called with statelock held */
+ lib_ni_t *ni = &nal->ni;
+ unsigned int hash;
+
+ lh->lh_cookie = ni->ni_next_object_cookie++;
+ hash = ((unsigned int)lh->lh_cookie) % ni->ni_lh_hash_size;
+ list_add (&lh->lh_hash_chain, &ni->ni_lh_hash_table[hash]);
+}
+
+void
+lib_invalidate_handle (nal_cb_t *nal, lib_handle_t *lh)
+{
+ list_del (&lh->lh_hash_chain);
+}
+
+int
+lib_init(nal_cb_t * nal, ptl_nid_t nid, ptl_pid_t pid, int gsize,
+ ptl_pt_index_t ptl_size, ptl_ac_index_t acl_size)
+{
+ int rc = PTL_OK;
+ lib_ni_t *ni = &nal->ni;
+ int i;
+ ENTRY;
+
+ /* NB serialised in PtlNIInit() */
+
+ if (ni->refcnt != 0) { /* already initialised */
+ ni->refcnt++;
+ goto out;
+ }
+
+ /*
+ * Allocate the portal table for this interface
+ * and all per-interface objects.
+ */
+ memset(&ni->counters, 0, sizeof(lib_counters_t));
+
+ rc = kportal_descriptor_setup (nal);
+ if (rc != PTL_OK)
+ goto out;
+
+ INIT_LIST_HEAD (&ni->ni_active_msgs);
+ INIT_LIST_HEAD (&ni->ni_active_mds);
+ INIT_LIST_HEAD (&ni->ni_active_eqs);
+
+ INIT_LIST_HEAD (&ni->ni_test_peers);
+
+ ni->ni_interface_cookie = lib_create_interface_cookie (nal);
+ ni->ni_next_object_cookie = 0;
+ rc = lib_setup_handle_hash (nal);
+ if (rc != PTL_OK)
+ goto out;
+
+ ni->nid = nid;
+ ni->pid = pid;
+
+ ni->num_nodes = gsize;
+ ni->tbl.size = ptl_size;
+
+ ni->tbl.tbl = nal->cb_malloc(nal, sizeof(struct list_head) * ptl_size);
+ if (ni->tbl.tbl == NULL) {
+ rc = PTL_NOSPACE;
+ goto out;
+ }
+
+ for (i = 0; i < ptl_size; i++)
+ INIT_LIST_HEAD(&(ni->tbl.tbl[i]));
+
+ ni->debug = PTL_DEBUG_NONE;
+ ni->up = 1;
+ ni->refcnt++;
+
+ out:
+ if (rc != PTL_OK) {
+ lib_cleanup_handle_hash (nal);
+ kportal_descriptor_cleanup (nal);
+ }
+
+ RETURN (rc);
+}
+
+int
+lib_fini(nal_cb_t * nal)
+{
+ lib_ni_t *ni = &nal->ni;
+ int idx;
+
+ ni->refcnt--;
+
+ if (ni->refcnt != 0)
+ goto out;
+
+ /* NB no stat_lock() since this is the last reference. The NAL
+ * should have shut down already, so it should be safe to unlink
+ * and free all descriptors, even those that appear committed to a
+ * network op (eg MD with non-zero pending count)
+ */
+
+ for (idx = 0; idx < ni->tbl.size; idx++)
+ while (!list_empty (&ni->tbl.tbl[idx])) {
+ lib_me_t *me = list_entry (ni->tbl.tbl[idx].next,
+ lib_me_t, me_list);
+
+ CERROR ("Active me %p on exit\n", me);
+ list_del (&me->me_list);
+ lib_me_free (nal, me);
+ }
+
+ while (!list_empty (&ni->ni_active_mds)) {
+ lib_md_t *md = list_entry (ni->ni_active_mds.next,
+ lib_md_t, md_list);
+
+ CERROR ("Active md %p on exit\n", md);
+ list_del (&md->md_list);
+ lib_md_free (nal, md);
+ }
+
+ while (!list_empty (&ni->ni_active_eqs)) {
+ lib_eq_t *eq = list_entry (ni->ni_active_eqs.next,
+ lib_eq_t, eq_list);
+
+ CERROR ("Active eq %p on exit\n", eq);
+ list_del (&eq->eq_list);
+ lib_eq_free (nal, eq);
+ }
+
+ while (!list_empty (&ni->ni_active_msgs)) {
+ lib_msg_t *msg = list_entry (ni->ni_active_msgs.next,
+ lib_msg_t, msg_list);
+
+ CERROR ("Active msg %p on exit\n", msg);
+ list_del (&msg->msg_list);
+ lib_msg_free (nal, msg);
+ }
+
+ nal->cb_free(nal, ni->tbl.tbl, sizeof(struct list_head) * ni->tbl.size);
+ ni->up = 0;
+
+ lib_cleanup_handle_hash (nal);
+ kportal_descriptor_cleanup (nal);
+
+ out:
+ return (PTL_OK);
+}
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * lib/lib-md.c
+ * Memory Descriptor management routines
+ *
+ * Copyright (c) 2001-2003 Cluster File Systems, Inc.
+ * Copyright (c) 2001-2002 Sandia National Laboratories
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2.1 of the GNU Lesser General
+ * Public License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef __KERNEL__
+# include <stdio.h>
+#else
+# define DEBUG_SUBSYSTEM S_PORTALS
+# include <linux/kp30.h>
+#endif
+
+#include <portals/lib-p30.h>
+#include <portals/arg-blocks.h>
+
+/*
+ * must be called with state lock held
+ */
+void lib_md_unlink(nal_cb_t * nal, lib_md_t * md)
+{
+ lib_me_t *me = md->me;
+
+ if (md->pending != 0) {
+ CDEBUG(D_NET, "Queueing unlink of md %p\n", md);
+ md->md_flags |= PTL_MD_FLAG_UNLINK;
+ return;
+ }
+
+ CDEBUG(D_NET, "Unlinking md %p\n", md);
+
+ if ((md->options & PTL_MD_KIOV) != 0) {
+ if (nal->cb_unmap_pages != NULL)
+ nal->cb_unmap_pages (nal, md->md_niov, md->md_iov.kiov,
+ &md->md_addrkey);
+ } else if (nal->cb_unmap != NULL)
+ nal->cb_unmap (nal, md->md_niov, md->md_iov.iov,
+ &md->md_addrkey);
+
+ if (me) {
+ me->md = NULL;
+ if (me->unlink == PTL_UNLINK)
+ lib_me_unlink(nal, me);
+ }
+
+ if (md->eq != NULL)
+ {
+ md->eq->eq_refcount--;
+ LASSERT (md->eq->eq_refcount >= 0);
+ }
+
+ lib_invalidate_handle (nal, &md->md_lh);
+ list_del (&md->md_list);
+ lib_md_free(nal, md);
+}
+
+/* must be called with state lock held */
+static int lib_md_build(nal_cb_t *nal, lib_md_t *new, void *private,
+ ptl_md_t *md, ptl_handle_eq_t *eqh, int unlink)
+{
+ const int max_size_opts = PTL_MD_AUTO_UNLINK |
+ PTL_MD_MAX_SIZE;
+ lib_eq_t *eq = NULL;
+ int rc;
+ int i;
+
+ /* NB we are passes an allocated, but uninitialised/active md.
+ * if we return success, caller may lib_md_unlink() it.
+ * otherwise caller may only lib_md_free() it.
+ */
+
+ if (!PtlHandleEqual (*eqh, PTL_EQ_NONE)) {
+ eq = ptl_handle2eq(eqh, nal);
+ if (eq == NULL)
+ return PTL_INV_EQ;
+ }
+
+ if ((md->options & PTL_MD_IOV) != 0 && /* discontiguous MD */
+ md->niov > PTL_MD_MAX_IOV) /* too many fragments */
+ return PTL_IOV_TOO_MANY;
+
+ if ((md->options & max_size_opts) != 0 && /* max size used */
+ (md->max_size < 0 || md->max_size > md->length)) // illegal max_size
+ return PTL_INV_MD;
+
+ new->me = NULL;
+ new->start = md->start;
+ new->length = md->length;
+ new->offset = 0;
+ new->max_size = md->max_size;
+ new->unlink = unlink;
+ new->options = md->options;
+ new->user_ptr = md->user_ptr;
+ new->eq = eq;
+ new->threshold = md->threshold;
+ new->pending = 0;
+ new->md_flags = 0;
+
+ if ((md->options & PTL_MD_IOV) != 0) {
+ int total_length = 0;
+
+ if ((md->options & PTL_MD_KIOV) != 0) /* Can't specify both */
+ return PTL_INV_MD;
+
+ new->md_niov = md->niov;
+
+ if (nal->cb_read (nal, private, new->md_iov.iov, md->start,
+ md->niov * sizeof (new->md_iov.iov[0])))
+ return PTL_SEGV;
+
+ for (i = 0; i < new->md_niov; i++) {
+ /* We take the base address on trust */
+ if (new->md_iov.iov[i].iov_len <= 0) /* invalid length */
+ return PTL_VAL_FAILED;
+
+ total_length += new->md_iov.iov[i].iov_len;
+ }
+
+ if (md->length > total_length)
+ return PTL_IOV_TOO_SMALL;
+
+ if (nal->cb_map != NULL) {
+ rc = nal->cb_map (nal, new->md_niov, new->md_iov.iov,
+ &new->md_addrkey);
+ if (rc != PTL_OK)
+ return (rc);
+ }
+ } else if ((md->options & PTL_MD_KIOV) != 0) {
+#ifndef __KERNEL__
+ return PTL_INV_MD;
+#else
+ int total_length = 0;
+
+ /* Trap attempt to use paged I/O if unsupported early. */
+ if (nal->cb_send_pages == NULL ||
+ nal->cb_recv_pages == NULL)
+ return PTL_INV_MD;
+
+ new->md_niov = md->niov;
+
+ if (nal->cb_read (nal, private, new->md_iov.kiov, md->start,
+ md->niov * sizeof (new->md_iov.kiov[0])))
+ return PTL_SEGV;
+
+ for (i = 0; i < new->md_niov; i++) {
+ /* We take the page pointer on trust */
+ if (new->md_iov.kiov[i].kiov_offset +
+ new->md_iov.kiov[i].kiov_len > PAGE_SIZE )
+ return PTL_VAL_FAILED; /* invalid length */
+
+ total_length += new->md_iov.kiov[i].kiov_len;
+ }
+
+ if (md->length > total_length)
+ return PTL_IOV_TOO_SMALL;
+
+ if (nal->cb_map_pages != NULL) {
+ rc = nal->cb_map_pages (nal, new->md_niov, new->md_iov.kiov,
+ &new->md_addrkey);
+ if (rc != PTL_OK)
+ return (rc);
+ }
+#endif
+ } else { /* contiguous */
+ new->md_niov = 1;
+ new->md_iov.iov[0].iov_base = md->start;
+ new->md_iov.iov[0].iov_len = md->length;
+
+ if (nal->cb_map != NULL) {
+ rc = nal->cb_map (nal, new->md_niov, new->md_iov.iov,
+ &new->md_addrkey);
+ if (rc != PTL_OK)
+ return (rc);
+ }
+ }
+
+ if (eq != NULL)
+ eq->eq_refcount++;
+
+ /* It's good; let handle2md succeed and add to active mds */
+ lib_initialise_handle (nal, &new->md_lh);
+ list_add (&new->md_list, &nal->ni.ni_active_mds);
+
+ return PTL_OK;
+}
+
+/* must be called with state lock held */
+void lib_md_deconstruct(nal_cb_t * nal, lib_md_t * md, ptl_md_t * new)
+{
+ /* NB this doesn't copy out all the iov entries so when a
+ * discontiguous MD is copied out, the target gets to know the
+ * original iov pointer (in start) and the number of entries it had
+ * and that's all.
+ */
+ new->start = md->start;
+ new->length = md->length;
+ new->threshold = md->threshold;
+ new->max_size = md->max_size;
+ new->options = md->options;
+ new->user_ptr = md->user_ptr;
+ ptl_eq2handle(&new->eventq, md->eq);
+ new->niov = ((md->options & (PTL_MD_IOV | PTL_MD_KIOV)) == 0) ? 0 : md->md_niov;
+}
+
+int do_PtlMDAttach(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
+{
+ /*
+ * Incoming:
+ * ptl_handle_me_t current_in
+ * ptl_md_t md_in
+ * ptl_unlink_t unlink_in
+ *
+ * Outgoing:
+ * ptl_handle_md_t * handle_out
+ */
+
+ PtlMDAttach_in *args = v_args;
+ PtlMDAttach_out *ret = v_ret;
+ lib_me_t *me;
+ lib_md_t *md;
+ unsigned long flags;
+
+ md = lib_md_alloc (nal);
+ if (md == NULL)
+ return (ret->rc = PTL_NOSPACE);
+
+ state_lock(nal, &flags);
+
+ me = ptl_handle2me(&args->me_in, nal);
+ if (me == NULL) {
+ ret->rc = PTL_INV_ME;
+ } else if (me->md != NULL) {
+ ret->rc = PTL_INUSE;
+ } else {
+ ret->rc = lib_md_build(nal, md, private, &args->md_in,
+ &args->eq_in, args->unlink_in);
+
+ if (ret->rc == PTL_OK) {
+ me->md = md;
+ md->me = me;
+
+ ptl_md2handle(&ret->handle_out, md);
+
+ state_unlock (nal, &flags);
+ return (PTL_OK);
+ }
+ }
+
+ lib_md_free (nal, md);
+
+ state_unlock (nal, &flags);
+ return (ret->rc);
+}
+
+int do_PtlMDBind(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
+{
+ /*
+ * Incoming:
+ * ptl_handle_ni_t ni_in
+ * ptl_md_t md_in
+ *
+ * Outgoing:
+ * ptl_handle_md_t * handle_out
+ */
+
+ PtlMDBind_in *args = v_args;
+ PtlMDBind_out *ret = v_ret;
+ lib_md_t *md;
+ unsigned long flags;
+
+ md = lib_md_alloc (nal);
+ if (md == NULL)
+ return (ret->rc = PTL_NOSPACE);
+
+ state_lock(nal, &flags);
+
+ ret->rc = lib_md_build(nal, md, private,
+ &args->md_in, &args->eq_in, PTL_UNLINK);
+
+ if (ret->rc == PTL_OK) {
+ ptl_md2handle(&ret->handle_out, md);
+
+ state_unlock(nal, &flags);
+ return (PTL_OK);
+ }
+
+ lib_md_free (nal, md);
+
+ state_unlock(nal, &flags);
+ return (ret->rc);
+}
+
+int do_PtlMDUnlink(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
+{
+ PtlMDUnlink_in *args = v_args;
+ PtlMDUnlink_out *ret = v_ret;
+
+ lib_md_t *md;
+ unsigned long flags;
+
+ state_lock(nal, &flags);
+
+ md = ptl_handle2md(&args->md_in, nal);
+ if (md == NULL) {
+ ret->rc = PTL_INV_MD;
+ } else if (md->pending != 0) { /* being filled/spilled */
+ ret->rc = PTL_MD_INUSE;
+ } else {
+ /* Callers attempting to unlink a busy MD which will get
+ * unlinked once the net op completes should see INUSE,
+ * before completion and INV_MD thereafter. LASSERT we've
+ * got that right... */
+ LASSERT ((md->md_flags & PTL_MD_FLAG_UNLINK) == 0);
+
+ lib_md_deconstruct(nal, md, &ret->status_out);
+ lib_md_unlink(nal, md);
+ ret->rc = PTL_OK;
+ }
+
+ state_unlock(nal, &flags);
+
+ return (ret->rc);
+}
+
+int do_PtlMDUpdate_internal(nal_cb_t * nal, void *private, void *v_args,
+ void *v_ret)
+{
+ /*
+ * Incoming:
+ * ptl_handle_md_t md_in
+ * ptl_md_t * old_inout
+ * ptl_md_t * new_inout
+ * ptl_handle_eq_t testq_in
+ * ptl_seq_t sequence_in
+ *
+ * Outgoing:
+ * ptl_md_t * old_inout
+ * ptl_md_t * new_inout
+ */
+ PtlMDUpdate_internal_in *args = v_args;
+ PtlMDUpdate_internal_out *ret = v_ret;
+ lib_md_t *md;
+ lib_eq_t *test_eq = NULL;
+ ptl_md_t *new = &args->new_inout;
+ unsigned long flags;
+
+ state_lock(nal, &flags);
+
+ md = ptl_handle2md(&args->md_in, nal);
+ if (md == NULL) {
+ ret->rc = PTL_INV_MD;
+ goto out;
+ }
+
+ if (args->old_inout_valid)
+ lib_md_deconstruct(nal, md, &ret->old_inout);
+
+ if (!args->new_inout_valid) {
+ ret->rc = PTL_OK;
+ goto out;
+ }
+
+ if (!PtlHandleEqual (args->testq_in, PTL_EQ_NONE)) {
+ test_eq = ptl_handle2eq(&args->testq_in, nal);
+ if (test_eq == NULL) {
+ ret->rc = PTL_INV_EQ;
+ goto out;
+ }
+ }
+
+ if (md->pending != 0) {
+ ret->rc = PTL_NOUPDATE;
+ goto out;
+ }
+
+ if (test_eq == NULL ||
+ test_eq->sequence == args->sequence_in) {
+ lib_me_t *me = md->me;
+
+#warning this does not track eq refcounts properly
+
+ ret->rc = lib_md_build(nal, md, private,
+ new, &new->eventq, md->unlink);
+
+ md->me = me;
+ } else {
+ ret->rc = PTL_NOUPDATE;
+ }
+
+ out:
+ state_unlock(nal, &flags);
+ return (ret->rc);
+}
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * lib/lib-me.c
+ * Match Entry management routines
+ *
+ * Copyright (c) 2001-2003 Cluster File Systems, Inc.
+ * Copyright (c) 2001-2002 Sandia National Laboratories
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2.1 of the GNU Lesser General
+ * Public License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef __KERNEL__
+# include <stdio.h>
+#else
+# define DEBUG_SUBSYSTEM S_PORTALS
+# include <linux/kp30.h>
+#endif
+
+#include <portals/lib-p30.h>
+#include <portals/arg-blocks.h>
+
+static void lib_me_dump(nal_cb_t * nal, lib_me_t * me);
+
+int do_PtlMEAttach(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
+{
+ PtlMEAttach_in *args = v_args;
+ PtlMEAttach_out *ret = v_ret;
+ lib_ni_t *ni = &nal->ni;
+ lib_ptl_t *tbl = &ni->tbl;
+ unsigned long flags;
+ lib_me_t *me;
+
+ if (args->index_in < 0 || args->index_in >= tbl->size)
+ return ret->rc = PTL_INV_PTINDEX;
+
+ /* Should check for valid matchid, but not yet */
+ if (0)
+ return ret->rc = PTL_INV_PROC;
+
+ me = lib_me_alloc (nal);
+ if (me == NULL)
+ return (ret->rc = PTL_NOSPACE);
+
+ state_lock(nal, &flags);
+
+ me->match_id = args->match_id_in;
+ me->match_bits = args->match_bits_in;
+ me->ignore_bits = args->ignore_bits_in;
+ me->unlink = args->unlink_in;
+ me->md = NULL;
+
+ lib_initialise_handle (nal, &me->me_lh);
+
+ if (args->position_in == PTL_INS_AFTER)
+ list_add_tail(&me->me_list, &(tbl->tbl[args->index_in]));
+ else
+ list_add(&me->me_list, &(tbl->tbl[args->index_in]));
+
+ ptl_me2handle(&ret->handle_out, me);
+
+ state_unlock(nal, &flags);
+
+ return ret->rc = PTL_OK;
+}
+
+int do_PtlMEInsert(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
+{
+ PtlMEInsert_in *args = v_args;
+ PtlMEInsert_out *ret = v_ret;
+ unsigned long flags;
+ lib_me_t *me;
+ lib_me_t *new;
+
+ new = lib_me_alloc (nal);
+ if (new == NULL)
+ return (ret->rc = PTL_NOSPACE);
+
+ /* Should check for valid matchid, but not yet */
+
+ state_lock(nal, &flags);
+
+ me = ptl_handle2me(&args->current_in, nal);
+ if (me == NULL) {
+ lib_me_free (nal, new);
+
+ state_unlock (nal, &flags);
+ return (ret->rc = PTL_INV_ME);
+ }
+
+ new->match_id = args->match_id_in;
+ new->match_bits = args->match_bits_in;
+ new->ignore_bits = args->ignore_bits_in;
+ new->unlink = args->unlink_in;
+ new->md = NULL;
+
+ lib_initialise_handle (nal, &new->me_lh);
+
+ if (args->position_in == PTL_INS_AFTER)
+ list_add_tail(&new->me_list, &me->me_list);
+ else
+ list_add(&new->me_list, &me->me_list);
+
+ ptl_me2handle(&ret->handle_out, new);
+
+ state_unlock(nal, &flags);
+
+ return ret->rc = PTL_OK;
+}
+
+int do_PtlMEUnlink(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
+{
+ PtlMEUnlink_in *args = v_args;
+ PtlMEUnlink_out *ret = v_ret;
+ unsigned long flags;
+ lib_me_t *me;
+
+ state_lock(nal, &flags);
+
+ me = ptl_handle2me(&args->current_in, nal);
+ if (me == NULL) {
+ ret->rc = PTL_INV_ME;
+ } else {
+ lib_me_unlink(nal, me);
+ ret->rc = PTL_OK;
+ }
+
+ state_unlock(nal, &flags);
+
+ return (ret->rc);
+}
+
+/* call with state_lock please */
+void lib_me_unlink(nal_cb_t *nal, lib_me_t *me)
+{
+ lib_ni_t *ni = &nal->ni;
+
+ if (ni->debug & PTL_DEBUG_UNLINK) {
+ ptl_handle_any_t handle;
+ ptl_me2handle(&handle, me);
+ }
+
+ list_del (&me->me_list);
+
+ if (me->md) {
+ me->md->me = NULL;
+ lib_md_unlink(nal, me->md);
+ }
+
+ lib_invalidate_handle (nal, &me->me_lh);
+ lib_me_free(nal, me);
+}
+
+int do_PtlTblDump(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
+{
+ PtlTblDump_in *args = v_args;
+ PtlTblDump_out *ret = v_ret;
+ lib_ptl_t *tbl = &nal->ni.tbl;
+ ptl_handle_any_t handle;
+ struct list_head *tmp;
+ unsigned long flags;
+
+ if (args->index_in < 0 || args->index_in >= tbl->size)
+ return ret->rc = PTL_INV_PTINDEX;
+
+ nal->cb_printf(nal, "Portal table index %d\n", args->index_in);
+
+ state_lock(nal, &flags);
+ list_for_each(tmp, &(tbl->tbl[args->index_in])) {
+ lib_me_t *me = list_entry(tmp, lib_me_t, me_list);
+ ptl_me2handle(&handle, me);
+ lib_me_dump(nal, me);
+ }
+ state_unlock(nal, &flags);
+
+ return ret->rc = PTL_OK;
+}
+
+int do_PtlMEDump(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
+{
+ PtlMEDump_in *args = v_args;
+ PtlMEDump_out *ret = v_ret;
+ lib_me_t *me;
+ unsigned long flags;
+
+ state_lock(nal, &flags);
+
+ me = ptl_handle2me(&args->current_in, nal);
+ if (me == NULL) {
+ ret->rc = PTL_INV_ME;
+ } else {
+ lib_me_dump(nal, me);
+ ret->rc = PTL_OK;
+ }
+
+ state_unlock(nal, &flags);
+
+ return ret->rc;
+}
+
+static void lib_me_dump(nal_cb_t * nal, lib_me_t * me)
+{
+ nal->cb_printf(nal, "Match Entry %p ("LPX64")\n", me,
+ me->me_lh.lh_cookie);
+
+ nal->cb_printf(nal, "\tMatch/Ignore\t= %016lx / %016lx\n",
+ me->match_bits, me->ignore_bits);
+
+ nal->cb_printf(nal, "\tMD\t= %p\n", me->md);
+ nal->cb_printf(nal, "\tprev\t= %p\n",
+ list_entry(me->me_list.prev, lib_me_t, me_list));
+ nal->cb_printf(nal, "\tnext\t= %p\n",
+ list_entry(me->me_list.next, lib_me_t, me_list));
+}
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * lib/lib-move.c
+ * Data movement routines
+ *
+ * Copyright (c) 2001-2003 Cluster File Systems, Inc.
+ * Copyright (c) 2001-2002 Sandia National Laboratories
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2.1 of the GNU Lesser General
+ * Public License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef __KERNEL__
+# include <stdio.h>
+#else
+# define DEBUG_SUBSYSTEM S_PORTALS
+# include <linux/kp30.h>
+#endif
+#include <portals/p30.h>
+#include <portals/lib-p30.h>
+#include <portals/arg-blocks.h>
+
+/*
+ * Right now it does not check access control lists.
+ *
+ * We only support one MD per ME, which is how the Portals 3.1 spec is written.
+ * All previous complication is removed.
+ */
+
+static lib_me_t *
+lib_find_me(nal_cb_t *nal, int index, int op_mask, ptl_nid_t src_nid,
+ ptl_pid_t src_pid, ptl_size_t rlength, ptl_size_t roffset,
+ ptl_match_bits_t match_bits, ptl_size_t *mlength_out,
+ ptl_size_t *offset_out, int *unlink_out)
+{
+ lib_ni_t *ni = &nal->ni;
+ struct list_head *match_list = &ni->tbl.tbl[index];
+ struct list_head *tmp;
+ lib_me_t *me;
+ lib_md_t *md;
+ ptl_size_t mlength;
+ ptl_size_t offset;
+
+ ENTRY;
+
+ CDEBUG (D_NET, "Request from "LPU64".%d of length %d into portal %d "
+ "MB="LPX64"\n", src_nid, src_pid, rlength, index, match_bits);
+
+ if (index < 0 || index >= ni->tbl.size) {
+ CERROR("Invalid portal %d not in [0-%d]\n",
+ index, ni->tbl.size);
+ goto failed;
+ }
+
+ list_for_each (tmp, match_list) {
+ me = list_entry(tmp, lib_me_t, me_list);
+ md = me->md;
+
+ /* ME attached but MD not attached yet */
+ if (md == NULL)
+ continue;
+
+ LASSERT (me == md->me);
+
+ /* MD deactivated */
+ if (md->threshold == 0)
+ continue;
+
+ /* mismatched MD op */
+ if ((md->options & op_mask) == 0)
+ continue;
+
+ /* mismatched ME nid/pid? */
+ if (me->match_id.nid != PTL_NID_ANY &&
+ me->match_id.nid != src_nid)
+ continue;
+
+ if (me->match_id.pid != PTL_PID_ANY &&
+ me->match_id.pid != src_pid)
+ continue;
+
+ /* mismatched ME matchbits? */
+ if (((me->match_bits ^ match_bits) & ~me->ignore_bits) != 0)
+ continue;
+
+ /* Hurrah! This _is_ a match; check it out... */
+
+ if ((md->options & PTL_MD_MANAGE_REMOTE) == 0)
+ offset = md->offset;
+ else
+ offset = roffset;
+
+ mlength = md->length - offset;
+ if ((md->options & PTL_MD_MAX_SIZE) != 0 &&
+ mlength > md->max_size)
+ mlength = md->max_size;
+
+ if (rlength <= mlength) { /* fits in allowed space */
+ mlength = rlength;
+ } else if ((md->options & PTL_MD_TRUNCATE) == 0) {
+ /* this packet _really_ is too big */
+ CERROR("Matching packet %d too big: %d left, "
+ "%d allowed\n", rlength, md->length - offset,
+ mlength);
+ goto failed;
+ }
+
+ md->offset = offset + mlength;
+
+ *offset_out = offset;
+ *mlength_out = mlength;
+ *unlink_out = ((md->options & PTL_MD_AUTO_UNLINK) != 0 &&
+ md->offset >= (md->length - md->max_size));
+ RETURN (me);
+ }
+
+ failed:
+ CERROR (LPU64": Dropping %s from "LPU64".%d portal %d match "LPX64
+ " offset %d length %d: no match\n",
+ ni->nid, (op_mask == PTL_MD_OP_GET) ? "GET" : "PUT",
+ src_nid, src_pid, index, match_bits, roffset, rlength);
+ RETURN(NULL);
+}
+
+int do_PtlFailNid (nal_cb_t *nal, void *private, void *v_args, void *v_ret)
+{
+ PtlFailNid_in *args = v_args;
+ PtlFailNid_out *ret = v_ret;
+ lib_test_peer_t *tp;
+ unsigned long flags;
+ struct list_head *el;
+ struct list_head *next;
+ struct list_head cull;
+
+ if (args->threshold != 0) {
+ /* Adding a new entry */
+ tp = (lib_test_peer_t *)nal->cb_malloc (nal, sizeof (*tp));
+ if (tp == NULL)
+ return (ret->rc = PTL_FAIL);
+
+ tp->tp_nid = args->nid;
+ tp->tp_threshold = args->threshold;
+
+ state_lock (nal, &flags);
+ list_add (&tp->tp_list, &nal->ni.ni_test_peers);
+ state_unlock (nal, &flags);
+ return (ret->rc = PTL_OK);
+ }
+
+ /* removing entries */
+ INIT_LIST_HEAD (&cull);
+
+ state_lock (nal, &flags);
+
+ list_for_each_safe (el, next, &nal->ni.ni_test_peers) {
+ tp = list_entry (el, lib_test_peer_t, tp_list);
+
+ if (tp->tp_threshold == 0 || /* needs culling anyway */
+ args->nid == PTL_NID_ANY || /* removing all entries */
+ tp->tp_nid == args->nid) /* matched this one */
+ {
+ list_del (&tp->tp_list);
+ list_add (&tp->tp_list, &cull);
+ }
+ }
+
+ state_unlock (nal, &flags);
+
+ while (!list_empty (&cull)) {
+ tp = list_entry (cull.next, lib_test_peer_t, tp_list);
+
+ list_del (&tp->tp_list);
+ nal->cb_free (nal, tp, sizeof (*tp));
+ }
+ return (ret->rc = PTL_OK);
+}
+
+static int
+fail_peer (nal_cb_t *nal, ptl_nid_t nid, int outgoing)
+{
+ lib_test_peer_t *tp;
+ struct list_head *el;
+ struct list_head *next;
+ unsigned long flags;
+ struct list_head cull;
+ int fail = 0;
+
+ INIT_LIST_HEAD (&cull);
+
+ state_lock (nal, &flags);
+
+ list_for_each_safe (el, next, &nal->ni.ni_test_peers) {
+ tp = list_entry (el, lib_test_peer_t, tp_list);
+
+ if (tp->tp_threshold == 0) {
+ /* zombie entry */
+ if (outgoing) {
+ /* only cull zombies on outgoing tests,
+ * since we may be at interrupt priority on
+ * incoming messages. */
+ list_del (&tp->tp_list);
+ list_add (&tp->tp_list, &cull);
+ }
+ continue;
+ }
+
+ if (tp->tp_nid == PTL_NID_ANY || /* fail every peer */
+ nid == tp->tp_nid) { /* fail this peer */
+ fail = 1;
+
+ if (tp->tp_threshold != PTL_MD_THRESH_INF) {
+ tp->tp_threshold--;
+ if (outgoing &&
+ tp->tp_threshold == 0) {
+ /* see above */
+ list_del (&tp->tp_list);
+ list_add (&tp->tp_list, &cull);
+ }
+ }
+ break;
+ }
+ }
+
+ state_unlock (nal, &flags);
+
+ while (!list_empty (&cull)) {
+ tp = list_entry (cull.next, lib_test_peer_t, tp_list);
+ list_del (&tp->tp_list);
+
+ nal->cb_free (nal, tp, sizeof (*tp));
+ }
+
+ return (fail);
+}
+
+ptl_size_t
+lib_iov_nob (int niov, struct iovec *iov)
+{
+ ptl_size_t nob = 0;
+
+ while (niov-- > 0)
+ nob += (iov++)->iov_len;
+
+ return (nob);
+}
+
+void
+lib_copy_iov2buf (char *dest, int niov, struct iovec *iov, ptl_size_t len)
+{
+ ptl_size_t nob;
+
+ while (len > 0)
+ {
+ LASSERT (niov > 0);
+ nob = MIN (iov->iov_len, len);
+ memcpy (dest, iov->iov_base, nob);
+
+ len -= nob;
+ dest += nob;
+ niov--;
+ iov++;
+ }
+}
+
+void
+lib_copy_buf2iov (int niov, struct iovec *iov, char *src, ptl_size_t len)
+{
+ ptl_size_t nob;
+
+ while (len > 0)
+ {
+ LASSERT (niov > 0);
+ nob = MIN (iov->iov_len, len);
+ memcpy (iov->iov_base, src, nob);
+
+ len -= nob;
+ src += nob;
+ niov--;
+ iov++;
+ }
+}
+
+static int
+lib_extract_iov (struct iovec *dst, lib_md_t *md,
+ ptl_size_t offset, ptl_size_t len)
+{
+ /* Initialise 'dst' to the subset of 'src' starting at 'offset',
+ * for exactly 'len' bytes, and return the number of entries.
+ * NB not destructive to 'src' */
+ int src_niov = md->md_niov;
+ struct iovec *src = md->md_iov.iov;
+ ptl_size_t frag_len;
+ int dst_niov;
+
+ LASSERT (len >= 0);
+ LASSERT (offset >= 0);
+ LASSERT (offset + len <= md->length);
+
+ if (len == 0) /* no data => */
+ return (0); /* no frags */
+
+ LASSERT (src_niov > 0);
+ while (offset >= src->iov_len) { /* skip initial frags */
+ offset -= src->iov_len;
+ src_niov--;
+ src++;
+ LASSERT (src_niov > 0);
+ }
+
+ dst_niov = 1;
+ for (;;) {
+ LASSERT (src_niov > 0);
+ LASSERT (dst_niov <= PTL_MD_MAX_IOV);
+
+ frag_len = src->iov_len - offset;
+ dst->iov_base = ((char *)src->iov_base) + offset;
+
+ if (len <= frag_len) {
+ dst->iov_len = len;
+ return (dst_niov);
+ }
+
+ dst->iov_len = frag_len;
+
+ len -= frag_len;
+ dst++;
+ src++;
+ dst_niov++;
+ src_niov--;
+ offset = 0;
+ }
+}
+
+#ifndef __KERNEL__
+ptl_size_t
+lib_kiov_nob (int niov, ptl_kiov_t *kiov)
+{
+ LASSERT (0);
+ return (0);
+}
+
+void
+lib_copy_kiov2buf (char *dest, int niov, ptl_kiov_t *kiov, ptl_size_t len)
+{
+ LASSERT (0);
+}
+
+void
+lib_copy_buf2kiov (int niov, ptl_kiov_t *kiov, char *dest, ptl_size_t len)
+{
+ LASSERT (0);
+}
+
+static int
+lib_extract_kiov (ptl_kiov_t *dst, lib_md_t *md,
+ ptl_size_t offset, ptl_size_t len)
+{
+ LASSERT (0);
+}
+
+#else
+
+ptl_size_t
+lib_kiov_nob (int niov, ptl_kiov_t *kiov)
+{
+ ptl_size_t nob = 0;
+
+ while (niov-- > 0)
+ nob += (kiov++)->kiov_len;
+
+ return (nob);
+}
+
+void
+lib_copy_kiov2buf (char *dest, int niov, ptl_kiov_t *kiov, ptl_size_t len)
+{
+ ptl_size_t nob;
+ char *addr;
+
+ LASSERT (!in_interrupt ());
+ while (len > 0)
+ {
+ LASSERT (niov > 0);
+ nob = MIN (kiov->kiov_len, len);
+
+ addr = ((char *)kmap (kiov->kiov_page)) + kiov->kiov_offset;
+ memcpy (dest, addr, nob);
+ kunmap (kiov->kiov_page);
+
+ len -= nob;
+ dest += nob;
+ niov--;
+ kiov++;
+ }
+}
+
+void
+lib_copy_buf2kiov (int niov, ptl_kiov_t *kiov, char *src, ptl_size_t len)
+{
+ ptl_size_t nob;
+ char *addr;
+
+ LASSERT (!in_interrupt ());
+ while (len > 0)
+ {
+ LASSERT (niov > 0);
+ nob = MIN (kiov->kiov_len, len);
+
+ addr = ((char *)kmap (kiov->kiov_page)) + kiov->kiov_offset;
+ memcpy (addr, src, nob);
+ kunmap (kiov->kiov_page);
+
+ len -= nob;
+ src += nob;
+ niov--;
+ kiov++;
+ }
+}
+
+static int
+lib_extract_kiov (ptl_kiov_t *dst, lib_md_t *md,
+ ptl_size_t offset, ptl_size_t len)
+{
+ /* Initialise 'dst' to the subset of 'src' starting at 'offset',
+ * for exactly 'len' bytes, and return the number of entries.
+ * NB not destructive to 'src' */
+ int src_niov = md->md_niov;
+ ptl_kiov_t *src = md->md_iov.kiov;
+ ptl_size_t frag_len;
+ int dst_niov;
+
+ LASSERT (len >= 0);
+ LASSERT (offset >= 0);
+ LASSERT (offset + len <= md->length);
+
+ if (len == 0) /* no data => */
+ return (0); /* no frags */
+
+ LASSERT (src_niov > 0);
+ while (offset >= src->kiov_len) { /* skip initial frags */
+ offset -= src->kiov_len;
+ src_niov--;
+ src++;
+ LASSERT (src_niov > 0);
+ }
+
+ dst_niov = 1;
+ for (;;) {
+ LASSERT (src_niov > 0);
+ LASSERT (dst_niov <= PTL_MD_MAX_IOV);
+
+ frag_len = src->kiov_len - offset;
+ dst->kiov_page = src->kiov_page;
+ dst->kiov_offset = src->kiov_offset + offset;
+
+ if (len <= frag_len) {
+ dst->kiov_len = len;
+ LASSERT (dst->kiov_offset + dst->kiov_len <= PAGE_SIZE);
+ return (dst_niov);
+ }
+
+ dst->kiov_len = frag_len;
+ LASSERT (dst->kiov_offset + dst->kiov_len <= PAGE_SIZE);
+
+ len -= frag_len;
+ dst++;
+ src++;
+ dst_niov++;
+ src_niov--;
+ offset = 0;
+ }
+}
+#endif
+
+void
+lib_recv (nal_cb_t *nal, void *private, lib_msg_t *msg, lib_md_t *md,
+ ptl_size_t offset, ptl_size_t mlen, ptl_size_t rlen)
+{
+ int niov;
+
+ if (mlen == 0)
+ nal->cb_recv (nal, private, msg, 0, NULL, 0, rlen);
+ else if ((md->options & PTL_MD_KIOV) == 0) {
+ niov = lib_extract_iov (msg->msg_iov.iov, md, offset, mlen);
+ nal->cb_recv (nal, private, msg,
+ niov, msg->msg_iov.iov, mlen, rlen);
+ } else {
+ niov = lib_extract_kiov (msg->msg_iov.kiov, md, offset, mlen);
+ nal->cb_recv_pages (nal, private, msg,
+ niov, msg->msg_iov.kiov, mlen, rlen);
+ }
+}
+
+int
+lib_send (nal_cb_t *nal, void *private, lib_msg_t *msg,
+ ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
+ lib_md_t *md, ptl_size_t offset, ptl_size_t len)
+{
+ int niov;
+
+ if (len == 0)
+ return (nal->cb_send (nal, private, msg,
+ hdr, type, nid, pid,
+ 0, NULL, 0));
+
+ if ((md->options & PTL_MD_KIOV) == 0) {
+ niov = lib_extract_iov (msg->msg_iov.iov, md, offset, len);
+ return (nal->cb_send (nal, private, msg,
+ hdr, type, nid, pid,
+ niov, msg->msg_iov.iov, len));
+ }
+
+ niov = lib_extract_kiov (msg->msg_iov.kiov, md, offset, len);
+ return (nal->cb_send_pages (nal, private, msg,
+ hdr, type, nid, pid,
+ niov, msg->msg_iov.kiov, len));
+}
+
+static lib_msg_t *
+get_new_msg (nal_cb_t *nal, lib_md_t *md)
+{
+ /* ALWAYS called holding the state_lock */
+ lib_counters_t *counters = &nal->ni.counters;
+ lib_msg_t *msg = lib_msg_alloc (nal);
+
+ if (msg == NULL)
+ return (NULL);
+
+ memset (msg, 0, sizeof (*msg));
+
+ msg->send_ack = 0;
+
+ msg->md = md;
+ msg->ev.arrival_time = get_cycles();
+ md->pending++;
+ if (md->threshold != PTL_MD_THRESH_INF) {
+ LASSERT (md->threshold > 0);
+ md->threshold--;
+ }
+
+ counters->msgs_alloc++;
+ if (counters->msgs_alloc > counters->msgs_max)
+ counters->msgs_max = counters->msgs_alloc;
+
+ list_add (&msg->msg_list, &nal->ni.ni_active_msgs);
+
+ return (msg);
+}
+
+
+/*
+ * Incoming messages have a ptl_msg_t object associated with them
+ * by the library. This object encapsulates the state of the
+ * message and allows the NAL to do non-blocking receives or sends
+ * of long messages.
+ *
+ */
+static int parse_put(nal_cb_t * nal, ptl_hdr_t * hdr, void *private)
+{
+ lib_ni_t *ni = &nal->ni;
+ ptl_size_t mlength = 0;
+ ptl_size_t offset = 0;
+ int unlink = 0;
+ lib_me_t *me;
+ lib_md_t *md;
+ lib_msg_t *msg;
+ unsigned long flags;
+
+ /* Convert put fields to host byte order */
+ hdr->msg.put.match_bits = NTOH__u64 (hdr->msg.put.match_bits);
+ hdr->msg.put.ptl_index = NTOH__u32 (hdr->msg.put.ptl_index);
+ hdr->msg.put.offset = NTOH__u32 (hdr->msg.put.offset);
+
+ state_lock(nal, &flags);
+
+ me = lib_find_me(nal, hdr->msg.put.ptl_index, PTL_MD_OP_PUT,
+ hdr->src_nid, hdr->src_pid,
+ PTL_HDR_LENGTH (hdr), hdr->msg.put.offset,
+ hdr->msg.put.match_bits,
+ &mlength, &offset, &unlink);
+ if (me == NULL)
+ goto drop;
+
+ md = me->md;
+ CDEBUG(D_NET, "Incoming put index %x from "LPU64"/%u of length %d/%d "
+ "into md "LPX64" [%d] + %d\n", hdr->msg.put.ptl_index,
+ hdr->src_nid, hdr->src_pid, mlength, PTL_HDR_LENGTH(hdr),
+ md->md_lh.lh_cookie, md->md_niov, offset);
+
+ msg = get_new_msg (nal, md);
+ if (msg == NULL) {
+ CERROR(LPU64": Dropping PUT from "LPU64": can't allocate msg\n",
+ ni->nid, hdr->src_nid);
+ goto drop;
+ }
+
+ if (!ptl_is_wire_handle_none(&hdr->msg.put.ack_wmd) &&
+ !(md->options & PTL_MD_ACK_DISABLE)) {
+ msg->send_ack = 1;
+ msg->ack_wmd = hdr->msg.put.ack_wmd;
+ msg->nid = hdr->src_nid;
+ msg->pid = hdr->src_pid;
+ msg->ev.match_bits = hdr->msg.put.match_bits;
+ }
+
+ if (md->eq) {
+ msg->ev.type = PTL_EVENT_PUT;
+ msg->ev.initiator.nid = hdr->src_nid;
+ msg->ev.initiator.pid = hdr->src_pid;
+ msg->ev.portal = hdr->msg.put.ptl_index;
+ msg->ev.match_bits = hdr->msg.put.match_bits;
+ msg->ev.rlength = PTL_HDR_LENGTH(hdr);
+ msg->ev.mlength = mlength;
+ msg->ev.offset = offset;
+ msg->ev.hdr_data = hdr->msg.put.hdr_data;
+
+ /* NB if this match has exhausted the MD, we can't be sure
+ * that this event will the the last one associated with
+ * this MD in the event queue (another message already
+ * matching this ME/MD could end up being last). So we
+ * remember the ME handle anyway and check again when we're
+ * allocating our slot in the event queue.
+ */
+ ptl_me2handle (&msg->ev.unlinked_me, me);
+
+ lib_md_deconstruct(nal, md, &msg->ev.mem_desc);
+ }
+
+ ni->counters.recv_count++;
+ ni->counters.recv_length += mlength;
+
+ /* only unlink after MD's pending count has been bumped
+ * in get_new_msg() otherwise lib_me_unlink() will nuke it */
+ if (unlink) {
+ md->md_flags |= PTL_MD_FLAG_AUTO_UNLINKED;
+ lib_me_unlink (nal, me);
+ }
+
+ state_unlock(nal, &flags);
+
+ lib_recv (nal, private, msg, md, offset, mlength, PTL_HDR_LENGTH (hdr));
+ return 0;
+
+ drop:
+ nal->ni.counters.drop_count++;
+ nal->ni.counters.drop_length += PTL_HDR_LENGTH(hdr);
+ state_unlock (nal, &flags);
+ lib_recv (nal, private, NULL, NULL, 0, 0, PTL_HDR_LENGTH (hdr));
+ return -1;
+}
+
+static int parse_get(nal_cb_t * nal, ptl_hdr_t * hdr, void *private)
+{
+ lib_ni_t *ni = &nal->ni;
+ ptl_size_t mlength = 0;
+ ptl_size_t offset = 0;
+ int unlink = 0;
+ lib_me_t *me;
+ lib_md_t *md;
+ lib_msg_t *msg;
+ ptl_hdr_t reply;
+ unsigned long flags;
+ int rc;
+
+ /* Convert get fields to host byte order */
+ hdr->msg.get.match_bits = NTOH__u64 (hdr->msg.get.match_bits);
+ hdr->msg.get.ptl_index = NTOH__u32 (hdr->msg.get.ptl_index);
+ hdr->msg.get.sink_length = NTOH__u32 (hdr->msg.get.sink_length);
+ hdr->msg.get.src_offset = NTOH__u32 (hdr->msg.get.src_offset);
+
+ /* compatibility check until field is deleted */
+ if (hdr->msg.get.return_offset != 0)
+ CERROR("Unexpected non-zero get.return_offset %x from "
+ LPU64"\n", hdr->msg.get.return_offset, hdr->src_nid);
+
+ state_lock(nal, &flags);
+
+ me = lib_find_me(nal, hdr->msg.get.ptl_index, PTL_MD_OP_GET,
+ hdr->src_nid, hdr->src_pid,
+ hdr->msg.get.sink_length, hdr->msg.get.src_offset,
+ hdr->msg.get.match_bits,
+ &mlength, &offset, &unlink);
+ if (me == NULL)
+ goto drop;
+
+ md = me->md;
+ CDEBUG(D_NET, "Incoming get index %d from "LPU64".%u of length %d/%d "
+ "from md "LPX64" [%d] + %d\n", hdr->msg.get.ptl_index,
+ hdr->src_nid, hdr->src_pid, mlength, PTL_HDR_LENGTH(hdr),
+ md->md_lh.lh_cookie, md->md_niov, offset);
+
+ msg = get_new_msg (nal, md);
+ if (msg == NULL) {
+ CERROR(LPU64": Dropping GET from "LPU64": can't allocate msg\n",
+ ni->nid, hdr->src_nid);
+ goto drop;
+ }
+
+ if (md->eq) {
+ msg->ev.type = PTL_EVENT_GET;
+ msg->ev.initiator.nid = hdr->src_nid;
+ msg->ev.initiator.pid = hdr->src_pid;
+ msg->ev.portal = hdr->msg.get.ptl_index;
+ msg->ev.match_bits = hdr->msg.get.match_bits;
+ msg->ev.rlength = PTL_HDR_LENGTH(hdr);
+ msg->ev.mlength = mlength;
+ msg->ev.offset = offset;
+ msg->ev.hdr_data = 0;
+
+ /* NB if this match has exhausted the MD, we can't be sure
+ * that this event will the the last one associated with
+ * this MD in the event queue (another message already
+ * matching this ME/MD could end up being last). So we
+ * remember the ME handle anyway and check again when we're
+ * allocating our slot in the event queue.
+ */
+ ptl_me2handle (&msg->ev.unlinked_me, me);
+
+ lib_md_deconstruct(nal, md, &msg->ev.mem_desc);
+ }
+
+ ni->counters.send_count++;
+ ni->counters.send_length += mlength;
+
+ /* only unlink after MD's refcount has been bumped
+ * in get_new_msg() otherwise lib_me_unlink() will nuke it */
+ if (unlink) {
+ md->md_flags |= PTL_MD_FLAG_AUTO_UNLINKED;
+ lib_me_unlink (nal, me);
+ }
+
+ state_unlock(nal, &flags);
+
+ memset (&reply, 0, sizeof (reply));
+ reply.type = HTON__u32 (PTL_MSG_REPLY);
+ reply.dest_nid = HTON__u64 (hdr->src_nid);
+ reply.src_nid = HTON__u64 (ni->nid);
+ reply.dest_pid = HTON__u32 (hdr->src_pid);
+ reply.src_pid = HTON__u32 (ni->pid);
+ PTL_HDR_LENGTH(&reply) = HTON__u32 (mlength);
+
+ reply.msg.reply.dst_wmd = hdr->msg.get.return_wmd;
+
+ rc = lib_send (nal, private, msg, &reply, PTL_MSG_REPLY,
+ hdr->src_nid, hdr->src_pid, md, offset, mlength);
+ if (rc != 0) {
+ CERROR(LPU64": Dropping GET from "LPU64": send REPLY failed\n",
+ ni->nid, hdr->src_nid);
+ state_lock (nal, &flags);
+ goto drop;
+ }
+
+ /* Complete the incoming message */
+ lib_recv (nal, private, NULL, NULL, 0, 0, PTL_HDR_LENGTH (hdr));
+ return (rc);
+ drop:
+ ni->counters.drop_count++;
+ ni->counters.drop_length += hdr->msg.get.sink_length;
+ state_unlock(nal, &flags);
+ lib_recv (nal, private, NULL, NULL, 0, 0, PTL_HDR_LENGTH (hdr));
+ return -1;
+}
+
+static int parse_reply(nal_cb_t * nal, ptl_hdr_t * hdr, void *private)
+{
+ lib_ni_t *ni = &nal->ni;
+ lib_md_t *md;
+ int rlength;
+ int length;
+ lib_msg_t *msg;
+ unsigned long flags;
+
+ /* compatibility check until field is deleted */
+ if (hdr->msg.reply.dst_offset != 0)
+ CERROR("Unexpected non-zero reply.dst_offset %x from "LPU64"\n",
+ hdr->msg.reply.dst_offset, hdr->src_nid);
+
+ state_lock(nal, &flags);
+
+ /* NB handles only looked up by creator (no flips) */
+ md = ptl_wire_handle2md(&hdr->msg.reply.dst_wmd, nal);
+ if (md == NULL || md->threshold == 0) {
+ CERROR (LPU64": Dropping REPLY from "LPU64" for %s MD "LPX64"."LPX64"\n",
+ ni->nid, hdr->src_nid,
+ md == NULL ? "invalid" : "inactive",
+ hdr->msg.reply.dst_wmd.wh_interface_cookie,
+ hdr->msg.reply.dst_wmd.wh_object_cookie);
+ goto drop;
+ }
+
+ LASSERT (md->offset == 0);
+
+ length = rlength = PTL_HDR_LENGTH(hdr);
+
+ if (length > md->length) {
+ if ((md->options & PTL_MD_TRUNCATE) == 0) {
+ CERROR (LPU64": Dropping REPLY from "LPU64
+ " length %d for MD "LPX64" would overflow (%d)\n",
+ ni->nid, hdr->src_nid, length,
+ hdr->msg.reply.dst_wmd.wh_object_cookie,
+ md->length);
+ goto drop;
+ }
+ length = md->length;
+ }
+
+ CDEBUG(D_NET, "Reply from "LPU64" of length %d/%d into md "LPX64"\n",
+ hdr->src_nid, length, rlength,
+ hdr->msg.reply.dst_wmd.wh_object_cookie);
+
+ msg = get_new_msg (nal, md);
+ if (msg == NULL) {
+ CERROR(LPU64": Dropping REPLY from "LPU64": can't "
+ "allocate msg\n", ni->nid, hdr->src_nid);
+ goto drop;
+ }
+
+ if (md->eq) {
+ msg->ev.type = PTL_EVENT_REPLY;
+ msg->ev.initiator.nid = hdr->src_nid;
+ msg->ev.initiator.pid = hdr->src_pid;
+ msg->ev.rlength = rlength;
+ msg->ev.mlength = length;
+ msg->ev.offset = 0;
+
+ lib_md_deconstruct(nal, md, &msg->ev.mem_desc);
+ }
+
+ ni->counters.recv_count++;
+ ni->counters.recv_length += length;
+
+ state_unlock(nal, &flags);
+
+ lib_recv (nal, private, msg, md, 0, length, rlength);
+ return 0;
+
+ drop:
+ nal->ni.counters.drop_count++;
+ nal->ni.counters.drop_length += PTL_HDR_LENGTH(hdr);
+ state_unlock (nal, &flags);
+ lib_recv (nal, private, NULL, NULL, 0, 0, PTL_HDR_LENGTH (hdr));
+ return -1;
+}
+
+static int parse_ack(nal_cb_t * nal, ptl_hdr_t * hdr, void *private)
+{
+ lib_ni_t *ni = &nal->ni;
+ lib_md_t *md;
+ lib_msg_t *msg = NULL;
+ unsigned long flags;
+
+ /* Convert ack fields to host byte order */
+ hdr->msg.ack.match_bits = NTOH__u64 (hdr->msg.ack.match_bits);
+ hdr->msg.ack.mlength = NTOH__u32 (hdr->msg.ack.mlength);
+
+ state_lock(nal, &flags);
+
+ /* NB handles only looked up by creator (no flips) */
+ md = ptl_wire_handle2md(&hdr->msg.ack.dst_wmd, nal);
+ if (md == NULL || md->threshold == 0) {
+ CERROR(LPU64": Dropping ACK from "LPU64" to %s MD "
+ LPX64"."LPX64"\n", ni->nid, hdr->src_nid,
+ (md == NULL) ? "invalid" : "inactive",
+ hdr->msg.ack.dst_wmd.wh_interface_cookie,
+ hdr->msg.ack.dst_wmd.wh_object_cookie);
+ goto drop;
+ }
+
+ CDEBUG(D_NET, LPU64": ACK from "LPU64" into md "LPX64"\n",
+ ni->nid, hdr->src_nid,
+ hdr->msg.ack.dst_wmd.wh_object_cookie);
+
+ msg = get_new_msg (nal, md);
+ if (msg == NULL) {
+ CERROR(LPU64": Dropping ACK from "LPU64": can't allocate msg\n",
+ ni->nid, hdr->src_nid);
+ goto drop;
+ }
+
+ if (md->eq) {
+ msg->ev.type = PTL_EVENT_ACK;
+ msg->ev.initiator.nid = hdr->src_nid;
+ msg->ev.initiator.pid = hdr->src_pid;
+ msg->ev.mlength = hdr->msg.ack.mlength;
+ msg->ev.match_bits = hdr->msg.ack.match_bits;
+
+ lib_md_deconstruct(nal, md, &msg->ev.mem_desc);
+ }
+
+ ni->counters.recv_count++;
+ state_unlock(nal, &flags);
+ lib_recv (nal, private, msg, NULL, 0, 0, PTL_HDR_LENGTH (hdr));
+ return 0;
+
+ drop:
+ nal->ni.counters.drop_count++;
+ state_unlock (nal, &flags);
+ lib_recv (nal, private, NULL, NULL, 0, 0, PTL_HDR_LENGTH (hdr));
+ return -1;
+}
+
+static char *
+hdr_type_string (ptl_hdr_t *hdr)
+{
+ switch (hdr->type) {
+ case PTL_MSG_ACK:
+ return ("ACK");
+ case PTL_MSG_PUT:
+ return ("PUT");
+ case PTL_MSG_GET:
+ return ("GET");
+ case PTL_MSG_REPLY:
+ return ("REPLY");
+ case PTL_MSG_HELLO:
+ return ("HELLO");
+ default:
+ return ("<UNKNOWN>");
+ }
+}
+
+void print_hdr(nal_cb_t * nal, ptl_hdr_t * hdr)
+{
+ char *type_str = hdr_type_string (hdr);
+
+ nal->cb_printf(nal, "P3 Header at %p of type %s\n", hdr, type_str);
+ nal->cb_printf(nal, " From nid/pid %Lu/%Lu", hdr->src_nid,
+ hdr->src_pid);
+ nal->cb_printf(nal, " To nid/pid %Lu/%Lu\n", hdr->dest_nid,
+ hdr->dest_pid);
+
+ switch (hdr->type) {
+ default:
+ break;
+
+ case PTL_MSG_PUT:
+ nal->cb_printf(nal,
+ " Ptl index %d, ack md "LPX64"."LPX64", "
+ "match bits "LPX64"\n",
+ hdr->msg.put.ptl_index,
+ hdr->msg.put.ack_wmd.wh_interface_cookie,
+ hdr->msg.put.ack_wmd.wh_object_cookie,
+ hdr->msg.put.match_bits);
+ nal->cb_printf(nal,
+ " Length %d, offset %d, hdr data "LPX64"\n",
+ PTL_HDR_LENGTH(hdr), hdr->msg.put.offset,
+ hdr->msg.put.hdr_data);
+ break;
+
+ case PTL_MSG_GET:
+ nal->cb_printf(nal,
+ " Ptl index %d, return md "LPX64"."LPX64", "
+ "match bits "LPX64"\n", hdr->msg.get.ptl_index,
+ hdr->msg.get.return_wmd.wh_interface_cookie,
+ hdr->msg.get.return_wmd.wh_object_cookie,
+ hdr->msg.get.match_bits);
+ nal->cb_printf(nal,
+ " Length %d, src offset %d\n",
+ hdr->msg.get.sink_length,
+ hdr->msg.get.src_offset);
+ break;
+
+ case PTL_MSG_ACK:
+ nal->cb_printf(nal, " dst md "LPX64"."LPX64", "
+ "manipulated length %d\n",
+ hdr->msg.ack.dst_wmd.wh_interface_cookie,
+ hdr->msg.ack.dst_wmd.wh_object_cookie,
+ hdr->msg.ack.mlength);
+ break;
+
+ case PTL_MSG_REPLY:
+ nal->cb_printf(nal, " dst md "LPX64"."LPX64", "
+ "length %d\n",
+ hdr->msg.reply.dst_wmd.wh_interface_cookie,
+ hdr->msg.reply.dst_wmd.wh_object_cookie,
+ PTL_HDR_LENGTH(hdr));
+ }
+
+} /* end of print_hdr() */
+
+
+int lib_parse(nal_cb_t * nal, ptl_hdr_t * hdr, void *private)
+{
+ unsigned long flags;
+
+ /* NB static check; optimizer will elide this if it's right */
+ LASSERT (offsetof (ptl_hdr_t, msg.ack.length) ==
+ offsetof (ptl_hdr_t, msg.put.length));
+ LASSERT (offsetof (ptl_hdr_t, msg.ack.length) ==
+ offsetof (ptl_hdr_t, msg.get.length));
+ LASSERT (offsetof (ptl_hdr_t, msg.ack.length) ==
+ offsetof (ptl_hdr_t, msg.reply.length));
+
+ /* convert common fields to host byte order */
+ hdr->dest_nid = NTOH__u64 (hdr->dest_nid);
+ hdr->src_nid = NTOH__u64 (hdr->src_nid);
+ hdr->dest_pid = NTOH__u32 (hdr->dest_pid);
+ hdr->src_pid = NTOH__u32 (hdr->src_pid);
+ hdr->type = NTOH__u32 (hdr->type);
+ PTL_HDR_LENGTH(hdr) = NTOH__u32 (PTL_HDR_LENGTH(hdr));
+#if 0
+ nal->cb_printf(nal, "%d: lib_parse: nal=%p hdr=%p type=%d\n",
+ nal->ni.nid, nal, hdr, hdr->type);
+ print_hdr(nal, hdr);
+#endif
+ if (hdr->type == PTL_MSG_HELLO) {
+ /* dest_nid is really ptl_magicversion_t */
+ ptl_magicversion_t *mv = (ptl_magicversion_t *)&hdr->dest_nid;
+
+ CERROR (LPU64": Dropping unexpected HELLO message: "
+ "magic %d, version %d.%d from "LPD64"\n",
+ nal->ni.nid, mv->magic,
+ mv->version_major, mv->version_minor,
+ hdr->src_nid);
+ lib_recv (nal, private, NULL, NULL, 0, 0, PTL_HDR_LENGTH (hdr));
+ return (-1);
+ }
+
+ if (hdr->dest_nid != nal->ni.nid) {
+ CERROR(LPU64": Dropping %s message from "LPU64" to "LPU64
+ " (not me)\n", nal->ni.nid, hdr_type_string (hdr),
+ hdr->src_nid, hdr->dest_nid);
+
+ state_lock (nal, &flags);
+ nal->ni.counters.drop_count++;
+ nal->ni.counters.drop_length += PTL_HDR_LENGTH(hdr);
+ state_unlock (nal, &flags);
+
+ lib_recv (nal, private, NULL, NULL, 0, 0, PTL_HDR_LENGTH (hdr));
+ return (-1);
+ }
+
+ if (!list_empty (&nal->ni.ni_test_peers) && /* normally we don't */
+ fail_peer (nal, hdr->src_nid, 0)) /* shall we now? */
+ {
+ CERROR(LPU64": Dropping incoming %s from "LPU64
+ ": simulated failure\n",
+ nal->ni.nid, hdr_type_string (hdr),
+ hdr->src_nid);
+ return (-1);
+ }
+
+ switch (hdr->type) {
+ case PTL_MSG_ACK:
+ return (parse_ack(nal, hdr, private));
+ case PTL_MSG_PUT:
+ return (parse_put(nal, hdr, private));
+ break;
+ case PTL_MSG_GET:
+ return (parse_get(nal, hdr, private));
+ break;
+ case PTL_MSG_REPLY:
+ return (parse_reply(nal, hdr, private));
+ break;
+ default:
+ CERROR(LPU64": Dropping <unknown> message from "LPU64
+ ": Bad type=0x%x\n", nal->ni.nid, hdr->src_nid,
+ hdr->type);
+
+ lib_recv (nal, private, NULL, NULL, 0, 0, PTL_HDR_LENGTH (hdr));
+ return (-1);
+ }
+}
+
+
+int do_PtlPut(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
+{
+ /*
+ * Incoming:
+ * ptl_handle_md_t md_in
+ * ptl_ack_req_t ack_req_in
+ * ptl_process_id_t target_in
+ * ptl_pt_index_t portal_in
+ * ptl_ac_index_t cookie_in
+ * ptl_match_bits_t match_bits_in
+ * ptl_size_t offset_in
+ *
+ * Outgoing:
+ */
+
+ PtlPut_in *args = v_args;
+ PtlPut_out *ret = v_ret;
+ ptl_hdr_t hdr;
+
+ lib_ni_t *ni = &nal->ni;
+ lib_md_t *md;
+ lib_msg_t *msg = NULL;
+ ptl_process_id_t *id = &args->target_in;
+ unsigned long flags;
+
+ if (!list_empty (&nal->ni.ni_test_peers) && /* normally we don't */
+ fail_peer (nal, id->nid, 1)) /* shall we now? */
+ {
+ CERROR(LPU64": Dropping PUT to "LPU64": simulated failure\n",
+ nal->ni.nid, id->nid);
+ return (ret->rc = PTL_INV_PROC);
+ }
+
+ ret->rc = PTL_OK;
+ state_lock(nal, &flags);
+ md = ptl_handle2md(&args->md_in, nal);
+ if (md == NULL || !md->threshold) {
+ state_unlock(nal, &flags);
+ return ret->rc = PTL_INV_MD;
+ }
+
+ CDEBUG(D_NET, "PtlPut -> %Lu: %lu\n", (unsigned long long)id->nid,
+ (unsigned long)id->pid);
+
+ memset (&hdr, 0, sizeof (hdr));
+ hdr.type = HTON__u32 (PTL_MSG_PUT);
+ hdr.dest_nid = HTON__u64 (id->nid);
+ hdr.src_nid = HTON__u64 (ni->nid);
+ hdr.dest_pid = HTON__u32 (id->pid);
+ hdr.src_pid = HTON__u32 (ni->pid);
+ PTL_HDR_LENGTH(&hdr) = HTON__u32 (md->length);
+
+ /* NB handles only looked up by creator (no flips) */
+ if (args->ack_req_in == PTL_ACK_REQ) {
+ hdr.msg.put.ack_wmd.wh_interface_cookie = ni->ni_interface_cookie;
+ hdr.msg.put.ack_wmd.wh_object_cookie = md->md_lh.lh_cookie;
+ } else {
+ hdr.msg.put.ack_wmd = PTL_WIRE_HANDLE_NONE;
+ }
+
+ hdr.msg.put.match_bits = HTON__u64 (args->match_bits_in);
+ hdr.msg.put.ptl_index = HTON__u32 (args->portal_in);
+ hdr.msg.put.offset = HTON__u32 (args->offset_in);
+ hdr.msg.put.hdr_data = args->hdr_data_in;
+
+ ni->counters.send_count++;
+ ni->counters.send_length += md->length;
+
+ msg = get_new_msg (nal, md);
+ if (msg == NULL) {
+ CERROR("BAD: could not allocate msg!\n");
+ state_unlock(nal, &flags);
+ return ret->rc = PTL_NOSPACE;
+ }
+
+ /*
+ * If this memory descriptor has an event queue associated with
+ * it we need to allocate a message state object and record the
+ * information about this operation that will be recorded into
+ * event queue once the message has been completed.
+ *
+ * NB. We're now committed to the GET, since we just marked the MD
+ * busy. Callers who observe this (by getting PTL_MD_INUSE from
+ * PtlMDUnlink()) expect a completion event to tell them when the
+ * MD becomes idle.
+ */
+ if (md->eq) {
+ msg->ev.type = PTL_EVENT_SENT;
+ msg->ev.initiator.nid = ni->nid;
+ msg->ev.initiator.pid = ni->pid;
+ msg->ev.portal = args->portal_in;
+ msg->ev.match_bits = args->match_bits_in;
+ msg->ev.rlength = md->length;
+ msg->ev.mlength = md->length;
+ msg->ev.offset = args->offset_in;
+ msg->ev.hdr_data = args->hdr_data_in;
+
+ lib_md_deconstruct(nal, md, &msg->ev.mem_desc);
+ }
+
+ state_unlock(nal, &flags);
+
+ lib_send (nal, private, msg, &hdr, PTL_MSG_PUT,
+ id->nid, id->pid, md, 0, md->length);
+
+ return ret->rc = PTL_OK;
+}
+
+
+int do_PtlGet(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
+{
+ /*
+ * Incoming:
+ * ptl_handle_md_t md_in
+ * ptl_process_id_t target_in
+ * ptl_pt_index_t portal_in
+ * ptl_ac_index_t cookie_in
+ * ptl_match_bits_t match_bits_in
+ * ptl_size_t offset_in
+ *
+ * Outgoing:
+ */
+
+ PtlGet_in *args = v_args;
+ PtlGet_out *ret = v_ret;
+ ptl_hdr_t hdr;
+ lib_msg_t *msg = NULL;
+ lib_ni_t *ni = &nal->ni;
+ ptl_process_id_t *id = &args->target_in;
+ lib_md_t *md;
+ unsigned long flags;
+
+ if (!list_empty (&nal->ni.ni_test_peers) && /* normally we don't */
+ fail_peer (nal, id->nid, 1)) /* shall we now? */
+ {
+ CERROR(LPU64": Dropping PUT to "LPU64": simulated failure\n",
+ nal->ni.nid, id->nid);
+ return (ret->rc = PTL_INV_PROC);
+ }
+
+ state_lock(nal, &flags);
+ md = ptl_handle2md(&args->md_in, nal);
+ if (md == NULL || !md->threshold) {
+ state_unlock(nal, &flags);
+ return ret->rc = PTL_INV_MD;
+ }
+
+ LASSERT (md->offset == 0);
+
+ CDEBUG(D_NET, "PtlGet -> %Lu: %lu\n", (unsigned long long)id->nid,
+ (unsigned long)id->pid);
+
+ memset (&hdr, 0, sizeof (hdr));
+ hdr.type = HTON__u32 (PTL_MSG_GET);
+ hdr.dest_nid = HTON__u64 (id->nid);
+ hdr.src_nid = HTON__u64 (ni->nid);
+ hdr.dest_pid = HTON__u32 (id->pid);
+ hdr.src_pid = HTON__u32 (ni->pid);
+ PTL_HDR_LENGTH(&hdr) = 0;
+
+ /* NB handles only looked up by creator (no flips) */
+ hdr.msg.get.return_wmd.wh_interface_cookie = ni->ni_interface_cookie;
+ hdr.msg.get.return_wmd.wh_object_cookie = md->md_lh.lh_cookie;
+
+ hdr.msg.get.match_bits = HTON__u64 (args->match_bits_in);
+ hdr.msg.get.ptl_index = HTON__u32 (args->portal_in);
+ hdr.msg.get.src_offset = HTON__u32 (args->offset_in);
+ hdr.msg.get.sink_length = HTON__u32 (md->length);
+
+ ni->counters.send_count++;
+
+ msg = get_new_msg (nal, md);
+ if (msg == NULL) {
+ CERROR("do_PtlGet: BAD - could not allocate cookie!\n");
+ state_unlock(nal, &flags);
+ return ret->rc = PTL_NOSPACE;
+ }
+
+ /*
+ * If this memory descriptor has an event queue associated with
+ * it we must allocate a message state object that will record
+ * the information to be filled in once the message has been
+ * completed. More information is in the do_PtlPut() comments.
+ *
+ * NB. We're now committed to the GET, since we just marked the MD
+ * busy. Callers who observe this (by getting PTL_MD_INUSE from
+ * PtlMDUnlink()) expect a completion event to tell them when the
+ * MD becomes idle.
+ */
+ if (md->eq) {
+ msg->ev.type = PTL_EVENT_SENT;
+ msg->ev.initiator.nid = ni->nid;
+ msg->ev.initiator.pid = ni->pid;
+ msg->ev.portal = args->portal_in;
+ msg->ev.match_bits = args->match_bits_in;
+ msg->ev.rlength = md->length;
+ msg->ev.mlength = md->length;
+ msg->ev.offset = args->offset_in;
+ msg->ev.hdr_data = 0;
+
+ lib_md_deconstruct(nal, md, &msg->ev.mem_desc);
+ }
+
+ state_unlock(nal, &flags);
+
+ lib_send (nal, private, msg, &hdr, PTL_MSG_GET,
+ id->nid, id->pid, NULL, 0, 0);
+
+ return ret->rc = PTL_OK;
+}
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * lib/lib-msg.c
+ * Message decoding, parsing and finalizing routines
+ *
+ * Copyright (c) 2001-2003 Cluster File Systems, Inc.
+ * Copyright (c) 2001-2002 Sandia National Laboratories
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2.1 of the GNU Lesser General
+ * Public License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef __KERNEL__
+# include <stdio.h>
+#else
+# define DEBUG_SUBSYSTEM S_PORTALS
+# include <linux/kp30.h>
+#endif
+
+#include <portals/lib-p30.h>
+
+int lib_finalize(nal_cb_t * nal, void *private, lib_msg_t *msg)
+{
+ lib_md_t *md;
+ lib_eq_t *eq;
+ int rc;
+ unsigned long flags;
+
+ /* ni went down while processing this message */
+ if (nal->ni.up == 0) {
+ return -1;
+ }
+
+ if (msg == NULL)
+ return 0;
+
+ rc = 0;
+ if (msg->send_ack) {
+ ptl_hdr_t ack;
+
+ LASSERT (!ptl_is_wire_handle_none (&msg->ack_wmd));
+
+ memset (&ack, 0, sizeof (ack));
+ ack.type = HTON__u32 (PTL_MSG_ACK);
+ ack.dest_nid = HTON__u64 (msg->nid);
+ ack.src_nid = HTON__u64 (nal->ni.nid);
+ ack.dest_pid = HTON__u32 (msg->pid);
+ ack.src_pid = HTON__u32 (nal->ni.pid);
+ PTL_HDR_LENGTH(&ack) = 0;
+
+ ack.msg.ack.dst_wmd = msg->ack_wmd;
+ ack.msg.ack.match_bits = msg->ev.match_bits;
+ ack.msg.ack.mlength = HTON__u32 (msg->ev.mlength);
+
+ rc = lib_send (nal, private, NULL, &ack, PTL_MSG_ACK,
+ msg->nid, msg->pid, NULL, 0, 0);
+ }
+
+ md = msg->md;
+ LASSERT (md->pending > 0); /* I've not dropped my ref yet */
+ eq = md->eq;
+
+ state_lock(nal, &flags);
+
+ if (eq != NULL) {
+ ptl_event_t *ev = &msg->ev;
+ ptl_event_t *eq_slot;
+
+ /* I have to hold the lock while I bump the sequence number
+ * and copy the event into the queue. If not, and I was
+ * interrupted after bumping the sequence number, other
+ * events could fill the queue, including the slot I just
+ * allocated to this event. On resuming, I would overwrite
+ * a more 'recent' event with old event state, and
+ * processes taking events off the queue would not detect
+ * overflow correctly.
+ */
+
+ ev->sequence = eq->sequence++;/* Allocate the next queue slot */
+
+ /* size must be a power of 2 to handle a wrapped sequence # */
+ LASSERT (eq->size != 0 &&
+ eq->size == LOWEST_BIT_SET (eq->size));
+ eq_slot = eq->base + (ev->sequence & (eq->size - 1));
+
+ /* Invalidate unlinked_me unless this is the last
+ * event for an auto-unlinked MD. Note that if md was
+ * auto-unlinked, md->pending can only decrease
+ */
+ if ((md->md_flags & PTL_MD_FLAG_AUTO_UNLINKED) == 0 || /* not auto-unlinked */
+ md->pending != 1) /* not last ref */
+ ev->unlinked_me = PTL_HANDLE_NONE;
+
+ /* Copy the event into the allocated slot, ensuring all the
+ * rest of the event's contents have been copied _before_
+ * the sequence number gets updated. A processes 'getting'
+ * an event waits on the next queue slot's sequence to be
+ * 'new'. When it is, _all_ other event fields had better
+ * be consistent. I assert 'sequence' is the last member,
+ * so I only need a 2 stage copy.
+ */
+ LASSERT(sizeof (ptl_event_t) ==
+ offsetof(ptl_event_t, sequence) + sizeof(ev->sequence));
+
+ rc = nal->cb_write (nal, private, (user_ptr)eq_slot, ev,
+ offsetof (ptl_event_t, sequence));
+ LASSERT (rc == 0);
+
+#ifdef __KERNEL__
+ barrier();
+#endif
+ /* Updating the sequence number is what makes the event 'new' */
+
+ /* cb_write is not necessarily atomic, so this could
+ cause a race with PtlEQGet */
+ rc = nal->cb_write(nal, private, (user_ptr)&eq_slot->sequence,
+ (void *)&ev->sequence,sizeof (ev->sequence));
+ LASSERT (rc == 0);
+
+#ifdef __KERNEL__
+ barrier();
+#endif
+
+ /* I must also ensure that (a) callbacks are made in the
+ * same order as the events land in the queue, and (b) the
+ * callback occurs before the event can be removed from the
+ * queue, so I can't drop the lock during the callback. */
+ if (nal->cb_callback != NULL)
+ nal->cb_callback(nal, private, eq, ev);
+ else if (eq->event_callback != NULL)
+ (void)((eq->event_callback) (ev));
+ }
+
+ LASSERT ((md->md_flags & PTL_MD_FLAG_AUTO_UNLINKED) == 0 ||
+ (md->md_flags & PTL_MD_FLAG_UNLINK) != 0);
+
+ md->pending--;
+ if (md->pending == 0 && /* no more outstanding operations on this md */
+ (md->threshold == 0 || /* done its business */
+ (md->md_flags & PTL_MD_FLAG_UNLINK) != 0)) /* marked for death */
+ lib_md_unlink(nal, md);
+
+ list_del (&msg->msg_list);
+ nal->ni.counters.msgs_alloc--;
+ lib_msg_free(nal, msg);
+
+ state_unlock(nal, &flags);
+
+ return rc;
+}
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * lib/lib-ni.c
+ * Network status registers and distance functions.
+ *
+ * Copyright (c) 2001-2003 Cluster File Systems, Inc.
+ * Copyright (c) 2001-2002 Sandia National Laboratories
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2.1 of the GNU Lesser General
+ * Public License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#define DEBUG_SUBSYSTEM S_PORTALS
+#include <portals/lib-p30.h>
+#include <portals/arg-blocks.h>
+
+#define MAX_DIST 18446744073709551615UL
+
+int do_PtlNIDebug(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
+{
+ PtlNIDebug_in *args = v_args;
+ PtlNIDebug_out *ret = v_ret;
+ lib_ni_t *ni = &nal->ni;
+
+ ret->rc = ni->debug;
+ ni->debug = args->mask_in;
+
+ return 0;
+}
+
+int do_PtlNIStatus(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
+{
+ /*
+ * Incoming:
+ * ptl_handle_ni_t interface_in
+ * ptl_sr_index_t register_in
+ *
+ * Outgoing:
+ * ptl_sr_value_t * status_out
+ */
+
+ PtlNIStatus_in *args = v_args;
+ PtlNIStatus_out *ret = v_ret;
+ lib_ni_t *ni = &nal->ni;
+ lib_counters_t *count = &ni->counters;
+
+ if (!args)
+ return ret->rc = PTL_SEGV;
+
+ ret->rc = PTL_OK;
+ ret->status_out = 0;
+
+ /*
+ * I hate this sort of code.... Hash tables, offset lists?
+ * Treat the counters as an array of ints?
+ */
+ if (args->register_in == PTL_SR_DROP_COUNT)
+ ret->status_out = count->drop_count;
+
+ else if (args->register_in == PTL_SR_DROP_LENGTH)
+ ret->status_out = count->drop_length;
+
+ else if (args->register_in == PTL_SR_RECV_COUNT)
+ ret->status_out = count->recv_count;
+
+ else if (args->register_in == PTL_SR_RECV_LENGTH)
+ ret->status_out = count->recv_length;
+
+ else if (args->register_in == PTL_SR_SEND_COUNT)
+ ret->status_out = count->send_count;
+
+ else if (args->register_in == PTL_SR_SEND_LENGTH)
+ ret->status_out = count->send_length;
+
+ else if (args->register_in == PTL_SR_MSGS_MAX)
+ ret->status_out = count->msgs_max;
+ else
+ ret->rc = PTL_INV_SR_INDX;
+
+ return ret->rc;
+}
+
+
+int do_PtlNIDist(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
+{
+ /*
+ * Incoming:
+ * ptl_handle_ni_t interface_in
+ * ptl_process_id_t process_in
+
+ *
+ * Outgoing:
+ * unsigned long * distance_out
+
+ */
+
+ PtlNIDist_in *args = v_args;
+ PtlNIDist_out *ret = v_ret;
+
+ unsigned long dist;
+ ptl_process_id_t id_in = args->process_in;
+ ptl_nid_t nid;
+ int rc;
+
+ nid = id_in.nid;
+
+ if ((rc = nal->cb_dist(nal, nid, &dist)) != 0) {
+ ret->distance_out = (unsigned long) MAX_DIST;
+ return PTL_INV_PROC;
+ }
+
+ ret->distance_out = dist;
+
+ return ret->rc = PTL_OK;
+}
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * lib/lib-not-impl.c
+ *
+ * boiler plate functions that can be used to write the
+ * library side routines
+ */
+
+# define DEBUG_SUBSYSTEM S_PORTALS
+
+#include <portals/lib-p30.h>
+#include <portals/arg-blocks.h>
+
+
+int do_PtlACEntry(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
+{
+ /*
+ * Incoming:
+ * ptl_handle_ni_t ni_in
+ * ptl_ac_index_t index_in
+ * ptl_process_id_t match_id_in
+ * ptl_pt_index_t portal_in
+
+ *
+ * Outgoing:
+
+ */
+
+ PtlACEntry_in *args = v_args;
+ PtlACEntry_out *ret = v_ret;
+
+ if (!args)
+ return ret->rc = PTL_SEGV;
+
+ return ret->rc = PTL_NOT_IMPLEMENTED;
+}
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * lib/lib-pid.c
+ * Process identification routines
+ */
+
+/* This should be removed. The NAL should have the PID information */
+#define DEBUG_SUBSYSTEM S_PORTALS
+
+#if defined (__KERNEL__)
+# include <linux/kernel.h>
+extern int getpid(void);
+#else
+# include <stdio.h>
+# include <unistd.h>
+#endif
+#include <portals/lib-p30.h>
+#include <portals/arg-blocks.h>
+
+int do_PtlGetId(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
+{
+ /*
+ * Incoming:
+ * ptl_handle_ni_t handle_in
+ *
+ * Outgoing:
+ * ptl_process_id_t * id_out
+ * ptl_id_t * gsize_out
+ */
+
+ PtlGetId_out *ret = v_ret;
+ lib_ni_t *ni = &nal->ni;
+
+ ret->id_out.nid = ni->nid;
+ ret->id_out.pid = ni->pid;
+
+ return ret->rc = PTL_OK;
+}
--- /dev/null
+Makefile
+Makefile.in
+aclocal.m4
+config.log
+config.status
+config.cache
+configure
+portals.spec
--- /dev/null
+# Copyright (C) 2002 Cluster File Systems, Inc.
+#
+# This code is issued under the GNU General Public License.
+# See the file COPYING in this distribution
+
+EXTRA_DIST = portals.spec
\ No newline at end of file
--- /dev/null
+%define kversion @RELEASE@
+%define linuxdir @LINUX@
+%define version HEAD
+
+Summary: Sandia Portals Message Passing - utilities
+Name: portals
+Version: %{version}
+Release: 0210101748uml
+Copyright: LGPL
+Group: Utilities/System
+BuildRoot: /var/tmp/portals-%{version}-root
+Source: http://sandiaportals.org/portals-%{version}.tar.gz
+
+%description
+Sandia Portals message passing package. Contains kernel modules, libraries and utilities.
+
+%package -n portals-modules
+Summary: Kernel modules and NAL's for portals
+Group: Development/Kernel
+
+%description -n portals-modules
+Object-Based Disk storage drivers for Linux %{kversion}.
+
+%package -n portals-source
+Summary: Portals kernel source for rebuilding with other kernels
+Group: Development/Kernel
+
+%description -n portals-source
+Portals kernel source for rebuilding with other kernels
+
+%prep
+%setup -n portals-%{version}
+
+%build
+rm -rf $RPM_BUILD_ROOT
+
+# Create the pristine source directory.
+srcdir=$RPM_BUILD_ROOT/usr/src/portals-%{version}
+mkdir -p $srcdir
+find . -name CVS -prune -o -print | cpio -ap $srcdir
+
+# Set an explicit path to our Linux tree, if we can.
+conf_flag=
+linuxdir=%{linuxdir}
+test -d $linuxdir && conf_flag=--with-linux=$linuxdir
+./configure $conf_flag
+make
+
+%install
+make install prefix=$RPM_BUILD_ROOT
+
+%ifarch alpha
+# this hurts me
+ conf_flag=
+ linuxdir=%{linuxdir}
+ test -d $linuxdir && conf_flag=--with-linux=$linuxdir
+ make clean
+ ./configure --enable-rtscts-myrinet $conf_flag
+ make
+ cp linux/rtscts/rtscts.o $RPM_BUILD_ROOT/lib/modules/%{kversion}/kernel/net/portals/rtscts_myrinet.o
+ cp user/myrinet_utils/mcpload $RPM_BUILD_ROOT/usr/sbin/mcpload
+%endif
+
+
+%files
+%attr(-, root, root) %doc COPYING
+%attr(-, root, root) /usr/sbin/acceptor
+%attr(-, root, root) /usr/sbin/ptlctl
+%attr(-, root, root) /usr/sbin/debugctl
+%ifarch alpha
+%attr(-, root, root) /usr/sbin/mcpload
+%endif
+%attr(-, root, root) /lib/libmyrnal.a
+%attr(-, root, root) /lib/libptlapi.a
+%attr(-, root, root) /lib/libptlctl.a
+%attr(-, root, root) /lib/libprocbridge.a
+%attr(-, root, root) /lib/libptllib.a
+%attr(-, root, root) /lib/libtcpnal.a
+%attr(-, root, root) /lib/libtcpnalutil.a
+%attr(-, root, root) /usr/include/portals/*.h
+%attr(-, root, root) /usr/include/portals/base/*.h
+%attr(-, root, root) /usr/include/linux/*.h
+
+%files -n portals-modules
+%attr(-, root, root) %doc COPYING
+%attr(-, root, root) /lib/modules/%{kversion}/kernel/net/portals/portals.o
+%attr(-, root, root) /lib/modules/%{kversion}/kernel/net/portals/kptlrouter.o
+%attr(-, root, root) /lib/modules/%{kversion}/kernel/net/portals/kptrxtx.o
+%ifarch alpha
+%attr(-, root, root) /lib/modules/%{kversion}/kernel/net/portals/p3mod.o
+%attr(-, root, root) /lib/modules/%{kversion}/kernel/net/portals/rtscts.o
+%endif
+%attr(-, root, root) /lib/modules/%{kversion}/kernel/net/portals/*nal.o
+
+%files -n portals-source
+%attr(-, root, root) /usr/src/portals-%{version}
+
+%post
+if [ ! -e /dev/portals ]; then
+ mknod /dev/portals c 10 240
+fi
+depmod -ae || exit 0
+
+grep -q portals /etc/modules.conf || \
+ echo 'alias char-major-10-240 portals' >> /etc/modules.conf
+
+grep -q '/dev/portals' /etc/modules.conf || \
+ echo 'alias /dev/portals portals' >> /etc/modules.conf
+
+%postun
+depmod -ae || exit 0
+
+%clean
+#rm -rf $RPM_BUILD_ROOT
+
+# end of file
--- /dev/null
+# Copyright (C) 2001 Cluster File Systems, Inc.
+#
+# This code is issued under the GNU General Public License.
+# See the file COPYING in this distribution
+
+include ../Rules.linux
+
+MODULE = kptlrouter
+modulenet_DATA = kptlrouter.o
+EXTRA_PROGRAMS = kptlrouter
+
+
+#CFLAGS:= @KCFLAGS@
+#CPPFLAGS:=@KCPPFLAGS@
+DEFS =
+kptlrouter_SOURCES = router.c proc.c router.h
--- /dev/null
+# Copyright (C) 2001 Cluster File Systems, Inc.
+#
+# This code is issued under the GNU General Public License.
+# See the file COPYING in this distribution
+
+include ../Kernelenv
+
+obj-y += kptlrouter.o
+kptlrouter-objs := router.o proc.o
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2002 Cluster File Systems, Inc.
+ *
+ * This file is part of Portals
+ * http://sourceforge.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#include "router.h"
+
+#define KPR_PROC_ROUTER "sys/portals/router"
+
+int
+kpr_proc_read (char *page, char **start, off_t off, int count, int *eof, void *data)
+{
+ unsigned long long bytes = kpr_fwd_bytes;
+ unsigned long packets = kpr_fwd_packets;
+ unsigned long errors = kpr_fwd_errors;
+ unsigned int qdepth = atomic_read (&kpr_queue_depth);
+ int len;
+
+ *eof = 1;
+ if (off != 0)
+ return (0);
+
+ len = sprintf (page, "%Ld %ld %ld %d\n", bytes, packets, errors, qdepth);
+
+ *start = page;
+ return (len);
+}
+
+int
+kpr_proc_write (struct file *file, const char *ubuffer, unsigned long count, void *data)
+{
+ /* Ignore what we've been asked to write, and just zero the stats counters */
+ kpr_fwd_bytes = 0;
+ kpr_fwd_packets = 0;
+ kpr_fwd_errors = 0;
+
+ return (count);
+}
+
+void
+kpr_proc_init(void)
+{
+ struct proc_dir_entry *entry = create_proc_entry (KPR_PROC_ROUTER, S_IFREG | S_IRUGO | S_IWUSR, NULL);
+
+ if (entry == NULL)
+ {
+ CERROR("couldn't create proc entry %s\n", KPR_PROC_ROUTER);
+ return;
+ }
+
+ entry->data = NULL;
+ entry->read_proc = kpr_proc_read;
+ entry->write_proc = kpr_proc_write;
+}
+
+void
+kpr_proc_fini(void)
+{
+ remove_proc_entry(KPR_PROC_ROUTER, 0);
+}
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2002 Cluster File Systems, Inc.
+ *
+ * This file is part of Portals
+ * http://sourceforge.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#include "router.h"
+
+struct list_head kpr_routes;
+struct list_head kpr_nals;
+
+unsigned long long kpr_fwd_bytes;
+unsigned long kpr_fwd_packets;
+unsigned long kpr_fwd_errors;
+atomic_t kpr_queue_depth;
+
+/* Mostly the tables are read-only (thread and interrupt context)
+ *
+ * Once in a blue moon we register/deregister NALs and add/remove routing
+ * entries (thread context only)... */
+rwlock_t kpr_rwlock;
+
+kpr_router_interface_t kpr_router_interface = {
+ kprri_register: kpr_register_nal,
+ kprri_lookup: kpr_lookup_target,
+ kprri_fwd_start: kpr_forward_packet,
+ kprri_fwd_done: kpr_complete_packet,
+ kprri_shutdown: kpr_shutdown_nal,
+ kprri_deregister: kpr_deregister_nal,
+};
+
+kpr_control_interface_t kpr_control_interface = {
+ kprci_add_route: kpr_add_route,
+ kprci_del_route: kpr_del_route,
+ kprci_get_route: kpr_get_route,
+};
+
+int
+kpr_register_nal (kpr_nal_interface_t *nalif, void **argp)
+{
+ long flags;
+ struct list_head *e;
+ kpr_nal_entry_t *ne;
+
+ CDEBUG (D_OTHER, "Registering NAL %d\n", nalif->kprni_nalid);
+
+ PORTAL_ALLOC (ne, sizeof (*ne));
+ if (ne == NULL)
+ return (-ENOMEM);
+
+ memset (ne, 0, sizeof (*ne));
+ memcpy ((void *)&ne->kpne_interface, (void *)nalif, sizeof (*nalif));
+
+ LASSERT (!in_interrupt());
+ write_lock_irqsave (&kpr_rwlock, flags);
+
+ for (e = kpr_nals.next; e != &kpr_nals; e = e->next)
+ {
+ kpr_nal_entry_t *ne2 = list_entry (e, kpr_nal_entry_t, kpne_list);
+
+ if (ne2->kpne_interface.kprni_nalid == ne->kpne_interface.kprni_nalid)
+ {
+ write_unlock_irqrestore (&kpr_rwlock, flags);
+
+ CERROR ("Attempt to register same NAL %d twice\n", ne->kpne_interface.kprni_nalid);
+
+ PORTAL_FREE (ne, sizeof (*ne));
+ return (-EEXIST);
+ }
+ }
+
+ list_add (&ne->kpne_list, &kpr_nals);
+
+ write_unlock_irqrestore (&kpr_rwlock, flags);
+
+ *argp = ne;
+ PORTAL_MODULE_USE;
+ return (0);
+}
+
+void
+kpr_shutdown_nal (void *arg)
+{
+ long flags;
+ kpr_nal_entry_t *ne = (kpr_nal_entry_t *)arg;
+
+ CDEBUG (D_OTHER, "Shutting down NAL %d\n", ne->kpne_interface.kprni_nalid);
+
+ LASSERT (!ne->kpne_shutdown);
+ LASSERT (!in_interrupt());
+
+ write_lock_irqsave (&kpr_rwlock, flags); /* locking a bit spurious... */
+ ne->kpne_shutdown = 1;
+ write_unlock_irqrestore (&kpr_rwlock, flags); /* except it's a memory barrier */
+
+ while (atomic_read (&ne->kpne_refcount) != 0)
+ {
+ CDEBUG (D_NET, "Waiting for refcount on NAL %d to reach zero (%d)\n",
+ ne->kpne_interface.kprni_nalid, atomic_read (&ne->kpne_refcount));
+
+ set_current_state (TASK_UNINTERRUPTIBLE);
+ schedule_timeout (HZ);
+ }
+}
+
+void
+kpr_deregister_nal (void *arg)
+{
+ long flags;
+ kpr_nal_entry_t *ne = (kpr_nal_entry_t *)arg;
+
+ CDEBUG (D_OTHER, "Deregister NAL %d\n", ne->kpne_interface.kprni_nalid);
+
+ LASSERT (ne->kpne_shutdown); /* caller must have issued shutdown already */
+ LASSERT (atomic_read (&ne->kpne_refcount) == 0); /* can't be busy */
+ LASSERT (!in_interrupt());
+
+ write_lock_irqsave (&kpr_rwlock, flags);
+
+ list_del (&ne->kpne_list);
+
+ write_unlock_irqrestore (&kpr_rwlock, flags);
+
+ PORTAL_FREE (ne, sizeof (*ne));
+ PORTAL_MODULE_UNUSE;
+}
+
+
+int
+kpr_lookup_target (void *arg, ptl_nid_t target_nid, ptl_nid_t *gateway_nidp)
+{
+ kpr_nal_entry_t *ne = (kpr_nal_entry_t *)arg;
+ struct list_head *e;
+ int rc = -ENOENT;
+
+ CDEBUG (D_OTHER, "lookup "LPX64" from NAL %d\n", target_nid, ne->kpne_interface.kprni_nalid);
+
+ if (ne->kpne_shutdown) /* caller is shutting down */
+ return (-ENOENT);
+
+ read_lock (&kpr_rwlock);
+
+ /* Search routes for one that has a gateway to target_nid on the callers network */
+
+ for (e = kpr_routes.next; e != &kpr_routes; e = e->next)
+ {
+ kpr_route_entry_t *re = list_entry (e, kpr_route_entry_t, kpre_list);
+
+ if (re->kpre_lo_nid > target_nid ||
+ re->kpre_hi_nid < target_nid)
+ continue;
+
+ /* found table entry */
+
+ if (re->kpre_gateway_nalid != ne->kpne_interface.kprni_nalid) /* different NAL */
+ rc = -EHOSTUNREACH;
+ else
+ {
+ rc = 0;
+ *gateway_nidp = re->kpre_gateway_nid;
+ }
+ break;
+ }
+
+ read_unlock (&kpr_rwlock);
+
+ CDEBUG (D_OTHER, "lookup "LPX64" from NAL %d: %d ("LPX64")\n",
+ target_nid, ne->kpne_interface.kprni_nalid, rc,
+ (rc == 0) ? *gateway_nidp : (ptl_nid_t)0);
+ return (rc);
+}
+
+void
+kpr_forward_packet (void *arg, kpr_fwd_desc_t *fwd)
+{
+ kpr_nal_entry_t *src_ne = (kpr_nal_entry_t *)arg;
+ ptl_nid_t target_nid = fwd->kprfd_target_nid;
+ int nob = fwd->kprfd_nob;
+ struct list_head *e;
+
+ CDEBUG (D_OTHER, "forward [%p] "LPX64" from NAL %d\n", fwd,
+ target_nid, src_ne->kpne_interface.kprni_nalid);
+
+ LASSERT (nob >= sizeof (ptl_hdr_t)); /* at least got a packet header */
+ LASSERT (nob == lib_iov_nob (fwd->kprfd_niov, fwd->kprfd_iov));
+
+ atomic_inc (&kpr_queue_depth);
+
+ kpr_fwd_packets++; /* (loose) stats accounting */
+ kpr_fwd_bytes += nob;
+
+ if (src_ne->kpne_shutdown) /* caller is shutting down */
+ goto out;
+
+ fwd->kprfd_router_arg = src_ne; /* stash caller's nal entry */
+ atomic_inc (&src_ne->kpne_refcount); /* source nal is busy until fwd completes */
+
+ read_lock (&kpr_rwlock);
+
+ /* Search routes for one that has a gateway to target_nid NOT on the caller's network */
+
+ for (e = kpr_routes.next; e != &kpr_routes; e = e->next)
+ {
+ kpr_route_entry_t *re = list_entry (e, kpr_route_entry_t, kpre_list);
+
+ if (re->kpre_lo_nid > target_nid || /* no match */
+ re->kpre_hi_nid < target_nid)
+ continue;
+
+ CDEBUG (D_OTHER, "forward [%p] "LPX64" from NAL %d: match "LPX64" on NAL %d\n", fwd,
+ target_nid, src_ne->kpne_interface.kprni_nalid,
+ re->kpre_gateway_nid, re->kpre_gateway_nalid);
+
+ if (re->kpre_gateway_nalid == src_ne->kpne_interface.kprni_nalid)
+ break; /* don't route to same NAL */
+
+ /* Search for gateway's NAL's entry */
+
+ for (e = kpr_nals.next; e != &kpr_nals; e = e->next)
+ {
+ kpr_nal_entry_t *dst_ne = list_entry (e, kpr_nal_entry_t, kpne_list);
+
+ if (re->kpre_gateway_nalid != dst_ne->kpne_interface.kprni_nalid) /* no match */
+ continue;
+
+ if (dst_ne->kpne_shutdown) /* don't route if NAL is shutting down */
+ break;
+
+ fwd->kprfd_gateway_nid = re->kpre_gateway_nid;
+ atomic_inc (&dst_ne->kpne_refcount); /* dest nal is busy until fwd completes */
+
+ read_unlock (&kpr_rwlock);
+
+ CDEBUG (D_OTHER, "forward [%p] "LPX64" from NAL %d: "LPX64" on NAL %d\n", fwd,
+ target_nid, src_ne->kpne_interface.kprni_nalid,
+ fwd->kprfd_gateway_nid, dst_ne->kpne_interface.kprni_nalid);
+
+ dst_ne->kpne_interface.kprni_fwd (dst_ne->kpne_interface.kprni_arg, fwd);
+ return;
+ }
+ break;
+ }
+
+ read_unlock (&kpr_rwlock);
+ out:
+ kpr_fwd_errors++;
+
+ CDEBUG (D_OTHER, "Failed to forward [%p] "LPX64" from NAL %d\n", fwd,
+ target_nid, src_ne->kpne_interface.kprni_nalid);
+
+ /* Can't find anywhere to forward to */
+ (fwd->kprfd_callback)(fwd->kprfd_callback_arg, -EHOSTUNREACH);
+
+ atomic_dec (&kpr_queue_depth);
+ atomic_dec (&src_ne->kpne_refcount);
+}
+
+void
+kpr_complete_packet (void *arg, kpr_fwd_desc_t *fwd, int error)
+{
+ kpr_nal_entry_t *dst_ne = (kpr_nal_entry_t *)arg;
+ kpr_nal_entry_t *src_ne = (kpr_nal_entry_t *)fwd->kprfd_router_arg;
+
+ CDEBUG (D_OTHER, "complete(1) [%p] from NAL %d to NAL %d: %d\n", fwd,
+ src_ne->kpne_interface.kprni_nalid, dst_ne->kpne_interface.kprni_nalid, error);
+
+ atomic_dec (&dst_ne->kpne_refcount); /* CAVEAT EMPTOR dst_ne can disappear now!!! */
+
+ (fwd->kprfd_callback)(fwd->kprfd_callback_arg, error);
+
+ CDEBUG (D_OTHER, "complete(2) [%p] from NAL %d: %d\n", fwd,
+ src_ne->kpne_interface.kprni_nalid, error);
+
+ atomic_dec (&kpr_queue_depth);
+ atomic_dec (&src_ne->kpne_refcount); /* CAVEAT EMPTOR src_ne can disappear now!!! */
+}
+
+int
+kpr_add_route (int gateway_nalid, ptl_nid_t gateway_nid, ptl_nid_t lo_nid,
+ ptl_nid_t hi_nid)
+{
+ long flags;
+ struct list_head *e;
+ kpr_route_entry_t *re;
+
+ CDEBUG(D_OTHER, "Add route: %d "LPX64" : "LPX64" - "LPX64"\n",
+ gateway_nalid, gateway_nid, lo_nid, hi_nid);
+
+ LASSERT(lo_nid <= hi_nid);
+
+ PORTAL_ALLOC (re, sizeof (*re));
+ if (re == NULL)
+ return (-ENOMEM);
+
+ re->kpre_gateway_nalid = gateway_nalid;
+ re->kpre_gateway_nid = gateway_nid;
+ re->kpre_lo_nid = lo_nid;
+ re->kpre_hi_nid = hi_nid;
+
+ LASSERT(!in_interrupt());
+ write_lock_irqsave (&kpr_rwlock, flags);
+
+ for (e = kpr_routes.next; e != &kpr_routes; e = e->next) {
+ kpr_route_entry_t *re2 = list_entry(e, kpr_route_entry_t,
+ kpre_list);
+
+ if (re->kpre_lo_nid > re2->kpre_hi_nid ||
+ re->kpre_hi_nid < re2->kpre_lo_nid)
+ continue;
+
+ CERROR ("Attempt to add duplicate routes ["LPX64" - "LPX64"]"
+ "to ["LPX64" - "LPX64"]\n",
+ re->kpre_lo_nid, re->kpre_hi_nid,
+ re2->kpre_lo_nid, re2->kpre_hi_nid);
+
+ write_unlock_irqrestore (&kpr_rwlock, flags);
+
+ PORTAL_FREE (re, sizeof (*re));
+ return (-EINVAL);
+ }
+
+ list_add (&re->kpre_list, &kpr_routes);
+
+ write_unlock_irqrestore (&kpr_rwlock, flags);
+ return (0);
+}
+
+int
+kpr_del_route (ptl_nid_t nid)
+{
+ long flags;
+ struct list_head *e;
+
+ CDEBUG(D_OTHER, "Del route "LPX64"\n", nid);
+
+ LASSERT(!in_interrupt());
+ write_lock_irqsave(&kpr_rwlock, flags);
+
+ for (e = kpr_routes.next; e != &kpr_routes; e = e->next) {
+ kpr_route_entry_t *re = list_entry(e, kpr_route_entry_t,
+ kpre_list);
+
+ if (re->kpre_lo_nid > nid || re->kpre_hi_nid < nid)
+ continue;
+
+ list_del (&re->kpre_list);
+ write_unlock_irqrestore(&kpr_rwlock, flags);
+
+ PORTAL_FREE(re, sizeof (*re));
+ return (0);
+ }
+
+ write_unlock_irqrestore(&kpr_rwlock, flags);
+ return (-ENOENT);
+}
+
+int
+kpr_get_route(int idx, int *gateway_nalid, ptl_nid_t *gateway_nid,
+ ptl_nid_t *lo_nid, ptl_nid_t *hi_nid)
+{
+ struct list_head *e;
+
+ read_lock(&kpr_rwlock);
+
+ for (e = kpr_routes.next; e != &kpr_routes; e = e->next) {
+ kpr_route_entry_t *re = list_entry(e, kpr_route_entry_t,
+ kpre_list);
+
+ if (idx-- == 0) {
+ *gateway_nalid = re->kpre_gateway_nalid;
+ *gateway_nid = re->kpre_gateway_nid;
+ *lo_nid = re->kpre_lo_nid;
+ *hi_nid = re->kpre_hi_nid;
+
+ read_unlock(&kpr_rwlock);
+ return (0);
+ }
+ }
+
+ read_unlock (&kpr_rwlock);
+ return (-ENOENT);
+}
+
+static void __exit
+kpr_finalise (void)
+{
+ LASSERT (list_empty (&kpr_nals));
+
+ while (!list_empty (&kpr_routes)) {
+ kpr_route_entry_t *re = list_entry(kpr_routes.next,
+ kpr_route_entry_t,
+ kpre_list);
+
+ list_del(&re->kpre_list);
+ PORTAL_FREE(re, sizeof (*re));
+ }
+
+ kpr_proc_fini();
+
+ PORTAL_SYMBOL_UNREGISTER(kpr_router_interface);
+ PORTAL_SYMBOL_UNREGISTER(kpr_control_interface);
+
+ CDEBUG(D_MALLOC, "kpr_finalise: kmem back to %d\n",
+ atomic_read(&portal_kmemory));
+}
+
+static int __init
+kpr_initialise (void)
+{
+ CDEBUG(D_MALLOC, "kpr_initialise: kmem %d\n",
+ atomic_read(&portal_kmemory));
+
+ rwlock_init(&kpr_rwlock);
+ INIT_LIST_HEAD(&kpr_routes);
+ INIT_LIST_HEAD(&kpr_nals);
+
+ kpr_proc_init();
+
+ PORTAL_SYMBOL_REGISTER(kpr_router_interface);
+ PORTAL_SYMBOL_REGISTER(kpr_control_interface);
+ return (0);
+}
+
+MODULE_AUTHOR("Eric Barton");
+MODULE_DESCRIPTION("Kernel Portals Router v0.01");
+MODULE_LICENSE("GPL");
+
+module_init (kpr_initialise);
+module_exit (kpr_finalise);
+
+EXPORT_SYMBOL (kpr_control_interface);
+EXPORT_SYMBOL (kpr_router_interface);
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2002 Cluster File Systems, Inc.
+ *
+ * This file is part of Portals
+ * http://sourceforge.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#ifndef _KPTLROUTER_H
+#define _KPTLROUTER_H
+#define EXPORT_SYMTAB
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+
+#define DEBUG_SUBSYSTEM S_PTLROUTER
+
+#include <linux/kp30.h>
+#include <portals/p30.h>
+#include <portals/lib-p30.h>
+
+typedef struct
+{
+ struct list_head kpne_list;
+ kpr_nal_interface_t kpne_interface;
+ atomic_t kpne_refcount;
+ int kpne_shutdown;
+} kpr_nal_entry_t;
+
+typedef struct
+{
+ struct list_head kpre_list;
+ int kpre_gateway_nalid;
+ ptl_nid_t kpre_gateway_nid;
+ ptl_nid_t kpre_lo_nid;
+ ptl_nid_t kpre_hi_nid;
+} kpr_route_entry_t;
+
+extern int kpr_register_nal (kpr_nal_interface_t *nalif, void **argp);
+extern int kpr_lookup_target (void *arg, ptl_nid_t target_nid, ptl_nid_t *gateway_nidp);
+extern void kpr_forward_packet (void *arg, kpr_fwd_desc_t *fwd);
+extern void kpr_complete_packet (void *arg, kpr_fwd_desc_t *fwd, int error);
+extern void kpr_shutdown_nal (void *arg);
+extern void kpr_deregister_nal (void *arg);
+
+extern void kpr_proc_init (void);
+extern void kpr_proc_fini (void);
+
+extern int kpr_add_route (int gateway_nal, ptl_nid_t gateway_nid,
+ ptl_nid_t lo_nid, ptl_nid_t hi_nid);
+extern int kpr_del_route (ptl_nid_t nid);
+extern int kpr_get_route (int idx, int *gateway_nal, ptl_nid_t *gateway_nid,
+ ptl_nid_t *lo_nid, ptl_nid_t *hi_nid);
+
+extern unsigned long long kpr_fwd_bytes;
+extern unsigned long kpr_fwd_packets;
+extern unsigned long kpr_fwd_errors;
+extern atomic_t kpr_queue_depth;
+
+#endif /* _KPLROUTER_H */
--- /dev/null
+Makefile
+Makefile.in
+.deps
--- /dev/null
+# Copyright (C) 2001 Cluster File Systems, Inc.
+#
+# This code is issued under the GNU General Public License.
+# See the file COPYING in this distribution
+
+include ../Rules.linux
+
+LDFLAGS = -m "`$(LD) --help | awk '/supported emulations/ {print $$4}'`" -r
+LINK = $(LD) $(LDFLAGS) -o $@
+DEFS =
+LIBS =
+MODULE = $(basename)
+EXTRA_DIST = startserver.sh startclient.sh stopserver.sh stopclient.sh
+
+noinst_PROGRAMS = pingsrv.o pingcli.o spingsrv.o spingcli.o
+
+pingsrv_o_SOURCES = ping_srv.c ping.h
+
+pingcli_o_SOURCES = ping_cli.c ping.h
+
+spingsrv_o_SOURCES = sping_srv.c ping.h
+
+spingcli_o_SOURCES = sping_cli.c ping.h
--- /dev/null
+#ifndef _KPING_INCLUDED
+#define _KPING_INCLUDED
+
+#include <portals/p30.h>
+
+
+#define PTL_PING_IN_SIZE 256 // n packets per buffer
+#define PTL_PING_IN_BUFFERS 2 // n fallback buffers
+
+#define PTL_PING_CLIENT 4
+#define PTL_PING_SERVER 5
+
+#define PING_HEADER_MAGIC 0xDEADBEEF
+#define PING_BULK_MAGIC 0xCAFEBABE
+
+#define PING_HEAD_BITS 0x00000001
+#define PING_BULK_BITS 0x00000002
+#define PING_IGNORE_BITS 0xFFFFFFFC
+
+#define PTL_PING_ACK 0x01
+#define PTL_PING_VERBOSE 0x02
+#define PTL_PING_VERIFY 0x04
+#define PTL_PING_PREALLOC 0x08
+
+
+#define NEXT_PRIMARY_BUFFER(index) \
+ (((index + 1) >= PTL_PING_IN_BUFFERS) ? 0 : (index + 1))
+
+#define PDEBUG(str, err) \
+ CERROR ("%s: error=%s (%d)\n", str, ptl_err_str[err], err)
+
+
+/* Ping data to be passed via the ioctl to kernel space */
+
+#if __KERNEL__
+
+
+#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
+#include <linux/workqueue.h>
+#else
+#include <linux/tqueue.h>
+#endif
+struct pingsrv_data {
+
+ ptl_handle_ni_t ni;
+ ptl_handle_me_t me;
+ ptl_handle_eq_t eq;
+ void *in_buf;
+ ptl_process_id_t my_id;
+ ptl_process_id_t id_local;
+ ptl_md_t mdin;
+ ptl_md_t mdout;
+ ptl_handle_md_t mdin_h;
+ ptl_handle_md_t mdout_h;
+ ptl_event_t evnt;
+ struct task_struct *tsk;
+}; /* struct pingsrv_data */
+
+struct pingcli_data {
+
+ struct portal_ioctl_data *args;
+ ptl_handle_me_t me;
+ ptl_handle_eq_t eq;
+ char *inbuf;
+ char *outbuf;
+ ptl_process_id_t myid;
+ ptl_process_id_t id_local;
+ ptl_process_id_t id_remote;
+ ptl_md_t md_in_head;
+ ptl_md_t md_out_head;
+ ptl_handle_md_t md_in_head_h;
+ ptl_handle_md_t md_out_head_h;
+ ptl_event_t ev;
+ struct task_struct *tsk;
+}; /* struct pingcli_data */
+
+
+#endif /* __KERNEL__ */
+
+#endif /* _KPING_INCLUDED */
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2002, Lawrence Livermore National Labs (LLNL)
+ * Author: Brian Behlendorf <behlendorf1@llnl.gov>
+ * Kedar Sovani (kedar@calsoftinc.com)
+ * Amey Inamdar (amey@calsoftinc.com)
+ *
+ * This file is part of Portals, http://www.sf.net/projects/lustre/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#define DEBUG_SUBSYSTEM S_PINGER
+
+#include <linux/kp30.h>
+#include <portals/p30.h>
+#include <linux/module.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+#include "ping.h"
+/* int portal_debug = D_PING_CLI; */
+
+
+#define STDSIZE (sizeof(int) + sizeof(int) + sizeof(struct timeval))
+
+#define MAX_TIME 100000
+
+/* This should be enclosed in a structure */
+
+static struct pingcli_data *client = NULL;
+
+static int count = 0;
+
+static void
+pingcli_shutdown(int err)
+{
+ int rc;
+
+ /* Yes, we are intentionally allowing us to fall through each
+ * case in to the next. This allows us to pass an error
+ * code to just clean up the right stuff.
+ */
+ switch (err) {
+ case 1:
+ /* Unlink any memory descriptors we may have used */
+ if ((rc = PtlMDUnlink (client->md_out_head_h)))
+ PDEBUG ("PtlMDUnlink", rc);
+ case 2:
+ if ((rc = PtlMDUnlink (client->md_in_head_h)))
+ PDEBUG ("PtlMDUnlink", rc);
+
+ /* Free the event queue */
+ if ((rc = PtlEQFree (client->eq)))
+ PDEBUG ("PtlEQFree", rc);
+
+ if ((rc = PtlMEUnlink (client->me)))
+ PDEBUG ("PtlMEUnlink", rc);
+ case 3:
+ kportal_put_ni (client->args->ioc_nal);
+
+ case 4:
+ /* Free our buffers */
+
+ if (client != NULL)
+ PORTAL_FREE (client,
+ sizeof(struct pingcli_data));
+ }
+
+
+ CDEBUG (D_OTHER, "ping client released resources\n");
+} /* pingcli_shutdown() */
+
+static int pingcli_callback(ptl_event_t *ev)
+{
+ int i, magic;
+ i = *(int *)(ev->mem_desc.start + ev->offset + sizeof(unsigned));
+ magic = *(int *)(ev->mem_desc.start + ev->offset);
+
+ if(magic != 0xcafebabe) {
+ printk ("Unexpected response \n");
+ return 1;
+ }
+
+ if((i == count) || !count)
+ wake_up_process (client->tsk);
+ else
+ printk ("Received response after timeout for %d\n",i);
+ return 1;
+}
+
+
+static struct pingcli_data *
+pingcli_start(struct portal_ioctl_data *args)
+{
+ ptl_handle_ni_t *nip;
+ unsigned ping_head_magic = PING_HEADER_MAGIC;
+ unsigned ping_bulk_magic = PING_BULK_MAGIC;
+ int rc;
+ struct timeval tv1, tv2;
+ client->tsk = current;
+ client->args = args;
+ CDEBUG (D_OTHER, "pingcli_setup args: nid "LPX64", \
+ nal %d, size %u, count: %u, timeout: %u\n",
+ args->ioc_nid, args->ioc_nal, args->ioc_size,
+ args->ioc_count, args->ioc_timeout);
+
+
+ PORTAL_ALLOC (client->outbuf, STDSIZE + args->ioc_size) ;
+ if (client->outbuf == NULL)
+ {
+ CERROR ("Unable to allocate out_buf ("LPSZ" bytes)\n", STDSIZE);
+ pingcli_shutdown (4);
+ return (NULL);
+ }
+
+ PORTAL_ALLOC (client->inbuf,
+ (args->ioc_size + STDSIZE) * args->ioc_count);
+ if (client->inbuf == NULL)
+ {
+ CERROR ("Unable to allocate out_buf ("LPSZ" bytes)\n", STDSIZE);
+ pingcli_shutdown (4);
+ return (NULL);
+ }
+
+ /* Aquire and initialize the proper nal for portals. */
+ if ((nip = kportal_get_ni (args->ioc_nal)) == NULL)
+ {
+ CERROR ("NAL %d not loaded\n", args->ioc_nal);
+ pingcli_shutdown (4);
+ return (NULL);
+ }
+
+ /* Based on the initialization aquire our unique portal ID. */
+ if ((rc = PtlGetId (*nip, &client->myid)))
+ {
+ CERROR ("PtlGetId error %d\n", rc);
+ pingcli_shutdown (2);
+ return (NULL);
+ }
+
+ /* Setup the local match entries */
+ client->id_local.nid = PTL_NID_ANY;
+ client->id_local.pid = PTL_PID_ANY;
+
+ /* Setup the remote match entries */
+ client->id_remote.nid = args->ioc_nid;
+ client->id_remote.pid = 0;
+
+ if ((rc = PtlMEAttach (*nip, PTL_PING_CLIENT,
+ client->id_local, 0, ~0, PTL_RETAIN,
+ PTL_INS_AFTER, &client->me)))
+ {
+ CERROR ("PtlMEAttach error %d\n", rc);
+ pingcli_shutdown (2);
+ return (NULL);
+ }
+
+ /* Allocate the event queue for this network interface */
+ if ((rc = PtlEQAlloc (*nip, 64, pingcli_callback, &client->eq)))
+ {
+ CERROR ("PtlEQAlloc error %d\n", rc);
+ pingcli_shutdown (2);
+ return (NULL);
+ }
+
+ count = args->ioc_count;
+
+ client->md_in_head.start = client->inbuf;
+ client->md_in_head.length = (args->ioc_size + STDSIZE)
+ * count;
+ client->md_in_head.threshold = PTL_MD_THRESH_INF;
+ client->md_in_head.options = PTL_MD_OP_PUT;
+ client->md_in_head.user_ptr = NULL;
+ client->md_in_head.eventq = client->eq;
+ memset (client->inbuf, 0, (args->ioc_size + STDSIZE) * count);
+
+ /* Attach the incoming buffer */
+ if ((rc = PtlMDAttach (client->me, client->md_in_head,
+ PTL_UNLINK, &client->md_in_head_h))) {
+ CERROR ("PtlMDAttach error %d\n", rc);
+ pingcli_shutdown (1);
+ return (NULL);
+ }
+ /* Setup the outgoing ping header */
+ client->md_out_head.start = client->outbuf;
+ client->md_out_head.length = STDSIZE + args->ioc_size;
+ client->md_out_head.threshold = args->ioc_count;
+ client->md_out_head.options = PTL_MD_OP_PUT;
+ client->md_out_head.user_ptr = NULL;
+ client->md_out_head.eventq = PTL_EQ_NONE;
+
+ memcpy (client->outbuf, &ping_head_magic, sizeof(ping_bulk_magic));
+
+ count = 0;
+
+ /* Bind the outgoing ping header */
+ if ((rc=PtlMDBind (*nip, client->md_out_head,
+ &client->md_out_head_h))) {
+ CERROR ("PtlMDBind error %d\n", rc);
+ pingcli_shutdown (1);
+ return NULL;
+ }
+ while ((args->ioc_count - count)) {
+ memcpy (client->outbuf + sizeof(unsigned),
+ &(count), sizeof(unsigned));
+ /* Put the ping packet */
+ do_gettimeofday (&tv1);
+
+ memcpy(client->outbuf+sizeof(unsigned)+sizeof(unsigned),&tv1,
+ sizeof(struct timeval));
+
+ if((rc = PtlPut (client->md_out_head_h, PTL_NOACK_REQ,
+ client->id_remote, PTL_PING_SERVER, 0, 0, 0, 0))) {
+ PDEBUG ("PtlPut (header)", rc);
+ pingcli_shutdown (1);
+ return NULL;
+ }
+ printk ("sent msg no %d", count);
+
+ set_current_state (TASK_INTERRUPTIBLE);
+ rc = schedule_timeout (20 * args->ioc_timeout);
+ if (rc == 0) {
+ printk (" :: timeout .....\n");
+ } else {
+ do_gettimeofday (&tv2);
+ printk(" :: Reply in %u usec\n",
+ (unsigned)((tv2.tv_sec - tv1.tv_sec)
+ * 1000000 + (tv2.tv_usec - tv1.tv_usec)));
+ }
+ count++;
+ }
+
+ if (client->outbuf != NULL)
+ PORTAL_FREE (client->outbuf, STDSIZE + args->ioc_size);
+
+ if (client->inbuf != NULL)
+ PORTAL_FREE (client->inbuf,
+ (args->ioc_size + STDSIZE) * args->ioc_count);
+
+ pingcli_shutdown (2);
+
+ /* Success! */
+ return NULL;
+} /* pingcli_setup() */
+
+
+
+/* called by the portals_ioctl for ping requests */
+static int kping_client(struct portal_ioctl_data *args)
+{
+ PORTAL_ALLOC (client, sizeof(struct pingcli_data));
+ if (client == NULL)
+ {
+ CERROR ("Unable to allocate client structure\n");
+ return (0);
+ }
+ memset (client, 0, sizeof(struct pingcli_data));
+ pingcli_start (args);
+
+ return 0;
+} /* kping_client() */
+
+
+static int __init pingcli_init(void)
+{
+ PORTAL_SYMBOL_REGISTER(kping_client);
+ return 0;
+} /* pingcli_init() */
+
+
+static void __exit pingcli_cleanup(void)
+{
+ PORTAL_SYMBOL_UNREGISTER (kping_client);
+} /* pingcli_cleanup() */
+
+
+MODULE_AUTHOR("Brian Behlendorf (LLNL)");
+MODULE_DESCRIPTION("A simple kernel space ping client for portals testing");
+MODULE_LICENSE("GPL");
+
+module_init(pingcli_init);
+module_exit(pingcli_cleanup);
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+EXPORT_SYMBOL (kping_client);
+#endif
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2002, Lawrence Livermore National Labs (LLNL)
+ * Author: Brian Behlendorf <behlendorf1@llnl.gov>
+ * Amey Inamdar <amey@calsoftinc.com>
+ * Kedar Sovani <kedar@calsoftinc.com>
+ *
+ *
+ * This file is part of Portals, http://www.sf.net/projects/lustre/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#define DEBUG_SUBSYSTEM S_PINGER
+
+#include <linux/kp30.h>
+#include <portals/p30.h>
+#include "ping.h"
+
+#include <linux/module.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/version.h>
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+#include <linux/workqueue.h>
+#else
+#include <linux/tqueue.h>
+#endif
+#include <linux/wait.h>
+#include <linux/smp_lock.h>
+
+#include <asm/unistd.h>
+#include <asm/semaphore.h>
+
+#define STDSIZE (sizeof(int) + sizeof(int) + sizeof(struct timeval))
+#define MAXSIZE (16*1024*1024)
+
+static unsigned ping_head_magic;
+static unsigned ping_bulk_magic;
+static int nal = 0; // Your NAL,
+static unsigned long packets_valid = 0; // Valid packets
+static int running = 1;
+atomic_t pkt;
+
+static struct pingsrv_data *server=NULL; // Our ping server
+
+static void *pingsrv_shutdown(int err)
+{
+ int rc;
+
+ /* Yes, we are intentionally allowing us to fall through each
+ * case in to the next. This allows us to pass an error
+ * code to just clean up the right stuff.
+ */
+ switch (err) {
+ case 1:
+ /* Unlink any memory descriptors we may have used */
+ if ((rc = PtlMDUnlink (server->mdin_h)))
+ PDEBUG ("PtlMDUnlink (out head buffer)", rc);
+ case 2:
+ /* Free the event queue */
+ if ((rc = PtlEQFree (server->eq)))
+ PDEBUG ("PtlEQFree", rc);
+
+ /* Unlink the client portal from the ME list */
+ if ((rc = PtlMEUnlink (server->me)))
+ PDEBUG ("PtlMEUnlink", rc);
+
+ case 3:
+ kportal_put_ni (nal);
+
+ case 4:
+
+ case 5:
+ if (server->in_buf != NULL)
+ PORTAL_FREE (server->in_buf, MAXSIZE);
+
+ if (server != NULL)
+ PORTAL_FREE (server,
+ sizeof (struct pingsrv_data));
+
+ }
+
+ CDEBUG (D_OTHER, "ping sever resources released\n");
+ return NULL;
+} /* pingsrv_shutdown() */
+
+
+int pingsrv_thread(void *arg)
+{
+ int rc;
+ unsigned long magic;
+ unsigned long ping_bulk_magic = 0xcafebabe;
+
+ kportal_daemonize ("pingsrv");
+ server->tsk = current;
+
+ while (running) {
+ set_current_state (TASK_INTERRUPTIBLE);
+ if (atomic_read (&pkt) == 0) {
+ schedule_timeout (MAX_SCHEDULE_TIMEOUT);
+ continue;
+ }
+
+ magic = *((int *)(server->evnt.mem_desc.start
+ + server->evnt.offset));
+
+
+ if(magic != 0xdeadbeef) {
+ printk("Unexpected Packet to the server\n");
+
+ }
+ memcpy (server->in_buf, &ping_bulk_magic, sizeof(ping_bulk_magic));
+
+ server->mdout.length = server->evnt.rlength;
+ server->mdout.start = server->in_buf;
+ server->mdout.threshold = 1;
+ server->mdout.options = PTL_MD_OP_PUT;
+ server->mdout.user_ptr = NULL;
+ server->mdout.eventq = PTL_EQ_NONE;
+
+ /* Bind the outgoing buffer */
+ if ((rc = PtlMDBind (server->ni, server->mdout,
+ &server->mdout_h))) {
+ PDEBUG ("PtlMDBind", rc);
+ pingsrv_shutdown (1);
+ return 1;
+ }
+
+
+ server->mdin.start = server->in_buf;
+ server->mdin.length = MAXSIZE;
+ server->mdin.threshold = 1;
+ server->mdin.options = PTL_MD_OP_PUT;
+ server->mdin.user_ptr = NULL;
+ server->mdin.eventq = server->eq;
+
+ if ((rc = PtlMDAttach (server->me, server->mdin,
+ PTL_UNLINK, &server->mdin_h))) {
+ PDEBUG ("PtlMDAttach (bulk)", rc);
+ CDEBUG (D_OTHER, "ping server resources allocated\n");
+ }
+
+ if ((rc = PtlPut (server->mdout_h, PTL_NOACK_REQ,
+ server->evnt.initiator, PTL_PING_CLIENT, 0, 0, 0, 0)))
+ PDEBUG ("PtlPut", rc);
+
+ atomic_dec (&pkt);
+
+ }
+ pingsrv_shutdown (1);
+ running = 1;
+ return 0;
+}
+
+static int pingsrv_packet(ptl_event_t *ev)
+{
+ atomic_inc (&pkt);
+ wake_up_process (server->tsk);
+ return 1;
+} /* pingsrv_head() */
+
+static int pingsrv_callback(ptl_event_t *ev)
+{
+
+ if (ev == NULL) {
+ CERROR ("null in callback, ev=%p\n", ev);
+ return 0;
+ }
+ server->evnt = *ev;
+
+ printk ("received ping from nid "LPX64" "
+ "(off=%u rlen=%u mlen=%u head=%x seq=%d size=%d)\n",
+ ev->initiator.nid, ev->offset, ev->rlength, ev->mlength,
+ *((int *)(ev->mem_desc.start + ev->offset)),
+ *((int *)(ev->mem_desc.start + ev->offset + sizeof(unsigned))),
+ *((int *)(ev->mem_desc.start + ev->offset + 2 *
+ sizeof(unsigned))));
+
+ packets_valid++;
+
+ return pingsrv_packet(ev);
+
+} /* pingsrv_callback() */
+
+
+static struct pingsrv_data *pingsrv_setup(void)
+{
+ ptl_handle_ni_t *nip;
+ int rc;
+
+ /* Aquire and initialize the proper nal for portals. */
+ if ((nip = kportal_get_ni (nal)) == NULL) {
+ CDEBUG (D_OTHER, "NAL %d not loaded\n", nal);
+ return pingsrv_shutdown (4);
+ }
+
+ server->ni= *nip;
+
+ /* Based on the initialization aquire our unique portal ID. */
+ if ((rc = PtlGetId (server->ni, &server->my_id))) {
+ PDEBUG ("PtlGetId", rc);
+ return pingsrv_shutdown (2);
+ }
+
+ server->id_local.nid = PTL_NID_ANY;
+ server->id_local.pid = PTL_PID_ANY;
+
+ /* Attach a match entries for header packets */
+ if ((rc = PtlMEAttach (server->ni, PTL_PING_SERVER,
+ server->id_local,0, ~0,
+ PTL_RETAIN, PTL_INS_AFTER, &server->me))) {
+ PDEBUG ("PtlMEAttach", rc);
+ return pingsrv_shutdown (2);
+ }
+
+
+ if ((rc = PtlEQAlloc (server->ni, 1024, pingsrv_callback,
+ &server->eq))) {
+ PDEBUG ("PtlEQAlloc (callback)", rc);
+ return pingsrv_shutdown (2);
+ }
+
+ PORTAL_ALLOC (server->in_buf, MAXSIZE);
+ if(!server->in_buf){
+ CDEBUG (D_OTHER,"Allocation error\n");
+ return pingsrv_shutdown(2);
+ }
+
+ /* Setup the incoming buffer */
+ server->mdin.start = server->in_buf;
+ server->mdin.length = MAXSIZE;
+ server->mdin.threshold = 1;
+ server->mdin.options = PTL_MD_OP_PUT;
+ server->mdin.user_ptr = NULL;
+ server->mdin.eventq = server->eq;
+ memset (server->in_buf, 0, STDSIZE);
+
+ if ((rc = PtlMDAttach (server->me, server->mdin,
+ PTL_UNLINK, &server->mdin_h))) {
+ PDEBUG ("PtlMDAttach (bulk)", rc);
+ CDEBUG (D_OTHER, "ping server resources allocated\n");
+ }
+
+ /* Success! */
+ return server;
+} /* pingsrv_setup() */
+
+static int pingsrv_start(void)
+{
+ /* Setup our server */
+ if (!pingsrv_setup()) {
+ CDEBUG (D_OTHER, "pingsrv_setup() failed, server stopped\n");
+ return -ENOMEM;
+ }
+ kernel_thread (pingsrv_thread,NULL,0);
+ return 0;
+} /* pingsrv_start() */
+
+
+
+static int __init pingsrv_init(void)
+{
+ ping_head_magic = PING_HEADER_MAGIC;
+ ping_bulk_magic = PING_BULK_MAGIC;
+ PORTAL_ALLOC (server, sizeof(struct pingsrv_data));
+ return pingsrv_start ();
+} /* pingsrv_init() */
+
+
+static void __exit pingsrv_cleanup(void)
+{
+ remove_proc_entry ("net/pingsrv", NULL);
+
+ running = 0;
+ wake_up_process (server->tsk);
+ while (running != 1) {
+ set_current_state (TASK_UNINTERRUPTIBLE);
+ schedule_timeout (HZ);
+ }
+
+} /* pingsrv_cleanup() */
+
+
+MODULE_PARM(nal, "i");
+MODULE_PARM_DESC(nal, "Use the specified NAL "
+ "(6-kscimacnal, 4-toenal, 2-ksocknal, 1-kqswnal)");
+
+MODULE_AUTHOR("Brian Behlendorf (LLNL)");
+MODULE_DESCRIPTION("A kernel space ping server for portals testing");
+MODULE_LICENSE("GPL");
+
+module_init(pingsrv_init);
+module_exit(pingsrv_cleanup);
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2002, Lawrence Livermore National Labs (LLNL)
+ * Author: Brian Behlendorf <behlendorf1@llnl.gov>
+ * Kedar Sovani (kedar@calsoftinc.com)
+ * Amey Inamdar (amey@calsoftinc.com)
+ *
+ * This file is part of Portals, http://www.sf.net/projects/lustre/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+/* This is a striped down version of pinger. It follows a single
+ * request-response protocol. Doesn't do Bulk data pinging. Also doesn't
+ * send multiple packets in a single ioctl.
+ */
+
+
+#define DEBUG_SUBSYSTEM S_PINGER
+
+#include <linux/kp30.h>
+#include <portals/p30.h>
+#include <linux/module.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+#include "ping.h"
+/* int portal_debug = D_PING_CLI; */
+
+
+#define STDSIZE (sizeof(int) + sizeof(int) + 4) /* The data is 4 bytes
+ assumed */
+
+/* This should be enclosed in a structure */
+
+static struct pingcli_data *client = NULL;
+
+static int count = 0;
+
+static void
+pingcli_shutdown(int err)
+{
+ int rc;
+
+ /* Yes, we are intentionally allowing us to fall through each
+ * case in to the next. This allows us to pass an error
+ * code to just clean up the right stuff.
+ */
+ switch (err) {
+ case 1:
+ /* Unlink any memory descriptors we may have used */
+ if ((rc = PtlMDUnlink (client->md_out_head_h)))
+ PDEBUG ("PtlMDUnlink", rc);
+ case 2:
+ /* Free the event queue */
+ if ((rc = PtlEQFree (client->eq)))
+ PDEBUG ("PtlEQFree", rc);
+
+ if ((rc = PtlMEUnlink (client->me)))
+ PDEBUG ("PtlMEUnlink", rc);
+ case 3:
+ kportal_put_ni (client->args->ioc_nal);
+
+ case 4:
+ /* Free our buffers */
+ if (client->outbuf != NULL)
+ PORTAL_FREE (client->outbuf, STDSIZE);
+
+ if (client->inbuf != NULL)
+ PORTAL_FREE (client->inbuf, STDSIZE);
+
+
+ if (client != NULL)
+ PORTAL_FREE (client,
+ sizeof(struct pingcli_data));
+ }
+
+
+ CDEBUG (D_OTHER, "ping client released resources\n");
+} /* pingcli_shutdown() */
+
+static int pingcli_callback(ptl_event_t *ev)
+{
+ wake_up_process (client->tsk);
+ return 1;
+}
+
+
+static struct pingcli_data *
+pingcli_start(struct portal_ioctl_data *args)
+{
+ const ptl_handle_ni_t *nip;
+ unsigned ping_head_magic = PING_HEADER_MAGIC;
+ int rc;
+
+ client->tsk = current;
+ client->args = args;
+
+ CDEBUG (D_OTHER, "pingcli_setup args: nid "LPX64", \
+ nal %d, size %u, count: %u, timeout: %u\n",
+ args->ioc_nid, args->ioc_nal, args->ioc_size,
+ args->ioc_count, args->ioc_timeout);
+
+
+ PORTAL_ALLOC (client->outbuf, STDSIZE) ;
+ if (client->outbuf == NULL)
+ {
+ CERROR ("Unable to allocate out_buf ("LPSZ" bytes)\n", STDSIZE);
+ pingcli_shutdown (4);
+ return (NULL);
+ }
+
+ PORTAL_ALLOC (client->inbuf, STDSIZE);
+
+ if (client->inbuf == NULL)
+ {
+ CERROR ("Unable to allocate out_buf ("LPSZ" bytes)\n", STDSIZE);
+ pingcli_shutdown (4);
+ return (NULL);
+ }
+
+ /* Aquire and initialize the proper nal for portals. */
+ if ((nip = kportal_get_ni (args->ioc_nal)) == NULL)
+ {
+ CERROR ("NAL %d not loaded.\n", args->ioc_nal);
+ pingcli_shutdown (4);
+ return (NULL);
+ }
+
+ /* Based on the initialization aquire our unique portal ID. */
+ if ((rc = PtlGetId (*nip, &client->myid)))
+ {
+ CERROR ("PtlGetId error %d\n", rc);
+ pingcli_shutdown (2);
+ return (NULL);
+ }
+
+ /* Setup the local match entries */
+ client->id_local.nid = PTL_NID_ANY;
+ client->id_local.pid = PTL_PID_ANY;
+
+ /* Setup the remote match entries */
+ client->id_remote.nid = args->ioc_nid;
+ client->id_remote.pid = 0;
+
+ if ((rc = PtlMEAttach (*nip, PTL_PING_CLIENT,
+ client->id_local, 0, ~0, PTL_RETAIN,
+ PTL_INS_AFTER, &client->me)))
+ {
+ CERROR ("PtlMEAttach error %d\n", rc);
+ pingcli_shutdown (2);
+ return (NULL);
+ }
+
+ /* Allocate the event queue for this network interface */
+ if ((rc = PtlEQAlloc (*nip, 64, pingcli_callback, &client->eq)))
+ {
+ CERROR ("PtlEQAlloc error %d\n", rc);
+ pingcli_shutdown (2);
+ return (NULL);
+ }
+
+
+ client->md_in_head.start = client->inbuf;
+ client->md_in_head.length = STDSIZE;
+ client->md_in_head.threshold = 1;
+ client->md_in_head.options = PTL_MD_OP_PUT;
+ client->md_in_head.user_ptr = NULL;
+ client->md_in_head.eventq = client->eq;
+ memset (client->inbuf, 0, STDSIZE);
+
+ /* Attach the incoming buffer */
+ if ((rc = PtlMDAttach (client->me, client->md_in_head,
+ PTL_UNLINK, &client->md_in_head_h))) {
+ CERROR ("PtlMDAttach error %d\n", rc);
+ pingcli_shutdown (1);
+ return (NULL);
+ }
+
+ /* Setup the outgoing ping header */
+ client->md_out_head.start = client->outbuf;
+ client->md_out_head.length = STDSIZE;
+ client->md_out_head.threshold = 1;
+ client->md_out_head.options = PTL_MD_OP_PUT;
+ client->md_out_head.user_ptr = NULL;
+ client->md_out_head.eventq = PTL_EQ_NONE;
+
+ memcpy (client->outbuf, &ping_head_magic, sizeof(ping_head_magic));
+
+ /* Bind the outgoing ping header */
+ if ((rc=PtlMDBind (*nip, client->md_out_head,
+ &client->md_out_head_h))) {
+ CERROR ("PtlMDBind error %d\n", rc);
+ pingcli_shutdown (1);
+ return (NULL);
+ }
+ /* Put the ping packet */
+ if((rc = PtlPut (client->md_out_head_h, PTL_NOACK_REQ,
+ client->id_remote, PTL_PING_SERVER, 0, 0, 0, 0))) {
+ PDEBUG ("PtlPut (header)", rc);
+ pingcli_shutdown (1);
+ return NULL;
+ }
+
+ count = 0;
+ set_current_state (TASK_INTERRUPTIBLE);
+ rc = schedule_timeout (20 * args->ioc_timeout);
+ if (rc == 0) {
+ printk (" Time out on the server\n");
+ pingcli_shutdown (2);
+ return NULL;
+ } else
+ printk("Received respose from the server \n");
+
+
+ pingcli_shutdown (2);
+
+ /* Success! */
+ return NULL;
+} /* pingcli_setup() */
+
+
+
+/* called by the portals_ioctl for ping requests */
+static int kping_client(struct portal_ioctl_data *args)
+{
+
+ PORTAL_ALLOC (client, sizeof(struct pingcli_data));
+ memset (client, 0, sizeof(struct pingcli_data));
+ if (client == NULL)
+ {
+ CERROR ("Unable to allocate client structure\n");
+ return (0);
+ }
+ pingcli_start (args);
+
+ return 0;
+} /* kping_client() */
+
+
+static int __init pingcli_init(void)
+{
+ PORTAL_SYMBOL_REGISTER(kping_client);
+ return 0;
+} /* pingcli_init() */
+
+
+static void __exit pingcli_cleanup(void)
+{
+ PORTAL_SYMBOL_UNREGISTER (kping_client);
+} /* pingcli_cleanup() */
+
+
+MODULE_AUTHOR("Brian Behlendorf (LLNL)");
+MODULE_DESCRIPTION("A simple kernel space ping client for portals testing");
+MODULE_LICENSE("GPL");
+
+module_init(pingcli_init);
+module_exit(pingcli_cleanup);
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+EXPORT_SYMBOL (kping_client);
+#endif
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2002, Lawrence Livermore National Labs (LLNL)
+ * Author: Brian Behlendorf <behlendorf1@llnl.gov>
+ * Amey Inamdar <amey@calsoftinc.com>
+ * Kedar Sovani <kedar@calsoftinc.com>
+ *
+ *
+ * This file is part of Portals, http://www.sf.net/projects/lustre/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/* This is a striped down version of pinger. It follows a single
+ * request-response protocol. Doesn't do Bulk data pinging. Also doesn't
+ * send multiple packets in a single ioctl.
+ */
+
+#define DEBUG_SUBSYSTEM S_PINGER
+
+#include <linux/kp30.h>
+#include <portals/p30.h>
+#include "ping.h"
+
+#include <linux/module.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/version.h>
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+#include <linux/workqueue.h>
+#else
+#include <linux/tqueue.h>
+#endif
+#include <linux/wait.h>
+#include <linux/smp_lock.h>
+
+#include <asm/unistd.h>
+#include <asm/semaphore.h>
+
+#define STDSIZE (sizeof(int) + sizeof(int) + 4)
+
+static int nal = 0; // Your NAL,
+static unsigned long packets_valid = 0; // Valid packets
+static int running = 1;
+atomic_t pkt;
+
+static struct pingsrv_data *server=NULL; // Our ping server
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+#endif
+
+static void *pingsrv_shutdown(int err)
+{
+ int rc;
+
+ /* Yes, we are intentionally allowing us to fall through each
+ * case in to the next. This allows us to pass an error
+ * code to just clean up the right stuff.
+ */
+ switch (err) {
+ case 1:
+ /* Unlink any memory descriptors we may have used */
+ if ((rc = PtlMDUnlink (server->mdin_h)))
+ PDEBUG ("PtlMDUnlink (out head buffer)", rc);
+ case 2:
+ /* Free the event queue */
+ if ((rc = PtlEQFree (server->eq)))
+ PDEBUG ("PtlEQFree", rc);
+
+ /* Unlink the client portal from the ME list */
+ if ((rc = PtlMEUnlink (server->me)))
+ PDEBUG ("PtlMEUnlink", rc);
+
+ case 3:
+ kportal_put_ni (nal);
+
+ case 4:
+
+ if (server->in_buf != NULL)
+ PORTAL_FREE (server->in_buf, STDSIZE);
+
+ if (server != NULL)
+ PORTAL_FREE (server,
+ sizeof (struct pingsrv_data));
+
+ }
+
+ CDEBUG (D_OTHER, "ping sever resources released\n");
+ return NULL;
+} /* pingsrv_shutdown() */
+
+
+int pingsrv_thread(void *arg)
+{
+ int rc;
+
+ kportal_daemonize ("pingsrv");
+ server->tsk = current;
+
+ while (running) {
+ set_current_state (TASK_INTERRUPTIBLE);
+ if (atomic_read (&pkt) == 0) {
+ schedule_timeout (MAX_SCHEDULE_TIMEOUT);
+ continue;
+ }
+
+ server->mdout.start = server->in_buf;
+ server->mdout.length = STDSIZE;
+ server->mdout.threshold = 1;
+ server->mdout.options = PTL_MD_OP_PUT;
+ server->mdout.user_ptr = NULL;
+ server->mdout.eventq = PTL_EQ_NONE;
+
+ /* Bind the outgoing buffer */
+ if ((rc = PtlMDBind (server->ni, server->mdout,
+ &server->mdout_h))) {
+ PDEBUG ("PtlMDBind", rc);
+ pingsrv_shutdown (1);
+ return 1;
+ }
+
+
+ server->mdin.start = server->in_buf;
+ server->mdin.length = STDSIZE;
+ server->mdin.threshold = 1;
+ server->mdin.options = PTL_MD_OP_PUT;
+ server->mdin.user_ptr = NULL;
+ server->mdin.eventq = server->eq;
+
+ if ((rc = PtlMDAttach (server->me, server->mdin,
+ PTL_UNLINK, &server->mdin_h))) {
+ PDEBUG ("PtlMDAttach (bulk)", rc);
+ CDEBUG (D_OTHER, "ping server resources allocated\n");
+ }
+
+ if ((rc = PtlPut (server->mdout_h, PTL_NOACK_REQ,
+ server->evnt.initiator, PTL_PING_CLIENT, 0, 0, 0, 0)))
+ PDEBUG ("PtlPut", rc);
+
+ atomic_dec (&pkt);
+
+ }
+ pingsrv_shutdown (1);
+ running = 1;
+ return 0;
+}
+
+static int pingsrv_packet(ptl_event_t *ev)
+{
+ atomic_inc (&pkt);
+ wake_up_process (server->tsk);
+ return 1;
+} /* pingsrv_head() */
+
+static int pingsrv_callback(ptl_event_t *ev)
+{
+
+ if (ev == NULL) {
+ CERROR ("null in callback, ev=%p\n", ev);
+ return 0;
+ }
+ server->evnt = *ev;
+
+ printk ("received ping from nid "LPX64" "
+ "(off=%u rlen=%u mlen=%u head=%x)\n",
+ ev->initiator.nid, ev->offset, ev->rlength, ev->mlength,
+ *((int *)(ev->mem_desc.start + ev->offset)));
+
+ packets_valid++;
+
+ return pingsrv_packet(ev);
+
+} /* pingsrv_callback() */
+
+
+static struct pingsrv_data *pingsrv_setup(void)
+{
+ ptl_handle_ni_t *nip;
+ int rc;
+
+ /* Aquire and initialize the proper nal for portals. */
+ if ((nip = kportal_get_ni (nal)) == NULL) {
+ CDEBUG (D_OTHER, "Nal %d not loaded.\n", nal);
+ return pingsrv_shutdown (4);
+ }
+
+ server->ni= *nip;
+
+ /* Based on the initialization aquire our unique portal ID. */
+ if ((rc = PtlGetId (server->ni, &server->my_id))) {
+ PDEBUG ("PtlGetId", rc);
+ return pingsrv_shutdown (2);
+ }
+
+ server->id_local.nid = PTL_NID_ANY;
+ server->id_local.pid = PTL_PID_ANY;
+
+ /* Attach a match entries for header packets */
+ if ((rc = PtlMEAttach (server->ni, PTL_PING_SERVER,
+ server->id_local,0, ~0,
+ PTL_RETAIN, PTL_INS_AFTER, &server->me))) {
+ PDEBUG ("PtlMEAttach", rc);
+ return pingsrv_shutdown (2);
+ }
+
+
+ if ((rc = PtlEQAlloc (server->ni, 64, pingsrv_callback,
+ &server->eq))) {
+ PDEBUG ("PtlEQAlloc (callback)", rc);
+ return pingsrv_shutdown (2);
+ }
+
+ PORTAL_ALLOC (server->in_buf, STDSIZE);
+ if(!server->in_buf){
+ CDEBUG (D_OTHER,"Allocation error\n");
+ return pingsrv_shutdown(2);
+ }
+
+ /* Setup the incoming buffer */
+ server->mdin.start = server->in_buf;
+ server->mdin.length = STDSIZE;
+ server->mdin.threshold = 1;
+ server->mdin.options = PTL_MD_OP_PUT;
+ server->mdin.user_ptr = NULL;
+ server->mdin.eventq = server->eq;
+ memset (server->in_buf, 0, STDSIZE);
+
+ if ((rc = PtlMDAttach (server->me, server->mdin,
+ PTL_UNLINK, &server->mdin_h))) {
+ PDEBUG ("PtlMDAttach (bulk)", rc);
+ CDEBUG (D_OTHER, "ping server resources allocated\n");
+ }
+
+ /* Success! */
+ return server;
+} /* pingsrv_setup() */
+
+static int pingsrv_start(void)
+{
+ /* Setup our server */
+ if (!pingsrv_setup()) {
+ CDEBUG (D_OTHER, "pingsrv_setup() failed, server stopped\n");
+ return -ENOMEM;
+ }
+ kernel_thread (pingsrv_thread,NULL,0);
+ return 0;
+} /* pingsrv_start() */
+
+
+
+static int __init pingsrv_init(void)
+{
+ PORTAL_ALLOC (server, sizeof(struct pingsrv_data));
+ return pingsrv_start ();
+} /* pingsrv_init() */
+
+
+static void __exit pingsrv_cleanup(void)
+{
+ remove_proc_entry ("net/pingsrv", NULL);
+
+ running = 0;
+ wake_up_process (server->tsk);
+ while (running != 1) {
+ set_current_state (TASK_UNINTERRUPTIBLE);
+ schedule_timeout (HZ);
+ }
+
+} /* pingsrv_cleanup() */
+
+
+MODULE_PARM(nal, "i");
+MODULE_PARM_DESC(nal, "Use the specified NAL "
+ "(6-kscimacnal, 4-toenal, 2-ksocknal, 1-kqswnal)");
+
+MODULE_AUTHOR("Brian Behlendorf (LLNL)");
+MODULE_DESCRIPTION("A kernel space ping server for portals testing");
+MODULE_LICENSE("GPL");
+
+module_init(pingsrv_init);
+module_exit(pingsrv_cleanup);
--- /dev/null
+#!/bin/sh
+
+SIMPLE=${SIMPLE:-0}
+
+if [ $SIMPLE -eq 0 ]; then
+ PING=pingcli.o
+else
+ PING=spingcli.o
+fi
+
+case "$1" in
+ toe)
+ /sbin/insmod ../oslib/portals.o
+ /sbin/insmod ../toenal/ktoenal.o
+ /sbin/insmod ./$PING
+ echo ktoenal > /tmp/nal
+ ;;
+
+ tcp)
+ /sbin/insmod ../oslib/portals.o
+ /sbin/insmod ../socknal/ksocknal.o
+ /sbin/insmod ./$PING
+ echo ksocknal > /tmp/nal
+ ;;
+
+ elan)
+ /sbin/insmod ../oslib/portals.o
+ /sbin/insmod ../qswnal/kqswnal.o
+ /sbin/insmod ./$PING
+ echo kqswnal > /tmp/nal
+ ;;
+
+ *)
+ echo "Usage : ${0} < tcp | toe | elan >"
+ exit 1;
+esac
+exit 0;
--- /dev/null
+#!/bin/sh
+
+SIMPLE=${SIMPLE:-0}
+
+if [ $SIMPLE -eq 0 ]; then
+ PING=pingsrv.o
+else
+ PING=spingsrv.o
+fi
+
+case "$1" in
+ toe)
+ /sbin/insmod ../oslib/portals.o
+ /sbin/insmod ../toenal/ktoenal.o
+ /sbin/insmod ./$PING nal=4
+ echo ktoenal > /tmp/nal
+ ;;
+
+ tcp)
+ /sbin/insmod ../oslib/portals.o
+ /sbin/insmod ../socknal/ksocknal.o
+ /sbin/insmod ./$PING nal=2
+ echo ksocknal > /tmp/nal
+ ;;
+
+ elan)
+ /sbin/insmod ../oslib/portals.o
+ /sbin/insmod ../qswnal/kqswnal.o
+ /sbin/insmod ./$PING nal=4
+ echo kqswnal > /tmp/nal
+ ;;
+
+ *)
+ echo "Usage : ${0} < tcp | toe | elan >"
+ exit 1;
+esac
+../utils/acceptor 9999&
+exit 0;
--- /dev/null
+#!/bin/sh
+
+SIMPLE=${SIMPLE:-1}
+
+if [ $SIMPLE -eq 0 ]; then
+ PING=spingcli
+else
+ PING=pingcli
+fi
+
+rmmod $PING
+NAL=`cat /tmp/nal`;
+rmmod $NAL
+rmmod portals
--- /dev/null
+#!/bin/sh
+
+SIMPLE=${SIMPLE:-1}
+
+if [ $SIMPLE -eq 0 ]; then
+ PING=spingsrv
+else
+ PING=pingsrv
+fi
+
+rmmod $PING
+NAL=`cat /tmp/nal`;
+rmmod $NAL
+killall -9 acceptor
+rm -f /var/run/acceptor-9999.pid
+rmmod portals
--- /dev/null
+CPPFLAGS=
+INCLUDES=-I$(top_srcdir)/portals/include -I$(top_srcdir)/include -I$(srcdir)
+lib_LIBRARIES = libtcpnal.a
+pkginclude_HEADERS = pqtimer.h dispatch.h table.h timer.h connection.h
+libtcpnal_a_SOURCES = debug.c pqtimer.c select.c table.c pqtimer.h dispatch.h table.h timer.h address.c procapi.c proclib.c connection.c tcpnal.c connection.h
--- /dev/null
+This library implements two NAL interfaces, both running over IP.
+The first, tcpnal, creates TCP connections between participating
+processes in order to transport the portals requests. The second,
+ernal, provides a simple transport protocol which runs over
+UDP datagrams.
+
+The interface functions return both of these values in host order for
+convenience and readability. However this means that addresses
+exchanged in messages between hosts of different orderings will not
+function properly.
+
+Both NALs use the same support functions in order to schedule events
+and communicate with the generic portals implementation.
+
+ -------------------------
+ | api |
+ |_______________________|
+ | lib |
+ |_______________________|
+ | ernal | |tcpnal |
+ |--------| |----------|
+ | udpsock| |connection|
+ |-----------------------|
+ | timer/select |
+ -------------------------
+
+
+ These NALs uses the framework from fdnal of a pipe between the api
+and library sides. This is wrapped up in the select on the library
+side, and blocks on the api side. Performance could be severely
+enhanced by collapsing this aritificial barrier, by using shared
+memory queues, or by wiring the api layer directly to the library.
+
+
+nid is defined as the low order 24-bits of the IP address of the
+physical node left shifted by 8 plus a virtual node number of 0
+through 255 (really only 239). The virtual node number of a tcpnal
+application should be specified using the environment variable
+PTL_VIRTNODE. pid is now a completely arbitrary number in the
+range of 0 to 255. The IP interface used can be overridden by
+specifying the appropriate hostid by setting the PTL_HOSTID
+environment variable. The value can be either dotted decimal
+(n.n.n.n) or hex starting with "0x".
+TCPNAL:
+ As the NAL needs to try to send to a particular nid/pid pair, it
+ will open up connections on demand. Because the port associated with
+ the connecting socket is different from the bound port, two
+ connections will normally be established between a pair of peers, with
+ data flowing from the anonymous connect (active) port to the advertised
+ or well-known bound (passive) port of each peer.
+
+ Should the connection fail to open, an error is reported to the
+ library component, which causes the api request to fail.
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (c) 2002 Cray Inc.
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2.1 of the GNU Lesser General
+ * Public License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/* address.c:
+ * this file provides functions to aquire the IP address of the node
+ * and translate them into a NID/PID pair which supports a static
+ * mapping of virtual nodes into the port range of an IP socket.
+*/
+
+#include <stdlib.h>
+#include <netdb.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <portals/p30.h>
+#include <bridge.h>
+#include <ipmap.h>
+
+
+/* Function: get_node_id
+ * Returns: a 32 bit id for this node, actually a big-endian IP address
+ *
+ * get_node_id() determines the host name and uses the resolver to
+ * find out its ip address. This is fairly fragile and inflexible, but
+ * explicitly asking about interfaces and their addresses is very
+ * complicated and nonportable.
+ */
+static unsigned int get_node_id(void)
+{
+ char buffer[255];
+ unsigned int x;
+ struct hostent *he;
+ char * host_envp;
+
+ if (!(host_envp = getenv("PTL_HOSTID")))
+ {
+ gethostname(buffer,sizeof(buffer));
+ he=gethostbyname(buffer);
+ if (he)
+ x=*(unsigned int *)he->h_addr_list[0];
+ else
+ x = 0;
+ return(ntohl(x));
+ }
+ else
+ {
+ if (host_envp[1] != 'x')
+ {
+ int a, b, c, d;
+ sscanf(host_envp, "%d.%d.%d.%d", &a, &b, &c, &d);
+ return ((a<<24) | (b<<16) | (c<<8) | d);
+ }
+ else
+ {
+ long long hostid = strtoll(host_envp, 0, 0);
+ return((unsigned int) hostid);
+ }
+ }
+}
+
+
+/* Function: set_address
+ * Arugments: t: a procnal structure to populate with the request
+ *
+ * set_address performs the bit manipulations to set the nid, pid, and
+ * iptop8 fields of the procnal structures.
+ *
+ * TODO: fix pidrequest to try to do dynamic binding if PTL_ID_ANY
+ */
+
+#ifdef DIRECT_IP_MODE
+void set_address(bridge t,ptl_pid_t pidrequest)
+{
+ int port;
+ if (pidrequest==(unsigned short)PTL_PID_ANY) port = 0;
+ else port=pidrequest;
+ t->nal_cb->ni.nid=get_node_id();
+ t->nal_cb->ni.pid=port;
+}
+#else
+
+void set_address(bridge t,ptl_pid_t pidrequest)
+{
+ int virtnode, in_addr, port;
+ ptl_pid_t pid;
+
+ /* get and remember my node id*/
+ if (!getenv("PTL_VIRTNODE"))
+ virtnode = 0;
+ else
+ {
+ int maxvnode = PNAL_VNODE_MASK - (PNAL_BASE_PORT
+ >> PNAL_VNODE_SHIFT);
+ virtnode = atoi(getenv("PTL_VIRTNODE"));
+ if (virtnode > maxvnode)
+ {
+ fprintf(stderr, "PTL_VIRTNODE of %d is too large - max %d\n",
+ virtnode, maxvnode);
+ return;
+ }
+ }
+
+ in_addr = get_node_id();
+
+ t->iptop8 = in_addr >> PNAL_HOSTID_SHIFT;/* for making new connections */
+ t->nal_cb->ni.nid = ((in_addr & PNAL_HOSTID_MASK)
+ << PNAL_VNODE_SHIFT)
+ + virtnode;
+
+ pid=pidrequest;
+ /* TODO: Support of pid PTL_ID_ANY with virtual nodes needs more work. */
+#ifdef notyet
+ if (pid==(unsigned short)PTL_PID_ANY) port = 0;
+#endif
+ if (pid==(unsigned short)PTL_PID_ANY)
+ {
+ fprintf(stderr, "portal pid PTL_ID_ANY is not currently supported\n");
+ return;
+ }
+ else if (pid > PNAL_PID_MASK)
+ {
+ fprintf(stderr, "portal pid of %d is too large - max %d\n",
+ pid, PNAL_PID_MASK);
+ return;
+ }
+ else port = ((virtnode << PNAL_VNODE_SHIFT) + pid) + PNAL_BASE_PORT;
+ t->nal_cb->ni.pid=pid;
+}
+#endif
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (c) 2002 Cray Inc.
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ */
+
+#include <portals/lib-p30.h>
+
+typedef struct bridge {
+ int alive;
+ nal_cb_t *nal_cb;
+ void *lower;
+ void *local;
+ void (*shutdown)(struct bridge *);
+ /* this doesn't really belong here */
+ unsigned char iptop8;
+} *bridge;
+
+
+nal_t *bridge_init(ptl_interface_t nal,
+ ptl_pid_t pid_request,
+ ptl_ni_limits_t *desired,
+ ptl_ni_limits_t *actual,
+ int *rc);
+
+typedef int (*nal_initialize)(bridge);
+extern nal_initialize nal_table[PTL_IFACE_MAX];
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (c) 2002 Cray Inc.
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2.1 of the GNU Lesser General
+ * Public License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/* connection.c:
+ This file provides a simple stateful connection manager which
+ builds tcp connections on demand and leaves them open for
+ future use. It also provides the machinery to allow peers
+ to connect to it
+*/
+
+#include <stdlib.h>
+#include <pqtimer.h>
+#include <dispatch.h>
+#include <table.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <string.h>
+#include <unistd.h>
+#include <syscall.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <connection.h>
+#include <errno.h>
+
+
+/* global variable: acceptor port */
+unsigned short tcpnal_acceptor_port = 988;
+
+
+/* Function: compare_connection
+ * Arguments: connection c: a connection in the hash table
+ * ptl_process_id_t: an id to verify agains
+ * Returns: 1 if the connection is the one requested, 0 otherwise
+ *
+ * compare_connection() tests for collisions in the hash table
+ */
+static int compare_connection(void *arg1, void *arg2)
+{
+ connection c = arg1;
+ unsigned int * id = arg2;
+ return((c->ip==id[0]) && (c->port==id[1]));
+}
+
+
+/* Function: connection_key
+ * Arguments: ptl_process_id_t id: an id to hash
+ * Returns: a not-particularily-well-distributed hash
+ * of the id
+ */
+static unsigned int connection_key(unsigned int *id)
+{
+ return(id[0]^id[1]);
+}
+
+
+/* Function: remove_connection
+ * Arguments: c: the connection to remove
+ */
+void remove_connection(void *arg)
+{
+ connection c = arg;
+ unsigned int id[2];
+
+ id[0]=c->ip;
+ id[1]=c->port;
+ hash_table_remove(c->m->connections,id);
+ close(c->fd);
+ free(c);
+}
+
+
+/* Function: read_connection:
+ * Arguments: c: the connection to read from
+ * dest: the buffer to read into
+ * len: the number of bytes to read
+ * Returns: success as 1, or failure as 0
+ *
+ * read_connection() reads data from the connection, continuing
+ * to read partial results until the request is satisfied or
+ * it errors. TODO: this read should be covered by signal protection.
+ */
+int read_connection(connection c,
+ unsigned char *dest,
+ int len)
+{
+ int offset=0,rc;
+
+ if (len){
+ do {
+ if((rc=syscall(SYS_read, c->fd, dest+offset, len-offset))<=0){
+ if (errno==EINTR) {
+ rc=0;
+ } else {
+ remove_connection(c);
+ return(0);
+ }
+ }
+ offset+=rc;
+ } while (offset<len);
+ }
+ return(1);
+}
+
+static int connection_input(connection c)
+{
+ return((*c->m->handler)(c->m->handler_arg,c));
+}
+
+
+/* Function: allocate_connection
+ * Arguments: t: tcpnal the allocation is occuring in the context of
+ * dest: portal endpoint address for this connection
+ * fd: open file descriptor for the socket
+ * Returns: an allocated connection structure
+ *
+ * just encompasses the action common to active and passive
+ * connections of allocation and placement in the global table
+ */
+static connection allocate_connection(manager m,
+ unsigned int ip,
+ unsigned short port,
+ int fd)
+{
+ connection c=malloc(sizeof(struct connection));
+ unsigned int id[2];
+ c->m=m;
+ c->fd=fd;
+ c->ip=ip;
+ c->port=port;
+ id[0]=ip;
+ id[1]=port;
+ register_io_handler(fd,READ_HANDLER,connection_input,c);
+ hash_table_insert(m->connections,c,id);
+ return(c);
+}
+
+
+/* Function: new_connection
+ * Arguments: t: opaque argument holding the tcpname
+ * Returns: 1 in order to reregister for new connection requests
+ *
+ * called when the bound service socket recieves
+ * a new connection request, it always accepts and
+ * installs a new connection
+ */
+static int new_connection(void *z)
+{
+ manager m=z;
+ struct sockaddr_in s;
+ int len=sizeof(struct sockaddr_in);
+ int fd=accept(m->bound,(struct sockaddr *)&s,&len);
+ unsigned int nid=*((unsigned int *)&s.sin_addr);
+ /* cfs specific hack */
+ //unsigned short pid=s.sin_port;
+ allocate_connection(m,htonl(nid),0/*pid*/,fd);
+ return(1);
+}
+
+
+/* Function: force_tcp_connection
+ * Arguments: t: tcpnal
+ * dest: portals endpoint for the connection
+ * Returns: an allocated connection structure, either
+ * a pre-existing one, or a new connection
+ */
+connection force_tcp_connection(manager m,
+ unsigned int ip,
+ unsigned short port)
+{
+ connection c;
+ struct sockaddr_in addr;
+ unsigned int id[2];
+
+ port = tcpnal_acceptor_port;
+
+ id[0]=ip;
+ id[1]=port;
+
+ if (!(c=hash_table_find(m->connections,id))){
+ int fd;
+
+ bzero((char *) &addr, sizeof(addr));
+ addr.sin_family = AF_INET;
+ addr.sin_addr.s_addr = htonl(ip);
+ addr.sin_port = htons(port);
+
+ if ((fd = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
+ perror("tcpnal socket failed");
+ exit(-1);
+ }
+ if (connect(fd,
+ (struct sockaddr *)&addr,
+ sizeof(struct sockaddr_in)))
+ {
+ perror("tcpnal connect");
+ return(0);
+ }
+ return(allocate_connection(m,ip,port,fd));
+ }
+ return(c);
+}
+
+
+/* Function: bind_socket
+ * Arguments: t: the nal state for this interface
+ * port: the port to attempt to bind to
+ * Returns: 1 on success, or 0 on error
+ *
+ * bind_socket() attempts to allocate and bind a socket to the requested
+ * port, or dynamically assign one from the kernel should the port be
+ * zero. Sets the bound and bound_handler elements of m.
+ *
+ * TODO: The port should be an explicitly sized type.
+ */
+static int bind_socket(manager m,unsigned short port)
+{
+ struct sockaddr_in addr;
+ int alen=sizeof(struct sockaddr_in);
+
+ if ((m->bound = socket(AF_INET, SOCK_STREAM, 0)) < 0)
+ return(0);
+
+ bzero((char *) &addr, sizeof(addr));
+ addr.sin_family = AF_INET;
+ addr.sin_addr.s_addr = 0;
+ addr.sin_port = port;
+
+ if (bind(m->bound,(struct sockaddr *)&addr,alen)<0){
+ perror ("tcpnal bind");
+ return(0);
+ }
+
+ getsockname(m->bound,(struct sockaddr *)&addr, &alen);
+
+ m->bound_handler=register_io_handler(m->bound,READ_HANDLER,
+ new_connection,m);
+ listen(m->bound,5);
+ m->port=addr.sin_port;
+ return(1);
+}
+
+
+/* Function: shutdown_connections
+ * Arguments: m: the manager structure
+ *
+ * close all connections and reclaim resources
+ */
+void shutdown_connections(manager m)
+{
+ close(m->bound);
+ remove_io_handler(m->bound_handler);
+ hash_destroy_table(m->connections,remove_connection);
+ free(m);
+}
+
+
+/* Function: init_connections
+ * Arguments: t: the nal state for this interface
+ * port: the port to attempt to bind to
+ * Returns: a newly allocated manager structure, or
+ * zero if the fixed port could not be bound
+ */
+manager init_connections(unsigned short pid,
+ int (*input)(),
+ void *a)
+{
+ manager m=(manager)malloc(sizeof(struct manager));
+ m->connections=hash_create_table(compare_connection,connection_key);
+ m->handler=input;
+ m->handler_arg=a;
+ if (bind_socket(m,pid)) return(m);
+ free(m);
+ return(0);
+}
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (c) 2002 Cray Inc.
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ */
+
+#include <table.h>
+
+typedef struct manager {
+ table connections;
+ int bound;
+ io_handler bound_handler;
+ int (*handler)(void *, void *);
+ void *handler_arg;
+ unsigned short port;
+} *manager;
+
+
+typedef struct connection {
+ unsigned int ip;
+ unsigned short port;
+ int fd;
+ manager m;
+} *connection;
+
+connection force_tcp_connection(manager m,
+ unsigned int ip,
+ unsigned int short);
+manager init_connections(unsigned short,
+ int (*f)(void *,connection),
+ void *);
+void remove_connection(void *arg);
+void shutdown_connections(manager m);
+int read_connection(connection c,
+ unsigned char *dest,
+ int len);
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2002 Cluster File Systems, Inc.
+ * Author: Phil Schwan <phil@clusterfs.com>
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2.1 of the GNU Lesser General
+ * Public License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <stdio.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <stdarg.h>
+#include <sys/time.h>
+
+int smp_processor_id = 1;
+char debug_file_path[1024] = "/tmp/lustre-log";
+char debug_file_name[1024];
+FILE *debug_file_fd;
+
+int portals_do_debug_dumplog(void *arg)
+{
+ printf("Look in %s\n", debug_file_name);
+ return 0;
+}
+
+
+void portals_debug_print(void)
+{
+ return;
+}
+
+
+void portals_debug_dumplog(void)
+{
+ printf("Look in %s\n", debug_file_name);
+ return;
+}
+
+
+int portals_debug_init(unsigned long bufsize)
+{
+ debug_file_fd = stdout;
+ return 0;
+}
+
+int portals_debug_cleanup(void)
+{
+ return 0; //close(portals_debug_fd);
+}
+
+int portals_debug_clear_buffer(void)
+{
+ return 0;
+}
+
+int portals_debug_mark_buffer(char *text)
+{
+
+ fprintf(debug_file_fd, "*******************************************************************************\n");
+ fprintf(debug_file_fd, "DEBUG MARKER: %s\n", text);
+ fprintf(debug_file_fd, "*******************************************************************************\n");
+
+ return 0;
+}
+
+int portals_debug_copy_to_user(char *buf, unsigned long len)
+{
+ return 0;
+}
+
+/* FIXME: I'm not very smart; someone smarter should make this better. */
+void
+portals_debug_msg (int subsys, int mask, char *file, char *fn, int line,
+ const char *format, ...)
+{
+ va_list ap;
+ unsigned long flags;
+ struct timeval tv;
+ int nob;
+
+
+ /* NB since we pass a non-zero sized buffer (at least) on the first
+ * print, we can be assured that by the end of all the snprinting,
+ * we _do_ have a terminated buffer, even if our message got truncated.
+ */
+
+ gettimeofday(&tv, NULL);
+
+ nob += fprintf(debug_file_fd,
+ "%02x:%06x:%d:%lu.%06lu ",
+ subsys >> 24, mask, smp_processor_id,
+ tv.tv_sec, tv.tv_usec);
+
+ nob += fprintf(debug_file_fd,
+ "(%s:%d:%s() %d+%ld): ",
+ file, line, fn, 0,
+ 8192 - ((unsigned long)&flags & 8191UL));
+
+ va_start (ap, format);
+ nob += fprintf(debug_file_fd, format, ap);
+ va_end (ap);
+
+
+}
+
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (c) 2002 Cray Inc.
+ * Copyright (c) 2002 Eric Hoffman
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ */
+
+/* this file is only called dispatch.h to prevent it
+ from colliding with /usr/include/sys/select.h */
+
+typedef struct io_handler *io_handler;
+
+struct io_handler{
+ io_handler *last;
+ io_handler next;
+ int fd;
+ int type;
+ int (*function)(void *);
+ void *argument;
+ int disabled;
+};
+
+
+#define READ_HANDLER 1
+#define WRITE_HANDLER 2
+#define EXCEPTION_HANDLER 4
+#define ALL_HANDLER (READ_HANDLER | WRITE_HANDLER | EXCEPTION_HANDLER)
+
+io_handler register_io_handler(int fd,
+ int type,
+ int (*function)(void *),
+ void *arg);
+
+void remove_io_handler (io_handler i);
+void init_unix_timer(void);
+void select_timer_block(when until);
+when now(void);
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (c) 2002 Cray Inc.
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ */
+
+#define DIRECT_IP_MODE
+#ifdef DIRECT_IP_MODE
+#define PNAL_NID(in_addr, port) (in_addr)
+#define PNAL_PID(pid) (pid)
+#define PNAL_IP(in_addr, port) (in_addr)
+#define PNAL_PORT(nid, pid) (pid)
+#else
+
+#define PNAL_BASE_PORT 4096
+#define PNAL_HOSTID_SHIFT 24
+#define PNAL_HOSTID_MASK ((1 << PNAL_HOSTID_SHIFT) - 1)
+#define PNAL_VNODE_SHIFT 8
+#define PNAL_VNODE_MASK ((1 << PNAL_VNODE_SHIFT) - 1)
+#define PNAL_PID_SHIFT 8
+#define PNAL_PID_MASK ((1 << PNAL_PID_SHIFT) - 1)
+
+#define PNAL_NID(in_addr, port) (((ntohl(in_addr) & PNAL_HOSTID_MASK) \
+ << PNAL_VNODE_SHIFT) \
+ | (((ntohs(port)-PNAL_BASE_PORT) >>\
+ PNAL_PID_SHIFT)))
+#define PNAL_PID(port) ((ntohs(port) - PNAL_BASE_PORT) & PNAL_PID_MASK)
+
+#define PNAL_IP(nid,t) (htonl((((unsigned)(nid))\
+ >> PNAL_VNODE_SHIFT)\
+ | (t->iptop8 << PNAL_HOSTID_SHIFT)))
+#define PNAL_PORT(nid, pid) (htons(((((nid) & PNAL_VNODE_MASK) \
+ << PNAL_VNODE_SHIFT) \
+ | ((pid) & PNAL_PID_MASK)) \
+ + PNAL_BASE_PORT))
+#endif
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (c) 2002 Cray Inc.
+ * Copyright (c) 2002 Eric Hoffman
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2.1 of the GNU Lesser General
+ * Public License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/* timer.c:
+ * this file implements a simple priority-queue based timer system. when
+ * combined with a file which implements now() and block(), it can
+ * be used to provide course-grained time-based callbacks.
+ */
+
+#include <pqtimer.h>
+#include <stdlib.h>
+#include <string.h>
+
+struct timer {
+ void (*function)(void *);
+ void *arg;
+ when w;
+ int interval;
+ int disable;
+};
+
+typedef struct thunk *thunk;
+struct thunk {
+ void (*f)(void *);
+ void *a;
+ thunk next;
+};
+
+extern when now(void);
+
+static thunk thunks;
+static int internal;
+static void (*block_function)(when);
+static int number_of_timers;
+static int size_of_pqueue;
+static timer *timers;
+
+
+static void heal(int where)
+{
+ int left=(where<<1);
+ int right=(where<<1)+1;
+ int min=where;
+ timer temp;
+
+ if (left <= number_of_timers)
+ if (timers[left]->w < timers[min]->w) min=left;
+ if (right <= number_of_timers)
+ if (timers[right]->w < timers[min]->w) min=right;
+ if (min != where){
+ temp=timers[where];
+ timers[where]=timers[min];
+ timers[min]=temp;
+ heal(min);
+ }
+}
+
+static void add_pqueue(int i)
+{
+ timer temp;
+ int parent=(i>>1);
+ if ((i>1) && (timers[i]->w< timers[parent]->w)){
+ temp=timers[i];
+ timers[i]=timers[parent];
+ timers[parent]=temp;
+ add_pqueue(parent);
+ }
+}
+
+static void add_timer(timer t)
+{
+ if (size_of_pqueue<(number_of_timers+2)){
+ int oldsize=size_of_pqueue;
+ timer *new=(void *)malloc(sizeof(struct timer)*(size_of_pqueue+=10));
+ memcpy(new,timers,sizeof(timer)*oldsize);
+ timers=new;
+ }
+ timers[++number_of_timers]=t;
+ add_pqueue(number_of_timers);
+}
+
+/* Function: register_timer
+ * Arguments: interval: the time interval from the current time when
+ * the timer function should be called
+ * function: the function to call when the time has expired
+ * argument: the argument to call it with.
+ * Returns: a pointer to a timer structure
+ */
+timer register_timer(when interval,
+ void (*function)(void *),
+ void *argument)
+{
+ timer t=(timer)malloc(sizeof(struct timer));
+
+ t->arg=argument;
+ t->function=function;
+ t->interval=interval;
+ t->disable=0;
+ t->w=now()+interval;
+ add_timer(t);
+ if (!internal && (number_of_timers==1))
+ block_function(t->w);
+ return(t);
+}
+
+/* Function: remove_timer
+ * Arguments: t:
+ * Returns: nothing
+ *
+ * remove_timer removes a timer from the system, insuring
+ * that it will never be called. It does not actually
+ * free the timer due to reentrancy issues.
+ */
+
+void remove_timer(timer t)
+{
+ t->disable=1;
+}
+
+
+
+void timer_fire()
+{
+ timer current;
+
+ current=timers[1];
+ timers[1]=timers[number_of_timers--];
+ heal(1);
+ if (!current->disable) {
+ (*current->function)(current->arg);
+ }
+ free(current);
+}
+
+when next_timer(void)
+{
+ when here=now();
+
+ while (number_of_timers && (timers[1]->w <= here)) timer_fire();
+ if (number_of_timers) return(timers[1]->w);
+ return(0);
+}
+
+/* Function: timer_loop
+ * Arguments: none
+ * Returns: never
+ *
+ * timer_loop() is the blocking dispatch function for the timer.
+ * Is calls the block() function registered with init_timer,
+ * and handles associated with timers that have been registered.
+ */
+void timer_loop()
+{
+ when here;
+
+ while (1){
+ thunk z;
+ here=now();
+
+ for (z=thunks;z;z=z->next) (*z->f)(z->a);
+
+ if (number_of_timers){
+ if (timers[1]->w > here){
+ (*block_function)(timers[1]->w);
+ } else {
+ timer_fire();
+ }
+ } else {
+ thunk z;
+ for (z=thunks;z;z=z->next) (*z->f)(z->a);
+ (*block_function)(0);
+ }
+ }
+}
+
+
+/* Function: register_thunk
+ * Arguments: f: the function to call
+ * a: the single argument to call it with
+ *
+ * Thunk functions get called at irregular intervals, they
+ * should not assume when, or take a particularily long
+ * amount of time. Thunks are for background cleanup tasks.
+ */
+void register_thunk(void (*f)(void *),void *a)
+{
+ thunk t=(void *)malloc(sizeof(struct thunk));
+ t->f=f;
+ t->a=a;
+ t->next=thunks;
+ thunks=t;
+}
+
+/* Function: initialize_timer
+ * Arguments: block: the function to call to block for the specified interval
+ *
+ * initialize_timer() must be called before any other timer function,
+ * including timer_loop.
+ */
+void initialize_timer(void (*block)(when))
+{
+ block_function=block;
+ number_of_timers=0;
+ size_of_pqueue=10;
+ timers=(timer *)malloc(sizeof(timer)*size_of_pqueue);
+ thunks=0;
+}
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (c) 2002 Cray Inc.
+ * Copyright (c) 2002 Eric Hoffman
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ */
+
+typedef unsigned long long when;
+when now(void);
+typedef struct timer *timer;
+timer register_timer(when interval,
+ void (*function)(void *),
+ void *argument);
+timer register_timer_wait(void);
+void remove_timer(timer);
+void timer_loop(void);
+void initialize_timer(void (*block)(when));
+void timer_fire(void);
+
+
+#define HZ 0x100000000ull
+
+
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (c) 2002 Cray Inc.
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2.1 of the GNU Lesser General
+ * Public License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/* api.c:
+ * This file provides the 'api' side for the process-based nals.
+ * it is responsible for creating the 'library' side thread,
+ * and passing wrapped portals transactions to it.
+ *
+ * Along with initialization, shutdown, and transport to the library
+ * side, this file contains some stubs to satisfy the nal definition.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <syscall.h>
+#include <procbridge.h>
+#include <pqtimer.h>
+#include <dispatch.h>
+#include <errno.h>
+
+
+/* Function: forward
+ * Arguments: nal_t *nal: pointer to my top-side nal structure
+ * id: the command to pass to the lower layer
+ * args, args_len:pointer to and length of the request
+ * ret, ret_len: pointer to and size of the result
+ * Returns: a portals status code
+ *
+ * forwards a packaged api call from the 'api' side to the 'library'
+ * side, and collects the result
+ */
+#define forward_failure(operand,fd,buffer,length)\
+ if(syscall(SYS_##operand,fd,buffer,length)!=length){\
+ lib_fini(b->nal_cb);\
+ return(PTL_SEGV);\
+ }
+static int procbridge_forward(nal_t *n, int id, void *args, ptl_size_t args_len,
+ void *ret, ptl_size_t ret_len)
+{
+ bridge b=(bridge)n->nal_data;
+ procbridge p=(procbridge)b->local;
+ int lib=p->to_lib[1];
+ int k;
+
+ forward_failure(write,lib, &id, sizeof(id));
+ forward_failure(write,lib,&args_len, sizeof(args_len));
+ forward_failure(write,lib,&ret_len, sizeof(ret_len));
+ forward_failure(write,lib,args, args_len);
+
+ do {
+ k=syscall(SYS_read, p->from_lib[0], ret, ret_len);
+ } while ((k!=ret_len) && (errno += EINTR));
+
+ if(k!=ret_len){
+ perror("nal: read return block");
+ return PTL_SEGV;
+ }
+ return (PTL_OK);
+}
+#undef forward_failure
+
+
+/* Function: shutdown
+ * Arguments: nal: a pointer to my top side nal structure
+ * ni: my network interface index
+ *
+ * cleanup nal state, reclaim the lower side thread and
+ * its state using PTL_FINI codepoint
+ */
+static int procbridge_shutdown(nal_t *n, int ni)
+{
+ bridge b=(bridge)n->nal_data;
+ procbridge p=(procbridge)b->local;
+ int code=PTL_FINI;
+
+ syscall(SYS_write, p->to_lib[1],&code,sizeof(code));
+ syscall(SYS_read, p->from_lib[0],&code,sizeof(code));
+
+ syscall(SYS_close, p->to_lib[0]);
+ syscall(SYS_close, p->to_lib[1]);
+ syscall(SYS_close, p->from_lib[0]);
+ syscall(SYS_close, p->from_lib[1]);
+
+ free(p);
+ return(0);
+}
+
+
+/* Function: validate
+ * useless stub
+ */
+static int procbridge_validate(nal_t *nal, void *base, ptl_size_t extent)
+{
+ return(0);
+}
+
+
+/* Function: yield
+ * Arguments: pid:
+ *
+ * this function was originally intended to allow the
+ * lower half thread to be scheduled to allow progress. we
+ * overload it to explicitly block until signalled by the
+ * lower half.
+ */
+static void procbridge_yield(nal_t *n)
+{
+ bridge b=(bridge)n->nal_data;
+ procbridge p=(procbridge)b->local;
+
+ pthread_mutex_lock(&p->mutex);
+ pthread_cond_wait(&p->cond,&p->mutex);
+ pthread_mutex_unlock(&p->mutex);
+}
+
+
+static void procbridge_lock(nal_t * nal, unsigned long *flags){}
+static void procbridge_unlock(nal_t * nal, unsigned long *flags){}
+/* api_nal
+ * the interface vector to allow the generic code to access
+ * this nal. this is seperate from the library side nal_cb.
+ * TODO: should be dyanmically allocated
+ */
+static nal_t api_nal = {
+ ni: {0},
+ nal_data: NULL,
+ forward: procbridge_forward,
+ shutdown: procbridge_shutdown,
+ validate: procbridge_validate,
+ yield: procbridge_yield,
+ lock: procbridge_lock,
+ unlock: procbridge_unlock
+};
+
+/* Function: bridge_init
+ *
+ * Arguments: pid: requested process id (port offset)
+ * PTL_ID_ANY not supported.
+ * desired: limits passed from the application
+ * and effectively ignored
+ * actual: limits actually allocated and returned
+ *
+ * Returns: a pointer to my statically allocated top side NAL
+ * structure
+ *
+ * initializes the tcp nal. we define unix_failure as an
+ * error wrapper to cut down clutter.
+ */
+#define unix_failure(operand,fd,buffer,length,text)\
+ if(syscall(SYS_##operand,fd,buffer,length)!=length){\
+ perror(text);\
+ return(NULL);\
+ }
+#if 0
+static nal_t *bridge_init(ptl_interface_t nal,
+ ptl_pid_t pid_request,
+ ptl_ni_limits_t *desired,
+ ptl_ni_limits_t *actual,
+ int *rc)
+{
+ procbridge p;
+ bridge b;
+ static int initialized=0;
+ ptl_ni_limits_t limits = {-1,-1,-1,-1,-1};
+
+ if(initialized) return (&api_nal);
+
+ init_unix_timer();
+
+ b=(bridge)malloc(sizeof(struct bridge));
+ p=(procbridge)malloc(sizeof(struct procbridge));
+ api_nal.nal_data=b;
+ b->local=p;
+
+ if(pipe(p->to_lib) || pipe(p->from_lib)) {
+ perror("nal_init: pipe");
+ return(NULL);
+ }
+
+ if (desired) limits = *desired;
+ unix_failure(write,p->to_lib[1], &pid_request, sizeof(pid_request),
+ "nal_init: write");
+ unix_failure(write,p->to_lib[1], &limits, sizeof(ptl_ni_limits_t),
+ "nal_init: write");
+ unix_failure(write,p->to_lib[1], &nal, sizeof(ptl_interface_t),
+ "nal_init: write");
+
+ if(pthread_create(&p->t, NULL, nal_thread, b)) {
+ perror("nal_init: pthread_create");
+ return(NULL);
+ }
+
+ unix_failure(read,p->from_lib[0], actual, sizeof(ptl_ni_limits_t),
+ "tcp_init: read");
+ unix_failure(read,p->from_lib[0], rc, sizeof(rc),
+ "nal_init: read");
+
+ if(*rc) return(NULL);
+
+ initialized = 1;
+ pthread_mutex_init(&p->mutex,0);
+ pthread_cond_init(&p->cond, 0);
+
+ return (&api_nal);
+}
+#endif
+
+ptl_nid_t tcpnal_mynid;
+
+nal_t *procbridge_interface(int num_interface,
+ ptl_pt_index_t ptl_size,
+ ptl_ac_index_t acl_size,
+ ptl_pid_t requested_pid)
+{
+ procbridge p;
+ bridge b;
+ static int initialized=0;
+ ptl_ni_limits_t limits = {-1,-1,-1,-1,-1};
+ int rc, nal_type = PTL_IFACE_TCP;/* PTL_IFACE_DEFAULT FIXME hack */
+
+ if(initialized) return (&api_nal);
+
+ init_unix_timer();
+
+ b=(bridge)malloc(sizeof(struct bridge));
+ p=(procbridge)malloc(sizeof(struct procbridge));
+ api_nal.nal_data=b;
+ b->local=p;
+
+ if(pipe(p->to_lib) || pipe(p->from_lib)) {
+ perror("nal_init: pipe");
+ return(NULL);
+ }
+
+ if (ptl_size)
+ limits.max_ptable_index = ptl_size;
+ if (acl_size)
+ limits.max_atable_index = acl_size;
+
+ unix_failure(write,p->to_lib[1], &requested_pid, sizeof(requested_pid),
+ "nal_init: write");
+ unix_failure(write,p->to_lib[1], &limits, sizeof(ptl_ni_limits_t),
+ "nal_init: write");
+ unix_failure(write,p->to_lib[1], &nal_type, sizeof(nal_type),
+ "nal_init: write");
+
+ if(pthread_create(&p->t, NULL, nal_thread, b)) {
+ perror("nal_init: pthread_create");
+ return(NULL);
+ }
+
+ unix_failure(read,p->from_lib[0], &rc, sizeof(rc),
+ "nal_init: read");
+
+ if(rc) return(NULL);
+
+ b->nal_cb->ni.nid = tcpnal_mynid;
+ initialized = 1;
+ pthread_mutex_init(&p->mutex,0);
+ pthread_cond_init(&p->cond, 0);
+
+ return (&api_nal);
+}
+#undef unix_failure
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (c) 2002 Cray Inc.
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ */
+
+#ifndef _PROCBRIDGE_H_
+#define _PROCBRIDGE_H_
+
+#include <pthread.h>
+#include <bridge.h>
+#include <ipmap.h>
+
+
+typedef struct procbridge {
+ pthread_t t;
+ pthread_cond_t cond;
+ pthread_mutex_t mutex;
+ int to_lib[2];
+ int from_lib[2];
+} *procbridge;
+
+extern void *nal_thread(void *);
+
+
+#define PTL_INIT (LIB_MAX_DISPATCH+1)
+#define PTL_FINI (LIB_MAX_DISPATCH+2)
+
+#define MAX_ACLS 1
+#define MAX_PTLS 128
+
+extern void set_address(bridge t,ptl_pid_t pidrequest);
+extern nal_t *procbridge_interface(int num_interface,
+ ptl_pt_index_t ptl_size,
+ ptl_ac_index_t acl_size,
+ ptl_pid_t requested_pid);
+
+#endif
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (c) 2002 Cray Inc.
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2.1 of the GNU Lesser General
+ * Public License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/* lib.c:
+ * This file provides the 'library' side for the process-based nals.
+ * it is responsible for communication with the 'api' side and
+ * providing service to the generic portals 'library'
+ * implementation. 'library' might be better termed 'communication'
+ * or 'kernel'.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <unistd.h>
+#include <syscall.h>
+#include <procbridge.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netdb.h>
+#include <errno.h>
+#include <timer.h>
+//#include <util/pqtimer.h>
+#include <dispatch.h>
+
+/* the following functions are stubs to satisfy the nal definition
+ without doing anything particularily useful*/
+
+static int nal_write(nal_cb_t *nal,
+ void *private,
+ user_ptr dst_addr,
+ void *src_addr,
+ ptl_size_t len)
+{
+ memcpy(dst_addr, src_addr, len);
+ return 0;
+}
+
+static int nal_read(nal_cb_t * nal,
+ void *private,
+ void *dst_addr,
+ user_ptr src_addr,
+ size_t len)
+{
+ memcpy(dst_addr, src_addr, len);
+ return 0;
+}
+
+static void *nal_malloc(nal_cb_t *nal,
+ ptl_size_t len)
+{
+ void *buf = malloc(len);
+ return buf;
+}
+
+static void nal_free(nal_cb_t *nal,
+ void *buf,
+ ptl_size_t len)
+{
+ free(buf);
+}
+
+static void nal_printf(nal_cb_t *nal,
+ const char *fmt,
+ ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ vprintf(fmt, ap);
+ va_end(ap);
+}
+
+
+static void nal_cli(nal_cb_t *nal,
+ unsigned long *flags)
+{
+}
+
+
+static void nal_sti(nal_cb_t *nal,
+ unsigned long *flags)
+{
+}
+
+
+static int nal_dist(nal_cb_t *nal,
+ ptl_nid_t nid,
+ unsigned long *dist)
+{
+ return 0;
+}
+
+
+
+/* Function: data_from_api
+ * Arguments: t: the nal state for this interface
+ * Returns: whether to continue reading from the pipe
+ *
+ * data_from_api() reads data from the api side in response
+ * to a select.
+ *
+ * We define data_failure() for syntactic convenience
+ * of unix error reporting.
+ */
+
+#define data_failure(operand,fd,buffer,length)\
+ if(syscall(SYS_##operand,fd,buffer,length)!=length){\
+ lib_fini(b->nal_cb);\
+ return(0);\
+ }
+static int data_from_api(void *arg)
+{
+ bridge b = arg;
+ procbridge p=(procbridge)b->local;
+ /* where are these two sizes derived from ??*/
+ char arg_block[ 256 ];
+ char ret_block[ 128 ];
+ ptl_size_t arg_len,ret_len;
+ int fd=p->to_lib[0];
+ int index;
+
+ data_failure(read,fd, &index, sizeof(index));
+
+ if (index==PTL_FINI) {
+ lib_fini(b->nal_cb);
+ if (b->shutdown) (*b->shutdown)(b);
+ syscall(SYS_write, p->from_lib[1],&b->alive,sizeof(b->alive));
+
+ /* a heavy-handed but convenient way of shutting down
+ the lower side thread */
+ pthread_exit(0);
+ }
+
+ data_failure(read,fd, &arg_len, sizeof(arg_len));
+ data_failure(read,fd, &ret_len, sizeof(ret_len));
+ data_failure(read,fd, arg_block, arg_len);
+
+ lib_dispatch(b->nal_cb, NULL, index, arg_block, ret_block);
+
+ data_failure(write,p->from_lib[1],ret_block, ret_len);
+ return(1);
+}
+#undef data_failure
+
+
+
+static void wakeup_topside(void *z)
+{
+ bridge b=z;
+ procbridge p=b->local;
+
+ pthread_mutex_lock(&p->mutex);
+ pthread_cond_broadcast(&p->cond);
+ pthread_mutex_unlock(&p->mutex);
+}
+
+
+/* Function: nal_thread
+ * Arguments: z: an opaque reference to a nal control structure
+ * allocated and partially populated by the api level code
+ * Returns: nothing, and only on error or explicit shutdown
+ *
+ * This function is the entry point of the pthread initiated on
+ * the api side of the interface. This thread is used to handle
+ * asynchronous delivery to the application.
+ *
+ * We define a limit macro to place a ceiling on limits
+ * for syntactic convenience
+ */
+#define LIMIT(x,y,max)\
+ if ((unsigned int)x > max) y = max;
+
+extern int tcpnal_init(bridge);
+
+nal_initialize nal_table[PTL_IFACE_MAX]={0,tcpnal_init,0};
+
+void *nal_thread(void *z)
+{
+ bridge b=z;
+ procbridge p=b->local;
+ int rc;
+ ptl_pid_t pid_request;
+ int nal_type;
+ ptl_ni_limits_t desired;
+ ptl_ni_limits_t actual;
+
+ b->nal_cb=(nal_cb_t *)malloc(sizeof(nal_cb_t));
+ b->nal_cb->nal_data=b;
+ b->nal_cb->cb_read=nal_read;
+ b->nal_cb->cb_write=nal_write;
+ b->nal_cb->cb_malloc=nal_malloc;
+ b->nal_cb->cb_free=nal_free;
+ b->nal_cb->cb_map=NULL;
+ b->nal_cb->cb_unmap=NULL;
+ b->nal_cb->cb_printf=nal_printf;
+ b->nal_cb->cb_cli=nal_cli;
+ b->nal_cb->cb_sti=nal_sti;
+ b->nal_cb->cb_dist=nal_dist;
+
+
+ register_io_handler(p->to_lib[0],READ_HANDLER,data_from_api,(void *)b);
+
+ if(!(rc = syscall(SYS_read, p->to_lib[0], &pid_request, sizeof(pid_request))))
+ perror("procbridge read from api");
+ if(!(rc = syscall(SYS_read, p->to_lib[0], &desired, sizeof(ptl_ni_limits_t))))
+ perror("procbridge read from api");
+ if(!(rc = syscall(SYS_read, p->to_lib[0], &nal_type, sizeof(nal_type))))
+ perror("procbridge read from api");
+
+ actual = desired;
+ LIMIT(desired.max_match_entries,actual.max_match_entries,MAX_MES);
+ LIMIT(desired.max_mem_descriptors,actual.max_mem_descriptors,MAX_MDS);
+ LIMIT(desired.max_event_queues,actual.max_event_queues,MAX_EQS);
+ LIMIT(desired.max_atable_index,actual.max_atable_index,MAX_ACLS);
+ LIMIT(desired.max_ptable_index,actual.max_ptable_index,MAX_PTLS);
+
+ set_address(b,pid_request);
+
+ if (nal_table[nal_type]) rc=(*nal_table[nal_type])(b);
+ /* initialize the generic 'library' level code */
+
+ rc = lib_init(b->nal_cb,
+ b->nal_cb->ni.nid,
+ b->nal_cb->ni.pid,
+ 10,
+ actual.max_ptable_index,
+ actual.max_atable_index);
+
+ /*
+ * Whatever the initialization returned is passed back to the
+ * user level code for further interpretation. We just exit if
+ * it is non-zero since something went wrong.
+ */
+ /* this should perform error checking */
+#if 0
+ write(p->from_lib[1], &actual, sizeof(ptl_ni_limits_t));
+#endif
+ syscall(SYS_write, p->from_lib[1], &rc, sizeof(rc));
+
+ if(!rc) {
+ /* the thunk function is called each time the timer loop
+ performs an operation and returns to blocking mode. we
+ overload this function to inform the api side that
+ it may be interested in looking at the event queue */
+ register_thunk(wakeup_topside,b);
+ timer_loop();
+ }
+ return(0);
+}
+#undef LIMIT
+
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (c) 2002 Cray Inc.
+ * Copyright (c) 2002 Eric Hoffman
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2.1 of the GNU Lesser General
+ * Public License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/* select.c:
+ * Provides a general mechanism for registering and dispatching
+ * io events through the select system call.
+ */
+
+#ifdef sun
+#include <sys/filio.h>
+#else
+#include <sys/ioctl.h>
+#endif
+
+#include <sys/time.h>
+#include <sys/types.h>
+#include <stdlib.h>
+#include <pqtimer.h>
+#include <dispatch.h>
+
+
+static struct timeval beginning_of_epoch;
+static io_handler io_handlers;
+
+/* Function: now
+ *
+ * Return: the current time in canonical units: a 64 bit number
+ * where the most significant 32 bits contains the number
+ * of seconds, and the least signficant a count of (1/(2^32))ths
+ * of a second.
+ */
+when now()
+{
+ struct timeval result;
+
+ gettimeofday(&result,0);
+ return((((unsigned long long)result.tv_sec)<<32)|
+ (((unsigned long long)result.tv_usec)<<32)/1000000);
+}
+
+
+/* Function: register_io_handler
+ * Arguments: fd: the file descriptor of interest
+ * type: a mask of READ_HANDLER, WRITE_HANDLER, EXCEPTION_HANDLER
+ * function: a function to call when io is available on fd
+ * arg: an opaque correlator to return to the handler
+ * Returns: a pointer to the io_handler structure
+ */
+io_handler register_io_handler(int fd,
+ int type,
+ int (*function)(void *),
+ void *arg)
+{
+ io_handler i=(io_handler)malloc(sizeof(struct io_handler));
+ if ((i->fd=fd)>=0){
+ i->type=type;
+ i->function=function;
+ i->argument=arg;
+ i->disabled=0;
+ i->last=&io_handlers;
+ if ((i->next=io_handlers)) i->next->last=&i->next;
+ io_handlers=i;
+ }
+ return(i);
+}
+
+/* Function: remove_io_handler
+ * Arguments: i: a pointer to the handler to stop servicing
+ *
+ * remove_io_handler() doesn't actually free the handler, due
+ * to reentrancy problems. it just marks the handler for
+ * later cleanup by the blocking function.
+ */
+void remove_io_handler (io_handler i)
+{
+ i->disabled=1;
+}
+
+static void set_flag(io_handler n,fd_set *fds)
+{
+ if (n->type & READ_HANDLER) FD_SET(n->fd,fds);
+ if (n->type & WRITE_HANDLER) FD_SET(n->fd,fds+1);
+ if (n->type & EXCEPTION_HANDLER) FD_SET(n->fd,fds+2);
+}
+
+
+/* Function: select_timer_block
+ * Arguments: until: an absolute time when the select should return
+ *
+ * This function dispatches the various file descriptors' handler
+ * functions, if the kernel indicates there is io available.
+ */
+void select_timer_block(when until)
+{
+ fd_set fds[3];
+ struct timeval timeout;
+ struct timeval *timeout_pointer;
+ int result;
+ io_handler j;
+ io_handler *k;
+
+ /* TODO: loop until the entire interval is expired*/
+ if (until){
+ when interval=until-now();
+ timeout.tv_sec=(interval>>32);
+ timeout.tv_usec=((interval<<32)/1000000)>>32;
+ timeout_pointer=&timeout;
+ } else timeout_pointer=0;
+
+ FD_ZERO(fds);
+ FD_ZERO(fds+1);
+ FD_ZERO(fds+2);
+ for (k=&io_handlers;*k;){
+ if ((*k)->disabled){
+ j=*k;
+ *k=(*k)->next;
+ free(j);
+ }
+ if (*k) {
+ set_flag(*k,fds);
+ k=&(*k)->next;
+ }
+ }
+ result=select(FD_SETSIZE,fds,fds+1,fds+2,timeout_pointer);
+
+ if (result > 0)
+ for (j=io_handlers;j;j=j->next){
+ if (!(j->disabled) &&
+ ((FD_ISSET(j->fd,fds) && (j->type & READ_HANDLER)) ||
+ (FD_ISSET(j->fd,fds+1) && (j->type & WRITE_HANDLER)) ||
+ (FD_ISSET(j->fd,fds+2) && (j->type & EXCEPTION_HANDLER)))){
+ if (!(*j->function)(j->argument))
+ j->disabled=1;
+ }
+ }
+}
+
+/* Function: init_unix_timer()
+ * is called to initialize the library
+ */
+void init_unix_timer()
+{
+ io_handlers=0;
+ gettimeofday(&beginning_of_epoch, 0);
+ initialize_timer(select_timer_block);
+}
--- /dev/null
+CPPFLAGS=
+INCLUDES=-I$(top_srcdir)/portals/include -I$(top_srcdir)/include -I$(srcdir)
+lib_LIBRARIES = libtcpnal.a
+pkginclude_HEADERS = pqtimer.h dispatch.h table.h timer.h connection.h
+libtcpnal_a_SOURCES = debug.c pqtimer.c select.c table.c pqtimer.h dispatch.h table.h timer.h address.c procapi.c proclib.c connection.c tcpnal.c connection.h
--- /dev/null
+This library implements two NAL interfaces, both running over IP.
+The first, tcpnal, creates TCP connections between participating
+processes in order to transport the portals requests. The second,
+ernal, provides a simple transport protocol which runs over
+UDP datagrams.
+
+The interface functions return both of these values in host order for
+convenience and readability. However this means that addresses
+exchanged in messages between hosts of different orderings will not
+function properly.
+
+Both NALs use the same support functions in order to schedule events
+and communicate with the generic portals implementation.
+
+ -------------------------
+ | api |
+ |_______________________|
+ | lib |
+ |_______________________|
+ | ernal | |tcpnal |
+ |--------| |----------|
+ | udpsock| |connection|
+ |-----------------------|
+ | timer/select |
+ -------------------------
+
+
+ These NALs uses the framework from fdnal of a pipe between the api
+and library sides. This is wrapped up in the select on the library
+side, and blocks on the api side. Performance could be severely
+enhanced by collapsing this aritificial barrier, by using shared
+memory queues, or by wiring the api layer directly to the library.
+
+
+nid is defined as the low order 24-bits of the IP address of the
+physical node left shifted by 8 plus a virtual node number of 0
+through 255 (really only 239). The virtual node number of a tcpnal
+application should be specified using the environment variable
+PTL_VIRTNODE. pid is now a completely arbitrary number in the
+range of 0 to 255. The IP interface used can be overridden by
+specifying the appropriate hostid by setting the PTL_HOSTID
+environment variable. The value can be either dotted decimal
+(n.n.n.n) or hex starting with "0x".
+TCPNAL:
+ As the NAL needs to try to send to a particular nid/pid pair, it
+ will open up connections on demand. Because the port associated with
+ the connecting socket is different from the bound port, two
+ connections will normally be established between a pair of peers, with
+ data flowing from the anonymous connect (active) port to the advertised
+ or well-known bound (passive) port of each peer.
+
+ Should the connection fail to open, an error is reported to the
+ library component, which causes the api request to fail.
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (c) 2002 Cray Inc.
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2.1 of the GNU Lesser General
+ * Public License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/* address.c:
+ * this file provides functions to aquire the IP address of the node
+ * and translate them into a NID/PID pair which supports a static
+ * mapping of virtual nodes into the port range of an IP socket.
+*/
+
+#include <stdlib.h>
+#include <netdb.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <portals/p30.h>
+#include <bridge.h>
+#include <ipmap.h>
+
+
+/* Function: get_node_id
+ * Returns: a 32 bit id for this node, actually a big-endian IP address
+ *
+ * get_node_id() determines the host name and uses the resolver to
+ * find out its ip address. This is fairly fragile and inflexible, but
+ * explicitly asking about interfaces and their addresses is very
+ * complicated and nonportable.
+ */
+static unsigned int get_node_id(void)
+{
+ char buffer[255];
+ unsigned int x;
+ struct hostent *he;
+ char * host_envp;
+
+ if (!(host_envp = getenv("PTL_HOSTID")))
+ {
+ gethostname(buffer,sizeof(buffer));
+ he=gethostbyname(buffer);
+ if (he)
+ x=*(unsigned int *)he->h_addr_list[0];
+ else
+ x = 0;
+ return(ntohl(x));
+ }
+ else
+ {
+ if (host_envp[1] != 'x')
+ {
+ int a, b, c, d;
+ sscanf(host_envp, "%d.%d.%d.%d", &a, &b, &c, &d);
+ return ((a<<24) | (b<<16) | (c<<8) | d);
+ }
+ else
+ {
+ long long hostid = strtoll(host_envp, 0, 0);
+ return((unsigned int) hostid);
+ }
+ }
+}
+
+
+/* Function: set_address
+ * Arugments: t: a procnal structure to populate with the request
+ *
+ * set_address performs the bit manipulations to set the nid, pid, and
+ * iptop8 fields of the procnal structures.
+ *
+ * TODO: fix pidrequest to try to do dynamic binding if PTL_ID_ANY
+ */
+
+#ifdef DIRECT_IP_MODE
+void set_address(bridge t,ptl_pid_t pidrequest)
+{
+ int port;
+ if (pidrequest==(unsigned short)PTL_PID_ANY) port = 0;
+ else port=pidrequest;
+ t->nal_cb->ni.nid=get_node_id();
+ t->nal_cb->ni.pid=port;
+}
+#else
+
+void set_address(bridge t,ptl_pid_t pidrequest)
+{
+ int virtnode, in_addr, port;
+ ptl_pid_t pid;
+
+ /* get and remember my node id*/
+ if (!getenv("PTL_VIRTNODE"))
+ virtnode = 0;
+ else
+ {
+ int maxvnode = PNAL_VNODE_MASK - (PNAL_BASE_PORT
+ >> PNAL_VNODE_SHIFT);
+ virtnode = atoi(getenv("PTL_VIRTNODE"));
+ if (virtnode > maxvnode)
+ {
+ fprintf(stderr, "PTL_VIRTNODE of %d is too large - max %d\n",
+ virtnode, maxvnode);
+ return;
+ }
+ }
+
+ in_addr = get_node_id();
+
+ t->iptop8 = in_addr >> PNAL_HOSTID_SHIFT;/* for making new connections */
+ t->nal_cb->ni.nid = ((in_addr & PNAL_HOSTID_MASK)
+ << PNAL_VNODE_SHIFT)
+ + virtnode;
+
+ pid=pidrequest;
+ /* TODO: Support of pid PTL_ID_ANY with virtual nodes needs more work. */
+#ifdef notyet
+ if (pid==(unsigned short)PTL_PID_ANY) port = 0;
+#endif
+ if (pid==(unsigned short)PTL_PID_ANY)
+ {
+ fprintf(stderr, "portal pid PTL_ID_ANY is not currently supported\n");
+ return;
+ }
+ else if (pid > PNAL_PID_MASK)
+ {
+ fprintf(stderr, "portal pid of %d is too large - max %d\n",
+ pid, PNAL_PID_MASK);
+ return;
+ }
+ else port = ((virtnode << PNAL_VNODE_SHIFT) + pid) + PNAL_BASE_PORT;
+ t->nal_cb->ni.pid=pid;
+}
+#endif
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (c) 2002 Cray Inc.
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ */
+
+#include <portals/lib-p30.h>
+
+typedef struct bridge {
+ int alive;
+ nal_cb_t *nal_cb;
+ void *lower;
+ void *local;
+ void (*shutdown)(struct bridge *);
+ /* this doesn't really belong here */
+ unsigned char iptop8;
+} *bridge;
+
+
+nal_t *bridge_init(ptl_interface_t nal,
+ ptl_pid_t pid_request,
+ ptl_ni_limits_t *desired,
+ ptl_ni_limits_t *actual,
+ int *rc);
+
+typedef int (*nal_initialize)(bridge);
+extern nal_initialize nal_table[PTL_IFACE_MAX];
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (c) 2002 Cray Inc.
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2.1 of the GNU Lesser General
+ * Public License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/* connection.c:
+ This file provides a simple stateful connection manager which
+ builds tcp connections on demand and leaves them open for
+ future use. It also provides the machinery to allow peers
+ to connect to it
+*/
+
+#include <stdlib.h>
+#include <pqtimer.h>
+#include <dispatch.h>
+#include <table.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <string.h>
+#include <unistd.h>
+#include <syscall.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <connection.h>
+#include <errno.h>
+
+
+/* global variable: acceptor port */
+unsigned short tcpnal_acceptor_port = 988;
+
+
+/* Function: compare_connection
+ * Arguments: connection c: a connection in the hash table
+ * ptl_process_id_t: an id to verify agains
+ * Returns: 1 if the connection is the one requested, 0 otherwise
+ *
+ * compare_connection() tests for collisions in the hash table
+ */
+static int compare_connection(void *arg1, void *arg2)
+{
+ connection c = arg1;
+ unsigned int * id = arg2;
+ return((c->ip==id[0]) && (c->port==id[1]));
+}
+
+
+/* Function: connection_key
+ * Arguments: ptl_process_id_t id: an id to hash
+ * Returns: a not-particularily-well-distributed hash
+ * of the id
+ */
+static unsigned int connection_key(unsigned int *id)
+{
+ return(id[0]^id[1]);
+}
+
+
+/* Function: remove_connection
+ * Arguments: c: the connection to remove
+ */
+void remove_connection(void *arg)
+{
+ connection c = arg;
+ unsigned int id[2];
+
+ id[0]=c->ip;
+ id[1]=c->port;
+ hash_table_remove(c->m->connections,id);
+ close(c->fd);
+ free(c);
+}
+
+
+/* Function: read_connection:
+ * Arguments: c: the connection to read from
+ * dest: the buffer to read into
+ * len: the number of bytes to read
+ * Returns: success as 1, or failure as 0
+ *
+ * read_connection() reads data from the connection, continuing
+ * to read partial results until the request is satisfied or
+ * it errors. TODO: this read should be covered by signal protection.
+ */
+int read_connection(connection c,
+ unsigned char *dest,
+ int len)
+{
+ int offset=0,rc;
+
+ if (len){
+ do {
+ if((rc=syscall(SYS_read, c->fd, dest+offset, len-offset))<=0){
+ if (errno==EINTR) {
+ rc=0;
+ } else {
+ remove_connection(c);
+ return(0);
+ }
+ }
+ offset+=rc;
+ } while (offset<len);
+ }
+ return(1);
+}
+
+static int connection_input(connection c)
+{
+ return((*c->m->handler)(c->m->handler_arg,c));
+}
+
+
+/* Function: allocate_connection
+ * Arguments: t: tcpnal the allocation is occuring in the context of
+ * dest: portal endpoint address for this connection
+ * fd: open file descriptor for the socket
+ * Returns: an allocated connection structure
+ *
+ * just encompasses the action common to active and passive
+ * connections of allocation and placement in the global table
+ */
+static connection allocate_connection(manager m,
+ unsigned int ip,
+ unsigned short port,
+ int fd)
+{
+ connection c=malloc(sizeof(struct connection));
+ unsigned int id[2];
+ c->m=m;
+ c->fd=fd;
+ c->ip=ip;
+ c->port=port;
+ id[0]=ip;
+ id[1]=port;
+ register_io_handler(fd,READ_HANDLER,connection_input,c);
+ hash_table_insert(m->connections,c,id);
+ return(c);
+}
+
+
+/* Function: new_connection
+ * Arguments: t: opaque argument holding the tcpname
+ * Returns: 1 in order to reregister for new connection requests
+ *
+ * called when the bound service socket recieves
+ * a new connection request, it always accepts and
+ * installs a new connection
+ */
+static int new_connection(void *z)
+{
+ manager m=z;
+ struct sockaddr_in s;
+ int len=sizeof(struct sockaddr_in);
+ int fd=accept(m->bound,(struct sockaddr *)&s,&len);
+ unsigned int nid=*((unsigned int *)&s.sin_addr);
+ /* cfs specific hack */
+ //unsigned short pid=s.sin_port;
+ allocate_connection(m,htonl(nid),0/*pid*/,fd);
+ return(1);
+}
+
+
+/* Function: force_tcp_connection
+ * Arguments: t: tcpnal
+ * dest: portals endpoint for the connection
+ * Returns: an allocated connection structure, either
+ * a pre-existing one, or a new connection
+ */
+connection force_tcp_connection(manager m,
+ unsigned int ip,
+ unsigned short port)
+{
+ connection c;
+ struct sockaddr_in addr;
+ unsigned int id[2];
+
+ port = tcpnal_acceptor_port;
+
+ id[0]=ip;
+ id[1]=port;
+
+ if (!(c=hash_table_find(m->connections,id))){
+ int fd;
+
+ bzero((char *) &addr, sizeof(addr));
+ addr.sin_family = AF_INET;
+ addr.sin_addr.s_addr = htonl(ip);
+ addr.sin_port = htons(port);
+
+ if ((fd = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
+ perror("tcpnal socket failed");
+ exit(-1);
+ }
+ if (connect(fd,
+ (struct sockaddr *)&addr,
+ sizeof(struct sockaddr_in)))
+ {
+ perror("tcpnal connect");
+ return(0);
+ }
+ return(allocate_connection(m,ip,port,fd));
+ }
+ return(c);
+}
+
+
+/* Function: bind_socket
+ * Arguments: t: the nal state for this interface
+ * port: the port to attempt to bind to
+ * Returns: 1 on success, or 0 on error
+ *
+ * bind_socket() attempts to allocate and bind a socket to the requested
+ * port, or dynamically assign one from the kernel should the port be
+ * zero. Sets the bound and bound_handler elements of m.
+ *
+ * TODO: The port should be an explicitly sized type.
+ */
+static int bind_socket(manager m,unsigned short port)
+{
+ struct sockaddr_in addr;
+ int alen=sizeof(struct sockaddr_in);
+
+ if ((m->bound = socket(AF_INET, SOCK_STREAM, 0)) < 0)
+ return(0);
+
+ bzero((char *) &addr, sizeof(addr));
+ addr.sin_family = AF_INET;
+ addr.sin_addr.s_addr = 0;
+ addr.sin_port = port;
+
+ if (bind(m->bound,(struct sockaddr *)&addr,alen)<0){
+ perror ("tcpnal bind");
+ return(0);
+ }
+
+ getsockname(m->bound,(struct sockaddr *)&addr, &alen);
+
+ m->bound_handler=register_io_handler(m->bound,READ_HANDLER,
+ new_connection,m);
+ listen(m->bound,5);
+ m->port=addr.sin_port;
+ return(1);
+}
+
+
+/* Function: shutdown_connections
+ * Arguments: m: the manager structure
+ *
+ * close all connections and reclaim resources
+ */
+void shutdown_connections(manager m)
+{
+ close(m->bound);
+ remove_io_handler(m->bound_handler);
+ hash_destroy_table(m->connections,remove_connection);
+ free(m);
+}
+
+
+/* Function: init_connections
+ * Arguments: t: the nal state for this interface
+ * port: the port to attempt to bind to
+ * Returns: a newly allocated manager structure, or
+ * zero if the fixed port could not be bound
+ */
+manager init_connections(unsigned short pid,
+ int (*input)(),
+ void *a)
+{
+ manager m=(manager)malloc(sizeof(struct manager));
+ m->connections=hash_create_table(compare_connection,connection_key);
+ m->handler=input;
+ m->handler_arg=a;
+ if (bind_socket(m,pid)) return(m);
+ free(m);
+ return(0);
+}
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (c) 2002 Cray Inc.
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ */
+
+#include <table.h>
+
+typedef struct manager {
+ table connections;
+ int bound;
+ io_handler bound_handler;
+ int (*handler)(void *, void *);
+ void *handler_arg;
+ unsigned short port;
+} *manager;
+
+
+typedef struct connection {
+ unsigned int ip;
+ unsigned short port;
+ int fd;
+ manager m;
+} *connection;
+
+connection force_tcp_connection(manager m,
+ unsigned int ip,
+ unsigned int short);
+manager init_connections(unsigned short,
+ int (*f)(void *,connection),
+ void *);
+void remove_connection(void *arg);
+void shutdown_connections(manager m);
+int read_connection(connection c,
+ unsigned char *dest,
+ int len);
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2002 Cluster File Systems, Inc.
+ * Author: Phil Schwan <phil@clusterfs.com>
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2.1 of the GNU Lesser General
+ * Public License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <stdio.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <stdarg.h>
+#include <sys/time.h>
+
+int smp_processor_id = 1;
+char debug_file_path[1024] = "/tmp/lustre-log";
+char debug_file_name[1024];
+FILE *debug_file_fd;
+
+int portals_do_debug_dumplog(void *arg)
+{
+ printf("Look in %s\n", debug_file_name);
+ return 0;
+}
+
+
+void portals_debug_print(void)
+{
+ return;
+}
+
+
+void portals_debug_dumplog(void)
+{
+ printf("Look in %s\n", debug_file_name);
+ return;
+}
+
+
+int portals_debug_init(unsigned long bufsize)
+{
+ debug_file_fd = stdout;
+ return 0;
+}
+
+int portals_debug_cleanup(void)
+{
+ return 0; //close(portals_debug_fd);
+}
+
+int portals_debug_clear_buffer(void)
+{
+ return 0;
+}
+
+int portals_debug_mark_buffer(char *text)
+{
+
+ fprintf(debug_file_fd, "*******************************************************************************\n");
+ fprintf(debug_file_fd, "DEBUG MARKER: %s\n", text);
+ fprintf(debug_file_fd, "*******************************************************************************\n");
+
+ return 0;
+}
+
+int portals_debug_copy_to_user(char *buf, unsigned long len)
+{
+ return 0;
+}
+
+/* FIXME: I'm not very smart; someone smarter should make this better. */
+void
+portals_debug_msg (int subsys, int mask, char *file, char *fn, int line,
+ const char *format, ...)
+{
+ va_list ap;
+ unsigned long flags;
+ struct timeval tv;
+ int nob;
+
+
+ /* NB since we pass a non-zero sized buffer (at least) on the first
+ * print, we can be assured that by the end of all the snprinting,
+ * we _do_ have a terminated buffer, even if our message got truncated.
+ */
+
+ gettimeofday(&tv, NULL);
+
+ nob += fprintf(debug_file_fd,
+ "%02x:%06x:%d:%lu.%06lu ",
+ subsys >> 24, mask, smp_processor_id,
+ tv.tv_sec, tv.tv_usec);
+
+ nob += fprintf(debug_file_fd,
+ "(%s:%d:%s() %d+%ld): ",
+ file, line, fn, 0,
+ 8192 - ((unsigned long)&flags & 8191UL));
+
+ va_start (ap, format);
+ nob += fprintf(debug_file_fd, format, ap);
+ va_end (ap);
+
+
+}
+
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (c) 2002 Cray Inc.
+ * Copyright (c) 2002 Eric Hoffman
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ */
+
+/* this file is only called dispatch.h to prevent it
+ from colliding with /usr/include/sys/select.h */
+
+typedef struct io_handler *io_handler;
+
+struct io_handler{
+ io_handler *last;
+ io_handler next;
+ int fd;
+ int type;
+ int (*function)(void *);
+ void *argument;
+ int disabled;
+};
+
+
+#define READ_HANDLER 1
+#define WRITE_HANDLER 2
+#define EXCEPTION_HANDLER 4
+#define ALL_HANDLER (READ_HANDLER | WRITE_HANDLER | EXCEPTION_HANDLER)
+
+io_handler register_io_handler(int fd,
+ int type,
+ int (*function)(void *),
+ void *arg);
+
+void remove_io_handler (io_handler i);
+void init_unix_timer(void);
+void select_timer_block(when until);
+when now(void);
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (c) 2002 Cray Inc.
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ */
+
+#define DIRECT_IP_MODE
+#ifdef DIRECT_IP_MODE
+#define PNAL_NID(in_addr, port) (in_addr)
+#define PNAL_PID(pid) (pid)
+#define PNAL_IP(in_addr, port) (in_addr)
+#define PNAL_PORT(nid, pid) (pid)
+#else
+
+#define PNAL_BASE_PORT 4096
+#define PNAL_HOSTID_SHIFT 24
+#define PNAL_HOSTID_MASK ((1 << PNAL_HOSTID_SHIFT) - 1)
+#define PNAL_VNODE_SHIFT 8
+#define PNAL_VNODE_MASK ((1 << PNAL_VNODE_SHIFT) - 1)
+#define PNAL_PID_SHIFT 8
+#define PNAL_PID_MASK ((1 << PNAL_PID_SHIFT) - 1)
+
+#define PNAL_NID(in_addr, port) (((ntohl(in_addr) & PNAL_HOSTID_MASK) \
+ << PNAL_VNODE_SHIFT) \
+ | (((ntohs(port)-PNAL_BASE_PORT) >>\
+ PNAL_PID_SHIFT)))
+#define PNAL_PID(port) ((ntohs(port) - PNAL_BASE_PORT) & PNAL_PID_MASK)
+
+#define PNAL_IP(nid,t) (htonl((((unsigned)(nid))\
+ >> PNAL_VNODE_SHIFT)\
+ | (t->iptop8 << PNAL_HOSTID_SHIFT)))
+#define PNAL_PORT(nid, pid) (htons(((((nid) & PNAL_VNODE_MASK) \
+ << PNAL_VNODE_SHIFT) \
+ | ((pid) & PNAL_PID_MASK)) \
+ + PNAL_BASE_PORT))
+#endif
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (c) 2002 Cray Inc.
+ * Copyright (c) 2002 Eric Hoffman
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2.1 of the GNU Lesser General
+ * Public License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/* timer.c:
+ * this file implements a simple priority-queue based timer system. when
+ * combined with a file which implements now() and block(), it can
+ * be used to provide course-grained time-based callbacks.
+ */
+
+#include <pqtimer.h>
+#include <stdlib.h>
+#include <string.h>
+
+struct timer {
+ void (*function)(void *);
+ void *arg;
+ when w;
+ int interval;
+ int disable;
+};
+
+typedef struct thunk *thunk;
+struct thunk {
+ void (*f)(void *);
+ void *a;
+ thunk next;
+};
+
+extern when now(void);
+
+static thunk thunks;
+static int internal;
+static void (*block_function)(when);
+static int number_of_timers;
+static int size_of_pqueue;
+static timer *timers;
+
+
+static void heal(int where)
+{
+ int left=(where<<1);
+ int right=(where<<1)+1;
+ int min=where;
+ timer temp;
+
+ if (left <= number_of_timers)
+ if (timers[left]->w < timers[min]->w) min=left;
+ if (right <= number_of_timers)
+ if (timers[right]->w < timers[min]->w) min=right;
+ if (min != where){
+ temp=timers[where];
+ timers[where]=timers[min];
+ timers[min]=temp;
+ heal(min);
+ }
+}
+
+static void add_pqueue(int i)
+{
+ timer temp;
+ int parent=(i>>1);
+ if ((i>1) && (timers[i]->w< timers[parent]->w)){
+ temp=timers[i];
+ timers[i]=timers[parent];
+ timers[parent]=temp;
+ add_pqueue(parent);
+ }
+}
+
+static void add_timer(timer t)
+{
+ if (size_of_pqueue<(number_of_timers+2)){
+ int oldsize=size_of_pqueue;
+ timer *new=(void *)malloc(sizeof(struct timer)*(size_of_pqueue+=10));
+ memcpy(new,timers,sizeof(timer)*oldsize);
+ timers=new;
+ }
+ timers[++number_of_timers]=t;
+ add_pqueue(number_of_timers);
+}
+
+/* Function: register_timer
+ * Arguments: interval: the time interval from the current time when
+ * the timer function should be called
+ * function: the function to call when the time has expired
+ * argument: the argument to call it with.
+ * Returns: a pointer to a timer structure
+ */
+timer register_timer(when interval,
+ void (*function)(void *),
+ void *argument)
+{
+ timer t=(timer)malloc(sizeof(struct timer));
+
+ t->arg=argument;
+ t->function=function;
+ t->interval=interval;
+ t->disable=0;
+ t->w=now()+interval;
+ add_timer(t);
+ if (!internal && (number_of_timers==1))
+ block_function(t->w);
+ return(t);
+}
+
+/* Function: remove_timer
+ * Arguments: t:
+ * Returns: nothing
+ *
+ * remove_timer removes a timer from the system, insuring
+ * that it will never be called. It does not actually
+ * free the timer due to reentrancy issues.
+ */
+
+void remove_timer(timer t)
+{
+ t->disable=1;
+}
+
+
+
+void timer_fire()
+{
+ timer current;
+
+ current=timers[1];
+ timers[1]=timers[number_of_timers--];
+ heal(1);
+ if (!current->disable) {
+ (*current->function)(current->arg);
+ }
+ free(current);
+}
+
+when next_timer(void)
+{
+ when here=now();
+
+ while (number_of_timers && (timers[1]->w <= here)) timer_fire();
+ if (number_of_timers) return(timers[1]->w);
+ return(0);
+}
+
+/* Function: timer_loop
+ * Arguments: none
+ * Returns: never
+ *
+ * timer_loop() is the blocking dispatch function for the timer.
+ * Is calls the block() function registered with init_timer,
+ * and handles associated with timers that have been registered.
+ */
+void timer_loop()
+{
+ when here;
+
+ while (1){
+ thunk z;
+ here=now();
+
+ for (z=thunks;z;z=z->next) (*z->f)(z->a);
+
+ if (number_of_timers){
+ if (timers[1]->w > here){
+ (*block_function)(timers[1]->w);
+ } else {
+ timer_fire();
+ }
+ } else {
+ thunk z;
+ for (z=thunks;z;z=z->next) (*z->f)(z->a);
+ (*block_function)(0);
+ }
+ }
+}
+
+
+/* Function: register_thunk
+ * Arguments: f: the function to call
+ * a: the single argument to call it with
+ *
+ * Thunk functions get called at irregular intervals, they
+ * should not assume when, or take a particularily long
+ * amount of time. Thunks are for background cleanup tasks.
+ */
+void register_thunk(void (*f)(void *),void *a)
+{
+ thunk t=(void *)malloc(sizeof(struct thunk));
+ t->f=f;
+ t->a=a;
+ t->next=thunks;
+ thunks=t;
+}
+
+/* Function: initialize_timer
+ * Arguments: block: the function to call to block for the specified interval
+ *
+ * initialize_timer() must be called before any other timer function,
+ * including timer_loop.
+ */
+void initialize_timer(void (*block)(when))
+{
+ block_function=block;
+ number_of_timers=0;
+ size_of_pqueue=10;
+ timers=(timer *)malloc(sizeof(timer)*size_of_pqueue);
+ thunks=0;
+}
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (c) 2002 Cray Inc.
+ * Copyright (c) 2002 Eric Hoffman
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ */
+
+typedef unsigned long long when;
+when now(void);
+typedef struct timer *timer;
+timer register_timer(when interval,
+ void (*function)(void *),
+ void *argument);
+timer register_timer_wait(void);
+void remove_timer(timer);
+void timer_loop(void);
+void initialize_timer(void (*block)(when));
+void timer_fire(void);
+
+
+#define HZ 0x100000000ull
+
+
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (c) 2002 Cray Inc.
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2.1 of the GNU Lesser General
+ * Public License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/* api.c:
+ * This file provides the 'api' side for the process-based nals.
+ * it is responsible for creating the 'library' side thread,
+ * and passing wrapped portals transactions to it.
+ *
+ * Along with initialization, shutdown, and transport to the library
+ * side, this file contains some stubs to satisfy the nal definition.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <syscall.h>
+#include <procbridge.h>
+#include <pqtimer.h>
+#include <dispatch.h>
+#include <errno.h>
+
+
+/* Function: forward
+ * Arguments: nal_t *nal: pointer to my top-side nal structure
+ * id: the command to pass to the lower layer
+ * args, args_len:pointer to and length of the request
+ * ret, ret_len: pointer to and size of the result
+ * Returns: a portals status code
+ *
+ * forwards a packaged api call from the 'api' side to the 'library'
+ * side, and collects the result
+ */
+#define forward_failure(operand,fd,buffer,length)\
+ if(syscall(SYS_##operand,fd,buffer,length)!=length){\
+ lib_fini(b->nal_cb);\
+ return(PTL_SEGV);\
+ }
+static int procbridge_forward(nal_t *n, int id, void *args, ptl_size_t args_len,
+ void *ret, ptl_size_t ret_len)
+{
+ bridge b=(bridge)n->nal_data;
+ procbridge p=(procbridge)b->local;
+ int lib=p->to_lib[1];
+ int k;
+
+ forward_failure(write,lib, &id, sizeof(id));
+ forward_failure(write,lib,&args_len, sizeof(args_len));
+ forward_failure(write,lib,&ret_len, sizeof(ret_len));
+ forward_failure(write,lib,args, args_len);
+
+ do {
+ k=syscall(SYS_read, p->from_lib[0], ret, ret_len);
+ } while ((k!=ret_len) && (errno += EINTR));
+
+ if(k!=ret_len){
+ perror("nal: read return block");
+ return PTL_SEGV;
+ }
+ return (PTL_OK);
+}
+#undef forward_failure
+
+
+/* Function: shutdown
+ * Arguments: nal: a pointer to my top side nal structure
+ * ni: my network interface index
+ *
+ * cleanup nal state, reclaim the lower side thread and
+ * its state using PTL_FINI codepoint
+ */
+static int procbridge_shutdown(nal_t *n, int ni)
+{
+ bridge b=(bridge)n->nal_data;
+ procbridge p=(procbridge)b->local;
+ int code=PTL_FINI;
+
+ syscall(SYS_write, p->to_lib[1],&code,sizeof(code));
+ syscall(SYS_read, p->from_lib[0],&code,sizeof(code));
+
+ syscall(SYS_close, p->to_lib[0]);
+ syscall(SYS_close, p->to_lib[1]);
+ syscall(SYS_close, p->from_lib[0]);
+ syscall(SYS_close, p->from_lib[1]);
+
+ free(p);
+ return(0);
+}
+
+
+/* Function: validate
+ * useless stub
+ */
+static int procbridge_validate(nal_t *nal, void *base, ptl_size_t extent)
+{
+ return(0);
+}
+
+
+/* Function: yield
+ * Arguments: pid:
+ *
+ * this function was originally intended to allow the
+ * lower half thread to be scheduled to allow progress. we
+ * overload it to explicitly block until signalled by the
+ * lower half.
+ */
+static void procbridge_yield(nal_t *n)
+{
+ bridge b=(bridge)n->nal_data;
+ procbridge p=(procbridge)b->local;
+
+ pthread_mutex_lock(&p->mutex);
+ pthread_cond_wait(&p->cond,&p->mutex);
+ pthread_mutex_unlock(&p->mutex);
+}
+
+
+static void procbridge_lock(nal_t * nal, unsigned long *flags){}
+static void procbridge_unlock(nal_t * nal, unsigned long *flags){}
+/* api_nal
+ * the interface vector to allow the generic code to access
+ * this nal. this is seperate from the library side nal_cb.
+ * TODO: should be dyanmically allocated
+ */
+static nal_t api_nal = {
+ ni: {0},
+ nal_data: NULL,
+ forward: procbridge_forward,
+ shutdown: procbridge_shutdown,
+ validate: procbridge_validate,
+ yield: procbridge_yield,
+ lock: procbridge_lock,
+ unlock: procbridge_unlock
+};
+
+/* Function: bridge_init
+ *
+ * Arguments: pid: requested process id (port offset)
+ * PTL_ID_ANY not supported.
+ * desired: limits passed from the application
+ * and effectively ignored
+ * actual: limits actually allocated and returned
+ *
+ * Returns: a pointer to my statically allocated top side NAL
+ * structure
+ *
+ * initializes the tcp nal. we define unix_failure as an
+ * error wrapper to cut down clutter.
+ */
+#define unix_failure(operand,fd,buffer,length,text)\
+ if(syscall(SYS_##operand,fd,buffer,length)!=length){\
+ perror(text);\
+ return(NULL);\
+ }
+#if 0
+static nal_t *bridge_init(ptl_interface_t nal,
+ ptl_pid_t pid_request,
+ ptl_ni_limits_t *desired,
+ ptl_ni_limits_t *actual,
+ int *rc)
+{
+ procbridge p;
+ bridge b;
+ static int initialized=0;
+ ptl_ni_limits_t limits = {-1,-1,-1,-1,-1};
+
+ if(initialized) return (&api_nal);
+
+ init_unix_timer();
+
+ b=(bridge)malloc(sizeof(struct bridge));
+ p=(procbridge)malloc(sizeof(struct procbridge));
+ api_nal.nal_data=b;
+ b->local=p;
+
+ if(pipe(p->to_lib) || pipe(p->from_lib)) {
+ perror("nal_init: pipe");
+ return(NULL);
+ }
+
+ if (desired) limits = *desired;
+ unix_failure(write,p->to_lib[1], &pid_request, sizeof(pid_request),
+ "nal_init: write");
+ unix_failure(write,p->to_lib[1], &limits, sizeof(ptl_ni_limits_t),
+ "nal_init: write");
+ unix_failure(write,p->to_lib[1], &nal, sizeof(ptl_interface_t),
+ "nal_init: write");
+
+ if(pthread_create(&p->t, NULL, nal_thread, b)) {
+ perror("nal_init: pthread_create");
+ return(NULL);
+ }
+
+ unix_failure(read,p->from_lib[0], actual, sizeof(ptl_ni_limits_t),
+ "tcp_init: read");
+ unix_failure(read,p->from_lib[0], rc, sizeof(rc),
+ "nal_init: read");
+
+ if(*rc) return(NULL);
+
+ initialized = 1;
+ pthread_mutex_init(&p->mutex,0);
+ pthread_cond_init(&p->cond, 0);
+
+ return (&api_nal);
+}
+#endif
+
+ptl_nid_t tcpnal_mynid;
+
+nal_t *procbridge_interface(int num_interface,
+ ptl_pt_index_t ptl_size,
+ ptl_ac_index_t acl_size,
+ ptl_pid_t requested_pid)
+{
+ procbridge p;
+ bridge b;
+ static int initialized=0;
+ ptl_ni_limits_t limits = {-1,-1,-1,-1,-1};
+ int rc, nal_type = PTL_IFACE_TCP;/* PTL_IFACE_DEFAULT FIXME hack */
+
+ if(initialized) return (&api_nal);
+
+ init_unix_timer();
+
+ b=(bridge)malloc(sizeof(struct bridge));
+ p=(procbridge)malloc(sizeof(struct procbridge));
+ api_nal.nal_data=b;
+ b->local=p;
+
+ if(pipe(p->to_lib) || pipe(p->from_lib)) {
+ perror("nal_init: pipe");
+ return(NULL);
+ }
+
+ if (ptl_size)
+ limits.max_ptable_index = ptl_size;
+ if (acl_size)
+ limits.max_atable_index = acl_size;
+
+ unix_failure(write,p->to_lib[1], &requested_pid, sizeof(requested_pid),
+ "nal_init: write");
+ unix_failure(write,p->to_lib[1], &limits, sizeof(ptl_ni_limits_t),
+ "nal_init: write");
+ unix_failure(write,p->to_lib[1], &nal_type, sizeof(nal_type),
+ "nal_init: write");
+
+ if(pthread_create(&p->t, NULL, nal_thread, b)) {
+ perror("nal_init: pthread_create");
+ return(NULL);
+ }
+
+ unix_failure(read,p->from_lib[0], &rc, sizeof(rc),
+ "nal_init: read");
+
+ if(rc) return(NULL);
+
+ b->nal_cb->ni.nid = tcpnal_mynid;
+ initialized = 1;
+ pthread_mutex_init(&p->mutex,0);
+ pthread_cond_init(&p->cond, 0);
+
+ return (&api_nal);
+}
+#undef unix_failure
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (c) 2002 Cray Inc.
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ */
+
+#ifndef _PROCBRIDGE_H_
+#define _PROCBRIDGE_H_
+
+#include <pthread.h>
+#include <bridge.h>
+#include <ipmap.h>
+
+
+typedef struct procbridge {
+ pthread_t t;
+ pthread_cond_t cond;
+ pthread_mutex_t mutex;
+ int to_lib[2];
+ int from_lib[2];
+} *procbridge;
+
+extern void *nal_thread(void *);
+
+
+#define PTL_INIT (LIB_MAX_DISPATCH+1)
+#define PTL_FINI (LIB_MAX_DISPATCH+2)
+
+#define MAX_ACLS 1
+#define MAX_PTLS 128
+
+extern void set_address(bridge t,ptl_pid_t pidrequest);
+extern nal_t *procbridge_interface(int num_interface,
+ ptl_pt_index_t ptl_size,
+ ptl_ac_index_t acl_size,
+ ptl_pid_t requested_pid);
+
+#endif
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (c) 2002 Cray Inc.
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2.1 of the GNU Lesser General
+ * Public License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/* lib.c:
+ * This file provides the 'library' side for the process-based nals.
+ * it is responsible for communication with the 'api' side and
+ * providing service to the generic portals 'library'
+ * implementation. 'library' might be better termed 'communication'
+ * or 'kernel'.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <unistd.h>
+#include <syscall.h>
+#include <procbridge.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netdb.h>
+#include <errno.h>
+#include <timer.h>
+//#include <util/pqtimer.h>
+#include <dispatch.h>
+
+/* the following functions are stubs to satisfy the nal definition
+ without doing anything particularily useful*/
+
+static int nal_write(nal_cb_t *nal,
+ void *private,
+ user_ptr dst_addr,
+ void *src_addr,
+ ptl_size_t len)
+{
+ memcpy(dst_addr, src_addr, len);
+ return 0;
+}
+
+static int nal_read(nal_cb_t * nal,
+ void *private,
+ void *dst_addr,
+ user_ptr src_addr,
+ size_t len)
+{
+ memcpy(dst_addr, src_addr, len);
+ return 0;
+}
+
+static void *nal_malloc(nal_cb_t *nal,
+ ptl_size_t len)
+{
+ void *buf = malloc(len);
+ return buf;
+}
+
+static void nal_free(nal_cb_t *nal,
+ void *buf,
+ ptl_size_t len)
+{
+ free(buf);
+}
+
+static void nal_printf(nal_cb_t *nal,
+ const char *fmt,
+ ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ vprintf(fmt, ap);
+ va_end(ap);
+}
+
+
+static void nal_cli(nal_cb_t *nal,
+ unsigned long *flags)
+{
+}
+
+
+static void nal_sti(nal_cb_t *nal,
+ unsigned long *flags)
+{
+}
+
+
+static int nal_dist(nal_cb_t *nal,
+ ptl_nid_t nid,
+ unsigned long *dist)
+{
+ return 0;
+}
+
+
+
+/* Function: data_from_api
+ * Arguments: t: the nal state for this interface
+ * Returns: whether to continue reading from the pipe
+ *
+ * data_from_api() reads data from the api side in response
+ * to a select.
+ *
+ * We define data_failure() for syntactic convenience
+ * of unix error reporting.
+ */
+
+#define data_failure(operand,fd,buffer,length)\
+ if(syscall(SYS_##operand,fd,buffer,length)!=length){\
+ lib_fini(b->nal_cb);\
+ return(0);\
+ }
+static int data_from_api(void *arg)
+{
+ bridge b = arg;
+ procbridge p=(procbridge)b->local;
+ /* where are these two sizes derived from ??*/
+ char arg_block[ 256 ];
+ char ret_block[ 128 ];
+ ptl_size_t arg_len,ret_len;
+ int fd=p->to_lib[0];
+ int index;
+
+ data_failure(read,fd, &index, sizeof(index));
+
+ if (index==PTL_FINI) {
+ lib_fini(b->nal_cb);
+ if (b->shutdown) (*b->shutdown)(b);
+ syscall(SYS_write, p->from_lib[1],&b->alive,sizeof(b->alive));
+
+ /* a heavy-handed but convenient way of shutting down
+ the lower side thread */
+ pthread_exit(0);
+ }
+
+ data_failure(read,fd, &arg_len, sizeof(arg_len));
+ data_failure(read,fd, &ret_len, sizeof(ret_len));
+ data_failure(read,fd, arg_block, arg_len);
+
+ lib_dispatch(b->nal_cb, NULL, index, arg_block, ret_block);
+
+ data_failure(write,p->from_lib[1],ret_block, ret_len);
+ return(1);
+}
+#undef data_failure
+
+
+
+static void wakeup_topside(void *z)
+{
+ bridge b=z;
+ procbridge p=b->local;
+
+ pthread_mutex_lock(&p->mutex);
+ pthread_cond_broadcast(&p->cond);
+ pthread_mutex_unlock(&p->mutex);
+}
+
+
+/* Function: nal_thread
+ * Arguments: z: an opaque reference to a nal control structure
+ * allocated and partially populated by the api level code
+ * Returns: nothing, and only on error or explicit shutdown
+ *
+ * This function is the entry point of the pthread initiated on
+ * the api side of the interface. This thread is used to handle
+ * asynchronous delivery to the application.
+ *
+ * We define a limit macro to place a ceiling on limits
+ * for syntactic convenience
+ */
+#define LIMIT(x,y,max)\
+ if ((unsigned int)x > max) y = max;
+
+extern int tcpnal_init(bridge);
+
+nal_initialize nal_table[PTL_IFACE_MAX]={0,tcpnal_init,0};
+
+void *nal_thread(void *z)
+{
+ bridge b=z;
+ procbridge p=b->local;
+ int rc;
+ ptl_pid_t pid_request;
+ int nal_type;
+ ptl_ni_limits_t desired;
+ ptl_ni_limits_t actual;
+
+ b->nal_cb=(nal_cb_t *)malloc(sizeof(nal_cb_t));
+ b->nal_cb->nal_data=b;
+ b->nal_cb->cb_read=nal_read;
+ b->nal_cb->cb_write=nal_write;
+ b->nal_cb->cb_malloc=nal_malloc;
+ b->nal_cb->cb_free=nal_free;
+ b->nal_cb->cb_map=NULL;
+ b->nal_cb->cb_unmap=NULL;
+ b->nal_cb->cb_printf=nal_printf;
+ b->nal_cb->cb_cli=nal_cli;
+ b->nal_cb->cb_sti=nal_sti;
+ b->nal_cb->cb_dist=nal_dist;
+
+
+ register_io_handler(p->to_lib[0],READ_HANDLER,data_from_api,(void *)b);
+
+ if(!(rc = syscall(SYS_read, p->to_lib[0], &pid_request, sizeof(pid_request))))
+ perror("procbridge read from api");
+ if(!(rc = syscall(SYS_read, p->to_lib[0], &desired, sizeof(ptl_ni_limits_t))))
+ perror("procbridge read from api");
+ if(!(rc = syscall(SYS_read, p->to_lib[0], &nal_type, sizeof(nal_type))))
+ perror("procbridge read from api");
+
+ actual = desired;
+ LIMIT(desired.max_match_entries,actual.max_match_entries,MAX_MES);
+ LIMIT(desired.max_mem_descriptors,actual.max_mem_descriptors,MAX_MDS);
+ LIMIT(desired.max_event_queues,actual.max_event_queues,MAX_EQS);
+ LIMIT(desired.max_atable_index,actual.max_atable_index,MAX_ACLS);
+ LIMIT(desired.max_ptable_index,actual.max_ptable_index,MAX_PTLS);
+
+ set_address(b,pid_request);
+
+ if (nal_table[nal_type]) rc=(*nal_table[nal_type])(b);
+ /* initialize the generic 'library' level code */
+
+ rc = lib_init(b->nal_cb,
+ b->nal_cb->ni.nid,
+ b->nal_cb->ni.pid,
+ 10,
+ actual.max_ptable_index,
+ actual.max_atable_index);
+
+ /*
+ * Whatever the initialization returned is passed back to the
+ * user level code for further interpretation. We just exit if
+ * it is non-zero since something went wrong.
+ */
+ /* this should perform error checking */
+#if 0
+ write(p->from_lib[1], &actual, sizeof(ptl_ni_limits_t));
+#endif
+ syscall(SYS_write, p->from_lib[1], &rc, sizeof(rc));
+
+ if(!rc) {
+ /* the thunk function is called each time the timer loop
+ performs an operation and returns to blocking mode. we
+ overload this function to inform the api side that
+ it may be interested in looking at the event queue */
+ register_thunk(wakeup_topside,b);
+ timer_loop();
+ }
+ return(0);
+}
+#undef LIMIT
+
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (c) 2002 Cray Inc.
+ * Copyright (c) 2002 Eric Hoffman
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2.1 of the GNU Lesser General
+ * Public License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/* select.c:
+ * Provides a general mechanism for registering and dispatching
+ * io events through the select system call.
+ */
+
+#ifdef sun
+#include <sys/filio.h>
+#else
+#include <sys/ioctl.h>
+#endif
+
+#include <sys/time.h>
+#include <sys/types.h>
+#include <stdlib.h>
+#include <pqtimer.h>
+#include <dispatch.h>
+
+
+static struct timeval beginning_of_epoch;
+static io_handler io_handlers;
+
+/* Function: now
+ *
+ * Return: the current time in canonical units: a 64 bit number
+ * where the most significant 32 bits contains the number
+ * of seconds, and the least signficant a count of (1/(2^32))ths
+ * of a second.
+ */
+when now()
+{
+ struct timeval result;
+
+ gettimeofday(&result,0);
+ return((((unsigned long long)result.tv_sec)<<32)|
+ (((unsigned long long)result.tv_usec)<<32)/1000000);
+}
+
+
+/* Function: register_io_handler
+ * Arguments: fd: the file descriptor of interest
+ * type: a mask of READ_HANDLER, WRITE_HANDLER, EXCEPTION_HANDLER
+ * function: a function to call when io is available on fd
+ * arg: an opaque correlator to return to the handler
+ * Returns: a pointer to the io_handler structure
+ */
+io_handler register_io_handler(int fd,
+ int type,
+ int (*function)(void *),
+ void *arg)
+{
+ io_handler i=(io_handler)malloc(sizeof(struct io_handler));
+ if ((i->fd=fd)>=0){
+ i->type=type;
+ i->function=function;
+ i->argument=arg;
+ i->disabled=0;
+ i->last=&io_handlers;
+ if ((i->next=io_handlers)) i->next->last=&i->next;
+ io_handlers=i;
+ }
+ return(i);
+}
+
+/* Function: remove_io_handler
+ * Arguments: i: a pointer to the handler to stop servicing
+ *
+ * remove_io_handler() doesn't actually free the handler, due
+ * to reentrancy problems. it just marks the handler for
+ * later cleanup by the blocking function.
+ */
+void remove_io_handler (io_handler i)
+{
+ i->disabled=1;
+}
+
+static void set_flag(io_handler n,fd_set *fds)
+{
+ if (n->type & READ_HANDLER) FD_SET(n->fd,fds);
+ if (n->type & WRITE_HANDLER) FD_SET(n->fd,fds+1);
+ if (n->type & EXCEPTION_HANDLER) FD_SET(n->fd,fds+2);
+}
+
+
+/* Function: select_timer_block
+ * Arguments: until: an absolute time when the select should return
+ *
+ * This function dispatches the various file descriptors' handler
+ * functions, if the kernel indicates there is io available.
+ */
+void select_timer_block(when until)
+{
+ fd_set fds[3];
+ struct timeval timeout;
+ struct timeval *timeout_pointer;
+ int result;
+ io_handler j;
+ io_handler *k;
+
+ /* TODO: loop until the entire interval is expired*/
+ if (until){
+ when interval=until-now();
+ timeout.tv_sec=(interval>>32);
+ timeout.tv_usec=((interval<<32)/1000000)>>32;
+ timeout_pointer=&timeout;
+ } else timeout_pointer=0;
+
+ FD_ZERO(fds);
+ FD_ZERO(fds+1);
+ FD_ZERO(fds+2);
+ for (k=&io_handlers;*k;){
+ if ((*k)->disabled){
+ j=*k;
+ *k=(*k)->next;
+ free(j);
+ }
+ if (*k) {
+ set_flag(*k,fds);
+ k=&(*k)->next;
+ }
+ }
+ result=select(FD_SETSIZE,fds,fds+1,fds+2,timeout_pointer);
+
+ if (result > 0)
+ for (j=io_handlers;j;j=j->next){
+ if (!(j->disabled) &&
+ ((FD_ISSET(j->fd,fds) && (j->type & READ_HANDLER)) ||
+ (FD_ISSET(j->fd,fds+1) && (j->type & WRITE_HANDLER)) ||
+ (FD_ISSET(j->fd,fds+2) && (j->type & EXCEPTION_HANDLER)))){
+ if (!(*j->function)(j->argument))
+ j->disabled=1;
+ }
+ }
+}
+
+/* Function: init_unix_timer()
+ * is called to initialize the library
+ */
+void init_unix_timer()
+{
+ io_handlers=0;
+ gettimeofday(&beginning_of_epoch, 0);
+ initialize_timer(select_timer_block);
+}
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (c) 2002 Cray Inc.
+ * Copyright (c) 2002 Eric Hoffman
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2.1 of the GNU Lesser General
+ * Public License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <table.h>
+#include <stdlib.h>
+#include <string.h>
+
+
+/* table.c:
+ * a very simple hash table implementation with paramerterizable
+ * comparison and key generation functions. it does resize
+ * in order to accomidate more entries, but never collapses
+ * the table
+ */
+
+static table_entry *table_lookup (table t,void *comparator,
+ unsigned int k,
+ int (*compare_function)(void *, void *),
+ int *success)
+{
+ unsigned int key=k%t->size;
+ table_entry *i;
+
+ for (i=&(t->entries[key]);*i;i=&((*i)->next)){
+ if (compare_function && ((*i)->key==k))
+ if ((*t->compare_function)((*i)->value,comparator)){
+ *success=1;
+ return(i);
+ }
+ }
+ *success=0;
+ return(&(t->entries[key]));
+}
+
+
+static void resize_table(table t, int size)
+{
+ int old_size=t->size;
+ table_entry *old_entries=t->entries;
+ int i;
+ table_entry j,n;
+ table_entry *position;
+ int success;
+
+ t->size=size;
+ t->entries=(table_entry *)malloc(sizeof(table_entry)*t->size);
+ memset(t->entries,0,sizeof(table_entry)*t->size);
+
+ for (i=0;i<old_size;i++)
+ for (j=old_entries[i];j;j=n){
+ n=j->next;
+ position=table_lookup(t,0,j->key,0,&success);
+ j->next= *position;
+ *position=j;
+ }
+ free(old_entries);
+}
+
+
+/* Function: key_from_int
+ * Arguments: int i: value to compute the key of
+ * Returns: the key
+ */
+unsigned int key_from_int(int i)
+{
+ return(i);
+}
+
+
+/* Function: key_from_string
+ * Arguments: char *s: the null terminated string
+ * to compute the key of
+ * Returns: the key
+ */
+unsigned int key_from_string(char *s)
+{
+ unsigned int result=0;
+ unsigned char *n;
+ int i;
+ if (!s) return(1);
+ for (n=s,i=0;*n;n++,i++) result^=(*n*57)^*n*i;
+ return(result);
+}
+
+
+/* Function: hash_create_table
+ * Arguments: compare_function: a function to compare
+ * a table instance with a correlator
+ * key_function: a function to generate a 32 bit
+ * hash key from a correlator
+ * Returns: a pointer to the new table
+ */
+table hash_create_table (int (*compare_function)(void *, void *),
+ unsigned int (*key_function)(unsigned int *))
+{
+ table new=(table)malloc(sizeof(struct table));
+ memset(new, 0, sizeof(struct table));
+
+ new->compare_function=compare_function;
+ new->key_function=key_function;
+ new->number_of_entries=0;
+ new->size=4;
+ new->entries=(table_entry *)malloc(sizeof(table_entry)*new->size);
+ memset(new->entries,0,sizeof(table_entry)*new->size);
+ return(new);
+}
+
+
+/* Function: hash_table_find
+ * Arguments: t: a table to look in
+ * comparator: a value to access the table entry
+ * Returns: the element references to by comparator, or null
+ */
+void *hash_table_find (table t, void *comparator)
+{
+ int success;
+ table_entry* entry=table_lookup(t,comparator,
+ (*t->key_function)(comparator),
+ t->compare_function,
+ &success);
+ if (success) return((*entry)->value);
+ return(0);
+}
+
+
+/* Function: hash_table_insert
+ * Arguments: t: a table to insert the object
+ * value: the object to put in the table
+ * comparator: the value by which the object
+ * will be addressed
+ * Returns: nothing
+ */
+void hash_table_insert (table t, void *value, void *comparator)
+{
+ int success;
+ unsigned int k=(*t->key_function)(comparator);
+ table_entry *position=table_lookup(t,comparator,k,
+ t->compare_function,&success);
+ table_entry entry;
+
+ if (success) {
+ entry = *position;
+ } else {
+ entry = (table_entry)malloc(sizeof(struct table_entry));
+ memset(entry, 0, sizeof(struct table_entry));
+ entry->next= *position;
+ *position=entry;
+ t->number_of_entries++;
+ }
+ entry->value=value;
+ entry->key=k;
+ if (t->number_of_entries > t->size) resize_table(t,t->size*2);
+}
+
+/* Function: hash_table_remove
+ * Arguments: t: the table to remove the object from
+ * comparator: the index value of the object to remove
+ * Returns:
+ */
+void hash_table_remove (table t, void *comparator)
+{
+ int success;
+ table_entry temp;
+ table_entry *position=table_lookup(t,comparator,
+ (*t->key_function)(comparator),
+ t->compare_function,&success);
+ if(success) {
+ temp=*position;
+ *position=(*position)->next;
+ free(temp); /* the value? */
+ t->number_of_entries--;
+ }
+}
+
+/* Function: hash_iterate_table_entries
+ * Arguments: t: the table to iterate over
+ * handler: a function to call with each element
+ * of the table, along with arg
+ * arg: the opaque object to pass to handler
+ * Returns: nothing
+ */
+void hash_iterate_table_entries(table t,
+ void (*handler)(void *,void *),
+ void *arg)
+{
+ int i;
+ table_entry *j,*next;
+
+ for (i=0;i<t->size;i++)
+ for (j=t->entries+i;*j;j=next){
+ next=&((*j)->next);
+ (*handler)(arg,(*j)->value);
+ }
+}
+
+/* Function: hash_filter_table_entries
+ * Arguments: t: the table to iterate over
+ * handler: a function to call with each element
+ * of the table, along with arg
+ * arg: the opaque object to pass to handler
+ * Returns: nothing
+ * Notes: operations on the table inside handler are not safe
+ *
+ * filter_table_entires() calls the handler function for each
+ * item in the table, passing it and arg. The handler function
+ * returns 1 if it is to be retained in the table, and 0
+ * if it is to be removed.
+ */
+void hash_filter_table_entries(table t, int (*handler)(void *, void *), void *arg)
+{
+ int i;
+ table_entry *j,*next,v;
+
+ for (i=0;i<t->size;i++)
+ for (j=t->entries+i;*j;j=next){
+ next=&((*j)->next);
+ if (!(*handler)(arg,(*j)->value)){
+ next=j;
+ v=*j;
+ *j=(*j)->next;
+ free(v);
+ t->number_of_entries--;
+ }
+ }
+}
+
+/* Function: destroy_table
+ * Arguments: t: the table to free
+ * thunk: a function to call with each element,
+ * most likely free()
+ * Returns: nothing
+ */
+void hash_destroy_table(table t,void (*thunk)(void *))
+{
+ table_entry j,next;
+ int i;
+ for (i=0;i<t->size;i++)
+ for (j=t->entries[i];j;j=next){
+ next=j->next;
+ if (thunk) (*thunk)(j->value);
+ free(j);
+ }
+ free(t->entries);
+ free(t);
+}
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (c) 2002 Cray Inc.
+ * Copyright (c) 2002 Eric Hoffman
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ */
+
+#ifndef E_TABLE
+#define E_TABLE
+
+typedef struct table_entry {
+ unsigned int key;
+ void *value;
+ struct table_entry *next;
+} *table_entry;
+
+
+typedef struct table {
+ unsigned int size;
+ int number_of_entries;
+ table_entry *entries;
+ int (*compare_function)(void *, void *);
+ unsigned int (*key_function)(unsigned int *);
+} *table;
+
+/* table.c */
+unsigned int key_from_int(int i);
+unsigned int key_from_string(char *s);
+table hash_create_table(int (*compare_function)(void *, void *), unsigned int (*key_function)(unsigned int *));
+void *hash_table_find(table t, void *comparator);
+void hash_table_insert(table t, void *value, void *comparator);
+void hash_table_remove(table t, void *comparator);
+void hash_iterate_table_entries(table t, void (*handler)(void *, void *), void *arg);
+void hash_filter_table_entries(table t, int (*handler)(void *, void *), void *arg);
+void hash_destroy_table(table t, void (*thunk)(void *));
+
+#endif
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (c) 2002 Cray Inc.
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2.1 of the GNU Lesser General
+ * Public License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/* tcpnal.c:
+ This file implements the TCP-based nal by providing glue
+ between the connection service and the generic NAL implementation */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <unistd.h>
+#include <syscall.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <pqtimer.h>
+#include <dispatch.h>
+#include <bridge.h>
+#include <ipmap.h>
+#include <connection.h>
+
+/* Function: tcpnal_send
+ * Arguments: nal: pointer to my nal control block
+ * private: unused
+ * cookie: passed back to the portals library
+ * hdr: pointer to the portals header
+ * nid: destination node
+ * pid: destination process
+ * data: body of the message
+ * len: length of the body
+ * Returns: zero on success
+ *
+ * sends a packet to the peer, after insuring that a connection exists
+ */
+#warning FIXME: "param 'type' is newly added, make use of it!!"
+int tcpnal_send(nal_cb_t *n,
+ void *private,
+ lib_msg_t *cookie,
+ ptl_hdr_t *hdr,
+ int type,
+ ptl_nid_t nid,
+ ptl_pid_t pid,
+ unsigned int niov,
+ struct iovec *iov,
+ size_t len)
+{
+ connection c;
+ bridge b=(bridge)n->nal_data;
+ struct iovec tiov[2];
+ int count = 1;
+
+ if (!(c=force_tcp_connection((manager)b->lower,
+ PNAL_IP(nid,b),
+ PNAL_PORT(nid,pid))))
+ return(1);
+
+#if 0
+ /* TODO: these results should be checked. furthermore, provision
+ must be made for the SIGPIPE which is delivered when
+ writing on a tcp socket which has closed underneath
+ the application. there is a linux flag in the sendmsg
+ call which turns off the signally behaviour, but its
+ nonstandard */
+ syscall(SYS_write, c->fd,hdr,sizeof(ptl_hdr_t));
+ LASSERT (niov <= 1);
+ if (len) syscall(SYS_write, c->fd,iov[0].iov_base,len);
+#else
+ LASSERT (niov <= 1);
+
+ tiov[0].iov_base = hdr;
+ tiov[0].iov_len = sizeof(ptl_hdr_t);
+
+ if (len) {
+ tiov[1].iov_base = iov[0].iov_base;
+ tiov[1].iov_len = len;
+ count++;
+ }
+
+ syscall(SYS_writev, c->fd, tiov, count);
+#endif
+ lib_finalize(n, private, cookie);
+
+ return(0);
+}
+
+
+/* Function: tcpnal_recv
+ * Arguments: nal_cb_t *nal: pointer to my nal control block
+ * void *private: connection pointer passed through
+ * lib_parse()
+ * lib_msg_t *cookie: passed back to portals library
+ * user_ptr data: pointer to the destination buffer
+ * size_t mlen: length of the body
+ * size_t rlen: length of data in the network
+ * Returns: zero on success
+ *
+ * blocking read of the requested data. must drain out the
+ * difference of mainpulated and requested lengths from the network
+ */
+int tcpnal_recv(nal_cb_t *n,
+ void *private,
+ lib_msg_t *cookie,
+ unsigned int niov,
+ struct iovec *iov,
+ ptl_size_t mlen,
+ ptl_size_t rlen)
+
+{
+ if (mlen) {
+ LASSERT (niov <= 1);
+ read_connection(private,iov[0].iov_base,mlen);
+ lib_finalize(n, private, cookie);
+ }
+
+ if (mlen!=rlen){
+ char *trash=malloc(rlen-mlen);
+
+ /*TODO: check error status*/
+ read_connection(private,trash,rlen-mlen);
+ free(trash);
+ }
+
+ return(rlen);
+}
+
+
+/* Function: from_connection:
+ * Arguments: c: the connection to read from
+ * Returns: whether or not to continue reading from this connection,
+ * expressed as a 1 to continue, and a 0 to not
+ *
+ * from_connection() is called from the select loop when i/o is
+ * available. It attempts to read the portals header and
+ * pass it to the generic library for processing.
+ */
+static int from_connection(void *a,connection c)
+{
+ bridge b=a;
+ ptl_hdr_t hdr;
+ if (read_connection(c, (unsigned char *)&hdr, sizeof(hdr))){
+ lib_parse(b->nal_cb, &hdr, c);
+ return(1);
+ }
+ return(0);
+}
+
+
+static void tcpnal_shutdown(bridge b)
+{
+ shutdown_connections(b->lower);
+}
+
+/* Function: PTL_IFACE_TCP
+ * Arguments: pid_request: desired port number to bind to
+ * desired: passed NAL limits structure
+ * actual: returned NAL limits structure
+ * Returns: a nal structure on success, or null on failure
+ */
+int tcpnal_init(bridge b)
+{
+ manager m;
+
+ b->nal_cb->cb_send=tcpnal_send;
+ b->nal_cb->cb_recv=tcpnal_recv;
+ b->shutdown=tcpnal_shutdown;
+
+ if (!(m=init_connections(PNAL_PORT(b->nal_cb->ni.nid,
+ b->nal_cb->ni.pid),
+ from_connection,b))){
+ /* TODO: this needs to shut down the
+ newly created junk */
+ return(PTL_NAL_FAILED);
+ }
+ /* XXX cfs hack */
+ b->nal_cb->ni.pid=0;
+ b->lower=m;
+ return(PTL_OK);
+}
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (c) 2002 Cray Inc.
+ * Copyright (c) 2002 Eric Hoffman
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ */
+
+/* TODO: make this an explicit type when they become available */
+typedef unsigned long long when;
+
+typedef struct timer {
+ void (*function)(void *);
+ void *arg;
+ when w;
+ int interval;
+ int disable;
+} *timer;
+
+timer register_timer(when, void (*f)(void *), void *a);
+void remove_timer(timer t);
+void timer_loop(void);
+void initialize_timer(void);
+void register_thunk(void (*f)(void *),void *a);
+
+
+#define HZ 0x100000000ull
+
+
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (c) 2002 Cray Inc.
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ */
+
+typedef unsigned short uint16;
+typedef unsigned long uint32;
+typedef unsigned long long uint64;
+typedef unsigned char uint8;
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (c) 2002 Cray Inc.
+ * Copyright (c) 2002 Eric Hoffman
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2.1 of the GNU Lesser General
+ * Public License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <table.h>
+#include <stdlib.h>
+#include <string.h>
+
+
+/* table.c:
+ * a very simple hash table implementation with paramerterizable
+ * comparison and key generation functions. it does resize
+ * in order to accomidate more entries, but never collapses
+ * the table
+ */
+
+static table_entry *table_lookup (table t,void *comparator,
+ unsigned int k,
+ int (*compare_function)(void *, void *),
+ int *success)
+{
+ unsigned int key=k%t->size;
+ table_entry *i;
+
+ for (i=&(t->entries[key]);*i;i=&((*i)->next)){
+ if (compare_function && ((*i)->key==k))
+ if ((*t->compare_function)((*i)->value,comparator)){
+ *success=1;
+ return(i);
+ }
+ }
+ *success=0;
+ return(&(t->entries[key]));
+}
+
+
+static void resize_table(table t, int size)
+{
+ int old_size=t->size;
+ table_entry *old_entries=t->entries;
+ int i;
+ table_entry j,n;
+ table_entry *position;
+ int success;
+
+ t->size=size;
+ t->entries=(table_entry *)malloc(sizeof(table_entry)*t->size);
+ memset(t->entries,0,sizeof(table_entry)*t->size);
+
+ for (i=0;i<old_size;i++)
+ for (j=old_entries[i];j;j=n){
+ n=j->next;
+ position=table_lookup(t,0,j->key,0,&success);
+ j->next= *position;
+ *position=j;
+ }
+ free(old_entries);
+}
+
+
+/* Function: key_from_int
+ * Arguments: int i: value to compute the key of
+ * Returns: the key
+ */
+unsigned int key_from_int(int i)
+{
+ return(i);
+}
+
+
+/* Function: key_from_string
+ * Arguments: char *s: the null terminated string
+ * to compute the key of
+ * Returns: the key
+ */
+unsigned int key_from_string(char *s)
+{
+ unsigned int result=0;
+ unsigned char *n;
+ int i;
+ if (!s) return(1);
+ for (n=s,i=0;*n;n++,i++) result^=(*n*57)^*n*i;
+ return(result);
+}
+
+
+/* Function: hash_create_table
+ * Arguments: compare_function: a function to compare
+ * a table instance with a correlator
+ * key_function: a function to generate a 32 bit
+ * hash key from a correlator
+ * Returns: a pointer to the new table
+ */
+table hash_create_table (int (*compare_function)(void *, void *),
+ unsigned int (*key_function)(unsigned int *))
+{
+ table new=(table)malloc(sizeof(struct table));
+ memset(new, 0, sizeof(struct table));
+
+ new->compare_function=compare_function;
+ new->key_function=key_function;
+ new->number_of_entries=0;
+ new->size=4;
+ new->entries=(table_entry *)malloc(sizeof(table_entry)*new->size);
+ memset(new->entries,0,sizeof(table_entry)*new->size);
+ return(new);
+}
+
+
+/* Function: hash_table_find
+ * Arguments: t: a table to look in
+ * comparator: a value to access the table entry
+ * Returns: the element references to by comparator, or null
+ */
+void *hash_table_find (table t, void *comparator)
+{
+ int success;
+ table_entry* entry=table_lookup(t,comparator,
+ (*t->key_function)(comparator),
+ t->compare_function,
+ &success);
+ if (success) return((*entry)->value);
+ return(0);
+}
+
+
+/* Function: hash_table_insert
+ * Arguments: t: a table to insert the object
+ * value: the object to put in the table
+ * comparator: the value by which the object
+ * will be addressed
+ * Returns: nothing
+ */
+void hash_table_insert (table t, void *value, void *comparator)
+{
+ int success;
+ unsigned int k=(*t->key_function)(comparator);
+ table_entry *position=table_lookup(t,comparator,k,
+ t->compare_function,&success);
+ table_entry entry;
+
+ if (success) {
+ entry = *position;
+ } else {
+ entry = (table_entry)malloc(sizeof(struct table_entry));
+ memset(entry, 0, sizeof(struct table_entry));
+ entry->next= *position;
+ *position=entry;
+ t->number_of_entries++;
+ }
+ entry->value=value;
+ entry->key=k;
+ if (t->number_of_entries > t->size) resize_table(t,t->size*2);
+}
+
+/* Function: hash_table_remove
+ * Arguments: t: the table to remove the object from
+ * comparator: the index value of the object to remove
+ * Returns:
+ */
+void hash_table_remove (table t, void *comparator)
+{
+ int success;
+ table_entry temp;
+ table_entry *position=table_lookup(t,comparator,
+ (*t->key_function)(comparator),
+ t->compare_function,&success);
+ if(success) {
+ temp=*position;
+ *position=(*position)->next;
+ free(temp); /* the value? */
+ t->number_of_entries--;
+ }
+}
+
+/* Function: hash_iterate_table_entries
+ * Arguments: t: the table to iterate over
+ * handler: a function to call with each element
+ * of the table, along with arg
+ * arg: the opaque object to pass to handler
+ * Returns: nothing
+ */
+void hash_iterate_table_entries(table t,
+ void (*handler)(void *,void *),
+ void *arg)
+{
+ int i;
+ table_entry *j,*next;
+
+ for (i=0;i<t->size;i++)
+ for (j=t->entries+i;*j;j=next){
+ next=&((*j)->next);
+ (*handler)(arg,(*j)->value);
+ }
+}
+
+/* Function: hash_filter_table_entries
+ * Arguments: t: the table to iterate over
+ * handler: a function to call with each element
+ * of the table, along with arg
+ * arg: the opaque object to pass to handler
+ * Returns: nothing
+ * Notes: operations on the table inside handler are not safe
+ *
+ * filter_table_entires() calls the handler function for each
+ * item in the table, passing it and arg. The handler function
+ * returns 1 if it is to be retained in the table, and 0
+ * if it is to be removed.
+ */
+void hash_filter_table_entries(table t, int (*handler)(void *, void *), void *arg)
+{
+ int i;
+ table_entry *j,*next,v;
+
+ for (i=0;i<t->size;i++)
+ for (j=t->entries+i;*j;j=next){
+ next=&((*j)->next);
+ if (!(*handler)(arg,(*j)->value)){
+ next=j;
+ v=*j;
+ *j=(*j)->next;
+ free(v);
+ t->number_of_entries--;
+ }
+ }
+}
+
+/* Function: destroy_table
+ * Arguments: t: the table to free
+ * thunk: a function to call with each element,
+ * most likely free()
+ * Returns: nothing
+ */
+void hash_destroy_table(table t,void (*thunk)(void *))
+{
+ table_entry j,next;
+ int i;
+ for (i=0;i<t->size;i++)
+ for (j=t->entries[i];j;j=next){
+ next=j->next;
+ if (thunk) (*thunk)(j->value);
+ free(j);
+ }
+ free(t->entries);
+ free(t);
+}
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (c) 2002 Cray Inc.
+ * Copyright (c) 2002 Eric Hoffman
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ */
+
+#ifndef E_TABLE
+#define E_TABLE
+
+typedef struct table_entry {
+ unsigned int key;
+ void *value;
+ struct table_entry *next;
+} *table_entry;
+
+
+typedef struct table {
+ unsigned int size;
+ int number_of_entries;
+ table_entry *entries;
+ int (*compare_function)(void *, void *);
+ unsigned int (*key_function)(unsigned int *);
+} *table;
+
+/* table.c */
+unsigned int key_from_int(int i);
+unsigned int key_from_string(char *s);
+table hash_create_table(int (*compare_function)(void *, void *), unsigned int (*key_function)(unsigned int *));
+void *hash_table_find(table t, void *comparator);
+void hash_table_insert(table t, void *value, void *comparator);
+void hash_table_remove(table t, void *comparator);
+void hash_iterate_table_entries(table t, void (*handler)(void *, void *), void *arg);
+void hash_filter_table_entries(table t, int (*handler)(void *, void *), void *arg);
+void hash_destroy_table(table t, void (*thunk)(void *));
+
+#endif
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (c) 2002 Cray Inc.
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2.1 of the GNU Lesser General
+ * Public License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/* tcpnal.c:
+ This file implements the TCP-based nal by providing glue
+ between the connection service and the generic NAL implementation */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <unistd.h>
+#include <syscall.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <pqtimer.h>
+#include <dispatch.h>
+#include <bridge.h>
+#include <ipmap.h>
+#include <connection.h>
+
+/* Function: tcpnal_send
+ * Arguments: nal: pointer to my nal control block
+ * private: unused
+ * cookie: passed back to the portals library
+ * hdr: pointer to the portals header
+ * nid: destination node
+ * pid: destination process
+ * data: body of the message
+ * len: length of the body
+ * Returns: zero on success
+ *
+ * sends a packet to the peer, after insuring that a connection exists
+ */
+#warning FIXME: "param 'type' is newly added, make use of it!!"
+int tcpnal_send(nal_cb_t *n,
+ void *private,
+ lib_msg_t *cookie,
+ ptl_hdr_t *hdr,
+ int type,
+ ptl_nid_t nid,
+ ptl_pid_t pid,
+ unsigned int niov,
+ struct iovec *iov,
+ size_t len)
+{
+ connection c;
+ bridge b=(bridge)n->nal_data;
+ struct iovec tiov[2];
+ int count = 1;
+
+ if (!(c=force_tcp_connection((manager)b->lower,
+ PNAL_IP(nid,b),
+ PNAL_PORT(nid,pid))))
+ return(1);
+
+#if 0
+ /* TODO: these results should be checked. furthermore, provision
+ must be made for the SIGPIPE which is delivered when
+ writing on a tcp socket which has closed underneath
+ the application. there is a linux flag in the sendmsg
+ call which turns off the signally behaviour, but its
+ nonstandard */
+ syscall(SYS_write, c->fd,hdr,sizeof(ptl_hdr_t));
+ LASSERT (niov <= 1);
+ if (len) syscall(SYS_write, c->fd,iov[0].iov_base,len);
+#else
+ LASSERT (niov <= 1);
+
+ tiov[0].iov_base = hdr;
+ tiov[0].iov_len = sizeof(ptl_hdr_t);
+
+ if (len) {
+ tiov[1].iov_base = iov[0].iov_base;
+ tiov[1].iov_len = len;
+ count++;
+ }
+
+ syscall(SYS_writev, c->fd, tiov, count);
+#endif
+ lib_finalize(n, private, cookie);
+
+ return(0);
+}
+
+
+/* Function: tcpnal_recv
+ * Arguments: nal_cb_t *nal: pointer to my nal control block
+ * void *private: connection pointer passed through
+ * lib_parse()
+ * lib_msg_t *cookie: passed back to portals library
+ * user_ptr data: pointer to the destination buffer
+ * size_t mlen: length of the body
+ * size_t rlen: length of data in the network
+ * Returns: zero on success
+ *
+ * blocking read of the requested data. must drain out the
+ * difference of mainpulated and requested lengths from the network
+ */
+int tcpnal_recv(nal_cb_t *n,
+ void *private,
+ lib_msg_t *cookie,
+ unsigned int niov,
+ struct iovec *iov,
+ ptl_size_t mlen,
+ ptl_size_t rlen)
+
+{
+ if (mlen) {
+ LASSERT (niov <= 1);
+ read_connection(private,iov[0].iov_base,mlen);
+ lib_finalize(n, private, cookie);
+ }
+
+ if (mlen!=rlen){
+ char *trash=malloc(rlen-mlen);
+
+ /*TODO: check error status*/
+ read_connection(private,trash,rlen-mlen);
+ free(trash);
+ }
+
+ return(rlen);
+}
+
+
+/* Function: from_connection:
+ * Arguments: c: the connection to read from
+ * Returns: whether or not to continue reading from this connection,
+ * expressed as a 1 to continue, and a 0 to not
+ *
+ * from_connection() is called from the select loop when i/o is
+ * available. It attempts to read the portals header and
+ * pass it to the generic library for processing.
+ */
+static int from_connection(void *a,connection c)
+{
+ bridge b=a;
+ ptl_hdr_t hdr;
+ if (read_connection(c, (unsigned char *)&hdr, sizeof(hdr))){
+ lib_parse(b->nal_cb, &hdr, c);
+ return(1);
+ }
+ return(0);
+}
+
+
+static void tcpnal_shutdown(bridge b)
+{
+ shutdown_connections(b->lower);
+}
+
+/* Function: PTL_IFACE_TCP
+ * Arguments: pid_request: desired port number to bind to
+ * desired: passed NAL limits structure
+ * actual: returned NAL limits structure
+ * Returns: a nal structure on success, or null on failure
+ */
+int tcpnal_init(bridge b)
+{
+ manager m;
+
+ b->nal_cb->cb_send=tcpnal_send;
+ b->nal_cb->cb_recv=tcpnal_recv;
+ b->shutdown=tcpnal_shutdown;
+
+ if (!(m=init_connections(PNAL_PORT(b->nal_cb->ni.nid,
+ b->nal_cb->ni.pid),
+ from_connection,b))){
+ /* TODO: this needs to shut down the
+ newly created junk */
+ return(PTL_NAL_FAILED);
+ }
+ /* XXX cfs hack */
+ b->nal_cb->ni.pid=0;
+ b->lower=m;
+ return(PTL_OK);
+}
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (c) 2002 Cray Inc.
+ * Copyright (c) 2002 Eric Hoffman
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ */
+
+/* TODO: make this an explicit type when they become available */
+typedef unsigned long long when;
+
+typedef struct timer {
+ void (*function)(void *);
+ void *arg;
+ when w;
+ int interval;
+ int disable;
+} *timer;
+
+timer register_timer(when, void (*f)(void *), void *a);
+void remove_timer(timer t);
+void timer_loop(void);
+void initialize_timer(void);
+void register_thunk(void (*f)(void *),void *a);
+
+
+#define HZ 0x100000000ull
+
+
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (c) 2002 Cray Inc.
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ */
+
+typedef unsigned short uint16;
+typedef unsigned long uint32;
+typedef unsigned long long uint64;
+typedef unsigned char uint8;
--- /dev/null
+Makefile
+Makefile.in
+acceptor
+debugctl
+ptlctl
+.deps
+routerstat
--- /dev/null
+# Copyright (C) 2001 Cluster File Systems, Inc.
+#
+# This code is issued under the GNU General Public License.
+# See the file COPYING in this distribution
+
+
+COMPILE = gcc -Wall -g -I$(srcdir)/../include
+LINK = gcc -o $@
+
+sbin_PROGRAMS = acceptor ptlctl debugctl routerstat
+lib_LIBRARIES = libptlctl.a
+
+acceptor_SOURCES = acceptor.c # -lefence
+
+libptlctl_a_SOURCES = portals.c debug.c l_ioctl.c parser.c parser.h
+
+ptlctl_SOURCES = ptlctl.c
+ptlctl_LDADD = -L. -lptlctl -lncurses # -lefence
+ptlctl_DEPENDENCIES = libptlctl.a
+
+debugctl_SOURCES = debugctl.c
+debugctl_LDADD = -L. -lptlctl -lncurses # -lefence
+debugctl_DEPENDENCIES = libptlctl.a
+
+routerstat_SOURCES = routerstat.c
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/tcp.h>
+#include <netdb.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <unistd.h>
+#include <asm/byteorder.h>
+#include <syslog.h>
+
+#include <errno.h>
+
+#include <portals/api-support.h>
+#include <portals/list.h>
+#include <portals/lib-types.h>
+
+/* should get this from autoconf somehow */
+#ifndef PIDFILE_DIR
+#define PIDFILE_DIR "/var/run"
+#endif
+
+#define PROGNAME "acceptor"
+
+void create_pidfile(char *name, int port)
+{
+ char pidfile[1024];
+ FILE *fp;
+
+ snprintf(pidfile, sizeof(pidfile), "%s/%s-%d.pid",
+ PIDFILE_DIR, name, port);
+
+ if ((fp = fopen(pidfile, "w"))) {
+ fprintf(fp, "%d\n", getpid());
+ fclose(fp);
+ } else {
+ syslog(LOG_ERR, "%s: %s\n", pidfile,
+ strerror(errno));
+ }
+}
+
+int pidfile_exists(char *name, int port)
+{
+ char pidfile[1024];
+
+ snprintf(pidfile, sizeof(pidfile), "%s/%s-%d.pid",
+ PIDFILE_DIR, name, port);
+
+ if (!access(pidfile, F_OK)) {
+ fprintf(stderr, "%s: exists, acceptor already running.\n",
+ pidfile);
+ return (1);
+ }
+ return (0);
+}
+
+int
+parse_size (int *sizep, char *str)
+{
+ int size;
+ char mod[32];
+
+ switch (sscanf (str, "%d%1[gGmMkK]", &size, mod))
+ {
+ default:
+ return (-1);
+
+ case 1:
+ *sizep = size;
+ return (0);
+
+ case 2:
+ switch (*mod)
+ {
+ case 'g':
+ case 'G':
+ *sizep = size << 30;
+ return (0);
+
+ case 'm':
+ case 'M':
+ *sizep = size << 20;
+ return (0);
+
+ case 'k':
+ case 'K':
+ *sizep = size << 10;
+ return (0);
+
+ default:
+ *sizep = size;
+ return (0);
+ }
+ }
+}
+
+void
+show_connection (int fd, __u32 net_ip, ptl_nid_t nid)
+{
+ struct hostent *h = gethostbyaddr ((char *)&net_ip, sizeof net_ip, AF_INET);
+ __u32 host_ip = ntohl (net_ip);
+ int rxmem = 0;
+ int txmem = 0;
+ int nonagle = 0;
+ int len;
+ char host[1024];
+
+ len = sizeof (txmem);
+ if (getsockopt (fd, SOL_SOCKET, SO_SNDBUF, &txmem, &len) != 0)
+ perror ("Cannot get write buffer size");
+
+ len = sizeof (rxmem);
+ if (getsockopt (fd, SOL_SOCKET, SO_RCVBUF, &rxmem, &len) != 0)
+ perror ("Cannot get read buffer size");
+
+ len = sizeof (nonagle);
+ if (getsockopt (fd, IPPROTO_TCP, TCP_NODELAY, &nonagle, &len) != 0)
+ perror ("Cannot get nagle");
+
+ if (h == NULL)
+ snprintf (host, sizeof(host), "%d.%d.%d.%d", (host_ip >> 24) & 0xff,
+ (host_ip >> 16) & 0xff, (host_ip >> 8) & 0xff, host_ip & 0xff);
+ else
+ snprintf (host, sizeof(host), "%s", h->h_name);
+
+ syslog (LOG_INFO, "Accepted host: %s NID: "LPX64" snd: %d rcv %d nagle: %s\n",
+ host, nid, txmem, rxmem, nonagle ? "disabled" : "enabled");
+}
+
+int
+sock_write (int cfd, void *buffer, int nob)
+{
+ while (nob > 0)
+ {
+ int rc = write (cfd, buffer, nob);
+
+ if (rc < 0)
+ {
+ if (errno == EINTR)
+ continue;
+
+ return (rc);
+ }
+
+ if (rc == 0)
+ {
+ fprintf (stderr, "Unexpected zero sock_write\n");
+ abort();
+ }
+
+ nob -= rc;
+ buffer = (char *)buffer + nob;
+ }
+
+ return (0);
+}
+
+int
+sock_read (int cfd, void *buffer, int nob)
+{
+ while (nob > 0)
+ {
+ int rc = read (cfd, buffer, nob);
+
+ if (rc < 0)
+ {
+ if (errno == EINTR)
+ continue;
+
+ return (rc);
+ }
+
+ if (rc == 0) /* EOF */
+ {
+ errno = ECONNABORTED;
+ return (-1);
+ }
+
+ nob -= rc;
+ buffer = (char *)buffer + nob;
+ }
+
+ return (0);
+}
+
+int
+exchange_nids (int cfd, ptl_nid_t my_nid, ptl_nid_t *peer_nid)
+{
+ int rc;
+ ptl_hdr_t hdr;
+ ptl_magicversion_t *hmv = (ptl_magicversion_t *)&hdr.dest_nid;
+
+ LASSERT (sizeof (*hmv) == sizeof (hdr.dest_nid));
+
+ memset (&hdr, 0, sizeof (hdr));
+
+ hmv->magic = __cpu_to_le32 (PORTALS_PROTO_MAGIC);
+ hmv->version_major = __cpu_to_le16 (PORTALS_PROTO_VERSION_MAJOR);
+ hmv->version_minor = __cpu_to_le16 (PORTALS_PROTO_VERSION_MINOR);
+
+ hdr.src_nid = __cpu_to_le64 (my_nid);
+ hdr.type = __cpu_to_le32 (PTL_MSG_HELLO);
+
+ /* Assume there's sufficient socket buffering for a portals HELLO header */
+ rc = sock_write (cfd, &hdr, sizeof (hdr));
+ if (rc != 0) {
+ perror ("Can't send initial HELLO");
+ return (-1);
+ }
+
+ /* First few bytes down the wire are the portals protocol magic and
+ * version, no matter what protocol version we're running. */
+
+ rc = sock_read (cfd, hmv, sizeof (*hmv));
+ if (rc != 0) {
+ perror ("Can't read from peer");
+ return (-1);
+ }
+
+ if (__cpu_to_le32 (hmv->magic) != PORTALS_PROTO_MAGIC) {
+ fprintf (stderr, "Bad magic %#08x (%#08x expected)\n",
+ __cpu_to_le32 (hmv->magic), PORTALS_PROTO_MAGIC);
+ return (-1);
+ }
+
+ if (__cpu_to_le16 (hmv->version_major) != PORTALS_PROTO_VERSION_MAJOR ||
+ __cpu_to_le16 (hmv->version_minor) != PORTALS_PROTO_VERSION_MINOR) {
+ fprintf (stderr, "Incompatible protocol version %d.%d (%d.%d expected)\n",
+ __cpu_to_le16 (hmv->version_major),
+ __cpu_to_le16 (hmv->version_minor),
+ PORTALS_PROTO_VERSION_MAJOR,
+ PORTALS_PROTO_VERSION_MINOR);
+ }
+
+ /* version 0 sends magic/version as the dest_nid of a 'hello' header,
+ * so read the rest of it in now... */
+ LASSERT (PORTALS_PROTO_VERSION_MAJOR == 0);
+ rc = sock_read (cfd, hmv + 1, sizeof (hdr) - sizeof (*hmv));
+ if (rc != 0) {
+ perror ("Can't read rest of HELLO hdr");
+ return (-1);
+ }
+
+ /* ...and check we got what we expected */
+ if (__cpu_to_le32 (hdr.type) != PTL_MSG_HELLO ||
+ __cpu_to_le32 (PTL_HDR_LENGTH (&hdr)) != 0) {
+ fprintf (stderr, "Expecting a HELLO hdr with 0 payload,"
+ " but got type %d with %d payload\n",
+ __cpu_to_le32 (hdr.type),
+ __cpu_to_le32 (PTL_HDR_LENGTH (&hdr)));
+ return (-1);
+ }
+
+ *peer_nid = __le64_to_cpu (hdr.src_nid);
+ return (0);
+}
+
+void
+usage (char *myname)
+{
+ fprintf (stderr, "Usage: %s [-r recv_mem] [-s send_mem] [-n] [-N nal_id] port\n", myname);
+ exit (1);
+}
+
+int main(int argc, char **argv)
+{
+ int o, fd, rc, port, pfd;
+ struct sockaddr_in srvaddr;
+ int c;
+ int rxmem = 0;
+ int txmem = 0;
+ int noclose = 0;
+ int nonagle = 1;
+ int nal = SOCKNAL;
+ int xchg_nids = 0;
+ int bind_irq = 0;
+
+ while ((c = getopt (argc, argv, "N:r:s:nlxi")) != -1)
+ switch (c)
+ {
+ case 'r':
+ if (parse_size (&rxmem, optarg) != 0 || rxmem < 0)
+ usage (argv[0]);
+ break;
+
+ case 's':
+ if (parse_size (&txmem, optarg) != 0 || txmem < 0)
+ usage (argv[0]);
+ break;
+
+ case 'n':
+ nonagle = 0;
+ break;
+
+ case 'l':
+ noclose = 1;
+ break;
+
+ case 'x':
+ xchg_nids = 1;
+ break;
+
+ case 'i':
+ bind_irq = 1;
+ break;
+
+ case 'N':
+ if (parse_size(&nal, optarg) != 0 ||
+ nal < 0 || nal > NAL_MAX_NR)
+ usage(argv[0]);
+ break;
+
+ default:
+ usage (argv[0]);
+ break;
+ }
+
+ if (optind >= argc)
+ usage (argv[0]);
+
+ port = atol(argv[optind++]);
+
+ if (pidfile_exists(PROGNAME, port))
+ exit(1);
+
+ memset(&srvaddr, 0, sizeof(srvaddr));
+ srvaddr.sin_family = AF_INET;
+ srvaddr.sin_port = htons(port);
+ srvaddr.sin_addr.s_addr = INADDR_ANY;
+
+ fd = socket(PF_INET, SOCK_STREAM, 0);
+ if (fd < 0) {
+ perror("opening socket");
+ exit(1);
+ }
+
+ o = 1;
+ if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &o, sizeof(o))) {
+ perror("Cannot set REUSEADDR socket opt");
+ exit(1);
+ }
+
+ if (nonagle)
+ {
+ o = 1;
+ rc = setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &o, sizeof (o));
+ if (rc != 0)
+ {
+ perror ("Cannot disable nagle");
+ exit (1);
+ }
+ }
+
+ if (txmem != 0)
+ {
+ rc = setsockopt (fd, SOL_SOCKET, SO_SNDBUF, &txmem, sizeof (txmem));
+ if (rc != 0)
+ {
+ perror ("Cannot set write buffer size");
+ exit (1);
+ }
+ }
+
+ if (rxmem != 0)
+ {
+ rc = setsockopt (fd, SOL_SOCKET, SO_RCVBUF, &rxmem, sizeof (rxmem));
+ if (rc != 0)
+ {
+ perror ("Cannot set read buffer size");
+ exit (1);
+ }
+ }
+
+ rc = bind(fd, (struct sockaddr *)&srvaddr, sizeof(srvaddr));
+ if ( rc == -1 ) {
+ perror("bind: ");
+ exit(1);
+ }
+
+ if (listen(fd, 127)) {
+ perror("listen: ");
+ exit(1);
+ }
+ fprintf(stderr, "listening on port %d\n", port);
+
+ pfd = open("/dev/portals", O_RDWR);
+ if ( pfd < 0 ) {
+ perror("opening portals device");
+ exit(1);
+ }
+
+ rc = daemon(1, noclose);
+ if (rc < 0) {
+ perror("daemon(): ");
+ exit(1);
+ }
+
+ openlog(PROGNAME, LOG_PID, LOG_DAEMON);
+ syslog(LOG_INFO, "started, listening on port %d\n", port);
+ create_pidfile(PROGNAME, port);
+
+ while (1) {
+ struct sockaddr_in clntaddr;
+ int len = sizeof(clntaddr);
+ int cfd;
+ struct portal_ioctl_data data;
+ ptl_nid_t peer_nid;
+
+ cfd = accept(fd, (struct sockaddr *)&clntaddr, &len);
+ if ( cfd < 0 ) {
+ perror("accept");
+ exit(0);
+ continue;
+ }
+
+ if (!xchg_nids)
+ peer_nid = ntohl (clntaddr.sin_addr.s_addr); /* HOST byte order */
+ else
+ {
+ PORTAL_IOC_INIT (data);
+ data.ioc_nal = nal;
+ rc = ioctl (pfd, IOC_PORTAL_GET_NID, &data);
+ if (rc < 0)
+ {
+ perror ("Can't get my NID");
+ close (cfd);
+ continue;
+ }
+
+ rc = exchange_nids (cfd, data.ioc_nid, &peer_nid);
+ if (rc != 0)
+ {
+ close (cfd);
+ continue;
+ }
+ }
+
+ show_connection (cfd, clntaddr.sin_addr.s_addr, peer_nid);
+
+ PORTAL_IOC_INIT(data);
+ data.ioc_fd = cfd;
+ data.ioc_nal = nal;
+ data.ioc_nal_cmd = NAL_CMD_REGISTER_PEER_FD;
+ data.ioc_nid = peer_nid;
+ data.ioc_flags = bind_irq;
+
+ if (ioctl(pfd, IOC_PORTAL_NAL_CMD, &data) < 0) {
+ perror("ioctl failed");
+
+ } else {
+ printf("client registered\n");
+ }
+ rc = close(cfd);
+ if (rc)
+ perror ("close failed");
+ }
+
+ closelog();
+ exit(0);
+
+}
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ *
+ * This file is part of Portals, http://www.sf.net/projects/lustre/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Some day I'll split all of this functionality into a cfs_debug module
+ * of its own. That day is not today.
+ *
+ */
+
+#include <stdio.h>
+#include <netdb.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <unistd.h>
+#include <time.h>
+#include <syscall.h>
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#define BUG() /* workaround for module.h includes */
+#include <linux/version.h>
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+#include <linux/module.h>
+#endif
+
+#include <portals/api-support.h>
+#include <portals/ptlctl.h>
+#include "parser.h"
+
+static char rawbuf[8192];
+static char *buf = rawbuf;
+static int max = 8192;
+//static int g_pfd = -1;
+static int subsystem_array[1 << 8];
+static int debug_mask = ~0;
+
+static const char *portal_debug_subsystems[] =
+ {"undefined", "mdc", "mds", "osc", "ost", "class", "obdfs", "llite",
+ "rpc", "ext2obd", "portals", "socknal", "qswnal", "pinger", "filter",
+ "obdtrace", "echo", "ldlm", "lov", "gmnal", "router", "ptldb", NULL};
+static const char *portal_debug_masks[] =
+ {"trace", "inode", "super", "ext2", "malloc", "cache", "info", "ioctl",
+ "blocks", "net", "warning", "buffs", "other", "dentry", "portals",
+ "page", "dlmtrace", "error", "emerg", "ha", "rpctrace", "vfstrace", NULL};
+
+struct debug_daemon_cmd {
+ char *cmd;
+ unsigned int cmdv;
+};
+
+static const struct debug_daemon_cmd portal_debug_daemon_cmd[] = {
+ {"start", DEBUG_DAEMON_START},
+ {"stop", DEBUG_DAEMON_STOP},
+ {"pause", DEBUG_DAEMON_PAUSE},
+ {"continue", DEBUG_DAEMON_CONTINUE},
+ {0, 0}
+};
+
+static int do_debug_mask(char *name, int enable)
+{
+ int found = 0, i;
+
+ for (i = 0; portal_debug_subsystems[i] != NULL; i++) {
+ if (strcasecmp(name, portal_debug_subsystems[i]) == 0 ||
+ strcasecmp(name, "all_subs") == 0) {
+ printf("%s output from subsystem \"%s\"\n",
+ enable ? "Enabling" : "Disabling",
+ portal_debug_subsystems[i]);
+ subsystem_array[i] = enable;
+ found = 1;
+ }
+ }
+ for (i = 0; portal_debug_masks[i] != NULL; i++) {
+ if (strcasecmp(name, portal_debug_masks[i]) == 0 ||
+ strcasecmp(name, "all_types") == 0) {
+ printf("%s output of type \"%s\"\n",
+ enable ? "Enabling" : "Disabling",
+ portal_debug_masks[i]);
+ if (enable)
+ debug_mask |= (1 << i);
+ else
+ debug_mask &= ~(1 << i);
+ found = 1;
+ }
+ }
+
+ return found;
+}
+
+int dbg_initialize(int argc, char **argv)
+{
+ memset(subsystem_array, 1, sizeof(subsystem_array));
+ return 0;
+}
+
+int jt_dbg_filter(int argc, char **argv)
+{
+ int i;
+
+ if (argc < 2) {
+ fprintf(stderr, "usage: %s <subsystem ID or debug mask>\n",
+ argv[0]);
+ return 0;
+ }
+
+ for (i = 1; i < argc; i++)
+ if (!do_debug_mask(argv[i], 0))
+ fprintf(stderr, "Unknown subsystem or debug type: %s\n",
+ argv[i]);
+ return 0;
+}
+
+int jt_dbg_show(int argc, char **argv)
+{
+ int i;
+
+ if (argc < 2) {
+ fprintf(stderr, "usage: %s <subsystem ID or debug mask>\n",
+ argv[0]);
+ return 0;
+ }
+
+ for (i = 1; i < argc; i++)
+ if (!do_debug_mask(argv[i], 1))
+ fprintf(stderr, "Unknown subsystem or debug type: %s\n",
+ argv[i]);
+
+ return 0;
+}
+
+static int applymask(char* procpath, int value)
+{
+ int rc;
+ char buf[64];
+ int len = snprintf(buf, 64, "%d", value);
+
+ int fd = open(procpath, O_WRONLY);
+ if (fd == -1) {
+ fprintf(stderr, "Unable to open %s: %s\n",
+ procpath, strerror(errno));
+ return fd;
+ }
+ rc = write(fd, buf, len+1);
+ if (rc<0) {
+ fprintf(stderr, "Write to %s failed: %s\n",
+ procpath, strerror(errno));
+ return rc;
+ }
+ close(fd);
+ return 0;
+}
+
+extern char *dump_filename;
+extern int dump(int dev_id, int opc, void *buf);
+
+static void applymask_all(unsigned int subs_mask, unsigned int debug_mask)
+{
+ if (!dump_filename) {
+ applymask("/proc/sys/portals/subsystem_debug", subs_mask);
+ applymask("/proc/sys/portals/debug", debug_mask);
+ } else {
+ struct portals_debug_ioctl_data data;
+
+ data.hdr.ioc_len = sizeof(data);
+ data.hdr.ioc_version = 0;
+ data.subs = subs_mask;
+ data.debug = debug_mask;
+
+ dump(OBD_DEV_ID, PTL_IOC_DEBUG_MASK, &data);
+ }
+ printf("Applied subsystem_debug=%d, debug=%d to /proc/sys/portals\n",
+ subs_mask, debug_mask);
+}
+
+int jt_dbg_list(int argc, char **argv)
+{
+ int i;
+
+ if (argc != 2) {
+ fprintf(stderr, "usage: %s <subs || types>\n", argv[0]);
+ return 0;
+ }
+
+ if (strcasecmp(argv[1], "subs") == 0) {
+ printf("Subsystems: all_subs");
+ for (i = 0; portal_debug_subsystems[i] != NULL; i++)
+ printf(", %s", portal_debug_subsystems[i]);
+ printf("\n");
+ } else if (strcasecmp(argv[1], "types") == 0) {
+ printf("Types: all_types");
+ for (i = 0; portal_debug_masks[i] != NULL; i++)
+ printf(", %s", portal_debug_masks[i]);
+ printf("\n");
+ }
+ else if (strcasecmp(argv[1], "applymasks") == 0) {
+ unsigned int subsystem_mask = 0;
+ for (i = 0; portal_debug_subsystems[i] != NULL; i++) {
+ if (subsystem_array[i]) subsystem_mask |= (1 << i);
+ }
+ applymask_all(subsystem_mask, debug_mask);
+ }
+ return 0;
+}
+
+/* if 'raw' is true, don't strip the debug information from the front of the
+ * lines */
+static void dump_buffer(FILE *fd, char *buf, int size, int raw)
+{
+ char *p, *z;
+ unsigned long subsystem, debug, dropped = 0, kept = 0;
+ int max_sub, max_type;
+
+ for (max_sub = 0; portal_debug_subsystems[max_sub] != NULL; max_sub++)
+ ;
+ for (max_type = 0; portal_debug_masks[max_type] != NULL; max_type++)
+ ;
+
+ while (size) {
+ p = memchr(buf, '\n', size);
+ if (!p)
+ break;
+ subsystem = strtoul(buf, &z, 16);
+ debug = strtoul(z + 1, &z, 16);
+
+ z++;
+ /* for some reason %*s isn't working. */
+ *p = '\0';
+ if (subsystem < max_sub &&
+ subsystem_array[subsystem] &&
+ (!debug || (debug_mask & debug))) {
+ if (raw)
+ fprintf(fd, "%s\n", buf);
+ else
+ fprintf(fd, "%s\n", z);
+ //printf("%s\n", buf);
+ kept++;
+ } else {
+ //fprintf(stderr, "dropping line (%lx:%lx): %s\n", subsystem, debug, buf);
+ dropped++;
+ }
+ *p = '\n';
+ p++;
+ size -= (p - buf);
+ buf = p;
+ }
+
+ printf("Debug log: %lu lines, %lu kept, %lu dropped.\n",
+ dropped + kept, kept, dropped);
+}
+
+int jt_dbg_debug_kernel(int argc, char **argv)
+{
+ int rc, raw = 1;
+ FILE *fd = stdout;
+ const int databuf_size = (6 << 20);
+ struct portal_ioctl_data data, *newdata;
+ char *databuf = NULL;
+
+ if (argc > 3) {
+ fprintf(stderr, "usage: %s [file] [raw]\n", argv[0]);
+ return 0;
+ }
+
+ if (argc > 1) {
+ fd = fopen(argv[1], "w");
+ if (fd == NULL) {
+ fprintf(stderr, "fopen(%s) failed: %s\n", argv[1],
+ strerror(errno));
+ return -1;
+ }
+ }
+ if (argc > 2)
+ raw = atoi(argv[2]);
+
+ databuf = malloc(databuf_size);
+ if (!databuf) {
+ fprintf(stderr, "No memory for buffer.\n");
+ goto out;
+ }
+
+ memset(&data, 0, sizeof(data));
+ data.ioc_plen1 = databuf_size;
+ data.ioc_pbuf1 = databuf;
+
+ if (portal_ioctl_pack(&data, &buf, max) != 0) {
+ fprintf(stderr, "portal_ioctl_pack failed.\n");
+ goto out;
+ }
+
+ rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_GET_DEBUG, buf);
+ if (rc) {
+ fprintf(stderr, "IOC_PORTAL_GET_DEBUG failed: %s\n",
+ strerror(errno));
+ goto out;
+ }
+
+ newdata = (struct portal_ioctl_data *)buf;
+ if (newdata->ioc_size > 0)
+ dump_buffer(fd, databuf, newdata->ioc_size, raw);
+ else
+ fprintf(stderr, "No data in the debug buffer.\n");
+
+ out:
+ if (databuf)
+ free(databuf);
+ if (fd != stdout)
+ fclose(fd);
+ return 0;
+}
+
+int jt_dbg_debug_daemon(int argc, char **argv)
+{
+ int i, rc;
+ unsigned int cmd = 0;
+ FILE *fd = stdout;
+ struct portal_ioctl_data data;
+
+ if (argc <= 1) {
+ fprintf(stderr, "usage: %s [start file <#MB>|stop|pause|"
+ "continue]\n", argv[0]);
+ return 0;
+ }
+ for (i = 0; portal_debug_daemon_cmd[i].cmd != NULL; i++) {
+ if (strcasecmp(argv[1], portal_debug_daemon_cmd[i].cmd) == 0) {
+ cmd = portal_debug_daemon_cmd[i].cmdv;
+ break;
+ }
+ }
+ if (portal_debug_daemon_cmd[i].cmd == NULL) {
+ fprintf(stderr, "usage: %s [start file <#MB>|stop|pause|"
+ "continue]\n", argv[0]);
+ return 0;
+ }
+ memset(&data, 0, sizeof(data));
+ if (cmd == DEBUG_DAEMON_START) {
+ if (argc < 3) {
+ fprintf(stderr, "usage: %s [start file <#MB>|stop|"
+ "pause|continue]\n", argv[0]);
+ return 0;
+ }
+ if (access(argv[2], F_OK) != 0) {
+ fd = fopen(argv[2], "w");
+ if (fd != NULL) {
+ fclose(fd);
+ remove(argv[2]);
+ goto ok;
+ }
+ }
+ if (access(argv[2], W_OK) == 0)
+ goto ok;
+ fprintf(stderr, "fopen(%s) failed: %s\n", argv[2],
+ strerror(errno));
+ return -1;
+ok:
+ data.ioc_inllen1 = strlen(argv[2]) + 1;
+ data.ioc_inlbuf1 = argv[2];
+ data.ioc_misc = 0;
+ if (argc == 4) {
+ unsigned long size;
+ errno = 0;
+ size = strtoul(argv[3], NULL, 0);
+ if (errno) {
+ fprintf(stderr, "file size(%s): error %s\n",
+ argv[3], strerror(errno));
+ return -1;
+ }
+ data.ioc_misc = size;
+ }
+ }
+ data.ioc_count = cmd;
+ if (portal_ioctl_pack(&data, &buf, max) != 0) {
+ fprintf(stderr, "portal_ioctl_pack failed.\n");
+ return -1;
+ }
+ rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_SET_DAEMON, buf);
+ if (rc < 0) {
+ fprintf(stderr, "IOC_PORTAL_SET_DEMON failed: %s\n",
+ strerror(errno));
+ return rc;
+ }
+ return 0;
+}
+
+int jt_dbg_debug_file(int argc, char **argv)
+{
+ int rc, fd = -1, raw = 1;
+ FILE *output = stdout;
+ char *databuf = NULL;
+ struct stat statbuf;
+
+ if (argc > 4 || argc < 2) {
+ fprintf(stderr, "usage: %s <input> [output] [raw]\n", argv[0]);
+ return 0;
+ }
+
+ fd = open(argv[1], O_RDONLY);
+ if (fd < 0) {
+ fprintf(stderr, "fopen(%s) failed: %s\n", argv[1],
+ strerror(errno));
+ return -1;
+ }
+#warning FIXME: cleanup fstat issue here
+#ifndef SYS_fstat64
+#define __SYS_fstat__ SYS_fstat
+#else
+#define __SYS_fstat__ SYS_fstat64
+#endif
+ rc = syscall(__SYS_fstat__, fd, &statbuf);
+ if (rc < 0) {
+ fprintf(stderr, "fstat failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ if (argc >= 3) {
+ output = fopen(argv[2], "w");
+ if (output == NULL) {
+ fprintf(stderr, "fopen(%s) failed: %s\n", argv[2],
+ strerror(errno));
+ goto out;
+ }
+ }
+
+ if (argc == 4)
+ raw = atoi(argv[3]);
+
+ databuf = mmap(NULL, statbuf.st_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE, fd, 0);
+ if (databuf == NULL) {
+ fprintf(stderr, "mmap failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ dump_buffer(output, databuf, statbuf.st_size, raw);
+
+ out:
+ if (databuf)
+ munmap(databuf, statbuf.st_size);
+ if (output != stdout)
+ fclose(output);
+ if (fd > 0)
+ close(fd);
+ return 0;
+}
+
+int jt_dbg_clear_debug_buf(int argc, char **argv)
+{
+ int rc;
+ struct portal_ioctl_data data;
+
+ if (argc != 1) {
+ fprintf(stderr, "usage: %s\n", argv[0]);
+ return 0;
+ }
+
+ memset(&data, 0, sizeof(data));
+ if (portal_ioctl_pack(&data, &buf, max) != 0) {
+ fprintf(stderr, "portal_ioctl_pack failed.\n");
+ return -1;
+ }
+
+ rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_CLEAR_DEBUG, buf);
+ if (rc) {
+ fprintf(stderr, "IOC_PORTAL_CLEAR_DEBUG failed: %s\n",
+ strerror(errno));
+ return -1;
+ }
+ return 0;
+}
+
+int jt_dbg_mark_debug_buf(int argc, char **argv)
+{
+ int rc;
+ struct portal_ioctl_data data;
+ char *text;
+ time_t now = time(NULL);
+
+ if (argc > 2) {
+ fprintf(stderr, "usage: %s [marker text]\n", argv[0]);
+ return 0;
+ }
+
+ if (argc == 2) {
+ text = argv[1];
+ } else {
+ text = ctime(&now);
+ text[strlen(text) - 1] = '\0'; /* stupid \n */
+ }
+
+ memset(&data, 0, sizeof(data));
+ data.ioc_inllen1 = strlen(text) + 1;
+ data.ioc_inlbuf1 = text;
+ if (portal_ioctl_pack(&data, &buf, max) != 0) {
+ fprintf(stderr, "portal_ioctl_pack failed.\n");
+ return -1;
+ }
+
+ rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_MARK_DEBUG, buf);
+ if (rc) {
+ fprintf(stderr, "IOC_PORTAL_MARK_DEBUG failed: %s\n",
+ strerror(errno));
+ return -1;
+ }
+ return 0;
+}
+
+
+int jt_dbg_modules(int argc, char **argv)
+{
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+ struct mod_paths {
+ char *name, *path;
+ } *mp, mod_paths[] = {
+ {"portals", "portals/linux/oslib"},
+ {"ksocknal", "portals/linux/socknal"},
+ {"obdclass", "lustre/obdclass"},
+ {"ptlrpc", "lustre/ptlrpc"},
+ {"obdext2", "lustre/obdext2"},
+ {"ost", "lustre/ost"},
+ {"osc", "lustre/osc"},
+ {"mds", "lustre/mds"},
+ {"mdc", "lustre/mdc"},
+ {"llite", "lustre/llite"},
+ {"obdecho", "lustre/obdecho"},
+ {"ldlm", "lustre/ldlm"},
+ {"obdfilter", "lustre/obdfilter"},
+ {"extN", "lustre/extN"},
+ {"lov", "lustre/lov"},
+ {"fsfilt_ext3", "lustre/obdclass"},
+ {"fsfilt_extN", "lustre/obdclass"},
+ {"mds_ext2", "lustre/mds"},
+ {"mds_ext3", "lustre/mds"},
+ {"mds_extN", "lustre/mds"},
+ {"ptlbd", "lustre/ptlbd"},
+ {NULL, NULL}
+ };
+ char *path = "..";
+ char *kernel = "linux";
+
+ if (argc >= 2)
+ path = argv[1];
+ if (argc == 3)
+ kernel = argv[2];
+ if (argc > 3) {
+ printf("%s [path] [kernel]\n", argv[0]);
+ return 0;
+ }
+
+ for (mp = mod_paths; mp->name != NULL; mp++) {
+ struct module_info info;
+ int rc;
+ size_t crap;
+ int query_module(const char *name, int which, void *buf,
+ size_t bufsize, size_t *ret);
+
+ rc = query_module(mp->name, QM_INFO, &info, sizeof(info),
+ &crap);
+ if (rc < 0) {
+ if (errno != ENOENT)
+ printf("query_module(%s) failed: %s\n",
+ mp->name, strerror(errno));
+ } else {
+ printf("add-symbol-file %s/%s/%s.o 0x%0lx\n", path,
+ mp->path, mp->name,
+ info.addr + sizeof(struct module));
+ }
+ }
+
+ return 0;
+#else
+ printf("jt_dbg_module is not yet implemented for Linux 2.5\n");
+ return 0;
+#endif /* linux 2.5 */
+}
+
+int jt_dbg_panic(int argc, char **argv)
+{
+ int rc;
+ struct portal_ioctl_data data;
+
+ if (argc != 1) {
+ fprintf(stderr, "usage: %s\n", argv[0]);
+ return 0;
+ }
+
+ memset(&data, 0, sizeof(data));
+ if (portal_ioctl_pack(&data, &buf, max) != 0) {
+ fprintf(stderr, "portal_ioctl_pack failed.\n");
+ return -1;
+ }
+
+ rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_PANIC, buf);
+ if (rc) {
+ fprintf(stderr, "IOC_PORTAL_PANIC failed: %s\n",
+ strerror(errno));
+ return -1;
+ }
+ return 0;
+}
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ *
+ * This file is part of Portals, http://www.sf.net/projects/lustre/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Some day I'll split all of this functionality into a cfs_debug module
+ * of its own. That day is not today.
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <portals/api-support.h>
+#include <portals/ptlctl.h>
+#include "parser.h"
+
+
+command_t list[] = {
+ {"debug_kernel", jt_dbg_debug_kernel, 0, "usage: debug_kernel [file] [raw], get debug buffer and print it [to a file]"},
+ {"debug_daemon", jt_dbg_debug_daemon, 0, "usage: debug_daemon [start file [#MB]|stop|pause|continue], control debug daemon to dump debug buffer to a file"},
+ {"debug_file", jt_dbg_debug_file, 0, "usage: debug_file <input> [output] [raw], read debug buffer from input and print it [to output]"},
+ {"clear", jt_dbg_clear_debug_buf, 0, "clear kernel debug buffer"},
+ {"mark", jt_dbg_mark_debug_buf, 0, "insert a marker into the kernel debug buffer (args: [marker text])"},
+ {"filter", jt_dbg_filter, 0, "filter certain messages (args: subsystem/debug ID)\n"},
+ {"show", jt_dbg_show, 0, "enable certain messages (args: subsystem/debug ID)\n"},
+ {"list", jt_dbg_list, 0, "list subsystem and debug types (args: subs or types)\n"},
+ {"modules", jt_dbg_modules, 0, "provide gdb-friendly module info (arg: <path>)"},
+ {"panic", jt_dbg_panic, 0, "cause the kernel to panic"},
+ {"dump", jt_ioc_dump, 0, "usage: dump file, save ioctl buffer to file"},
+ {"help", Parser_help, 0, "help"},
+ {"exit", Parser_quit, 0, "quit"},
+ {"quit", Parser_quit, 0, "quit"},
+ { 0, 0, 0, NULL }
+};
+
+int main(int argc, char **argv)
+{
+ if (dbg_initialize(argc, argv) < 0)
+ exit(2);
+
+ register_ioc_dev(PORTALS_DEV_ID, PORTALS_DEV_PATH);
+
+ Parser_init("debugctl > ", list);
+ if (argc > 1)
+ return Parser_execarg(argc - 1, &argv[1], list);
+
+ Parser_commands();
+
+ unregister_ioc_dev(PORTALS_DEV_ID);
+ return 0;
+}
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ *
+ * This file is part of Portals, http://www.sf.net/projects/lustre/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <syscall.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <sys/ioctl.h>
+#include <errno.h>
+#include <unistd.h>
+
+#include <portals/api-support.h>
+#include <portals/ptlctl.h>
+
+struct ioc_dev {
+ const char * dev_name;
+ int dev_fd;
+};
+
+static struct ioc_dev ioc_dev_list[10];
+
+struct dump_hdr {
+ int magic;
+ int dev_id;
+ int opc;
+};
+
+char * dump_filename;
+
+static int
+open_ioc_dev(int dev_id)
+{
+ const char * dev_name;
+
+ if (dev_id < 0 || dev_id >= sizeof(ioc_dev_list))
+ return -EINVAL;
+
+ dev_name = ioc_dev_list[dev_id].dev_name;
+ if (dev_name == NULL) {
+ fprintf(stderr, "unknown device id: %d\n", dev_id);
+ return -EINVAL;
+ }
+
+ if (ioc_dev_list[dev_id].dev_fd < 0) {
+ int fd = open(dev_name, O_RDWR);
+
+ if (fd < 0) {
+ fprintf(stderr, "opening %s failed: %s\n"
+ "hint: the kernel modules may not be loaded\n",
+ dev_name, strerror(errno));
+ return fd;
+ }
+ ioc_dev_list[dev_id].dev_fd = fd;
+ }
+
+ return ioc_dev_list[dev_id].dev_fd;
+}
+
+
+static int
+do_ioctl(int dev_id, int opc, void *buf)
+{
+ int fd, rc;
+
+ fd = open_ioc_dev(dev_id);
+ if (fd < 0)
+ return fd;
+
+ rc = ioctl(fd, opc, buf);
+ return rc;
+
+}
+
+static FILE *
+get_dump_file()
+{
+ FILE *fp = NULL;
+
+ if (!dump_filename) {
+ fprintf(stderr, "no dump filename\n");
+ } else
+ fp = fopen(dump_filename, "a");
+ return fp;
+}
+
+/*
+ * The dump file should start with a description of which devices are
+ * used, but for now it will assumed whatever app reads the file will
+ * know what to do. */
+int
+dump(int dev_id, int opc, void *buf)
+{
+ FILE *fp;
+ struct dump_hdr dump_hdr;
+ struct portal_ioctl_hdr * ioc_hdr = (struct portal_ioctl_hdr *) buf;
+ int rc;
+
+ printf("dumping opc %x to %s\n", opc, dump_filename);
+
+
+ dump_hdr.magic = 0xdeadbeef;
+ dump_hdr.dev_id = dev_id;
+ dump_hdr.opc = opc;
+
+ fp = get_dump_file();
+ if (fp == NULL) {
+ fprintf(stderr, "%s: %s\n", dump_filename,
+ strerror(errno));
+ return -EINVAL;
+ }
+
+ rc = fwrite(&dump_hdr, sizeof(dump_hdr), 1, fp);
+ if (rc == 1)
+ rc = fwrite(buf, ioc_hdr->ioc_len, 1, fp);
+ fclose(fp);
+ if (rc != 1) {
+ fprintf(stderr, "%s: %s\n", dump_filename,
+ strerror(errno));
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/* register a device to send ioctls to. */
+int
+register_ioc_dev(int dev_id, const char * dev_name)
+{
+
+ if (dev_id < 0 || dev_id >= sizeof(ioc_dev_list))
+ return -EINVAL;
+
+ unregister_ioc_dev(dev_id);
+
+ ioc_dev_list[dev_id].dev_name = dev_name;
+ ioc_dev_list[dev_id].dev_fd = -1;
+
+ return dev_id;
+}
+
+void
+unregister_ioc_dev(int dev_id)
+{
+
+ if (dev_id < 0 || dev_id >= sizeof(ioc_dev_list))
+ return;
+ if (ioc_dev_list[dev_id].dev_name != NULL &&
+ ioc_dev_list[dev_id].dev_fd >= 0)
+ close(ioc_dev_list[dev_id].dev_fd);
+
+ ioc_dev_list[dev_id].dev_name = NULL;
+ ioc_dev_list[dev_id].dev_fd = -1;
+}
+
+/* If this file is set, then all ioctl buffers will be
+ appended to the file. */
+int
+set_ioctl_dump(char * file)
+{
+ if (dump_filename)
+ free(dump_filename);
+
+ dump_filename = strdup(file);
+ return 0;
+}
+
+int
+l_ioctl(int dev_id, int opc, void *buf)
+{
+ if (dump_filename)
+ return dump(dev_id, opc, buf);
+ else
+ return do_ioctl(dev_id, opc, buf);
+}
+
+/* Read an ioctl dump file, and call the ioc_func for each ioctl buffer
+ * in the file. For example:
+ *
+ * parse_dump("lctl.dump", l_ioctl);
+ *
+ * Note: if using l_ioctl, then you also need to register_ioc_dev() for
+ * each device used in the dump.
+ */
+int
+parse_dump(char * dump_file, int (*ioc_func)(int dev_id, int opc, void *))
+{
+ int fd, line =0;
+ struct stat st;
+ char *buf, *end;
+
+ fd = syscall(SYS_open, dump_file, O_RDONLY);
+
+#warning FIXME: cleanup fstat issue here
+#ifndef SYS_fstat64
+#define __SYS_fstat__ SYS_fstat
+#else
+#define __SYS_fstat__ SYS_fstat64
+#endif
+ if (syscall(__SYS_fstat__, fd, &st)) {
+ perror("stat fails");
+ exit(1);
+ }
+
+ if (st.st_size < 1) {
+ fprintf(stderr, "KML is empty\n");
+ exit(1);
+ }
+
+ buf = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE , fd, 0);
+ end = buf + st.st_size;
+ close(fd);
+ while (buf < end) {
+ struct dump_hdr *dump_hdr = (struct dump_hdr *) buf;
+ struct portal_ioctl_hdr * data;
+ char tmp[8096];
+ int rc;
+
+ line++;
+
+ data = (struct portal_ioctl_hdr *) (buf + sizeof(*dump_hdr));
+ if (buf + data->ioc_len > end ) {
+ fprintf(stderr, "dump file overflow, %p + %d > %p\n", buf,
+ data->ioc_len, end);
+ return -1;
+ }
+#if 0
+ printf ("dump_hdr: %lx data: %lx\n",
+ (unsigned long)dump_hdr - (unsigned long)buf, (unsigned long)data - (unsigned long)buf);
+
+ printf("%d: opcode %x len: %d ver: %x ", line, dump_hdr->opc,
+ data->ioc_len, data->ioc_version);
+#endif
+
+ memcpy(tmp, data, data->ioc_len);
+
+ rc = ioc_func(dump_hdr->dev_id, dump_hdr->opc, tmp);
+ if (rc) {
+ printf("failed: %d\n", rc);
+ exit(1);
+ }
+
+ buf += data->ioc_len + sizeof(*dump_hdr);
+ }
+ return 0;
+}
+
+int
+jt_ioc_dump(int argc, char **argv)
+{
+ if (argc > 2) {
+ fprintf(stderr, "usage: %s [hostname]\n", argv[0]);
+ return 0;
+ }
+ printf("setting dumpfile to: %s\n", argv[1]);
+
+ set_ioctl_dump(argv[1]);
+ return 0;
+}
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2001 Cluster File Systems, Inc.
+ *
+ * This file is part of Lustre, http://www.sf.net/projects/lustre/
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <string.h>
+#include <stddef.h>
+#include <unistd.h>
+#include <sys/param.h>
+#include <assert.h>
+
+#include <config.h>
+#ifdef HAVE_LIBREADLINE
+#define READLINE_LIBRARY
+#include <readline/readline.h>
+#endif
+//extern char **completion_matches __P((char *, rl_compentry_func_t *));
+extern void using_history(void);
+extern void stifle_history(int);
+extern void add_history(char *);
+
+#include "parser.h"
+
+static command_t * top_level; /* Top level of commands, initialized by
+ * InitParser */
+static char * parser_prompt = NULL;/* Parser prompt, set by InitParser */
+static int done; /* Set to 1 if user types exit or quit */
+
+
+/* static functions */
+static char *skipwhitespace(char *s);
+static char *skiptowhitespace(char *s);
+static command_t *find_cmd(char *name, command_t cmds[], char **next);
+static int process(char *s, char **next, command_t *lookup, command_t **result,
+ char **prev);
+static void print_commands(char *str, command_t *table);
+
+static char * skipwhitespace(char * s)
+{
+ char * t;
+ int len;
+
+ len = (int)strlen(s);
+ for (t = s; t <= s + len && isspace(*t); t++);
+ return(t);
+}
+
+
+static char * skiptowhitespace(char * s)
+{
+ char * t;
+
+ for (t = s; *t && !isspace(*t); t++);
+ return(t);
+}
+
+static int line2args(char *line, char **argv, int maxargs)
+{
+ char *arg;
+ int i = 0;
+
+ arg = strtok(line, " \t");
+ if ( arg ) {
+ argv[i] = arg;
+ i++;
+ } else
+ return 0;
+
+ while( (arg = strtok(NULL, " \t")) && (i <= maxargs)) {
+ argv[i] = arg;
+ i++;
+ }
+ return i;
+}
+
+/* find a command -- return it if unique otherwise print alternatives */
+static command_t *Parser_findargcmd(char *name, command_t cmds[])
+{
+ command_t *cmd;
+
+ for (cmd = cmds; cmd->pc_name; cmd++) {
+ if (strcmp(name, cmd->pc_name) == 0)
+ return cmd;
+ }
+ return NULL;
+}
+
+int Parser_execarg(int argc, char **argv, command_t cmds[])
+{
+ command_t *cmd;
+
+ cmd = Parser_findargcmd(argv[0], cmds);
+ if ( cmd ) {
+ return (cmd->pc_func)(argc, argv);
+ } else {
+ printf("Try interactive use without arguments or use one of:\n");
+ for (cmd = cmds; cmd->pc_name; cmd++)
+ printf("\"%s\" ", cmd->pc_name);
+ printf("\nas argument.\n");
+ }
+ return -1;
+}
+
+/* returns the command_t * (NULL if not found) corresponding to a
+ _partial_ match with the first token in name. It sets *next to
+ point to the following token. Does not modify *name. */
+static command_t * find_cmd(char * name, command_t cmds[], char ** next)
+{
+ int i, len;
+
+ if (!cmds || !name )
+ return NULL;
+
+ /* This sets name to point to the first non-white space character,
+ and next to the first whitespace after name, len to the length: do
+ this with strtok*/
+ name = skipwhitespace(name);
+ *next = skiptowhitespace(name);
+ len = *next - name;
+ if (len == 0)
+ return NULL;
+
+ for (i = 0; cmds[i].pc_name; i++) {
+ if (strncasecmp(name, cmds[i].pc_name, len) == 0) {
+ *next = skipwhitespace(*next);
+ return(&cmds[i]);
+ }
+ }
+ return NULL;
+}
+
+/* Recursively process a command line string s and find the command
+ corresponding to it. This can be ambiguous, full, incomplete,
+ non-existent. */
+static int process(char *s, char ** next, command_t *lookup,
+ command_t **result, char **prev)
+{
+ *result = find_cmd(s, lookup, next);
+ *prev = s;
+
+ /* non existent */
+ if ( ! *result )
+ return CMD_NONE;
+
+ /* found entry: is it ambigous, i.e. not exact command name and
+ more than one command in the list matches. Note that find_cmd
+ points to the first ambiguous entry */
+ if ( strncasecmp(s, (*result)->pc_name, strlen((*result)->pc_name)) &&
+ find_cmd(s, (*result) + 1, next))
+ return CMD_AMBIG;
+
+ /* found a unique command: component or full? */
+ if ( (*result)->pc_func ) {
+ return CMD_COMPLETE;
+ } else {
+ if ( *next == '\0' ) {
+ return CMD_INCOMPLETE;
+ } else {
+ return process(*next, next, (*result)->pc_sub_cmd, result, prev);
+ }
+ }
+}
+
+#ifdef HAVE_LIBREADLINE
+static command_t * match_tbl; /* Command completion against this table */
+static char * command_generator(const char * text, int state)
+{
+ static int index,
+ len;
+ char *name;
+
+ /* Do we have a match table? */
+ if (!match_tbl)
+ return NULL;
+
+ /* If this is the first time called on this word, state is 0 */
+ if (!state) {
+ index = 0;
+ len = (int)strlen(text);
+ }
+
+ /* Return next name in the command list that paritally matches test */
+ while ( (name = (match_tbl + index)->pc_name) ) {
+ index++;
+
+ if (strncasecmp(name, text, len) == 0) {
+ return(strdup(name));
+ }
+ }
+
+ /* No more matches */
+ return NULL;
+}
+
+/* probably called by readline */
+static char **command_completion(char * text, int start, int end)
+{
+ command_t * table;
+ char * pos;
+
+ match_tbl = top_level;
+ for (table = find_cmd(rl_line_buffer, match_tbl, &pos);
+ table;
+ table = find_cmd(pos, match_tbl, &pos)) {
+
+ if (*(pos - 1) == ' ') match_tbl = table->pc_sub_cmd;
+ }
+
+ return(completion_matches(text, command_generator));
+}
+#endif
+
+/* take a string and execute the function or print help */
+int execute_line(char * line)
+{
+ command_t *cmd, *ambig;
+ char *prev;
+ char *next, *tmp;
+ char *argv[MAXARGS];
+ int i;
+ int rc = 0;
+
+ switch( process(line, &next, top_level, &cmd, &prev) ) {
+ case CMD_AMBIG:
+ fprintf(stderr, "Ambiguous command \'%s\'\nOptions: ", line);
+ while( (ambig = find_cmd(prev, cmd, &tmp)) ) {
+ fprintf(stderr, "%s ", ambig->pc_name);
+ cmd = ambig + 1;
+ }
+ fprintf(stderr, "\n");
+ break;
+ case CMD_NONE:
+ fprintf(stderr, "No such command, type help\n");
+ break;
+ case CMD_INCOMPLETE:
+ fprintf(stderr,
+ "'%s' incomplete command. Use '%s x' where x is one of:\n",
+ line, line);
+ fprintf(stderr, "\t");
+ for (i = 0; cmd->pc_sub_cmd[i].pc_name; i++) {
+ fprintf(stderr, "%s ", cmd->pc_sub_cmd[i].pc_name);
+ }
+ fprintf(stderr, "\n");
+ break;
+ case CMD_COMPLETE:
+ i = line2args(line, argv, MAXARGS);
+ rc = (cmd->pc_func)(i, argv);
+
+ if (rc == CMD_HELP)
+ fprintf(stderr, "%s\n", cmd->pc_help);
+
+ break;
+ }
+
+ return rc;
+}
+
+int
+noop_fn ()
+{
+ return (0);
+}
+
+/* just in case you're ever in an airplane and discover you
+ forgot to install readline-dev. :) */
+int init_input()
+{
+ int interactive = isatty (fileno (stdin));
+
+#ifdef HAVE_LIBREADLINE
+ using_history();
+ stifle_history(HISTORY);
+
+ if (!interactive)
+ {
+ rl_prep_term_function = (rl_vintfunc_t *)noop_fn;
+ rl_deprep_term_function = (rl_voidfunc_t *)noop_fn;
+ }
+
+ rl_attempted_completion_function = (CPPFunction *)command_completion;
+ rl_completion_entry_function = (void *)command_generator;
+#endif
+ return interactive;
+}
+
+#ifndef HAVE_LIBREADLINE
+#define add_history(s)
+char * readline(char * prompt)
+{
+ char line[2048];
+ int n = 0;
+ if (prompt)
+ printf ("%s", prompt);
+ if (fgets(line, sizeof(line), stdin) == NULL)
+ return (NULL);
+ n = strlen(line);
+ if (n && line[n-1] == '\n')
+ line[n-1] = '\0';
+ return strdup(line);
+}
+#endif
+
+/* this is the command execution machine */
+int Parser_commands(void)
+{
+ char *line, *s;
+ int rc = 0;
+ int interactive;
+
+ interactive = init_input();
+
+ while(!done) {
+ line = readline(interactive ? parser_prompt : NULL);
+
+ if (!line) break;
+
+ s = skipwhitespace(line);
+
+ if (*s) {
+ add_history(s);
+ rc = execute_line(s);
+ }
+
+ free(line);
+ }
+ return rc;
+}
+
+
+/* sets the parser prompt */
+void Parser_init(char * prompt, command_t * cmds)
+{
+ done = 0;
+ top_level = cmds;
+ if (parser_prompt) free(parser_prompt);
+ parser_prompt = strdup(prompt);
+}
+
+/* frees the parser prompt */
+void Parser_exit(int argc, char *argv[])
+{
+ done = 1;
+ free(parser_prompt);
+ parser_prompt = NULL;
+}
+
+/* convert a string to an integer */
+int Parser_int(char *s, int *val)
+{
+ int ret;
+
+ if (*s != '0')
+ ret = sscanf(s, "%d", val);
+ else if (*(s+1) != 'x')
+ ret = sscanf(s, "%o", val);
+ else {
+ s++;
+ ret = sscanf(++s, "%x", val);
+ }
+
+ return(ret);
+}
+
+
+void Parser_qhelp(int argc, char *argv[]) {
+
+ printf("Available commands are:\n");
+
+ print_commands(NULL, top_level);
+ printf("For more help type: help command-name\n");
+}
+
+int Parser_help(int argc, char **argv)
+{
+ char line[1024];
+ char *next, *prev, *tmp;
+ command_t *result, *ambig;
+ int i;
+
+ if ( argc == 1 ) {
+ Parser_qhelp(argc, argv);
+ return 0;
+ }
+
+ line[0]='\0';
+ for ( i = 1 ; i < argc ; i++ ) {
+ strcat(line, argv[i]);
+ }
+
+ switch ( process(line, &next, top_level, &result, &prev) ) {
+ case CMD_COMPLETE:
+ fprintf(stderr, "%s: %s\n",line, result->pc_help);
+ break;
+ case CMD_NONE:
+ fprintf(stderr, "%s: Unknown command.\n", line);
+ break;
+ case CMD_INCOMPLETE:
+ fprintf(stderr,
+ "'%s' incomplete command. Use '%s x' where x is one of:\n",
+ line, line);
+ fprintf(stderr, "\t");
+ for (i = 0; result->pc_sub_cmd[i].pc_name; i++) {
+ fprintf(stderr, "%s ", result->pc_sub_cmd[i].pc_name);
+ }
+ fprintf(stderr, "\n");
+ break;
+ case CMD_AMBIG:
+ fprintf(stderr, "Ambiguous command \'%s\'\nOptions: ", line);
+ while( (ambig = find_cmd(prev, result, &tmp)) ) {
+ fprintf(stderr, "%s ", ambig->pc_name);
+ result = ambig + 1;
+ }
+ fprintf(stderr, "\n");
+ break;
+ }
+ return 0;
+}
+
+
+void Parser_printhelp(char *cmd)
+{
+ char *argv[] = { "help", cmd };
+ Parser_help(2, argv);
+}
+
+/*************************************************************************
+ * COMMANDS *
+ *************************************************************************/
+
+
+static void print_commands(char * str, command_t * table) {
+ command_t * cmds;
+ char buf[80];
+
+ for (cmds = table; cmds->pc_name; cmds++) {
+ if (cmds->pc_func) {
+ if (str) printf("\t%s %s\n", str, cmds->pc_name);
+ else printf("\t%s\n", cmds->pc_name);
+ }
+ if (cmds->pc_sub_cmd) {
+ if (str) {
+ sprintf(buf, "%s %s", str, cmds->pc_name);
+ print_commands(buf, cmds->pc_sub_cmd);
+ } else {
+ print_commands(cmds->pc_name, cmds->pc_sub_cmd);
+ }
+ }
+ }
+}
+
+char *Parser_getstr(const char *prompt, const char *deft, char *res,
+ size_t len)
+{
+ char *line = NULL;
+ int size = strlen(prompt) + strlen(deft) + 8;
+ char *theprompt;
+ theprompt = malloc(size);
+ assert(theprompt);
+
+ sprintf(theprompt, "%s [%s]: ", prompt, deft);
+
+ line = readline(theprompt);
+ free(theprompt);
+
+ if ( line == NULL || *line == '\0' ) {
+ strncpy(res, deft, len);
+ } else {
+ strncpy(res, line, len);
+ }
+
+ if ( line ) {
+ free(line);
+ return res;
+ } else {
+ return NULL;
+ }
+}
+
+/* get integer from prompt, loop forever to get it */
+int Parser_getint(const char *prompt, long min, long max, long deft, int base)
+{
+ int rc;
+ long result;
+ char *line;
+ int size = strlen(prompt) + 40;
+ char *theprompt = malloc(size);
+ assert(theprompt);
+ sprintf(theprompt,"%s [%ld, (0x%lx)]: ", prompt, deft, deft);
+
+ fflush(stdout);
+
+ do {
+ line = NULL;
+ line = readline(theprompt);
+ if ( !line ) {
+ fprintf(stdout, "Please enter an integer.\n");
+ fflush(stdout);
+ continue;
+ }
+ if ( *line == '\0' ) {
+ free(line);
+ result = deft;
+ break;
+ }
+ rc = Parser_arg2int(line, &result, base);
+ free(line);
+ if ( rc != 0 ) {
+ fprintf(stdout, "Invalid string.\n");
+ fflush(stdout);
+ } else if ( result > max || result < min ) {
+ fprintf(stdout, "Error: response must lie between %ld and %ld.\n",
+ min, max);
+ fflush(stdout);
+ } else {
+ break;
+ }
+ } while ( 1 ) ;
+
+ if (theprompt)
+ free(theprompt);
+ return result;
+
+}
+
+/* get boolean (starting with YyNn; loop forever */
+int Parser_getbool(const char *prompt, int deft)
+{
+ int result = 0;
+ char *line;
+ int size = strlen(prompt) + 8;
+ char *theprompt = malloc(size);
+ assert(theprompt);
+
+ fflush(stdout);
+
+ if ( deft != 0 && deft != 1 ) {
+ fprintf(stderr, "Error: Parser_getbool given bad default (%d).\n",
+ deft);
+ assert ( 0 );
+ }
+ sprintf(theprompt, "%s [%s]: ", prompt, (deft==0)? "N" : "Y");
+
+ do {
+ line = NULL;
+ line = readline(theprompt);
+ if ( line == NULL ) {
+ result = deft;
+ break;
+ }
+ if ( *line == '\0' ) {
+ result = deft;
+ break;
+ }
+ if ( *line == 'y' || *line == 'Y' ) {
+ result = 1;
+ break;
+ }
+ if ( *line == 'n' || *line == 'N' ) {
+ result = 0;
+ break;
+ }
+ if ( line )
+ free(line);
+ fprintf(stdout, "Invalid string. Must start with yY or nN\n");
+ fflush(stdout);
+ } while ( 1 );
+
+ if ( line )
+ free(line);
+ if ( theprompt )
+ free(theprompt);
+ return result;
+}
+
+/* parse int out of a string or prompt for it */
+long Parser_intarg(const char *inp, const char *prompt, int deft,
+ int min, int max, int base)
+{
+ long result;
+ int rc;
+
+ rc = Parser_arg2int(inp, &result, base);
+
+ if ( rc == 0 ) {
+ return result;
+ } else {
+ return Parser_getint(prompt, deft, min, max, base);
+ }
+}
+
+/* parse int out of a string or prompt for it */
+char *Parser_strarg(char *inp, const char *prompt, const char *deft,
+ char *answer, int len)
+{
+ if ( inp == NULL || *inp == '\0' ) {
+ return Parser_getstr(prompt, deft, answer, len);
+ } else
+ return inp;
+}
+
+/* change a string into a number: return 0 on success. No invalid characters
+ allowed. The processing of base and validity follows strtol(3)*/
+int Parser_arg2int(const char *inp, long *result, int base)
+{
+ char *endptr;
+
+ if ( (base !=0) && (base < 2 || base > 36) )
+ return 1;
+
+ *result = strtol(inp, &endptr, base);
+
+ if ( *inp != '\0' && *endptr == '\0' )
+ return 0;
+ else
+ return 1;
+}
+
+/* Convert human readable size string to and int; "1k" -> 1000 */
+int Parser_size (int *sizep, char *str) {
+ int size;
+ char mod[32];
+
+ switch (sscanf (str, "%d%1[gGmMkK]", &size, mod)) {
+ default:
+ return (-1);
+
+ case 1:
+ *sizep = size;
+ return (0);
+
+ case 2:
+ switch (*mod) {
+ case 'g':
+ case 'G':
+ *sizep = size << 30;
+ return (0);
+
+ case 'm':
+ case 'M':
+ *sizep = size << 20;
+ return (0);
+
+ case 'k':
+ case 'K':
+ *sizep = size << 10;
+ return (0);
+
+ default:
+ *sizep = size;
+ return (0);
+ }
+ }
+}
+
+/* Convert a string boolean to an int; "enable" -> 1 */
+int Parser_bool (int *b, char *str) {
+ if (!strcasecmp (str, "no") ||
+ !strcasecmp (str, "n") ||
+ !strcasecmp (str, "off") ||
+ !strcasecmp (str, "disable"))
+ {
+ *b = 0;
+ return (0);
+ }
+
+ if (!strcasecmp (str, "yes") ||
+ !strcasecmp (str, "y") ||
+ !strcasecmp (str, "on") ||
+ !strcasecmp (str, "enable"))
+ {
+ *b = 1;
+ return (0);
+ }
+
+ return (-1);
+}
+
+int Parser_quit(int argc, char **argv)
+{
+ argc = argc;
+ argv = argv;
+ done = 1;
+ return 0;
+}
--- /dev/null
+#ifndef _PARSER_H_
+#define _PARSER_H_
+
+#define HISTORY 100 /* Don't let history grow unbounded */
+#define MAXARGS 100
+
+#define CMD_COMPLETE 0
+#define CMD_INCOMPLETE 1
+#define CMD_NONE 2
+#define CMD_AMBIG 3
+#define CMD_HELP 4
+
+typedef struct parser_cmd {
+ char *pc_name;
+ int (* pc_func)(int, char **);
+ struct parser_cmd * pc_sub_cmd;
+ char *pc_help;
+} command_t;
+
+typedef struct argcmd {
+ char *ac_name;
+ int (*ac_func)(int, char **);
+ char *ac_help;
+} argcmd_t;
+
+typedef struct network {
+ char *type;
+ char *server;
+ int port;
+} network_t;
+
+int Parser_quit(int argc, char **argv);
+void Parser_init(char *, command_t *); /* Set prompt and load command list */
+int Parser_commands(void); /* Start the command parser */
+void Parser_qhelp(int, char **); /* Quick help routine */
+int Parser_help(int, char **); /* Detailed help routine */
+void Parser_printhelp(char *); /* Detailed help routine */
+void Parser_exit(int, char **); /* Shuts down command parser */
+int Parser_execarg(int argc, char **argv, command_t cmds[]);
+int execute_line(char * line);
+
+/* Converts a string to an integer */
+int Parser_int(char *, int *);
+
+/* Prompts for a string, with default values and a maximum length */
+char *Parser_getstr(const char *prompt, const char *deft, char *res,
+ size_t len);
+
+/* Prompts for an integer, with minimum, maximum and default values and base */
+int Parser_getint(const char *prompt, long min, long max, long deft,
+ int base);
+
+/* Prompts for a yes/no, with default */
+int Parser_getbool(const char *prompt, int deft);
+
+/* Extracts an integer from a string, or prompts if it cannot get one */
+long Parser_intarg(const char *inp, const char *prompt, int deft,
+ int min, int max, int base);
+
+/* Extracts a word from the input, or propmts if it cannot get one */
+char *Parser_strarg(char *inp, const char *prompt, const char *deft,
+ char *answer, int len);
+
+/* Extracts an integer from a string with a base */
+int Parser_arg2int(const char *inp, long *result, int base);
+
+/* Convert human readable size string to and int; "1k" -> 1000 */
+int Parser_size(int *sizep, char *str);
+
+/* Convert a string boolean to an int; "enable" -> 1 */
+int Parser_bool(int *b, char *str);
+
+#endif
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ *
+ * This file is part of Portals, http://www.sf.net/projects/lustre/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/tcp.h>
+#include <netdb.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <errno.h>
+#include <unistd.h>
+#include <time.h>
+#include <asm/byteorder.h>
+
+#include <portals/api-support.h>
+#include <portals/ptlctl.h>
+#include <portals/list.h>
+#include <portals/lib-types.h>
+#include "parser.h"
+
+unsigned int portal_debug;
+unsigned int portal_printk;
+unsigned int portal_stack;
+
+
+static ptl_nid_t g_nid = 0;
+static unsigned int g_nal = 0;
+static unsigned short g_port = 0;
+
+static int g_socket_txmem = 0;
+static int g_socket_rxmem = 0;
+static int g_socket_nonagle = 1;
+
+typedef struct
+{
+ char *name;
+ int num;
+} name2num_t;
+
+static name2num_t nalnames[] = {
+ {"tcp", SOCKNAL},
+ {"toe", TOENAL},
+ {"elan", QSWNAL},
+ {"gm", GMNAL},
+ {"scimac", SCIMACNAL},
+ {NULL, -1}
+};
+
+static name2num_t *
+name2num_lookup_name (name2num_t *table, char *str)
+{
+ while (table->name != NULL)
+ if (!strcmp (str, table->name))
+ return (table);
+ else
+ table++;
+ return (NULL);
+}
+
+static name2num_t *
+name2num_lookup_num (name2num_t *table, int num)
+{
+ while (table->name != NULL)
+ if (num == table->num)
+ return (table);
+ else
+ table++;
+ return (NULL);
+}
+
+int
+ptl_name2nal (char *str)
+{
+ name2num_t *e = name2num_lookup_name (nalnames, str);
+
+ return ((e == NULL) ? 0 : e->num);
+}
+
+static char *
+nal2name (int nal)
+{
+ name2num_t *e = name2num_lookup_num (nalnames, nal);
+
+ return ((e == NULL) ? "???" : e->name);
+}
+
+static int
+nid2nal (ptl_nid_t nid)
+{
+ /* BIG pragmatic assumption */
+ return ((((__u32)nid) & 0xffff0000) != 0 ? SOCKNAL : QSWNAL);
+}
+
+int
+ptl_parse_nid (ptl_nid_t *nidp, char *str)
+{
+ struct hostent *he;
+ int a;
+ int b;
+ int c;
+ int d;
+
+ if (sscanf (str, "%d.%d.%d.%d", &a, &b, &c, &d) == 4 &&
+ (a & ~0xff) == 0 && (b & ~0xff) == 0 &&
+ (c & ~0xff) == 0 && (d & ~0xff) == 0)
+ {
+ __u32 addr = (a<<24)|(b<<16)|(c<<8)|d;
+
+ *nidp = (ptl_nid_t)addr;
+ return (0);
+ }
+
+ if ((('a' <= str[0] && str[0] <= 'z') ||
+ ('A' <= str[0] && str[0] <= 'Z')) &&
+ (he = gethostbyname (str)) != NULL)
+ {
+ __u32 addr = *(__u32 *)he->h_addr;
+
+ *nidp = (ptl_nid_t)ntohl(addr); /* HOST byte order */
+ return (0);
+ }
+
+ if (sscanf (str, "%i", &a) == 1)
+ {
+ *nidp = (ptl_nid_t)a;
+ return (0);
+ }
+
+ if (sscanf (str, "%x", &a) == 1)
+ {
+ *nidp = (ptl_nid_t) a;
+ return (0);
+ }
+
+ return (-1);
+}
+
+char *
+ptl_nid2str (char *buffer, ptl_nid_t nid)
+{
+ switch (nid2nal(nid))
+ {
+ case QSWNAL:
+ sprintf (buffer, LPD64, nid);
+ return (buffer);
+
+ case SCIMACNAL:
+ sprintf (buffer, LPX64, nid);
+ return (buffer);
+
+ case SOCKNAL: {
+ __u32 addr = htonl((__u32)nid); /* back to NETWORK byte order */
+ struct hostent *he = gethostbyaddr ((const char *)&addr, sizeof (addr), AF_INET);
+
+ if (he != NULL)
+ strcpy (buffer, he->h_name);
+ else
+ {
+ addr = (__u32)nid;
+ sprintf (buffer, "%d.%d.%d.%d",
+ (addr>>24)&0xff, (addr>>16)&0xff, (addr>>8)&0xff, addr&0xff);
+ }
+ return (buffer);
+ }
+
+ default:
+ sprintf (buffer, "nid2nal broken");
+ return (buffer);
+ }
+}
+
+int
+sock_write (int cfd, void *buffer, int nob)
+{
+ while (nob > 0)
+ {
+ int rc = write (cfd, buffer, nob);
+
+ if (rc < 0)
+ {
+ if (errno == EINTR)
+ continue;
+
+ return (rc);
+ }
+
+ if (rc == 0)
+ {
+ fprintf (stderr, "Unexpected zero sock_write\n");
+ abort();
+ }
+
+ nob -= rc;
+ buffer = (char *)buffer + nob;
+ }
+
+ return (0);
+}
+
+int
+sock_read (int cfd, void *buffer, int nob)
+{
+ while (nob > 0)
+ {
+ int rc = read (cfd, buffer, nob);
+
+ if (rc < 0)
+ {
+ if (errno == EINTR)
+ continue;
+
+ return (rc);
+ }
+
+ if (rc == 0) /* EOF */
+ {
+ errno = ECONNABORTED;
+ return (-1);
+ }
+
+ nob -= rc;
+ buffer = (char *)buffer + nob;
+ }
+
+ return (0);
+}
+
+int ptl_initialize(int argc, char **argv)
+{
+ register_ioc_dev(PORTALS_DEV_ID, PORTALS_DEV_PATH);
+ return 0;
+}
+
+
+int jt_ptl_network(int argc, char **argv)
+{
+ int nal;
+
+ if (argc != 2 ||
+ (nal = ptl_name2nal (argv[1])) == 0)
+ {
+ name2num_t *entry;
+
+ fprintf(stderr, "usage: %s \n", argv[0]);
+ for (entry = nalnames; entry->name != NULL; entry++)
+ fprintf (stderr, "%s%s", entry == nalnames ? "<" : "|", entry->name);
+ fprintf(stderr, ">\n");
+ }
+ else
+ g_nal = nal;
+
+ return (0);
+}
+
+int
+exchange_nids (int cfd, ptl_nid_t my_nid, ptl_nid_t *peer_nid)
+{
+ int rc;
+ ptl_hdr_t hdr;
+ ptl_magicversion_t *hmv = (ptl_magicversion_t *)&hdr.dest_nid;
+
+ LASSERT (sizeof (*hmv) == sizeof (hdr.dest_nid));
+
+ memset (&hdr, 0, sizeof (hdr));
+
+ hmv->magic = __cpu_to_le32 (PORTALS_PROTO_MAGIC);
+ hmv->version_major = __cpu_to_le16 (PORTALS_PROTO_VERSION_MAJOR);
+ hmv->version_minor = __cpu_to_le16 (PORTALS_PROTO_VERSION_MINOR);
+
+ hdr.src_nid = __cpu_to_le64 (my_nid);
+ hdr.type = __cpu_to_le32 (PTL_MSG_HELLO);
+
+ /* Assume there's sufficient socket buffering for a portals HELLO header */
+ rc = sock_write (cfd, &hdr, sizeof (hdr));
+ if (rc != 0) {
+ perror ("Can't send initial HELLO");
+ return (-1);
+ }
+
+ /* First few bytes down the wire are the portals protocol magic and
+ * version, no matter what protocol version we're running. */
+
+ rc = sock_read (cfd, hmv, sizeof (*hmv));
+ if (rc != 0) {
+ perror ("Can't read from peer");
+ return (-1);
+ }
+
+ if (__cpu_to_le32 (hmv->magic) != PORTALS_PROTO_MAGIC) {
+ fprintf (stderr, "Bad magic %#08x (%#08x expected)\n",
+ __cpu_to_le32 (hmv->magic), PORTALS_PROTO_MAGIC);
+ return (-1);
+ }
+
+ if (__cpu_to_le16 (hmv->version_major) != PORTALS_PROTO_VERSION_MAJOR ||
+ __cpu_to_le16 (hmv->version_minor) != PORTALS_PROTO_VERSION_MINOR) {
+ fprintf (stderr, "Incompatible protocol version %d.%d (%d.%d expected)\n",
+ __cpu_to_le16 (hmv->version_major),
+ __cpu_to_le16 (hmv->version_minor),
+ PORTALS_PROTO_VERSION_MAJOR,
+ PORTALS_PROTO_VERSION_MINOR);
+ }
+
+ /* version 0 sends magic/version as the dest_nid of a 'hello' header,
+ * so read the rest of it in now... */
+ LASSERT (PORTALS_PROTO_VERSION_MAJOR == 0);
+ rc = sock_read (cfd, hmv + 1, sizeof (hdr) - sizeof (*hmv));
+ if (rc != 0) {
+ perror ("Can't read rest of HELLO hdr");
+ return (-1);
+ }
+
+ /* ...and check we got what we expected */
+ if (__cpu_to_le32 (hdr.type) != PTL_MSG_HELLO ||
+ __cpu_to_le32 (PTL_HDR_LENGTH (&hdr)) != 0) {
+ fprintf (stderr, "Expecting a HELLO hdr with 0 payload,"
+ " but got type %d with %d payload\n",
+ __cpu_to_le32 (hdr.type),
+ __cpu_to_le32 (PTL_HDR_LENGTH (&hdr)));
+ return (-1);
+ }
+
+ *peer_nid = __le64_to_cpu (hdr.src_nid);
+ return (0);
+}
+
+int jt_ptl_connect(int argc, char **argv)
+{
+ if (argc < 2) {
+ usage:
+ fprintf(stderr, "usage: %s <hostname port [xi]> or <elan ID>\n",
+ argv[0]);
+ return 0;
+ }
+ if (g_nal == 0) {
+ fprintf(stderr, "Error: you must run the 'network' command "
+ "first.\n");
+ return -1;
+ }
+ if (g_nal == SOCKNAL || g_nal == TOENAL) {
+ ptl_nid_t peer_nid;
+ struct hostent *he;
+ struct portal_ioctl_data data;
+ struct sockaddr_in srvaddr;
+ char *flag;
+ int fd, rc;
+ int nonagle = 0;
+ int rxmem = 0;
+ int txmem = 0;
+ int bind_irq = 0;
+ int xchange_nids = 0;
+ int o;
+ int olen;
+
+ if (argc < 3) {
+ goto usage;
+ }
+
+ he = gethostbyname(argv[1]);
+ if (!he) {
+ fprintf(stderr, "gethostbyname error: %s\n",
+ strerror(errno));
+ return -1;
+ }
+
+ g_port = atol(argv[2]);
+
+ if (argc > 3)
+ for (flag = argv[3]; *flag != 0; flag++)
+ switch (*flag)
+ {
+ case 'i':
+ bind_irq = 1;
+ break;
+
+ case 'x':
+ xchange_nids = 1;
+ break;
+
+ default:
+ fprintf (stderr, "unrecognised flag '%c'\n",
+ *flag);
+ return (-1);
+ }
+
+ memset(&srvaddr, 0, sizeof(srvaddr));
+ srvaddr.sin_family = AF_INET;
+ srvaddr.sin_port = htons(g_port);
+ srvaddr.sin_addr.s_addr = *(__u32 *)he->h_addr;
+
+ fd = socket(PF_INET, SOCK_STREAM, 0);
+ if ( fd < 0 ) {
+ fprintf(stderr, "socket() failed: %s\n",
+ strerror(errno));
+ return -1;
+ }
+
+ if (g_socket_nonagle)
+ {
+ o = 1;
+ if (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &o, sizeof (o)) != 0)
+ {
+ fprintf(stderr, "cannot disable nagle: %s\n", strerror(errno));
+ return (-1);
+ }
+ }
+
+ if (g_socket_rxmem != 0)
+ {
+ o = g_socket_rxmem;
+ if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &o, sizeof (o)) != 0)
+ {
+ fprintf(stderr, "cannot set receive buffer size: %s\n", strerror(errno));
+ return (-1);
+ }
+ }
+
+ if (g_socket_txmem != 0)
+ {
+ o = g_socket_txmem;
+ if (setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &o, sizeof (o)) != 0)
+ {
+ fprintf(stderr, "cannot set send buffer size: %s\n", strerror(errno));
+ return (-1);
+ }
+ }
+
+ rc = connect(fd, (struct sockaddr *)&srvaddr, sizeof(srvaddr));
+ if ( rc == -1 ) {
+ fprintf(stderr, "connect() failed: %s\n",
+ strerror(errno));
+ return -1;
+ }
+
+ olen = sizeof (txmem);
+ if (getsockopt (fd, SOL_SOCKET, SO_SNDBUF, &txmem, &olen) != 0)
+ fprintf (stderr, "Can't get send buffer size: %s\n", strerror (errno));
+ olen = sizeof (rxmem);
+ if (getsockopt (fd, SOL_SOCKET, SO_RCVBUF, &rxmem, &olen) != 0)
+ fprintf (stderr, "Can't get receive buffer size: %s\n", strerror (errno));
+ olen = sizeof (nonagle);
+ if (getsockopt (fd, IPPROTO_TCP, TCP_NODELAY, &nonagle, &olen) != 0)
+ fprintf (stderr, "Can't get nagle: %s\n", strerror (errno));
+
+ if (xchange_nids) {
+
+ PORTAL_IOC_INIT (data);
+ data.ioc_nal = g_nal;
+ rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_GET_NID, &data);
+ if (rc != 0)
+ {
+ fprintf (stderr, "failed to get my nid: %s\n",
+ strerror (errno));
+ close (fd);
+ return (-1);
+ }
+
+ rc = exchange_nids (fd, data.ioc_nid, &peer_nid);
+ if (rc != 0)
+ {
+ close (fd);
+ return (-1);
+ }
+ }
+ else
+ peer_nid = ntohl (srvaddr.sin_addr.s_addr); /* HOST byte order */
+
+ printf("Connected host: %s NID "LPX64" snd: %d rcv: %d nagle: %s\n", argv[1],
+ peer_nid, txmem, rxmem, nonagle ? "Disabled" : "Enabled");
+
+ PORTAL_IOC_INIT(data);
+ data.ioc_fd = fd;
+ data.ioc_nal = g_nal;
+ data.ioc_nal_cmd = NAL_CMD_REGISTER_PEER_FD;
+ data.ioc_nid = peer_nid;
+ data.ioc_flags = bind_irq;
+
+ rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_NAL_CMD, &data);
+ if (rc) {
+ fprintf(stderr, "failed to register fd with portals: "
+ "%s\n", strerror(errno));
+ close (fd);
+ return -1;
+ }
+
+ g_nid = peer_nid;
+ printf("Connection to "LPX64" registered with socknal\n", g_nid);
+
+ rc = close(fd);
+ if (rc) {
+ fprintf(stderr, "close failed: %d\n", rc);
+ }
+ } else if (g_nal == QSWNAL) {
+ g_nid = atoi(argv[1]);
+ } else if (g_nal == GMNAL) {
+ g_nid = atoi(argv[1]);
+ } else if (g_nal == SCIMACNAL) {
+ unsigned int tmpnid;
+ if(sscanf(argv[1], "%x", &tmpnid) == 1) {
+ g_nid=tmpnid;
+ }
+ else {
+ fprintf(stderr, "nid %s invalid for SCI nal\n", argv[1]);
+ }
+
+
+ } else {
+ fprintf(stderr, "This should never happen. Also it is very "
+ "bad.\n");
+ }
+
+ return 0;
+}
+
+int jt_ptl_disconnect(int argc, char **argv)
+{
+ if (argc > 2) {
+ fprintf(stderr, "usage: %s [hostname]\n", argv[0]);
+ return 0;
+ }
+ if (g_nal == 0) {
+ fprintf(stderr, "Error: you must run the 'network' command "
+ "first.\n");
+ return -1;
+ }
+ if (g_nal == SOCKNAL || g_nal == TOENAL) {
+ struct hostent *he;
+ struct portal_ioctl_data data;
+ int rc;
+
+ PORTAL_IOC_INIT(data);
+ if (argc == 2) {
+ he = gethostbyname(argv[1]);
+ if (!he) {
+ fprintf(stderr, "gethostbyname error: %s\n",
+ strerror(errno));
+ return -1;
+ }
+
+ data.ioc_nid = ntohl (*(__u32 *)he->h_addr); /* HOST byte order */
+
+ } else {
+ printf("Disconnecting ALL connections.\n");
+ /* leave ioc_nid zeroed == disconnect all */
+ }
+ data.ioc_nal = g_nal;
+ data.ioc_nal_cmd = NAL_CMD_CLOSE_CONNECTION;
+ rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_NAL_CMD, &data);
+ if (rc) {
+ fprintf(stderr, "failed to remove connection: %s\n",
+ strerror(errno));
+ return -1;
+ }
+ } else if (g_nal == QSWNAL) {
+ printf("'disconnect' doesn't make any sense for "
+ "elan.\n");
+ } else if (g_nal == GMNAL) {
+ printf("'disconnect' doesn't make any sense for "
+ "GM.\n");
+ } else if (g_nal == SCIMACNAL) {
+ printf("'disconnect' doesn't make any sense for "
+ "SCI.\n");
+ } else {
+ fprintf(stderr, "This should never happen. Also it is very "
+ "bad.\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+int jt_ptl_push_connection (int argc, char **argv)
+{
+ if (argc > 2) {
+ fprintf(stderr, "usage: %s [hostname]\n", argv[0]);
+ return 0;
+ }
+ if (g_nal == 0) {
+ fprintf(stderr, "Error: you must run the 'network' command "
+ "first.\n");
+ return -1;
+ }
+ if (g_nal == SOCKNAL || g_nal == TOENAL) {
+ struct hostent *he;
+ struct portal_ioctl_data data;
+ int rc;
+
+ PORTAL_IOC_INIT(data);
+ if (argc == 2) {
+ he = gethostbyname(argv[1]);
+ if (!he) {
+ fprintf(stderr, "gethostbyname error: %s\n",
+ strerror(errno));
+ return -1;
+ }
+
+ data.ioc_nid = ntohl (*(__u32 *)he->h_addr); /* HOST byte order */
+
+ } else {
+ printf("Pushing ALL connections.\n");
+ /* leave ioc_nid zeroed == disconnect all */
+ }
+ data.ioc_nal = g_nal;
+ data.ioc_nal_cmd = NAL_CMD_PUSH_CONNECTION;
+ rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_NAL_CMD, &data);
+ if (rc) {
+ fprintf(stderr, "failed to push connection: %s\n",
+ strerror(errno));
+ return -1;
+ }
+ } else if (g_nal == QSWNAL) {
+ printf("'push' doesn't make any sense for elan.\n");
+ } else if (g_nal == GMNAL) {
+ printf("'push' doesn't make any sense for GM.\n");
+ } else if (g_nal == SCIMACNAL) {
+ printf("'push' doesn't make any sense for SCI.\n");
+ } else {
+ fprintf(stderr, "This should never happen. Also it is very "
+ "bad.\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+int jt_ptl_ping(int argc, char **argv)
+{
+ int rc;
+ ptl_nid_t nid;
+ long count = 1;
+ long size = 4;
+ long timeout = 1;
+ struct portal_ioctl_data data;
+
+ if (argc < 2) {
+ fprintf(stderr, "usage: %s nid [count] [size] [timeout (secs)]\n", argv[0]);
+ return 0;
+ }
+
+ if (g_nal == 0) {
+ fprintf(stderr, "Error: you must run the 'network' command "
+ "first.\n");
+ return -1;
+ }
+
+ if (ptl_parse_nid (&nid, argv[1]) != 0)
+ {
+ fprintf (stderr, "Can't parse nid \"%s\"\n", argv[1]);
+ return (-1);
+ }
+
+ if (argc > 2)
+ {
+ count = atol(argv[2]);
+
+ if (count < 0 || count > 20000)
+ {
+ fprintf(stderr, "are you insane? %ld is a crazy count.\n", count);
+ return -1;
+ }
+ }
+
+ if (argc > 3)
+ size= atol(argv[3]);
+
+ if (argc > 4)
+ timeout = atol (argv[4]);
+
+ PORTAL_IOC_INIT (data);
+ data.ioc_count = count;
+ data.ioc_size = size;
+ data.ioc_nid = nid;
+ data.ioc_nal = g_nal;
+ data.ioc_timeout = timeout;
+
+ rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_PING, &data);
+ if (rc) {
+ fprintf(stderr, "failed to start pinger: %s\n",
+ strerror(errno));
+ return -1;
+ }
+ return 0;
+}
+
+int jt_ptl_mynid(int argc, char **argv)
+{
+ int rc;
+ struct hostent *h;
+ char buf[1024], *hostname;
+ struct portal_ioctl_data data;
+ ptl_nid_t mynid;
+
+ if (argc > 2) {
+ fprintf(stderr, "usage: %s [hostname]\n", argv[0]);
+ fprintf(stderr, "hostname defaults to the hostname of the "
+ "machine.\n");
+ return 0;
+ }
+
+ if (g_nal == 0) {
+ fprintf(stderr, "Error: you must run the 'network' command "
+ "first.\n");
+ return -1;
+ }
+
+ if (g_nal == QSWNAL) {
+ fprintf(stderr, "'mynid' doesn't make any sense for elan.\n");
+ return -1;
+ } else if (g_nal == GMNAL) {
+ fprintf(stderr, "'mynid' doesn't make any sense for GM.\n");
+ return -1;
+ } else if (g_nal == SCIMACNAL) {
+ fprintf(stderr, "'mynid' doesn't make any sense for SCI.\n");
+ return -1;
+ }
+
+ if (g_nal != SOCKNAL && g_nal != TOENAL) {
+ fprintf(stderr, "This should never happen. Also it is very "
+ "bad.\n");
+ return -1;
+ }
+
+ if (argc == 1) {
+ if (gethostname(buf, sizeof(buf)) != 0) {
+ fprintf(stderr, "gethostname failed: %s\n",
+ strerror(errno));
+ return -1;
+ }
+ hostname = buf;
+ } else {
+ hostname = argv[1];
+ }
+
+ h = gethostbyname(hostname);
+
+ if (!h) {
+ fprintf(stderr, "cannot get address for host '%s': %d\n",
+ hostname, h_errno);
+ return -1;
+ }
+ mynid = (ptl_nid_t)ntohl (*(__u32 *)h->h_addr); /* HOST byte order */
+
+ PORTAL_IOC_INIT(data);
+ data.ioc_nid = mynid;
+ data.ioc_nal = g_nal;
+ data.ioc_nal_cmd = NAL_CMD_REGISTER_MYNID;
+
+ rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_NAL_CMD, &data);
+ if (rc < 0)
+ fprintf(stderr, "IOC_PORTAL_REGISTER_MYNID failed: %s\n",
+ strerror(errno));
+ else
+ printf("registered my nid "LPX64" (%s)\n", mynid, hostname);
+ return 0;
+}
+
+int
+jt_ptl_fail_nid (int argc, char **argv)
+{
+ int rc;
+ ptl_nid_t nid;
+ unsigned int threshold;
+ struct portal_ioctl_data data;
+
+ if (argc < 2 || argc > 3)
+ {
+ fprintf (stderr, "usage: %s nid|\"_all_\" [count (0 == mend)]\n", argv[0]);
+ return (0);
+ }
+
+ if (g_nal == 0) {
+ fprintf(stderr, "Error: you must run the 'network' command "
+ "first.\n");
+ return (-1);
+ }
+
+ if (!strcmp (argv[1], "_all_"))
+ nid = PTL_NID_ANY;
+ else if (ptl_parse_nid (&nid, argv[1]) != 0)
+ {
+ fprintf (stderr, "Can't parse nid \"%s\"\n", argv[1]);
+ return (-1);
+ }
+
+ if (argc < 3)
+ threshold = PTL_MD_THRESH_INF;
+ else if (sscanf (argv[2], "%i", &threshold) != 1) {
+ fprintf (stderr, "Can't parse count \"%s\"\n", argv[2]);
+ return (-1);
+ }
+
+ PORTAL_IOC_INIT (data);
+ data.ioc_nal = g_nal;
+ data.ioc_nid = nid;
+ data.ioc_count = threshold;
+
+ rc = l_ioctl (PORTALS_DEV_ID, IOC_PORTAL_FAIL_NID, &data);
+ if (rc < 0)
+ fprintf (stderr, "IOC_PORTAL_FAIL_NID failed: %s\n",
+ strerror (errno));
+ else
+ printf ("%s %s\n", threshold == 0 ? "Unfailing" : "Failing", argv[1]);
+
+ return (0);
+}
+
+int
+jt_ptl_rxmem (int argc, char **argv)
+{
+ int size;
+
+ if (argc > 1)
+ {
+ if (Parser_size (&size, argv[1]) != 0 || size < 0)
+ {
+ fprintf (stderr, "Can't parse size %s\n", argv[1]);
+ return (0);
+ }
+
+ g_socket_rxmem = size;
+ }
+ printf ("Socket rmem = %d\n", g_socket_rxmem);
+ return (0);
+}
+
+int
+jt_ptl_txmem (int argc, char **argv)
+{
+ int size;
+
+ if (argc > 1)
+ {
+ if (Parser_size (&size, argv[1]) != 0 || size < 0)
+ {
+ fprintf (stderr, "Can't parse size %s\n", argv[1]);
+ return (0);
+ }
+ g_socket_txmem = size;
+ }
+ printf ("Socket txmem = %d\n", g_socket_txmem);
+ return (0);
+}
+
+int
+jt_ptl_nagle (int argc, char **argv)
+{
+ int enable;
+
+ if (argc > 1)
+ {
+ if (Parser_bool (&enable, argv[1]) != 0)
+ {
+ fprintf (stderr, "Can't parse boolean %s\n", argv[1]);
+ return (0);
+ }
+ g_socket_nonagle = !enable;
+ }
+ printf ("Nagle %s\n", g_socket_nonagle ? "disabled" : "enabled");
+ return (0);
+}
+
+int
+jt_ptl_add_route (int argc, char **argv)
+{
+ struct portal_ioctl_data data;
+ ptl_nid_t nid1;
+ ptl_nid_t nid2;
+ ptl_nid_t gateway_nid;
+ int gateway_nal;
+ int rc;
+
+ if (argc < 3)
+ {
+ fprintf (stderr, "usage: %s gateway target [target]\n", argv[0]);
+ return (0);
+ }
+
+ if (ptl_parse_nid (&gateway_nid, argv[1]) != 0)
+ {
+ fprintf (stderr, "Can't parse gateway NID \"%s\"\n", argv[1]);
+ return (-1);
+ }
+
+ gateway_nal = nid2nal (gateway_nid);
+
+ if (ptl_parse_nid (&nid1, argv[2]) != 0)
+ {
+ fprintf (stderr, "Can't parse first target NID \"%s\"\n", argv[2]);
+ return (-1);
+ }
+
+ if (argc < 4)
+ nid2 = nid1;
+ else if (ptl_parse_nid (&nid2, argv[3]) != 0)
+ {
+ fprintf (stderr, "Can't parse second target NID \"%s\"\n", argv[4]);
+ return (-1);
+ }
+
+ PORTAL_IOC_INIT(data);
+ data.ioc_nid = gateway_nid;
+ data.ioc_nal = gateway_nal;
+ data.ioc_nid2 = MIN (nid1, nid2);
+ data.ioc_nid3 = MAX (nid1, nid2);
+
+ rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_ADD_ROUTE, &data);
+ if (rc != 0)
+ {
+ fprintf (stderr, "IOC_PORTAL_ADD_ROUTE failed: %s\n", strerror (errno));
+ return (-1);
+ }
+
+ return (0);
+}
+
+int
+jt_ptl_del_route (int argc, char **argv)
+{
+ struct portal_ioctl_data data;
+ ptl_nid_t nid;
+ int rc;
+
+ if (argc < 2)
+ {
+ fprintf (stderr, "usage: %s targetNID\n", argv[0]);
+ return (0);
+ }
+
+ if (ptl_parse_nid (&nid, argv[1]) != 0)
+ {
+ fprintf (stderr, "Can't parse target NID \"%s\"\n", argv[1]);
+ return (-1);
+ }
+
+ PORTAL_IOC_INIT(data);
+ data.ioc_nid = nid;
+
+ rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_DEL_ROUTE, &data);
+ if (rc != 0)
+ {
+ fprintf (stderr, "IOC_PORTAL_DEL_ROUTE ("LPX64") failed: %s\n", nid, strerror (errno));
+ return (-1);
+ }
+
+ return (0);
+}
+
+int
+jt_ptl_print_routes (int argc, char **argv)
+{
+ char buffer[3][128];
+ struct portal_ioctl_data data;
+ int rc;
+ int index;
+ int gateway_nal;
+ ptl_nid_t gateway_nid;
+ ptl_nid_t nid1;
+ ptl_nid_t nid2;
+
+
+ for (index = 0;;index++)
+ {
+ PORTAL_IOC_INIT(data);
+ data.ioc_count = index;
+
+ rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_GET_ROUTE, &data);
+ if (rc != 0)
+ break;
+
+ gateway_nal = data.ioc_nal;
+ gateway_nid = data.ioc_nid;
+ nid1 = data.ioc_nid2;
+ nid2 = data.ioc_nid3;
+
+ printf ("%8s %18s : %s - %s\n",
+ nal2name (gateway_nal),
+ ptl_nid2str (buffer[0], gateway_nid),
+ ptl_nid2str (buffer[1], nid1),
+ ptl_nid2str (buffer[2], nid2));
+ }
+ return (0);
+}
+
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ *
+ * This file is part of Portals, http://www.sf.net/projects/lustre/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <portals/api-support.h>
+#include <portals/ptlctl.h>
+
+#include "parser.h"
+
+
+command_t list[] = {
+ {"network", jt_ptl_network, 0,"setup the NAL (args: nal name)"},
+ {"connect", jt_ptl_connect, 0, "connect to a remote nid (args: <hostname port> | <id> for tcp/elan respectively)"},
+ {"disconnect", jt_ptl_disconnect, 0, "disconnect from a remote nid (args: [hostname]"},
+ {"push", jt_ptl_push_connection, 0, "flush connection to a remote nid (args: [hostname]"},
+ {"ping", jt_ptl_ping, 0, "do a ping test (args: nid [count] [size] [timeout])"},
+ {"mynid", jt_ptl_mynid, 0, "inform the socknal of the local NID (args: [hostname])"},
+ {"add_route", jt_ptl_add_route, 0, "add an entry to the routing table (args: gatewayNID targetNID [targetNID])"},
+ {"del_route", jt_ptl_del_route, 0, "delete an entry from the routing table (args: targetNID"},
+ {"print_routes", jt_ptl_print_routes, 0, "print the routing table (args: none)"},
+ {"recv_mem", jt_ptl_rxmem, 0, "Set socket receive buffer size (args: [size])"},
+ {"send_mem", jt_ptl_txmem, 0, "Set socket send buffer size (args: [size])"},
+ {"nagle", jt_ptl_nagle, 0, "Enable/Disable Nagle (args: [on/off])"},
+ {"dump", jt_ioc_dump, 0, "usage: dump file, save ioctl buffer to file"},
+ {"fail", jt_ptl_fail_nid, 0, "usage: fail nid|_all_ [count]"},
+ {"help", Parser_help, 0, "help"},
+ {"exit", Parser_quit, 0, "quit"},
+ {"quit", Parser_quit, 0, "quit"},
+ { 0, 0, 0, NULL }
+};
+
+int main(int argc, char **argv)
+{
+ if (ptl_initialize(argc, argv) < 0)
+ exit(1);
+
+ Parser_init("ptlctl > ", list);
+ if (argc > 1)
+ return Parser_execarg(argc - 1, &argv[1], list);
+
+ Parser_commands();
+
+ return 0;
+}
--- /dev/null
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/time.h>
+
+double
+timenow ()
+{
+ struct timeval tv;
+
+ gettimeofday (&tv, NULL);
+ return (tv.tv_sec + tv.tv_usec / 1000000.0);
+}
+
+void
+do_stat (int fd)
+{
+ static char buffer[1024];
+ static double last = 0.0;
+ double now;
+ double t;
+ long long bytes;
+ long packets;
+ long errors;
+ long depth;
+ int n;
+
+ lseek (fd, 0, SEEK_SET);
+ now = timenow();
+ n = read (fd, buffer, sizeof (buffer));
+ if (n < 0)
+ {
+ fprintf (stderr, "Can't read statfile\n");
+ exit (1);
+ }
+ buffer[n] = 0;
+
+ n = sscanf (buffer, "%Ld %ld %ld %ld", &bytes, &packets, &errors, &depth);
+
+ if (n < 3)
+ {
+ fprintf (stderr, "Can't parse statfile\n");
+ exit (1);
+ }
+
+ if (last == 0.0)
+ printf ("%Ld bytes, %ld packets (sz %Ld) %ld errors",
+ bytes, packets, (long long)((packets == 0) ? 0LL : bytes/packets), errors);
+ else
+ {
+ t = now - last;
+
+ printf ("%9Ld (%7.2fMb/s), %7ld packets (sz %5Ld, %5ld/s) %ld errors (%ld/s)",
+ bytes, ((double)bytes)/((1<<20) * t),
+ packets, (long long)((packets == 0) ? 0LL : bytes/packets), (long)(packets/t),
+ errors, (long)(errors/t));
+ }
+
+ if (n == 4)
+ printf (" (%ld)\n", depth);
+ else
+ printf ("\n");
+
+ fflush (stdout);
+
+ lseek (fd, 0, SEEK_SET);
+ write (fd, "\n", 1);
+ last = timenow();
+}
+
+int main (int argc, char **argv)
+{
+ int interval = 0;
+ int fd;
+
+ if (argc > 1)
+ interval = atoi (argv[1]);
+
+ fd = open ("/proc/sys/portals/router", O_RDWR);
+ if (fd < 0)
+ {
+ fprintf (stderr, "Can't open stat: %s\n", strerror (errno));
+ return (1);
+ }
+
+ do_stat (fd);
+ if (interval == 0)
+ return (0);
+
+ for (;;)
+ {
+ sleep (interval);
+ do_stat (fd);
+ }
+}
--- /dev/null
+include fs/lustre/portals/Kernelenv
+
+obj-y += portals/
+obj-y += mds/
--- /dev/null
+# Copyright (C) 2001 Cluster File Systems, Inc.
+#
+# This code is issued under the GNU General Public License.
+# See the file COPYING in this distribution
+
+include fs/lustre/portals/Kernelenv
+
+obj-y += mds.o
+
+mds-objs := mds_lov.o handler.o mds_reint.o mds_fs.o lproc_mds.o mds_internal.h mds_updates.o mds_open.o simple.o target.o
--- /dev/null
+EXTRA_CFLAGS= -Ifs/lustre/include -Ifs/lustre/portals/include
--- /dev/null
+EXTRA_CFLAGS= -Ifs/lustre/include -Ifs/lustre/portals/include
--- /dev/null
+# Copyright (C) 2001 Cluster File Systems, Inc.
+#
+# This code is issued under the GNU General Public License.
+# See the file COPYING in this distribution
+
+EXTRA_DIST = Rules.linux archdep.m4 MCP
+DIST_SUBDIRS = libcfs portals knals unals utils tests doc router
+SUBDIRS = libcfs portals knals unals utils tests doc router
--- /dev/null
+include fs/lustre/portals/Kernelenv
+
+obj-y += portals/
+obj-y += libcfs/
+obj-y += knals/
+obj-y += router/
--- /dev/null
+# included in Linux kernel directories
+# Rules for module building
+
+MODLINK=@MOD_LINK@
+if LINUX25
+
+
+basename=$(shell echo $< | sed -e 's/\.c//g' | sed -e 's/-//g' | sed -e 's/\.o//g')
+AM_CPPFLAGS= -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -pipe -mpreferred-stack-boundary=2 -DKBUILD_MODNAME=$(MODULE) -DKBUILD_BASENAME=$(basename)
+
+$(MODULE).o: $($(MODULE)_OBJECTS)
+ $(LD) -m $(MOD_LINK) -r -o $(MODULE).o $($(MODULE)_OBJECTS)
+
+
+
+else
+
+
+$(MODULE).o: $($(MODULE)_OBJECTS)
+ $(LD) -m $(MOD_LINK) -r -o $(MODULE).o $($(MODULE)_OBJECTS)
+
+
+
+endif
+
+
+tags:
+ rm -f $(top_srcdir)/TAGS
+ rm -f $(top_srcdir)/tags
+ find $(top_srcdir)/../portals/ -name '*.[hc]' | xargs etags -a
+ find $(top_srcdir) -name '*.[hc]' | grep -v ".orig" | xargs etags -a
+ find $(top_srcdir)/../portals/ -name '*.[hc]' | xargs ctags -a
+ find $(top_srcdir) -name '*.[hc]' | grep -v ".orig" | xargs ctags -a
+
+
+
+
--- /dev/null
+
+# -------- in kernel compilation? (2.5 only) -------------
+AC_ARG_ENABLE(inkernel, [ --enable-inkernel set up 2.5 kernel makefiles])
+AM_CONDITIONAL(INKERNEL, test x$enable_inkernel = xyes)
+echo "Makefile for in kernel build: $INKERNEL"
+
+# -------- liblustre compilation --------------
+AC_ARG_WITH(lib, [ --with-lib compile lustre library], host_cpu="lib")
+
+# -------- set linuxdir ------------
+
+AC_ARG_WITH(linux, [ --with-linux=[path] set path to Linux source (default=/usr/src/linux)],LINUX=$with_linux,LINUX=/usr/src/linux)
+AC_SUBST(LINUX)
+
+# --------- UML? --------------------
+AC_MSG_CHECKING(if you are running user mode linux for $host_cpu ...)
+if test $host_cpu = "lib" ; then
+ host_cpu="lib"
+ AC_MSG_RESULT(no building Lustre library)
+else
+ if test -e $LINUX/include/asm-um ; then
+ if test X`ls -id $LINUX/include/asm/ | awk '{print $1}'` = X`ls -id $LINUX/include/asm-um | awk '{print $1}'` ; then
+ host_cpu="um";
+ AC_MSG_RESULT(yes)
+ else
+ AC_MSG_RESULT(no (asm doesn't point at asm-um))
+ fi
+
+ else
+ AC_MSG_RESULT(no (asm-um missing))
+ fi
+fi
+
+# --------- Linux 25 ------------------
+
+AC_MSG_CHECKING(if you are running linux 2.5)
+if test -e $LINUX/include/linux/namei.h ; then
+ linux25="yes"
+ AC_MSG_RESULT(yes)
+else
+ linux25="no"
+ AC_MSG_RESULT(no)
+fi
+AM_CONDITIONAL(LINUX25, test x$linux25 = xyes)
+echo "Makefiles for in linux 2.5 build: $LINUX25"
+
+# ------- Makeflags ------------------
+
+AC_MSG_CHECKING(setting make flags system architecture: )
+case ${host_cpu} in
+ lib )
+ AC_MSG_RESULT($host_cpu)
+ KCFLAGS='-g -Wall '
+ KCPPFLAGS='-D__arch_lib__ '
+ MOD_LINK=elf_i386
+;;
+ um )
+ AC_MSG_RESULT($host_cpu)
+ KCFLAGS='-g -Wall -pipe -Wno-trigraphs -Wstrict-prototypes -fno-strict-aliasing -fno-common '
+ case ${linux25} in
+ yes )
+ KCPPFLAGS='-D__KERNEL__ -U__i386__ -Ui386 -DUM_FASTCALL -D__arch_um__ -DSUBARCH="i386" -DNESTING=0 -D_LARGEFILE64_SOURCE -Derrno=kernel_errno -DPATCHLEVEL=4 -DMODULE -I$(LINUX)/arch/um/include -I$(LINUX)/arch/um/kernel/tt/include -I$(LINUX)/arch/um/kernel/skas/include -O2 -nostdinc -iwithprefix include -DKBUILD_BASENAME=$(MODULE) -DKBUILD_MODNAME=$(MODULE) '
+ ;;
+ * )
+ KCPPFLAGS='-D__KERNEL__ -U__i386__ -Ui386 -DUM_FASTCALL -D__arch_um__ -DSUBARCH="i386" -DNESTING=0 -D_LARGEFILE64_SOURCE -Derrno=kernel_errno -DPATCHLEVEL=4 -DMODULE -I$(LINUX)/arch/um/kernel/tt/include -I$(LINUX)/arch/um/include '
+ ;;
+ esac
+
+ MOD_LINK=elf_i386
+;;
+ i*86 )
+ AC_MSG_RESULT($host_cpu)
+ KCFLAGS='-g -O2 -Wall -Wstrict-prototypes -pipe'
+ case ${linux25} in
+ yes )
+ KCPPFLAGS='-D__KERNEL__ -DMODULE -march=i686 -I$(LINUX)/include/asm-i386/mach-default -nostdinc -iwithprefix include '
+ ;;
+ * )
+ KCPPFLAGS='-D__KERNEL__ -DMODULE '
+ ;;
+ esac
+ MOD_LINK=elf_i386
+;;
+
+ alphaev6 )
+ AC_MSG_RESULT($host_cpu)
+ KCFLAGS='-g -O2 -Wall -Wstrict-prototypes -Wno-trigraphs -fomit-frame-pointer -fno-strict-aliasing -fno-common -pipe -mno-fp-regs -ffixed-8 -mcpu=ev5 -Wa,-mev6'
+ KCPPFLAGS='-D__KERNEL__ -DMODULE '
+ MOD_LINK=elf64alpha
+;;
+
+ alphaev67 )
+ AC_MSG_RESULT($host_cpu)
+ KCFLAGS='-g -O2 -Wall -Wstrict-prototypes -Wno-trigraphs -fomit-frame-pointer -fno-strict-aliasing -fno-common -pipe -mno-fp-regs -ffixed-8 -mcpu=ev5 -Wa,-mev6'
+ KCPPFLAGS='-D__KERNEL__ -DMODULE '
+ MOD_LINK=elf64alpha
+;;
+
+ alpha* )
+ AC_MSG_RESULT($host_cpu)
+ KCFLAGS='-g -O2 -Wall -Wstrict-prototypes -Wno-trigraphs -fomit-frame-pointer -fno-strict-aliasing -fno-common -pipe -mno-fp-regs -ffixed-8 -mcpu=ev5 -Wa,-mev5'
+ KCPPFLAGS='-D__KERNEL__ -DMODULE '
+ MOD_LINK=elf64alpha
+;;
+
+ ia64 )
+ AC_MSG_RESULT($host_cpu)
+ KCFLAGS='-gstabs -O2 -Wall -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -pipe -ffixed-r13 -mfixed-range=f10-f15,f32-f127 -falign-functions=32 -mb-step'
+ KCPPFLAGS='-D__KERNEL__ -DMODULE'
+ MOD_LINK=elf64_ia64
+;;
+
+ sparc64 )
+ AC_MSG_RESULT($host_cpu)
+ KCFLAGS='-O2 -Wall -Wstrict-prototypes -Wno-trigraphs -fomit-frame-pointer -fno-strict-aliasing -fno-common -Wno-unused -m64 -pipe -mno-fpu -mcpu=ultrasparc -mcmodel=medlow -ffixed-g4 -fcall-used-g5 -fcall-used-g7 -Wno-sign-compare -Wa,--undeclared-regs'
+ KCPPFLAGS='-D__KERNEL__'
+ MOD_LINK=elf64_sparc
+
+;;
+
+ powerpc )
+ AC_MSG_RESULT($host_cpu)
+ KCFLAGS='-O2 -Wall -Wstrict-prototypes -Wno-trigraphs -fomit-frame-pointer -fno-strict-aliasing -fno-common -D__powerpc__ -fsigned-char -msoft-float -pipe -ffixed-r2 -Wno-uninitialized -mmultiple -mstring'
+ KCPPFLAGS='-D__KERNEL__'
+ MOD_LINK=elf32ppclinux
+;;
+
+ *)
+ AC_ERROR("Unknown Linux Platform: $host_cpu")
+;;
+esac
+
+# ----------- make dep run? ------------------
+
+if test $host_cpu != "lib" ; then
+ AC_MSG_CHECKING(if make dep has been run in kernel source (host $host_cpu) )
+ if test -f $LINUX/include/linux/config.h ; then
+ AC_MSG_RESULT(yes)
+ else
+ AC_MSG_ERROR(** cannot find $LINUX/include/linux/config.h. Run make dep in $LINUX.)
+ fi
+fi
+
+# ------------ include paths ------------------
+
+if test $host_cpu != "lib" ; then
+ KINCFLAGS='-I$(top_srcdir)/include -I$(top_srcdir)/portals/include -I$(LINUX)/include'
+else
+ KINCFLAGS='-I$(top_srcdir)/include -I$(top_srcdir)/portals/include'
+fi
+CPPFLAGS="$KINCFLAGS $ARCHCPPFLAGS"
+
+if test $host_cpu != "lib" ; then
+# ------------ autoconf.h ------------------
+ AC_MSG_CHECKING(if autoconf.h is in kernel source)
+ if test -f $LINUX/include/linux/autoconf.h ; then
+ AC_MSG_RESULT(yes)
+ else
+ AC_MSG_ERROR(** cannot find $LINUX/include/linux/autoconf.h. Run make config in $LINUX.)
+ fi
+
+# ------------ RELEASE and moduledir ------------------
+ AC_MSG_CHECKING(for Linux release)
+
+ dnl We need to rid ourselves of the nasty [ ] quotes.
+ changequote(, )
+ dnl Get release from version.h
+ RELEASE="`sed -ne 's/.*UTS_RELEASE[ \"]*\([0-9.a-zA-Z_-]*\).*/\1/p' $LINUX/include/linux/version.h`"
+ changequote([, ])
+
+ moduledir='$(libdir)/modules/'$RELEASE/kernel
+ AC_SUBST(moduledir)
+
+ modulefsdir='$(moduledir)/fs/$(PACKAGE)'
+ AC_SUBST(modulefsdir)
+
+ AC_MSG_RESULT($RELEASE)
+ AC_SUBST(RELEASE)
+
+# ---------- modversions? --------------------
+ AC_MSG_CHECKING(for MODVERSIONS)
+ if egrep -e 'MODVERSIONS.*1' $LINUX/include/linux/autoconf.h >/dev/null 2>&1;
+ then
+ MFLAGS="-DMODULE -DMODVERSIONS -include $LINUX/include/linux/modversions.h -DEXPORT_SYMTAB"
+ AC_MSG_RESULT(yes)
+ else
+ MFLAGS=
+ AC_MSG_RESULT(no)
+ fi
+fi
+
+# ---------- SMP -------------------
+#AC_MSG_CHECKING(for SMP)
+#if egrep -e SMP=y $LINUX/.config >/dev/null 2>&1; then
+# SMPFLAG=
+# AC_MSG_RESULT(yes)
+#else
+# SMPFLAG=
+# AC_MSG_RESULT(no)
+#fi
+
+CFLAGS="$KCFLAGS"
+CPPFLAGS="$KINCFLAGS $KCPPFLAGS $MFLAGS "
+
+AC_SUBST(MOD_LINK)
+AC_SUBST(LINUX25)
\ No newline at end of file
--- /dev/null
+#!/bin/sh
+
+aclocal &&
+automake --add-missing &&
+${AUTOCONF:-autoconf}
--- /dev/null
+
+# ---------- directories ---------
+
+
+# --------- unsigned long long sane? -------
+
+AC_CHECK_SIZEOF(unsigned long long, 0)
+echo "---> size SIZEOF $SIZEOF_unsigned_long_long"
+echo "---> size SIZEOF $ac_cv_sizeof_unsigned_long_long"
+if test $ac_cv_sizeof_unsigned_long_long != 8 ; then
+ AC_MSG_ERROR([** we assume that sizeof(long long) == 8. Tell phil@clusterfs.com])
+fi
+
+# directories for binaries
+ac_default_prefix=
+bindir='${exec_prefix}/usr/bin'
+sbindir='${exec_prefix}/usr/sbin'
+includedir='${prefix}/usr/include'
+
+# Directories for documentation and demos.
+docdir='${prefix}/usr/share/doc/$(PACKAGE)'
+AC_SUBST(docdir)
+demodir='$(docdir)/demo'
+AC_SUBST(demodir)
+pkgexampledir='${prefix}/usr/lib/$(PACKAGE)/examples'
+AC_SUBST(pkgexampledir)
+pymoddir='${prefix}/usr/lib/${PACKAGE}/python/Lustre'
+AC_SUBST(pymoddir)
+modulenetdir='$(moduledir)/net/$(PACKAGE)'
+AC_SUBST(modulenetdir)
+
+
+# ---------- BAD gcc? ------------
+AC_PROG_RANLIB
+AC_PROG_CC
+AC_MSG_CHECKING(for buggy compiler)
+CC_VERSION=`$CC -v 2>&1 | grep "^gcc version"`
+bad_cc() {
+ echo
+ echo " '$CC_VERSION'"
+ echo " has been known to generate bad code, "
+ echo " please get an updated compiler."
+ AC_MSG_ERROR(sorry)
+}
+TMP_VERSION=`echo $CC_VERSION | cut -c 1-16`
+if test "$TMP_VERSION" = "gcc version 2.95"; then
+ bad_cc
+fi
+case "$CC_VERSION" in
+ # ost_pack_niobuf putting 64bit NTOH temporaries on the stack
+ # without "sub $0xc,%esp" to protect the stack from being
+ # stomped on by interrupts (bug 606)
+ "gcc version 2.96 20000731 (Red Hat Linux 7.1 2.96-98)")
+ bad_cc
+ ;;
+ # mandrake's similar sub 0xc compiler bug
+ # http://marc.theaimsgroup.com/?l=linux-kernel&m=104748366226348&w=2
+ "gcc version 2.96 20000731 (Mandrake Linux 8.1 2.96-0.62mdk)")
+ bad_cc
+ ;;
+ *)
+ AC_MSG_RESULT(no known problems)
+ ;;
+esac
+# end ------ BAD gcc? ------------
+
+# -------- Check for required packages --------------
+
+# this doesn't seem to work on older autoconf
+# AC_CHECK_LIB(readline, readline,,)
+AC_ARG_ENABLE(readline, [ --enable-readline use readline library],,
+ enable_readline="yes")
+
+if test "$enable_readline" = "yes" ; then
+ LIBREADLINE="-lreadline -lncurses"
+ HAVE_LIBREADLINE="-DHAVE_LIBREADLINE=1"
+else
+ LIBREADLINE=""
+ HAVE_LIBREADLINE=""
+fi
+AC_SUBST(LIBREADLINE)
+AC_SUBST(HAVE_LIBREADLINE)
+
+AC_ARG_ENABLE(efence, [ --enable-efence use efence library],,
+ enable_efence="no")
+
+if test "$enable_efence" = "yes" ; then
+ LIBEFENCE="-lefence"
+ HAVE_LIBEFENCE="-DHAVE_LIBEFENCE=1"
+else
+ LIBEFENCE=""
+ HAVE_LIBEFENCE=""
+fi
+AC_SUBST(LIBEFENCE)
+AC_SUBST(HAVE_LIBEFENCE)
+
+AM_CONDITIONAL(LIBLUSTRE, test x$host_cpu = xlib)
+AC_MSG_CHECKING(if you are building lib lustre)
+if test "$host_cpu" = "lib"; then
+ AC_MSG_RESULT(yes)
+ libdir='${exec_prefix}/lib/lustre'
+else
+ AC_MSG_RESULT(no)
+fi
+
+# end -------- Kernel build environment. -----------------
+
+
--- /dev/null
+# This version is here to make autoconf happy; the name is a file which is
+# "unique" to this directory so that configure knows where it should run.
+AC_INIT(knals/Makefile.am, 3.0)
+AC_CANONICAL_SYSTEM
+# Copyright (C) 2001 Cluster File Systems, Inc.
+#
+# This code is issued under the GNU General Public License.
+# See the file COPYING in this distribution
+
+# Automake variables. Steal the version number from packaging/intersync.spec
+AM_INIT_AUTOMAKE(portals, builtin([esyscmd], [sed -ne '/.*define IVERSION /{ s/.*IVERSION //; p; }' libcfs/module.c]))
+# AM_MAINTAINER_MODE
+
+sinclude(archdep.m4)
+sinclude(build.m4)
+sinclude<portalsconf.m4)
+
+if test x$enable_inkernel = xyes ; then
+cp Kernelenv.mk Kernelenv.in
+cp Makefile.mk Makefile.in
+cp libcfs/Makefile.mk libcfs/Makefile.in
+cp portals/Makefile.mk portals/Makefile.in
+cp knals/Makefile.mk knals/Makefile.in
+cp knals/socknal/Makefile.mk knals/socknal/Makefile.in
+cp router/Makefile.mk router/Makefile.in
+AC_OUTPUT(Kernelenv)
+fi
+
+
+AM_CONFIG_HEADER(include/config.h)
+
+AC_OUTPUT([Rules.linux Makefile libcfs/Makefile portals/Makefile \
+ unals/Makefile knals/Makefile router/Makefile \
+ knals/socknal/Makefile knals/gmnal/Makefile knals/qswnal/Makefile \
+ knals/scimacnal/Makefile knals/toenal/Makefile \
+ utils/Makefile tests/Makefile doc/Makefile \
+ packaging/Makefile packaging/portals.spec ])
+
--- /dev/null
+Makefile
+Makefile.in
+*.eps
+*.pdf
--- /dev/null
+In this document I will try to draw the data structures and how they
+interrelate in the Portals 3 reference implementation. It is probably
+best shown with a drawing, so there may be an additional xfig or
+Postscript figure.
+
+
+MEMORY POOLS:
+------------
+
+First, a digression on memory allocation in the library. As mentioned
+in the NAL Writer's Guide, the library does not link against any
+standard C libraries and as such is unable to dynamically allocate
+memory on its own. It requires that the NAL implement a method
+for allocation that is appropriate for the protection domain in
+which the library lives. This is only called when a network
+interface is initialized to allocate the Portals object pools.
+
+These pools are preallocate blocks of objects that the library
+can rapidly make active and manage with a minimum of overhead.
+It is also cuts down on overhead for setting up structures
+since the NAL->malloc() callback does not need to be called
+for each object.
+
+The objects are maintained on a per-object type singly linked free
+list and contain a pointer to the next free object. This pointer
+is NULL if the object is not on the free list and is non-zero
+if it is on the list. The special sentinal value of 0xDEADBEEF
+is used to mark the end of the free list since NULL could
+indicate that the last object in the list is not free.
+
+When one of the lib_*_alloc() functions is called, the library
+returns the head of the free list and advances the head pointer
+to the next item on the list. The special case of 0xDEADBEEF is
+checked and a NULL pointer is returned if there are no more
+objects of this type available. The lib_*_free() functions
+are even simpler -- check to ensure that the object is not already
+free, set its next pointer to the current head and then set
+the head to be this newly freed object.
+
+Since C does not have templates, I did the next best thing and wrote
+the memory pool allocation code as a macro that expands based on the
+type of the argument. The mk_alloc(T) macro expands to
+write the _lib_T_alloc() and lib_T_free() functions.
+It requires that the object have a pointer of the type T named
+"next_free". There are also functions that map _lib_T_alloc()
+to lib_T_alloc() so that the library can add some extra
+functionality to the T constructor.
+
+
+
+LINKED LISTS:
+------------
+
+Many of the active Portals objects are stored in doubly linked lists
+when they are active. These are always implemented with the pointer
+to the next object and a pointer to the next pointer of the
+previous object. This avoids the "dummy head" object or
+special cases for inserting at the beginning or end of the list.
+The pointer manipulations are a little hairy at times, but
+I hope that they are understandable.
+
+The actual linked list code is implemented as macros in <lib-p30.h>,
+although the object has to know about
+
+
--- /dev/null
+# Copyright (C) 2001 Cluster File Systems, Inc.
+#
+# This code is issued under the GNU General Public License.
+# See the file COPYING in this distribution
+
+LYX2PDF = lyx --export pdf
+LYX2TXT = lyx --export text
+LYX2HTML = lyx --export html
+SUFFIXES = .lin .lyx .pdf .sgml .html .txt .fig .eps
+
+DOCS = portals3.pdf
+IMAGES = file.eps flow_new.eps get.eps mpi.eps portals.eps put.eps
+LYXFILES= portals3.lyx
+
+MAINTAINERCLEANFILES = $(IMAGES) $(DOCS) $(GENERATED)
+GENERATED =
+EXTRA_DIST = $(DOCS) $(IMAGES) $(LYXFILES)
+
+all: $(DOCS)
+
+# update date and version in document
+date := $(shell date +%x)
+tag := $(shell echo '$$Name: $$' | sed -e 's/^\$$Na''me: *\$$$$/HEAD/; s/^\$$Na''me: \(.*\) \$$$$/\1/')
+addversion = sed -e 's|@T''AG@|$(tag)|g; s|@VER''SION@|$(VERSION)|g; s|@DA''TE@|$(date)|g'
+
+# Regenerate when the $(VERSION) or $Name: $ changes.
+.INTERMEDIATE: $(GENERATED)
+$(GENERATED) : %.lyx: %.lin Makefile
+ $(addversion) $< > $@
+
+.lyx.pdf:
+ @$(LYX2PDF) $< || printf "\n*** Warning: not creating PDF docs; install lyx to rectify this\n"
+
+.lyx.txt:
+ @$(LYX2TXT) $< || printf "\n*** Warning: not creating text docs; install lyx to rectify this\n"
+.lyx.html:
+ @$(LYX2HTML) $< || printf "\n*** Warning: not creating HTML docs; install lyx to rectify this\n"
+.fig.eps:
+ -fig2dev -L eps $< > $@
+
+portals3.pdf portals3.txt portals3.html: $(IMAGES) portals3.lyx
+
+syncweb: portals3.pdf
+# cp lustre.pdf /usr/src/www/content/lustre/docs/lustre.pdf
+# ( cd /usr/src/www ; make lustre ; make synclustre )
+
--- /dev/null
+This documents the life cycle of message as it arrives and is handled by
+a basic async, packetized NAL. There are four types of messages that have
+slightly different life cycles, so they are addressed independently.
+
+
+Put request
+-----------
+
+1. NAL notices that there is a incoming message header on the network
+and reads an ptl_hdr_t in from the wire.
+
+2. It may store additional NAL specific data that provides context
+for this event in a void* that it will interpret in some fashion
+later.
+
+3. The NAL calls lib_parse() with a pointer to the header and its
+private data structure.
+
+4. The library decodes the header and may build a message state
+object that describes the event to be written and the ACK to be
+sent, if any. It then calls nal->recv() with the private data
+that the NAL passed in, a pointer to the message state object
+and a translated user address.
+
+ The NAL will have been given a chance to pretranslate
+ all user addresses when the buffers are created. This
+ process is described in the NAL-HOWTO.
+
+5. The NAL should restore what ever context it required from the
+private data pointer, begin receiving the bytes and possibly store
+some extra state of its own. It should return at this point.
+
+
+
+Get request
+-----------
+
+1. As with a Put, the NAL notices the incoming message header and
+passes it to lib_parse().
+
+2. The library decodes the header and calls nal->recv() with a
+zero byte length, offset and destination to instruct it to clean
+up the wire after reading the header. The private data will
+be passed in as well, allowing the NAL to retrieve any state
+or context that it requires.
+
+3. The library may build a message state object to possibly
+write an event log or invalidate a memory region.
+
+4. The library will build a ptl_msg_t header that specifies the
+Portals protocol information for delivery at the remote end.
+
+5. The library calls nal->send() with the pre-built header,
+the optional message state object, the four part address
+component, a translated user pointer + offset, and some
+other things.
+
+6. The NAL is to put the header on the wire or copy it at
+this point (since it off the stack). It should store some
+amount of state about its current position in the message and
+the destination address.
+
+7. And then return to the library.
+
+
+Reply request
+-------------
+
+1. Starting at "The library decodes the header..."
+
+2. The library decodes the header and calls nal->recv()
+to bring in the rest of the message. Flow continues in
+exactly the same fashion as with all other receives.
+
+
+Ack request
+-----------
+
+1. The library decodes the header, builds the appropriate data
+structures for the event in a message state object and calls nal->recv()
+with a zero byte length, etc.
+
+
+Packet arrival
+--------------
+
+1. The NAL should notice the arrival of a packet, retrieve whatever
+state it needs from the message ID or other NAL specific header data
+and place the data bytes directly into the user address that were
+given to nal->recv().
+
+ How this happens is outside the scope of the Portals library
+ and soley determined by the NAL...
+
+2. If this is the last packet in a message, the NAL should retrieve
+the lib_msg_t *cookie that it was given in the call to nal->recv()
+and pass it to lib_finalize(). lib_finalize() may call nal->send()
+to send an ACK, nal->write() to record an entry in the event log,
+nal->invalidate() to unregister a region of memory or do nothing at all.
+
+3. It should then clean up any remaining NAL specific state about
+the message and go back into the main loop.
+
+
+Outgoing packets
+----------------
+
+1. When the NAL has pending output, it should put the packets on
+the wire wrapped with whatever implementation specified wrappers.
+
+2. Once it has output all the packets of a message it should
+call lib_finalize() with the message state object that was
+handed to nal->send(). This will allows the library to clean
+up its state regarding the message and write any pending event
+entries.
+
+
+
--- /dev/null
+This document is a first attempt at describing how to write a NAL
+for the Portals 3 library. It also defines the library architecture
+and the abstraction of protection domains.
+
+
+First, an overview of the architecture:
+
+ Application
+
+----|----+--------
+ |
+ API === NAL (User space)
+ |
+---------+---|-----
+ |
+ LIB === NAL (Library space)
+ |
+---------+---|-----
+
+ Physical wire (NIC space)
+
+
+Application
+ API
+API-side NAL
+------------
+LIB-side NAL
+ LIB
+LIB-side NAL
+ wire
+
+Communication is through the indicated paths via well defined
+interfaces. The API and LIB portions are written to be portable
+across platforms and do not depend on the network interface.
+
+Communcation between the application and the API code is
+defined in the Portals 3 API specification. This is the
+user-visible portion of the interface and should be the most
+stable.
+
+
+
+API-side NAL:
+------------
+
+The user space NAL needs to implement only a few functions
+that are stored in a nal_t data structure and called by the
+API-side library:
+
+ int forward( nal_t *nal,
+ int index,
+ void *args,
+ size_t arg_len,
+ void *ret,
+ size_t ret_len
+ );
+
+Most of the data structures in the portals library are held in
+the LIB section of the code, so it is necessary to forward API
+calls across the protection domain to the library. This is
+handled by the NAL's forward method. Once the argument and return
+blocks are on the remote side the NAL should call lib_dispatch()
+to invoke the appropriate API function.
+
+ int validate( nal_t *nal,
+ void *base,
+ size_t extent,
+ void **trans_base,
+ void **trans_data
+ );
+
+The validate method provides a means for the NAL to prevalidate
+and possibly pretranslate user addresses into a form suitable
+for fast use by the network card or kernel module. The trans_base
+pointer will be used by the library everytime it needs to
+refer to the block of memory. The trans_data result is a
+cookie that will be handed to the NAL along with the trans_base.
+
+The library never performs calculations on the trans_base value;
+it only computes offsets that are then handed to the NAL.
+
+
+ int shutdown( nal_t *nal, int interface );
+
+Brings down the network interface. The remote NAL side should
+call lib_fini() to bring down the library side of the network.
+
+ void yield( nal_t *nal );
+
+This allows the user application to gracefully give up the processor
+while busy waiting. Performance critical applications may not
+want to take the time to call this function, so it should be an
+option to the PtlEQWait call. Right now it is not implemented as such.
+
+Lastly, the NAL must implement a function named PTL_IFACE_*, where
+* is the name of the NAL such as PTL_IFACE_IP or PTL_IFACE_MYR.
+This initialization function is to set up communication with the
+library-side NAL, which should call lib_init() to bring up the
+network interface.
+
+
+
+LIB-side NAL:
+------------
+
+On the library-side, the NAL has much more responsibility. It
+is responsible for calling lib_dispatch() on behalf of the user,
+it is also responsible for bringing packets off the wire and
+pushing bits out. As on the user side, the methods are stored
+in a nal_cb_t structure that is defined on a per network
+interface basis.
+
+The calls to lib_dispatch() need to be examined. The prototype:
+
+ void lib_dispatch(
+ nal_cb_t *nal,
+ void *private,
+ int index,
+ void *arg_block,
+ void *ret_block
+ );
+
+has two complications. The private field is a NAL-specific
+value that will be passed to any callbacks produced as a result
+of this API call. Kernel module implementations may use this
+for task structures, or perhaps network card data. It is ignored
+by the library.
+
+Secondly, the arg_block and ret_block must be in the same protection
+domain as the library. The NAL's two halves must communicate the
+sizes and perform the copies. After the call, the buffer pointed
+to by ret_block will be filled in and should be copied back to
+the user space. How this is to be done is NAL specific.
+
+ int lib_parse(
+ nal_cb_t *nal,
+ ptl_hdr_t *hdr,
+ void *private
+ );
+
+This is the only other entry point into the library from the NAL.
+When the NAL detects an incoming message on the wire it should read
+sizeof(ptl_hdr_t) bytes and pass a pointer to the header to
+lib_parse(). It may set private to be anything that it needs to
+tie the incoming message to callbacks that are made as a result
+of this event.
+
+The method calls are:
+
+ int (*send)(
+ nal_cb_t *nal,
+ void *private,
+ lib_msg_t *cookie,
+ ptl_hdr_t *hdr,
+ int nid,
+ int pid,
+ int gid,
+ int rid,
+ user_ptr trans_base,
+ user_ptr trans_data,
+ size_t offset,
+ size_t len
+ );
+
+This is a tricky function -- it must support async output
+of messages as well as properly syncronized event log writing.
+The private field is the same that was passed into lib_dispatch()
+or lib_parse() and may be used to tie this call to the event
+that initiated the entry to the library.
+
+The cookie is a pointer to a library private value that must
+be passed to lib_finalize() once the message has been completely
+sent. It should not be examined by the NAL for any meaning.
+
+The four ID fields are passed in, although some implementations
+may not use all of them.
+
+The single base pointer has been replaced with the translated
+address that the API NAL generated in the api_nal->validate()
+call. The trans_data is unchanged and the offset is in bytes.
+
+
+ int (*recv)(
+ nal_cb_t *nal,
+ void *private,
+ lib_msg_t *cookie,
+ user_ptr trans_base,
+ user_ptr trans_data,
+ size_t offset,
+ size_t mlen,
+ size_t rlen
+ );
+
+This callback will only be called in response to lib_parse().
+The cookie, trans_addr and trans_data are as discussed in send().
+The NAL should read mlen bytes from the wire, deposit them into
+trans_base + offset and then discard (rlen - mlen) bytes.
+Once the entire message has been received the NAL should call
+lib_finalize() with the lib_msg_t *cookie.
+
+The special arguments of base=NULL, data=NULL, offset=0, mlen=0, rlen=0
+is used to indicate that the NAL should clean up the wire. This could
+be implemented as a blocking call, although having it return as quickly
+as possible is desirable.
+
+ int (*write)(
+ nal_cb_t *nal,
+ void *private,
+ user_ptr trans_addr,
+ user_ptr trans_data,
+ size_t offset,
+
+ void *src_addr,
+ size_t len
+ );
+
+This is essentially a cross-protection domain memcpy(). The user address
+has been pretranslated by the api_nal->translate() call.
+
+ void *(*malloc)(
+ nal_cb_t *nal,
+ size_t len
+ );
+
+ void (*free)(
+ nal_cb_t *nal,
+ void *buf
+ );
+
+Since the NAL may be in a non-standard hosted environment it can
+not call malloc(). This allows the library side NAL to implement
+the system specific malloc(). In the current reference implementation
+the libary only calls nal->malloc() when the network interface is
+initialized and then calls free when it is brought down. The library
+maintains its own pool of objects for allocation so only one call to
+malloc is made per object type.
+
+ void (*invalidate)(
+ nal_cb_t *nal,
+ user_ptr trans_base,
+ user_ptr trans_data,
+ size_t extent
+ );
+
+User addresses are validated/translated at the user-level API NAL
+method, which is likely to push them to this level. Meanwhile,
+the library NAL will be notified when the library no longer
+needs the buffer. Overlapped buffers are not detected by the
+library, so the NAL should ref count each page involved.
+
+Unfortunately we have a few bugs when the invalidate method is
+called. It is still in progress...
+
+ void (*printf)(
+ nal_cb_t *nal,
+ const char *fmt,
+ ...
+ );
+
+As with malloc(), the library does not have any way to do printf
+or printk. It is not necessary for the NAL to implement the this
+call, although it will make debugging difficult.
+
+ void (*cli)(
+ nal_cb_t *nal,
+ unsigned long *flags
+ );
+
+ void (*sti)(
+ nal_cb_t *nal,
+ unsigned long *flags
+ );
+
+These are used by the library to mark critical sections.
+
+ int (*gidrid2nidpid)(
+ nal_cb_t *nal,
+ ptl_id_t gid,
+ ptl_id_t rid,
+ ptl_id_t *nid,
+ ptl_id_t *pid
+ );
+
+
+ int (*nidpid2gidrid)(
+ nal_cb_t *nal,
+ ptl_id_t nid,
+ ptl_id_t pid,
+ ptl_id_t *gid,
+ ptl_id_t *rid
+ );
+
+Rolf added these. I haven't looked at how they have to work yet.
--- /dev/null
+#FIG 3.2
+Landscape
+Center
+Inches
+Letter
+100.00
+Single
+-2
+1200 2
+6 1200 750 1650 1050
+2 4 0 1 0 7 100 0 -1 0.000 0 0 7 0 0 5
+ 1650 1050 1650 750 1200 750 1200 1050 1650 1050
+4 1 0 100 0 0 10 0.0000 0 105 240 1425 952 FS0\001
+-6
+6 1200 2325 1650 2625
+2 4 0 1 0 7 100 0 -1 0.000 0 0 7 0 0 5
+ 1650 2625 1650 2325 1200 2325 1200 2625 1650 2625
+4 1 0 100 0 0 10 0.0000 0 105 240 1425 2527 FS3\001
+-6
+6 1200 1800 1650 2100
+2 4 0 1 0 7 100 0 -1 0.000 0 0 7 0 0 5
+ 1650 2100 1650 1800 1200 1800 1200 2100 1650 2100
+4 1 0 100 0 0 10 0.0000 0 105 240 1425 2002 FS2\001
+-6
+6 1200 1275 1650 1575
+2 4 0 1 0 7 100 0 -1 0.000 0 0 7 0 0 5
+ 1650 1575 1650 1275 1200 1275 1200 1575 1650 1575
+4 1 0 100 0 0 10 0.0000 0 105 240 1425 1477 FS1\001
+-6
+6 450 750 900 1200
+5 1 0 1 0 7 100 0 20 0.000 0 1 0 0 675.000 750.000 450 1050 675 1125 900 1050
+1 2 0 1 0 7 100 0 20 0.000 1 0.0000 675 825 225 75 450 900 900 750
+2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2
+ 450 825 450 1050
+2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2
+ 900 1050 900 825
+-6
+6 450 2325 900 2775
+5 1 0 1 0 7 100 0 20 0.000 0 1 0 0 675.000 2325.000 450 2625 675 2700 900 2625
+1 2 0 1 0 7 100 0 20 0.000 1 0.0000 675 2400 225 75 450 2475 900 2325
+2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2
+ 450 2400 450 2625
+2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2
+ 900 2625 900 2400
+-6
+6 450 1800 900 2250
+5 1 0 1 0 7 100 0 20 0.000 0 1 0 0 675.000 1800.000 450 2100 675 2175 900 2100
+1 2 0 1 0 7 100 0 20 0.000 1 0.0000 675 1875 225 75 450 1950 900 1800
+2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2
+ 450 1875 450 2100
+2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2
+ 900 2100 900 1875
+-6
+6 450 1275 900 1725
+5 1 0 1 0 7 100 0 20 0.000 0 1 0 0 675.000 1275.000 450 1575 675 1650 900 1575
+1 2 0 1 0 7 100 0 20 0.000 1 0.0000 675 1350 225 75 450 1425 900 1275
+2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2
+ 450 1350 450 1575
+2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2
+ 900 1575 900 1350
+-6
+6 2250 750 3450 2625
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2
+ 2550 1200 3150 1200
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2
+ 2550 1500 3150 1500
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2
+ 2550 1800 3150 1800
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2
+ 2550 2100 3150 2100
+2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
+ 2550 975 3150 975 3150 2625 2550 2625 2550 975
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2
+ 2550 2400 3150 2400
+4 1 0 100 0 0 10 0.0000 0 135 1185 2850 900 Application Buffer\001
+-6
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 1 2
+ 0 0 1.00 60.00 120.00
+ 0 0 1.00 60.00 120.00
+ 1650 2400 2550 1350
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 1 2
+ 0 0 1.00 60.00 120.00
+ 0 0 1.00 60.00 120.00
+ 1650 1875 2550 1050
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 1 2
+ 0 0 1.00 60.00 120.00
+ 0 0 1.00 60.00 120.00
+ 1650 1425 2550 1950
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 1 2
+ 0 0 1.00 60.00 120.00
+ 0 0 1.00 60.00 120.00
+ 1650 900 2550 1650
+2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2
+ 900 900 1200 900
+2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2
+ 900 1425 1200 1425
+2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2
+ 900 1950 1200 1950
+2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2
+ 900 2475 1200 2475
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 1 2
+ 0 0 1.00 60.00 120.00
+ 0 0 1.00 60.00 120.00
+ 1650 2025 2550 2250
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 1 2
+ 0 0 1.00 60.00 120.00
+ 0 0 1.00 60.00 120.00
+ 1650 2550 2550 2475
+2 4 0 1 0 7 100 0 -1 0.000 0 0 7 0 0 5
+ 1875 2850 1875 600 225 600 225 2850 1875 2850
+4 1 0 100 0 0 10 0.0000 0 105 1215 1050 525 Parallel File Server\001
--- /dev/null
+#FIG 3.2
+Landscape
+Center
+Inches
+Letter
+100.00
+Single
+-2
+1200 2
+6 525 2175 1575 2925
+6 675 2287 1425 2812
+4 1 0 50 0 0 10 0.0000 4 105 255 1050 2437 MD\001
+4 1 0 50 0 0 10 0.0000 4 105 645 1050 2587 Exists and\001
+4 1 0 50 0 0 10 0.0000 4 135 555 1050 2737 Accepts?\001
+-6
+2 3 0 1 0 7 100 0 -1 0.000 0 0 0 0 0 5
+ 1575 2550 1050 2175 525 2550 1050 2925 1575 2550
+-6
+6 3450 1275 4350 1725
+6 3600 1312 4200 1687
+4 1 0 100 0 0 10 0.0000 0 135 525 3900 1612 Message\001
+4 1 0 100 0 0 10 0.0000 0 105 465 3900 1462 Discard\001
+-6
+2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
+ 3450 1275 4350 1275 4350 1725 3450 1725 3450 1275
+-6
+6 4650 1275 5550 1725
+6 4725 1312 5475 1687
+4 1 0 100 0 0 10 0.0000 0 135 735 5100 1612 Drop Count\001
+4 1 0 100 0 0 10 0.0000 0 105 630 5100 1462 Increment\001
+-6
+2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
+ 4650 1275 5550 1275 5550 1725 4650 1725 4650 1275
+-6
+6 1350 525 2250 975
+6 1350 562 2250 937
+4 1 0 100 0 0 10 0.0000 0 135 795 1800 862 Match Entry\001
+4 1 0 100 0 0 10 0.0000 0 105 585 1800 712 Get Next\001
+-6
+2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
+ 1350 525 2250 525 2250 975 1350 975 1350 525
+-6
+6 525 1125 1575 1875
+2 3 0 1 0 7 100 0 -1 0.000 0 0 0 0 0 5
+ 1575 1500 1050 1125 525 1500 1050 1875 1575 1500
+4 1 0 100 0 0 10 0.0000 0 105 465 1049 1552 Match?\001
+-6
+6 2340 1237 2940 1687
+6 2340 1237 2940 1687
+4 1 0 100 0 0 10 0.0000 0 105 345 2640 1387 More\001
+4 1 0 100 0 0 10 0.0000 0 105 405 2640 1537 Match\001
+4 1 0 100 0 0 10 0.0000 0 105 510 2640 1687 Entries?\001
+-6
+-6
+6 525 3225 1575 3975
+6 675 3375 1425 3750
+4 1 0 50 0 0 10 0.0000 4 105 255 1050 3525 MD\001
+4 1 0 50 0 0 10 0.0000 4 105 615 1050 3720 has room?\001
+-6
+2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 0 0 5
+ 525 3600 1050 3225 1575 3600 1050 3975 525 3600
+-6
+6 3300 3375 4350 3825
+6 3300 3412 4350 3787
+4 1 0 50 0 0 10 0.0000 4 105 735 3825 3562 Unlink MD\001
+4 1 0 50 0 0 10 0.0000 4 135 945 3825 3712 & Match Entry\001
+-6
+2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
+ 3300 3375 4350 3375 4350 3825 3300 3825 3300 3375
+-6
+6 1950 3225 3000 3975
+6 2250 3450 2700 3750
+4 1 0 50 0 0 10 0.0000 4 105 450 2475 3600 Unlink\001
+4 1 0 50 0 0 10 0.0000 4 105 315 2475 3750 full?\001
+-6
+2 3 0 1 0 7 100 0 -1 0.000 0 0 0 0 0 5
+ 3000 3600 2475 3225 1950 3600 2475 3975 3000 3600
+-6
+6 3150 4500 4200 4950
+6 3150 4537 4200 4912
+4 1 0 50 0 0 10 0.0000 4 105 735 3675 4687 Unlink MD\001
+4 1 0 50 0 0 10 0.0000 4 135 945 3675 4837 & Match Entry\001
+-6
+2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
+ 3150 4500 4200 4500 4200 4950 3150 4950 3150 4500
+-6
+6 600 4500 1500 4950
+6 675 4537 1425 4912
+4 1 0 50 0 0 10 0.0000 4 135 615 1050 4837 Operation\001
+4 1 0 50 0 0 10 0.0000 4 105 525 1050 4687 Perform\001
+-6
+2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
+ 600 4500 1500 4500 1500 4950 600 4950 600 4500
+-6
+6 4650 4350 5700 5100
+6 4950 4537 5400 4912
+6 4950 4537 5400 4912
+4 1 0 50 0 0 10 0.0000 4 135 435 5175 4837 Queue?\001
+4 1 0 50 0 0 10 0.0000 4 105 360 5175 4687 Event\001
+-6
+-6
+2 3 0 1 0 7 100 0 -1 0.000 0 0 0 0 0 5
+ 5700 4725 5175 4350 4650 4725 5175 5100 5700 4725
+-6
+6 6000 4500 6900 4950
+6 6225 4575 6675 4875
+4 1 0 50 0 0 10 0.0000 4 105 360 6450 4875 Event\001
+4 1 0 50 0 0 10 0.0000 4 105 435 6450 4725 Record\001
+-6
+2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
+ 6000 4500 6900 4500 6900 4950 6000 4950 6000 4500
+-6
+6 1800 4350 2850 5100
+6 2100 4575 2550 4875
+4 1 0 50 0 0 10 0.0000 4 105 450 2325 4725 Unlink\001
+4 1 0 50 0 0 10 0.0000 4 105 450 2325 4875 thresh?\001
+-6
+2 3 0 1 0 7 100 0 -1 0.000 0 0 0 0 0 5
+ 2850 4725 2325 4350 1800 4725 2325 5100 2850 4725
+-6
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 1050 1875 1050 2175
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 1575 1500 2100 1500
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 1050 450 1050 1125
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 1350 750 1050 750
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 1050 2925 1050 3225
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 3150 1500 3450 1500
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 4350 1500 4650 1500
+2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 0 0 5
+ 2100 1500 2625 1125 3150 1500 2625 1875 2100 1500
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 1575 3600 1950 3600
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 1050 3975 1050 4500
+2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 3000 3600 3300 3600
+2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 1500 4725 1800 4725
+2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 5700 4725 6000 4725
+2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 2850 4725 3150 4725
+2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 4200 4725 4650 4725
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 6900 4725 7950 4725
+3 0 0 1 0 7 100 0 -1 0.000 0 1 0 5
+ 0 0 1.00 60.00 120.00
+ 1575 2550 1650 2550 1800 2550 1800 2400 1800 1500
+ 0.000 1.000 1.000 1.000 0.000
+3 0 0 1 0 7 100 0 -1 0.000 0 0 1 5
+ 0 0 1.00 60.00 120.00
+ 2250 750 2475 750 2625 750 2625 900 2625 1125
+ 0.000 1.000 1.000 1.000 0.000
+3 0 0 1 0 7 100 0 -1 0.000 0 0 1 5
+ 0 0 1.00 60.00 120.00
+ 7500 4725 7500 1650 7500 1500 7350 1500 5550 1500
+ 0.000 1.000 1.000 1.000 0.000
+3 0 0 1 0 7 50 0 -1 0.000 0 1 0 5
+ 0 0 1.00 60.00 120.00
+ 2475 3225 2475 2400 2475 2250 2325 2250 1800 2250
+ 0.000 1.000 1.000 1.000 0.000
+3 0 0 1 0 7 50 0 -1 0.000 0 1 0 5
+ 0 0 1.00 60.00 120.00
+ 3825 3375 3825 2175 3825 2025 3675 2025 1800 2025
+ 0.000 1.000 1.000 1.000 0.000
+3 0 0 1 0 7 50 0 -1 0.000 0 1 0 8
+ 0 0 1.00 60.00 120.00
+ 2325 4350 2325 4275 2325 4125 2475 4125 4275 4125 4425 4125
+ 4425 4275 4425 4725
+ 0.000 1.000 1.000 1.000 1.000 1.000 1.000 0.000
+3 0 0 1 0 7 50 0 -1 0.000 0 1 0 8
+ 0 0 1.00 60.00 120.00
+ 5175 4350 5175 4275 5175 4125 5325 4125 7125 4125 7275 4125
+ 7275 4275 7275 4725
+ 0.000 1.000 1.000 1.000 1.000 1.000 1.000 0.000
+4 1 0 100 0 0 10 0.0000 0 75 150 1575 1425 no\001
+4 1 0 100 0 0 10 0.0000 0 135 360 825 525 Entry\001
+4 1 0 100 0 0 10 0.0000 0 75 150 1575 2475 no\001
+4 1 0 100 0 0 10 0.0000 0 105 195 1200 1950 yes\001
+4 1 0 100 0 0 10 0.0000 0 105 195 1200 3000 yes\001
+4 1 0 100 0 0 10 0.0000 0 105 195 2775 1050 yes\001
+4 1 0 100 0 0 10 0.0000 0 75 150 3225 1425 no\001
+4 1 0 100 0 0 10 0.0000 0 75 150 1650 3525 no\001
+4 1 0 100 0 0 10 0.0000 0 105 195 1200 4050 yes\001
+4 1 0 100 0 0 10 0.0000 0 105 195 3150 3525 yes\001
+4 1 0 100 0 0 10 0.0000 0 75 150 2625 3150 no\001
+4 1 0 100 0 0 10 0.0000 0 105 195 3000 4650 yes\001
+4 1 0 100 0 0 10 0.0000 0 105 195 5850 4650 yes\001
+4 1 0 100 0 0 10 0.0000 0 75 150 2475 4275 no\001
+4 1 0 100 0 0 10 0.0000 0 75 150 5325 4275 no\001
+4 1 0 50 0 0 10 0.0000 4 105 285 7800 4650 Exit\001
--- /dev/null
+#FIG 3.2
+Landscape
+Center
+Inches
+Letter
+100.00
+Single
+-2
+1200 2
+6 2775 900 3525 1200
+4 0 0 100 0 0 10 0.0000 0 105 720 2775 1200 Translation\001
+4 0 0 100 0 0 10 0.0000 0 105 405 2850 1050 Portal\001
+-6
+6 1350 1725 2175 2025
+4 0 0 100 0 0 10 0.0000 0 105 825 1350 2025 Transmission\001
+4 0 0 100 0 0 10 0.0000 0 105 285 1620 1875 Data\001
+-6
+2 1 0 1 0 7 100 0 -1 4.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 900 525 2700 750
+2 1 0 1 0 7 100 0 -1 4.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 2700 825 2700 1275
+2 1 0 1 0 7 100 0 -1 3.000 0 0 7 1 0 2
+ 0 0 1.00 60.00 120.00
+ 2700 1350 900 1950
+2 2 0 1 0 7 100 0 -1 4.000 0 0 7 0 0 5
+ 2400 300 3600 300 3600 2250 2400 2250 2400 300
+2 2 0 1 0 7 100 0 -1 4.000 0 0 7 0 0 5
+ 0 300 1200 300 1200 2250 0 2250 0 300
+4 1 0 100 0 0 10 0.0000 4 135 495 1800 825 Request\001
+4 1 0 100 0 0 10 0.0000 0 105 540 600 525 Initiator\001
+4 1 0 100 0 0 10 0.0000 0 135 405 3000 525 Target\001
--- /dev/null
+% ---------------------------------------------------------------
+%
+% $Id: ieee.bst,v 1.1.2.1 2003/05/19 04:25:30 braam Exp $
+%
+% by Paolo.Ienne@di.epfl.ch
+%
+% ---------------------------------------------------------------
+%
+% no guarantee is given that the format corresponds perfectly to
+% IEEE 8.5" x 11" Proceedings, but most features should be ok.
+%
+% ---------------------------------------------------------------
+%
+% `ieee' from BibTeX standard bibliography style `abbrv'
+% version 0.99a for BibTeX versions 0.99a or later, LaTeX version 2.09.
+% Copyright (C) 1985, all rights reserved.
+% Copying of this file is authorized only if either
+% (1) you make absolutely no changes to your copy, including name, or
+% (2) if you do make changes, you name it something other than
+% btxbst.doc, plain.bst, unsrt.bst, alpha.bst, and abbrv.bst.
+% This restriction helps ensure that all standard styles are identical.
+% The file btxbst.doc has the documentation for this style.
+
+ENTRY
+ { address
+ author
+ booktitle
+ chapter
+ edition
+ editor
+ howpublished
+ institution
+ journal
+ key
+ month
+ note
+ number
+ organization
+ pages
+ publisher
+ school
+ series
+ title
+ type
+ volume
+ year
+ }
+ {}
+ { label }
+
+INTEGERS { output.state before.all mid.sentence after.sentence after.block }
+
+FUNCTION {init.state.consts}
+{ #0 'before.all :=
+ #1 'mid.sentence :=
+ #2 'after.sentence :=
+ #3 'after.block :=
+}
+
+STRINGS { s t }
+
+FUNCTION {output.nonnull}
+{ 's :=
+ output.state mid.sentence =
+ { ", " * write$ }
+ { output.state after.block =
+ { add.period$ write$
+ newline$
+ "\newblock " write$
+ }
+ { output.state before.all =
+ 'write$
+ { add.period$ " " * write$ }
+ if$
+ }
+ if$
+ mid.sentence 'output.state :=
+ }
+ if$
+ s
+}
+
+FUNCTION {output}
+{ duplicate$ empty$
+ 'pop$
+ 'output.nonnull
+ if$
+}
+
+FUNCTION {output.check}
+{ 't :=
+ duplicate$ empty$
+ { pop$ "empty " t * " in " * cite$ * warning$ }
+ 'output.nonnull
+ if$
+}
+
+FUNCTION {output.bibitem}
+{ newline$
+ "\bibitem{" write$
+ cite$ write$
+ "}" write$
+ newline$
+ ""
+ before.all 'output.state :=
+}
+
+FUNCTION {fin.entry}
+{ add.period$
+ write$
+ newline$
+}
+
+FUNCTION {new.block}
+{ output.state before.all =
+ 'skip$
+ { after.block 'output.state := }
+ if$
+}
+
+FUNCTION {new.sentence}
+{ output.state after.block =
+ 'skip$
+ { output.state before.all =
+ 'skip$
+ { after.sentence 'output.state := }
+ if$
+ }
+ if$
+}
+
+FUNCTION {not}
+{ { #0 }
+ { #1 }
+ if$
+}
+
+FUNCTION {and}
+{ 'skip$
+ { pop$ #0 }
+ if$
+}
+
+FUNCTION {or}
+{ { pop$ #1 }
+ 'skip$
+ if$
+}
+
+FUNCTION {new.block.checka}
+{ empty$
+ 'skip$
+ 'new.block
+ if$
+}
+
+FUNCTION {new.block.checkb}
+{ empty$
+ swap$ empty$
+ and
+ 'skip$
+ 'new.block
+ if$
+}
+
+FUNCTION {new.sentence.checka}
+{ empty$
+ 'skip$
+ 'new.sentence
+ if$
+}
+
+FUNCTION {new.sentence.checkb}
+{ empty$
+ swap$ empty$
+ and
+ 'skip$
+ 'new.sentence
+ if$
+}
+
+FUNCTION {field.or.null}
+{ duplicate$ empty$
+ { pop$ "" }
+ 'skip$
+ if$
+}
+
+FUNCTION {emphasize}
+{ duplicate$ empty$
+ { pop$ "" }
+ { "{\em " swap$ * "}" * }
+ if$
+}
+
+INTEGERS { nameptr namesleft numnames }
+
+FUNCTION {format.names}
+{ 's :=
+ #1 'nameptr :=
+ s num.names$ 'numnames :=
+ numnames 'namesleft :=
+ { namesleft #0 > }
+ { s nameptr "{f.~}{vv~}{ll}{, jj}" format.name$ 't :=
+ nameptr #1 >
+ { namesleft #1 >
+ { ", " * t * }
+ { numnames #2 >
+ { "," * }
+ 'skip$
+ if$
+ t "others" =
+ { " et~al." * }
+ { " and " * t * }
+ if$
+ }
+ if$
+ }
+ 't
+ if$
+ nameptr #1 + 'nameptr :=
+ namesleft #1 - 'namesleft :=
+ }
+ while$
+}
+
+FUNCTION {format.authors}
+{ author empty$
+ { "" }
+ { author format.names }
+ if$
+}
+
+FUNCTION {format.editors}
+{ editor empty$
+ { "" }
+ { editor format.names
+ editor num.names$ #1 >
+ { ", editors" * }
+ { ", editor" * }
+ if$
+ }
+ if$
+}
+
+FUNCTION {format.title}
+{ title empty$
+ { "" }
+ { title "t" change.case$ }
+ if$
+}
+
+FUNCTION {n.dashify}
+{ 't :=
+ ""
+ { t empty$ not }
+ { t #1 #1 substring$ "-" =
+ { t #1 #2 substring$ "--" = not
+ { "--" *
+ t #2 global.max$ substring$ 't :=
+ }
+ { { t #1 #1 substring$ "-" = }
+ { "-" *
+ t #2 global.max$ substring$ 't :=
+ }
+ while$
+ }
+ if$
+ }
+ { t #1 #1 substring$ *
+ t #2 global.max$ substring$ 't :=
+ }
+ if$
+ }
+ while$
+}
+
+FUNCTION {format.date}
+{ year empty$
+ { month empty$
+ { "" }
+ { "there's a month but no year in " cite$ * warning$
+ month
+ }
+ if$
+ }
+ { month empty$
+ 'year
+ { month " " * year * }
+ if$
+ }
+ if$
+}
+
+FUNCTION {format.btitle}
+{ title emphasize
+}
+
+FUNCTION {tie.or.space.connect}
+{ duplicate$ text.length$ #3 <
+ { "~" }
+ { " " }
+ if$
+ swap$ * *
+}
+
+FUNCTION {either.or.check}
+{ empty$
+ 'pop$
+ { "can't use both " swap$ * " fields in " * cite$ * warning$ }
+ if$
+}
+
+FUNCTION {format.bvolume}
+{ volume empty$
+ { "" }
+ { "volume" volume tie.or.space.connect
+ series empty$
+ 'skip$
+ { " of " * series emphasize * }
+ if$
+ "volume and number" number either.or.check
+ }
+ if$
+}
+
+FUNCTION {format.number.series}
+{ volume empty$
+ { number empty$
+ { series field.or.null }
+ { output.state mid.sentence =
+ { "number" }
+ { "Number" }
+ if$
+ number tie.or.space.connect
+ series empty$
+ { "there's a number but no series in " cite$ * warning$ }
+ { " in " * series * }
+ if$
+ }
+ if$
+ }
+ { "" }
+ if$
+}
+
+FUNCTION {format.edition}
+{ edition empty$
+ { "" }
+ { output.state mid.sentence =
+ { edition "l" change.case$ " edition" * }
+ { edition "t" change.case$ " edition" * }
+ if$
+ }
+ if$
+}
+
+INTEGERS { multiresult }
+
+FUNCTION {multi.page.check}
+{ 't :=
+ #0 'multiresult :=
+ { multiresult not
+ t empty$ not
+ and
+ }
+ { t #1 #1 substring$
+ duplicate$ "-" =
+ swap$ duplicate$ "," =
+ swap$ "+" =
+ or or
+ { #1 'multiresult := }
+ { t #2 global.max$ substring$ 't := }
+ if$
+ }
+ while$
+ multiresult
+}
+
+FUNCTION {format.pages}
+{ pages empty$
+ { "" }
+ { pages multi.page.check
+ { "pages" pages n.dashify tie.or.space.connect }
+ { "page" pages tie.or.space.connect }
+ if$
+ }
+ if$
+}
+
+FUNCTION {format.vol.num.pages}
+{ volume field.or.null
+ number empty$
+ 'skip$
+ { "(" number * ")" * *
+ volume empty$
+ { "there's a number but no volume in " cite$ * warning$ }
+ 'skip$
+ if$
+ }
+ if$
+ pages empty$
+ 'skip$
+ { duplicate$ empty$
+ { pop$ format.pages }
+ { ":" * pages n.dashify * }
+ if$
+ }
+ if$
+}
+
+FUNCTION {format.chapter.pages}
+{ chapter empty$
+ 'format.pages
+ { type empty$
+ { "chapter" }
+ { type "l" change.case$ }
+ if$
+ chapter tie.or.space.connect
+ pages empty$
+ 'skip$
+ { ", " * format.pages * }
+ if$
+ }
+ if$
+}
+
+FUNCTION {format.in.ed.booktitle}
+{ booktitle empty$
+ { "" }
+ { editor empty$
+ { "In " booktitle emphasize * }
+ { "In " format.editors * ", " * booktitle emphasize * }
+ if$
+ }
+ if$
+}
+
+FUNCTION {empty.misc.check}
+{ author empty$ title empty$ howpublished empty$
+ month empty$ year empty$ note empty$
+ and and and and and
+ key empty$ not and
+ { "all relevant fields are empty in " cite$ * warning$ }
+ 'skip$
+ if$
+}
+
+FUNCTION {format.thesis.type}
+{ type empty$
+ 'skip$
+ { pop$
+ type "t" change.case$
+ }
+ if$
+}
+
+FUNCTION {format.tr.number}
+{ type empty$
+ { "Technical Report" }
+ 'type
+ if$
+ number empty$
+ { "t" change.case$ }
+ { number tie.or.space.connect }
+ if$
+}
+
+FUNCTION {format.article.crossref}
+{ key empty$
+ { journal empty$
+ { "need key or journal for " cite$ * " to crossref " * crossref *
+ warning$
+ ""
+ }
+ { "In {\em " journal * "\/}" * }
+ if$
+ }
+ { "In " key * }
+ if$
+ " \cite{" * crossref * "}" *
+}
+
+FUNCTION {format.crossref.editor}
+{ editor #1 "{vv~}{ll}" format.name$
+ editor num.names$ duplicate$
+ #2 >
+ { pop$ " et~al." * }
+ { #2 <
+ 'skip$
+ { editor #2 "{ff }{vv }{ll}{ jj}" format.name$ "others" =
+ { " et~al." * }
+ { " and " * editor #2 "{vv~}{ll}" format.name$ * }
+ if$
+ }
+ if$
+ }
+ if$
+}
+
+FUNCTION {format.book.crossref}
+{ volume empty$
+ { "empty volume in " cite$ * "'s crossref of " * crossref * warning$
+ "In "
+ }
+ { "Volume" volume tie.or.space.connect
+ " of " *
+ }
+ if$
+ editor empty$
+ editor field.or.null author field.or.null =
+ or
+ { key empty$
+ { series empty$
+ { "need editor, key, or series for " cite$ * " to crossref " *
+ crossref * warning$
+ "" *
+ }
+ { "{\em " * series * "\/}" * }
+ if$
+ }
+ { key * }
+ if$
+ }
+ { format.crossref.editor * }
+ if$
+ " \cite{" * crossref * "}" *
+}
+
+FUNCTION {format.incoll.inproc.crossref}
+{ editor empty$
+ editor field.or.null author field.or.null =
+ or
+ { key empty$
+ { booktitle empty$
+ { "need editor, key, or booktitle for " cite$ * " to crossref " *
+ crossref * warning$
+ ""
+ }
+ { "In {\em " booktitle * "\/}" * }
+ if$
+ }
+ { "In " key * }
+ if$
+ }
+ { "In " format.crossref.editor * }
+ if$
+ " \cite{" * crossref * "}" *
+}
+
+FUNCTION {article}
+{ output.bibitem
+ format.authors "author" output.check
+ new.block
+ format.title "title" output.check
+ new.block
+ crossref missing$
+ { journal emphasize "journal" output.check
+ format.vol.num.pages output
+ format.date "year" output.check
+ }
+ { format.article.crossref output.nonnull
+ format.pages output
+ }
+ if$
+ new.block
+ note output
+ fin.entry
+}
+
+FUNCTION {book}
+{ output.bibitem
+ author empty$
+ { format.editors "author and editor" output.check }
+ { format.authors output.nonnull
+ crossref missing$
+ { "author and editor" editor either.or.check }
+ 'skip$
+ if$
+ }
+ if$
+ new.block
+ format.btitle "title" output.check
+ crossref missing$
+ { format.bvolume output
+ new.block
+ format.number.series output
+ new.sentence
+ publisher "publisher" output.check
+ address output
+ }
+ { new.block
+ format.book.crossref output.nonnull
+ }
+ if$
+ format.edition output
+ format.date "year" output.check
+ new.block
+ note output
+ fin.entry
+}
+
+FUNCTION {booklet}
+{ output.bibitem
+ format.authors output
+ new.block
+ format.title "title" output.check
+ howpublished address new.block.checkb
+ howpublished output
+ address output
+ format.date output
+ new.block
+ note output
+ fin.entry
+}
+
+FUNCTION {inbook}
+{ output.bibitem
+ author empty$
+ { format.editors "author and editor" output.check }
+ { format.authors output.nonnull
+ crossref missing$
+ { "author and editor" editor either.or.check }
+ 'skip$
+ if$
+ }
+ if$
+ new.block
+ format.btitle "title" output.check
+ crossref missing$
+ { format.bvolume output
+ format.chapter.pages "chapter and pages" output.check
+ new.block
+ format.number.series output
+ new.sentence
+ publisher "publisher" output.check
+ address output
+ }
+ { format.chapter.pages "chapter and pages" output.check
+ new.block
+ format.book.crossref output.nonnull
+ }
+ if$
+ format.edition output
+ format.date "year" output.check
+ new.block
+ note output
+ fin.entry
+}
+
+FUNCTION {incollection}
+{ output.bibitem
+ format.authors "author" output.check
+ new.block
+ format.title "title" output.check
+ new.block
+ crossref missing$
+ { format.in.ed.booktitle "booktitle" output.check
+ format.bvolume output
+ format.number.series output
+ format.chapter.pages output
+ new.sentence
+ publisher "publisher" output.check
+ address output
+ format.edition output
+ format.date "year" output.check
+ }
+ { format.incoll.inproc.crossref output.nonnull
+ format.chapter.pages output
+ }
+ if$
+ new.block
+ note output
+ fin.entry
+}
+
+FUNCTION {inproceedings}
+{ output.bibitem
+ format.authors "author" output.check
+ new.block
+ format.title "title" output.check
+ new.block
+ crossref missing$
+ { format.in.ed.booktitle "booktitle" output.check
+ format.bvolume output
+ format.number.series output
+ format.pages output
+ address empty$
+ { organization publisher new.sentence.checkb
+ organization output
+ publisher output
+ format.date "year" output.check
+ }
+ { address output.nonnull
+ format.date "year" output.check
+ new.sentence
+ organization output
+ publisher output
+ }
+ if$
+ }
+ { format.incoll.inproc.crossref output.nonnull
+ format.pages output
+ }
+ if$
+ new.block
+ note output
+ fin.entry
+}
+
+FUNCTION {conference} { inproceedings }
+
+FUNCTION {manual}
+{ output.bibitem
+ author empty$
+ { organization empty$
+ 'skip$
+ { organization output.nonnull
+ address output
+ }
+ if$
+ }
+ { format.authors output.nonnull }
+ if$
+ new.block
+ format.btitle "title" output.check
+ author empty$
+ { organization empty$
+ { address new.block.checka
+ address output
+ }
+ 'skip$
+ if$
+ }
+ { organization address new.block.checkb
+ organization output
+ address output
+ }
+ if$
+ format.edition output
+ format.date output
+ new.block
+ note output
+ fin.entry
+}
+
+FUNCTION {mastersthesis}
+{ output.bibitem
+ format.authors "author" output.check
+ new.block
+ format.title "title" output.check
+ new.block
+ "Master's thesis" format.thesis.type output.nonnull
+ school "school" output.check
+ address output
+ format.date "year" output.check
+ new.block
+ note output
+ fin.entry
+}
+
+FUNCTION {misc}
+{ output.bibitem
+ format.authors output
+ title howpublished new.block.checkb
+ format.title output
+ howpublished new.block.checka
+ howpublished output
+ format.date output
+ new.block
+ note output
+ fin.entry
+ empty.misc.check
+}
+
+FUNCTION {phdthesis}
+{ output.bibitem
+ format.authors "author" output.check
+ new.block
+ format.btitle "title" output.check
+ new.block
+ "PhD thesis" format.thesis.type output.nonnull
+ school "school" output.check
+ address output
+ format.date "year" output.check
+ new.block
+ note output
+ fin.entry
+}
+
+FUNCTION {proceedings}
+{ output.bibitem
+ editor empty$
+ { organization output }
+ { format.editors output.nonnull }
+ if$
+ new.block
+ format.btitle "title" output.check
+ format.bvolume output
+ format.number.series output
+ address empty$
+ { editor empty$
+ { publisher new.sentence.checka }
+ { organization publisher new.sentence.checkb
+ organization output
+ }
+ if$
+ publisher output
+ format.date "year" output.check
+ }
+ { address output.nonnull
+ format.date "year" output.check
+ new.sentence
+ editor empty$
+ 'skip$
+ { organization output }
+ if$
+ publisher output
+ }
+ if$
+ new.block
+ note output
+ fin.entry
+}
+
+FUNCTION {techreport}
+{ output.bibitem
+ format.authors "author" output.check
+ new.block
+ format.title "title" output.check
+ new.block
+ format.tr.number output.nonnull
+ institution "institution" output.check
+ address output
+ format.date "year" output.check
+ new.block
+ note output
+ fin.entry
+}
+
+FUNCTION {unpublished}
+{ output.bibitem
+ format.authors "author" output.check
+ new.block
+ format.title "title" output.check
+ new.block
+ note "note" output.check
+ format.date output
+ fin.entry
+}
+
+FUNCTION {default.type} { misc }
+
+MACRO {jan} {"Jan."}
+
+MACRO {feb} {"Feb."}
+
+MACRO {mar} {"Mar."}
+
+MACRO {apr} {"Apr."}
+
+MACRO {may} {"May"}
+
+MACRO {jun} {"June"}
+
+MACRO {jul} {"July"}
+
+MACRO {aug} {"Aug."}
+
+MACRO {sep} {"Sept."}
+
+MACRO {oct} {"Oct."}
+
+MACRO {nov} {"Nov."}
+
+MACRO {dec} {"Dec."}
+
+MACRO {acmcs} {"ACM Comput. Surv."}
+
+MACRO {acta} {"Acta Inf."}
+
+MACRO {cacm} {"Commun. ACM"}
+
+MACRO {ibmjrd} {"IBM J. Res. Dev."}
+
+MACRO {ibmsj} {"IBM Syst.~J."}
+
+MACRO {ieeese} {"IEEE Trans. Softw. Eng."}
+
+MACRO {ieeetc} {"IEEE Trans. Comput."}
+
+MACRO {ieeetcad}
+ {"IEEE Trans. Comput.-Aided Design Integrated Circuits"}
+
+MACRO {ipl} {"Inf. Process. Lett."}
+
+MACRO {jacm} {"J.~ACM"}
+
+MACRO {jcss} {"J.~Comput. Syst. Sci."}
+
+MACRO {scp} {"Sci. Comput. Programming"}
+
+MACRO {sicomp} {"SIAM J. Comput."}
+
+MACRO {tocs} {"ACM Trans. Comput. Syst."}
+
+MACRO {tods} {"ACM Trans. Database Syst."}
+
+MACRO {tog} {"ACM Trans. Gr."}
+
+MACRO {toms} {"ACM Trans. Math. Softw."}
+
+MACRO {toois} {"ACM Trans. Office Inf. Syst."}
+
+MACRO {toplas} {"ACM Trans. Prog. Lang. Syst."}
+
+MACRO {tcs} {"Theoretical Comput. Sci."}
+
+READ
+
+FUNCTION {sortify}
+{ purify$
+ "l" change.case$
+}
+
+INTEGERS { len }
+
+FUNCTION {chop.word}
+{ 's :=
+ 'len :=
+ s #1 len substring$ =
+ { s len #1 + global.max$ substring$ }
+ 's
+ if$
+}
+
+FUNCTION {sort.format.names}
+{ 's :=
+ #1 'nameptr :=
+ ""
+ s num.names$ 'numnames :=
+ numnames 'namesleft :=
+ { namesleft #0 > }
+ { nameptr #1 >
+ { " " * }
+ 'skip$
+ if$
+ s nameptr "{vv{ } }{ll{ }}{ f{ }}{ jj{ }}" format.name$ 't :=
+ nameptr numnames = t "others" = and
+ { "et al" * }
+ { t sortify * }
+ if$
+ nameptr #1 + 'nameptr :=
+ namesleft #1 - 'namesleft :=
+ }
+ while$
+}
+
+FUNCTION {sort.format.title}
+{ 't :=
+ "A " #2
+ "An " #3
+ "The " #4 t chop.word
+ chop.word
+ chop.word
+ sortify
+ #1 global.max$ substring$
+}
+
+FUNCTION {author.sort}
+{ author empty$
+ { key empty$
+ { "to sort, need author or key in " cite$ * warning$
+ ""
+ }
+ { key sortify }
+ if$
+ }
+ { author sort.format.names }
+ if$
+}
+
+FUNCTION {author.editor.sort}
+{ author empty$
+ { editor empty$
+ { key empty$
+ { "to sort, need author, editor, or key in " cite$ * warning$
+ ""
+ }
+ { key sortify }
+ if$
+ }
+ { editor sort.format.names }
+ if$
+ }
+ { author sort.format.names }
+ if$
+}
+
+FUNCTION {author.organization.sort}
+{ author empty$
+ { organization empty$
+ { key empty$
+ { "to sort, need author, organization, or key in " cite$ * warning$
+ ""
+ }
+ { key sortify }
+ if$
+ }
+ { "The " #4 organization chop.word sortify }
+ if$
+ }
+ { author sort.format.names }
+ if$
+}
+
+FUNCTION {editor.organization.sort}
+{ editor empty$
+ { organization empty$
+ { key empty$
+ { "to sort, need editor, organization, or key in " cite$ * warning$
+ ""
+ }
+ { key sortify }
+ if$
+ }
+ { "The " #4 organization chop.word sortify }
+ if$
+ }
+ { editor sort.format.names }
+ if$
+}
+
+FUNCTION {presort}
+{ type$ "book" =
+ type$ "inbook" =
+ or
+ 'author.editor.sort
+ { type$ "proceedings" =
+ 'editor.organization.sort
+ { type$ "manual" =
+ 'author.organization.sort
+ 'author.sort
+ if$
+ }
+ if$
+ }
+ if$
+ " "
+ *
+ year field.or.null sortify
+ *
+ " "
+ *
+ title field.or.null
+ sort.format.title
+ *
+ #1 entry.max$ substring$
+ 'sort.key$ :=
+}
+
+ITERATE {presort}
+
+SORT
+
+STRINGS { longest.label }
+
+INTEGERS { number.label longest.label.width }
+
+FUNCTION {initialize.longest.label}
+{ "" 'longest.label :=
+ #1 'number.label :=
+ #0 'longest.label.width :=
+}
+
+FUNCTION {longest.label.pass}
+{ number.label int.to.str$ 'label :=
+ number.label #1 + 'number.label :=
+ label width$ longest.label.width >
+ { label 'longest.label :=
+ label width$ 'longest.label.width :=
+ }
+ 'skip$
+ if$
+}
+
+EXECUTE {initialize.longest.label}
+
+ITERATE {longest.label.pass}
+
+FUNCTION {begin.bib}
+{ preamble$ empty$
+ 'skip$
+ { preamble$ write$ newline$ }
+ if$
+ "\begin{thebibliography}{" longest.label *
+ "}\setlength{\itemsep}{-1ex}\small" * write$ newline$
+}
+
+EXECUTE {begin.bib}
+
+EXECUTE {init.state.consts}
+
+ITERATE {call.type$}
+
+FUNCTION {end.bib}
+{ newline$
+ "\end{thebibliography}" write$ newline$
+}
+
+EXECUTE {end.bib}
+
+% end of file ieee.bst
+% ---------------------------------------------------------------
--- /dev/null
+#FIG 3.2
+Landscape
+Center
+Inches
+Letter
+100.00
+Single
+-2
+1200 2
+6 150 1650 900 2025
+4 1 0 100 0 0 10 0.0000 0 135 735 525 1800 Unexpected\001
+4 1 0 100 0 0 10 0.0000 0 135 585 525 1995 Messages\001
+-6
+6 150 150 900 525
+4 1 0 100 0 0 10 0.0000 0 135 615 525 300 Preposted\001
+4 1 0 100 0 0 10 0.0000 0 105 525 525 495 Receives\001
+-6
+6 2550 4125 3150 4725
+4 1 0 100 0 0 10 0.0000 0 135 600 2850 4275 Length=0\001
+4 1 0 100 0 0 10 0.0000 0 105 540 2850 4470 Truncate\001
+4 1 0 100 0 0 10 0.0000 0 105 480 2850 4665 No Ack\001
+-6
+6 1050 1575 1950 1875
+2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
+ 1050 1575 1950 1575 1950 1875 1050 1875 1050 1575
+4 1 0 100 0 0 10 0.0000 0 105 780 1500 1725 Match Short\001
+-6
+6 5400 1575 6300 2175
+2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
+ 5400 1575 6300 1575 6300 2175 5400 2175 5400 1575
+4 1 0 100 0 0 10 0.0000 0 105 405 5850 1875 Buffer\001
+-6
+6 5400 2400 6300 3000
+2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
+ 5400 2400 6300 2400 6300 3000 5400 3000 5400 2400
+4 1 0 100 0 0 10 0.0000 0 105 405 5850 2700 Buffer\001
+-6
+6 1050 2400 1950 2700
+2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
+ 1050 2400 1950 2400 1950 2700 1050 2700 1050 2400
+4 1 0 100 0 0 10 0.0000 0 105 780 1500 2550 Match Short\001
+-6
+6 1050 825 1950 1125
+2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
+ 1050 825 1950 825 1950 1125 1050 1125 1050 825
+4 1 0 100 0 0 10 0.0000 0 105 765 1500 975 Match None\001
+-6
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 1500 1125 1500 1575
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 3225 2025 4050 3375
+2 1 1 1 0 7 100 0 -1 4.000 0 0 -1 0 0 2
+ 150 675 6600 675
+2 1 1 1 0 7 100 0 -1 4.000 0 0 -1 0 0 2
+ 150 1350 6600 1350
+2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
+ 2400 4125 3300 4125 3300 4725 2400 4725 2400 4125
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 3225 4500 4050 3675
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 3225 1725 5400 1725
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 3225 2550 5400 2550
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 3225 2850 4050 3450
+2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 1500 1800 1500 2400
+2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
+ 2400 825 3300 825 3300 1275 2400 1275 2400 825
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 1500 2625 1500 4125
+2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
+ 1050 4125 1950 4125 1950 4425 1050 4425 1050 4125
+2 1 0 1 0 7 100 0 -1 4.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 1500 300 1500 825
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 1875 975 2400 975
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 1875 1725 2400 1725
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 1875 2550 2400 2550
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 1875 4275 2400 4275
+2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
+ 2400 1575 3300 1575 3300 2175 2400 2175 2400 1575
+2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
+ 2400 2400 3300 2400 3300 3000 2400 3000 2400 2400
+2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
+ 4050 3300 5250 3300 5250 3750 4050 3750 4050 3300
+4 1 0 100 0 0 10 0.0000 0 105 885 1500 150 Match Entries\001
+4 1 0 100 0 0 10 0.0000 0 135 1290 2850 150 Memory Descriptors\001
+4 1 0 100 0 0 10 0.0000 0 135 1065 5850 150 Memory Regions\001
+4 1 0 100 0 0 10 0.0000 0 135 825 4500 150 Event Queues\001
+4 1 0 100 0 0 10 0.0000 0 105 585 525 1050 RcvMark\001
+4 1 0 100 0 0 10 0.0000 0 105 330 2850 1102 None\001
+4 1 0 100 0 0 10 0.0000 0 135 705 1500 4275 Match Any\001
+4 1 0 50 0 0 10 0.0000 0 150 810 2850 1725 max_offset=\001
+4 1 0 50 0 0 10 0.0000 0 150 840 2850 1875 n - short_len\001
+4 1 0 50 0 0 10 0.0000 0 150 810 2850 2550 max_offset=\001
+4 1 0 50 0 0 10 0.0000 0 150 840 2850 2700 n - short_len\001
+4 1 0 50 0 0 10 0.0000 0 105 405 2850 2100 unlink\001
+4 1 0 50 0 0 10 0.0000 0 105 405 2850 2925 unlink\001
+4 1 0 100 0 0 10 0.0000 0 135 930 4650 3675 Message Queue\001
+4 1 0 100 0 0 10 0.0000 0 135 735 4650 3525 Unexpected\001
--- /dev/null
+#FIG 3.2
+Landscape
+Center
+Inches
+Letter
+100.00
+Single
+-2
+1200 2
+2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
+ 1350 900 1650 900 1650 1200 1350 1200 1350 900
+2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
+ 1800 1350 2100 1350 2100 1650 1800 1650 1800 1350
+2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
+ 2250 1800 2550 1800 2550 2100 2250 2100 2250 1800
+2 1 1 1 0 7 100 0 -1 4.000 0 0 -1 0 0 2
+ 4200 375 4200 2100
+2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
+ 525 600 1125 600 1125 2100 525 2100 525 600
+2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
+ 4425 1275 4875 1275 4875 1950 4425 1950 4425 1275
+2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
+ 2550 1200 3150 1200 3150 1500 2550 1500 2550 1200
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 3000 1425 4425 1425
+2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
+ 3600 825 3750 825 3750 1125 3600 1125 3600 825
+2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 2025 1425 2550 1425
+2 2 0 1 0 7 50 0 -1 0.000 0 0 -1 0 0 5
+ 4425 750 4875 750 4875 1125 4425 1125 4425 750
+2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 3675 975 4425 975
+3 0 0 1 0 7 100 0 -1 0.000 0 1 0 2
+ 0 0 1.00 60.00 120.00
+ 825 1050 1350 1050
+ 0.000 0.000
+3 0 0 1 0 7 100 0 -1 0.000 0 1 0 5
+ 0 0 1.00 60.00 120.00
+ 1500 1125 1500 1350 1500 1500 1650 1500 1800 1500
+ 0.000 1.000 1.000 1.000 0.000
+3 0 0 1 0 7 100 0 -1 0.000 0 1 0 5
+ 0 0 1.00 60.00 120.00
+ 1950 1575 1950 1800 1950 1950 2100 1950 2250 1950
+ 0.000 1.000 1.000 1.000 0.000
+3 0 0 1 0 7 100 0 -1 0.000 0 0 0 2
+ 525 975 1125 975
+ 0.000 0.000
+3 0 0 1 0 7 100 0 -1 0.000 0 0 0 2
+ 525 1125 1125 1125
+ 0.000 0.000
+3 0 0 1 0 7 100 0 -1 0.000 0 1 0 7
+ 0 0 1.00 60.00 120.00
+ 3000 1275 3150 1275 3300 1275 3300 1125 3300 975 3450 975
+ 3600 975
+ 0.000 1.000 1.000 1.000 1.000 1.000 0.000
+4 0 0 100 0 0 10 0.0000 0 105 690 1275 750 Match List\001
+4 1 0 100 0 0 10 0.0000 0 105 780 825 525 Portal Table\001
+4 2 0 100 0 0 10 0.0000 0 135 825 4050 2025 Library Space\001
+4 0 0 100 0 0 10 0.0000 0 135 1110 4350 2175 Application Space\001
+4 1 0 100 0 0 10 0.0000 0 135 660 2850 1050 Descriptor\001
+4 1 0 100 0 0 10 0.0000 0 135 540 2850 825 Memory\001
+4 1 0 100 0 0 10 0.0000 0 135 765 3750 675 Event Queue\001
+4 1 0 100 0 0 10 0.0000 0 135 495 4650 675 Regions\001
+4 1 0 100 0 0 10 0.0000 0 135 540 4650 525 Memory\001
--- /dev/null
+@Article{ Cplant,
+ title = { {M}assively {P}arallel {C}omputing with
+ {C}ommodity {C}omponents },
+ author = { Ron Brightwell and David S. Greenberg and Arthur
+ B. Maccabe and Rolf Riesen },
+ journal = { Parallel Computing },
+ volume = { 26 },
+ month = { February },
+ pages = { 243-266 },
+ year = { 2000 }
+}
+
+@Manual{ Portals,
+ organization = { Sandia National Laboratories },
+ title = { {P}uma {P}ortals },
+ note = { http://www.cs.sandia.gov/puma/portals },
+ year = { 1997 }
+}
+
+@Techreport{ VIA,
+ title = { {V}irtual {I}nterface {A}rchitecture
+ {S}pecification {V}ersion 1.0 },
+ author = { {Compaq, Microsoft, and Intel} },
+ institution = { Compaq, Microsoft, and Intel },
+ month = { December },
+ year = { 1997 }
+}
+
+@Techreport{ ST,
+ title = { {I}nformation {T}echnology - {S}cheduled
+ {T}ransfer {P}rotocol - {W}orking {D}raft 2.0 },
+ author = { {Task Group of Technical Committee T11} },
+ institution = { Accredited Standards Committee NCITS },
+ month = { July },
+ year = { 1998 }
+}
+
+@Manual{ TFLOPS,
+ organization = { Sandia National Laboratories },
+ title = { ASCI Red },
+ note = { http://www.sandia.gov/ASCI/TFLOP },
+ year = { 1996 }
+}
+
+@Techreport{ GM,
+ title = { The {GM} {M}essage {P}assing {S}ystem },
+ author = { {Myricom, Inc.} },
+ institution = { {Myricom, Inc.} },
+ year = { 1997 },
+}
+
+@Article{ MPIstandard,
+ title = { {MPI}: {A} {M}essage-{P}assing {I}nterface standard },
+ author = { {Message Passing Interface Forum} },
+ journal = { The International Journal of Supercomputer Applications
+ and High Performance Computing },
+ volume = { 8 },
+ year = { 1994 }
+}
+
+@Inproceedings{ PumaOS,
+ author = "Lance Shuler and Chu Jong and Rolf Riesen and
+ David van Dresser and Arthur B. Maccabe and
+ Lee Ann Fisk and T. Mack Stallcup",
+ booktitle = "Proceeding of the 1995 Intel Supercomputer
+ User's Group Conference",
+ title = "The {P}uma Operating System for Massively Parallel Computers",
+ organization = "Intel Supercomputer User's Group",
+ year = 1995
+}
+
+@InProceedings{ SUNMOS,
+author = "Arthur B. Maccabe and Kevin S. McCurley and Rolf Riesen and
+ Stephen R. Wheat",
+title = "{SUNMOS} for the {Intel} {Paragon}: A Brief User's Guide",
+booktitle = "Proceedings of the {Intel} Supercomputer Users' Group. 1994
+ Annual North America Users' Conference.",
+year = 1994,
+pages = "245--251",
+month = "June",
+location = "ftp.cs.sandia.gov /pub/sunmos/papers/ISUG94-1.ps"
+}
+
+@InProceedings { PumaMPI,
+ title = { Design and Implementation of {MPI} on {P}uma Portals },
+ author = { Ron Brightwell and Lance Shuler },
+ booktitle = { Proceedings of the Second MPI Developer's Conference },
+ pages = { 18-25 },
+ month = { July },
+ year = { 1996 }
+}
+
+@Inproceedings{ FM2,
+ author = { Mario Lauria and Scott Pakin and Andrew Chien },
+ title = { {E}fficient {L}ayering for {H}igh {S}peed
+ {C}ommunication: {F}ast {M}essages 2.x },
+ Booktitle = { Proceedings of the IEEE International Symposium
+ on High Performance Distributed Computing },
+ year = { 1998 }
+}
+
+@Manual { CraySHMEM,
+ title = "SHMEM Technical Note for C, SG-2516 2.3",
+ organization = "Cray Research, Inc.",
+ month = "October",
+ year = 1994
+}
+
+@Manual { MPI2,
+ title = "{MPI}-2: {E}xtensions to the {M}essage-{P}assing {I}nterface",
+ organization = "Message Passing Interface Forum",
+ note = "http://www.mpi-forum.org/docs/mpi-20-html/mpi2-report.html",
+ month = "July",
+ year = 1997
+}
+
+@InProceedings { PMMPI,
+ title = { {The Design and Implementation of Zero Copy MPI Using
+ Commodity Hardware with a High Performance Network} },
+ author = { Francis O'Carroll and Hiroshi Tezuka and Atsushi Hori
+ and Yutaka Ishikawa },
+ booktitle = { Proceedings of the ICS },
+ year = { 1998 }
+}
--- /dev/null
+#LyX 1.2 created this file. For more info see http://www.lyx.org/
+\lyxformat 220
+\textclass report
+\begin_preamble
+\usepackage{fullpage}
+\renewenvironment{comment}%
+{\begin{quote}\textbf{Discussion}: \slshape}%
+{\end{quote}}
+\pagestyle{myheadings}
+\markboth{$Revision: 1.1.2.1 $\hfil$Date: 2003/05/19 04:25:30 $}%
+{$Date: 2003/05/19 04:25:30 $\hfil$Revision: 1.1.2.1 $}
+\end_preamble
+\language american
+\inputencoding auto
+\fontscheme pslatex
+\graphics default
+\paperfontsize 10
+\spacing single
+\papersize letterpaper
+\paperpackage a4
+\use_geometry 0
+\use_amsmath 0
+\use_natbib 0
+\use_numerical_citations 0
+\paperorientation portrait
+\secnumdepth 2
+\tocdepth 2
+\paragraph_separation indent
+\defskip medskip
+\quotes_language english
+\quotes_times 2
+\papercolumns 1
+\papersides 2
+\paperpagestyle headings
+
+\layout Title
+
+The Portals 3.2 Message Passing Interface
+\newline
+ Revision 1.1
+\layout Author
+
+Ron Brightwell
+\begin_inset Foot
+collapsed true
+
+\layout Standard
+
+R.
+ Brightwell and R.
+ Riesen are with the Scalable Computing Systems Department, Sandia National
+ Laboratories, P.O.
+ Box 5800, Albuquerque, NM\SpecialChar ~
+\SpecialChar ~
+87111-1110, bright@cs.sandia.gov, rolf@cs.sandia.gov.
+\end_inset
+
+, Arthur B.
+ Maccabe
+\begin_inset Foot
+collapsed true
+
+\layout Standard
+
+A.
+ B.
+ Maccabe is with the Computer Science Department, University of New Mexico,
+ Albuquerque, NM\SpecialChar ~
+\SpecialChar ~
+87131-1386, maccabe@cs.unm.edu.
+\end_inset
+
+, Rolf Riesen and Trammell Hudson
+\layout Abstract
+
+This report presents a specification for the Portals 3.2 message passing
+ interface.
+ Portals 3.2 is intended to allow scalable, high-performance network communicatio
+n between nodes of a parallel computing system.
+ Specifically, it is designed to support a parallel computing platform composed
+ of clusters of commodity workstations connected by a commodity system area
+ network fabric.
+ In addition, Portals 3.2 is well suited to massively parallel processing
+ and embedded systems.
+ Portals 3.2 represents an adaption of the data movement layer developed
+ for massively parallel processing platforms, such as the 4500-node Intel
+ TeraFLOPS machine.
+
+\layout Standard
+
+
+\begin_inset ERT
+status Collapsed
+
+\layout Standard
+
+\backslash
+clearpage
+\backslash
+pagenumbering{roman}
+\backslash
+setcounter{page}{3}
+\end_inset
+
+
+\layout Standard
+
+
+\begin_inset LatexCommand \tableofcontents{}
+
+\end_inset
+
+
+\layout Standard
+
+
+\begin_inset ERT
+status Collapsed
+
+\layout Standard
+
+\backslash
+cleardoublepage
+\end_inset
+
+
+\layout Standard
+
+
+\begin_inset FloatList figure
+
+\end_inset
+
+
+\layout Standard
+
+
+\begin_inset ERT
+status Collapsed
+
+\layout Standard
+
+\backslash
+cleardoublepage
+\end_inset
+
+
+\layout Standard
+
+
+\begin_inset FloatList table
+
+\end_inset
+
+
+\layout Standard
+
+
+\begin_inset ERT
+status Collapsed
+
+\layout Standard
+
+\backslash
+cleardoublepage
+\end_inset
+
+
+\layout Chapter*
+
+Summary of Changes for Revision 1.1
+\layout Enumerate
+
+Updated version number to 3.2 throughout the document
+\layout Enumerate
+
+Section
+\begin_inset LatexCommand \ref{sub:PtlGetId}
+
+\end_inset
+
+: added
+\family typewriter
+PTL_SEGV
+\family default
+ to error list for
+\shape italic
+PtlGetId
+\shape default
+.
+\layout Enumerate
+
+Section
+\begin_inset LatexCommand \ref{sec:meattach}
+
+\end_inset
+
+: added
+\family typewriter
+PTL_ML_TOOLONG
+\family default
+ to error list for
+\shape italic
+PtlMEAttach
+\shape default
+.
+\layout Enumerate
+
+Section
+\begin_inset LatexCommand \ref{sec:meunlink}
+
+\end_inset
+
+: removed text referring to a list of associated memory descriptors.
+\layout Enumerate
+
+Section
+\begin_inset LatexCommand \ref{sec:mdfree}
+
+\end_inset
+
+: added text to describe unlinking a free-floating memory descriptor.
+\layout Enumerate
+
+Table
+\begin_inset LatexCommand \ref{tab:types}
+
+\end_inset
+
+: added entry for
+\family typewriter
+ptl_seq_t
+\family default
+.
+\layout Enumerate
+
+Section
+\begin_inset LatexCommand \ref{sec:md-type}
+
+\end_inset
+
+:
+\begin_deeper
+\layout Enumerate
+
+added definition of
+\family typewriter
+max_offset
+\family default
+.
+\layout Enumerate
+
+added text to clarify
+\family typewriter
+PTL_MD_MANAGE_REMOTE
+\family default
+.
+\end_deeper
+\layout Enumerate
+
+Section
+\begin_inset LatexCommand \ref{sec:mdattach}
+
+\end_inset
+
+: modified text for
+\family typewriter
+unlink_op
+\family default
+.
+\layout Enumerate
+
+Section
+\begin_inset LatexCommand \ref{sec:niinit}
+
+\end_inset
+
+: added text to clarify multiple calls to
+\shape italic
+PtlNIInit
+\shape default
+.
+\layout Enumerate
+
+Section
+\begin_inset LatexCommand \ref{sec:mdattach}
+
+\end_inset
+
+: added text to clarify
+\family typewriter
+unlink_nofit
+\family default
+.
+\layout Enumerate
+
+Section
+\begin_inset LatexCommand \ref{sec:receiving}
+
+\end_inset
+
+: removed text indicating that an MD will reject a message if the associated
+ EQ is full.
+\layout Enumerate
+
+Section
+\begin_inset LatexCommand \ref{sec:mdfree}
+
+\end_inset
+
+: added
+\family typewriter
+PTL_MD_INUSE
+\family default
+ error code and text to indicate that only MDs with no pending operations
+ can be unlinked.
+\layout Enumerate
+
+Table
+\begin_inset LatexCommand \ref{tab:retcodes}
+
+\end_inset
+
+: added
+\family typewriter
+PTL_MD_INUSE
+\family default
+ return code.
+\layout Enumerate
+
+Section
+\begin_inset LatexCommand \ref{sec:event-type}
+
+\end_inset
+
+: added user id field, MD handle field, and NI specific failure field to
+ the
+\family typewriter
+ptl_event_t
+\family default
+ structure.
+\layout Enumerate
+
+Table
+\begin_inset LatexCommand \ref{tab:types}
+
+\end_inset
+
+: added
+\family typewriter
+ptl_ni_fail_t
+\family default
+.
+\layout Enumerate
+
+Section
+\begin_inset LatexCommand \ref{sec:event-type}
+
+\end_inset
+
+: added
+\family typewriter
+PTL_EVENT_UNLINK
+\family default
+ event type.
+\layout Enumerate
+
+Table
+\begin_inset LatexCommand \ref{tab:func}
+
+\end_inset
+
+: removed
+\shape slanted
+PtlTransId
+\shape default
+.
+\layout Enumerate
+
+Section
+\begin_inset LatexCommand \ref{sec:meattach}
+
+\end_inset
+
+, Section
+\begin_inset LatexCommand \ref{sec:meinsert}
+
+\end_inset
+
+, Section
+\begin_inset LatexCommand \ref{sec:put}
+
+\end_inset
+
+: listed allowable constants with relevant fields.
+\layout Enumerate
+
+Table
+\begin_inset LatexCommand \ref{tab:func}
+
+\end_inset
+
+: added
+\shape italic
+PtlMEAttachAny
+\shape default
+ function.
+\layout Enumerate
+
+Table
+\begin_inset LatexCommand \ref{tab:retcodes}
+
+\end_inset
+
+: added
+\family typewriter
+PTL_PT_FULL
+\family default
+ return code for
+\shape italic
+PtlMEAttachAny
+\shape default
+.
+\layout Enumerate
+
+Table
+\begin_inset LatexCommand \ref{tab:oconsts}
+
+\end_inset
+
+: updated to reflect new event types.
+\layout Enumerate
+
+Section
+\begin_inset LatexCommand \ref{sec:id-type}
+
+\end_inset
+
+: added
+\family typewriter
+ptl_nid_t
+\family default
+,
+\family typewriter
+ptl_pid_t
+\family default
+, and
+\family typewriter
+ptl_uid_t
+\family default
+.
+\layout Chapter*
+
+Summary of Changes for Version 3.1
+\layout Section*
+
+Thread Issues
+\layout Standard
+
+The most significant change to the interface from version 3.0 to 3.1 involves
+ the clarification of how the interface interacts with multi-threaded applicatio
+ns.
+ We adopted a generic thread model in which processes define an address
+ space and threads share the address space.
+ Consideration of the API in the light of threads lead to several clarifications
+ throughout the document:
+\layout Enumerate
+
+Glossary:
+\begin_deeper
+\layout Enumerate
+
+added a definition for
+\emph on
+thread
+\emph default
+,
+\layout Enumerate
+
+reworded the definition for
+\emph on
+process
+\emph default
+.
+
+\end_deeper
+\layout Enumerate
+
+Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:apiover}
+
+\end_inset
+
+: added section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:threads}
+
+\end_inset
+
+ to describe the multi-threading model used by the Portals API.
+
+\layout Enumerate
+
+Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:ptlinit}
+
+\end_inset
+
+:
+\emph on
+PtlInit
+\emph default
+ must be called at least once and may be called any number of times.
+
+\layout Enumerate
+
+Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:ptlfini}
+
+\end_inset
+
+:
+\emph on
+PtlFini
+\emph default
+ should be called once as the process is terminating and not as each thread
+ terminates.
+
+\layout Enumerate
+
+Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:pid}
+
+\end_inset
+
+: Portals does not define thread ids.
+
+\layout Enumerate
+
+Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:ni}
+
+\end_inset
+
+: network interfaces are associated with processes, not threads.
+
+\layout Enumerate
+
+Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:niinit}
+
+\end_inset
+
+:
+\emph on
+PtlNIInit
+\emph default
+ must be called at least once and may be called any number of times.
+
+\layout Enumerate
+
+Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:eqget}
+
+\end_inset
+
+:
+\emph on
+PtlEQGet
+\emph default
+ returns
+\family typewriter
+PTL_EQ_EMPTY
+\family default
+ if a thread is blocked on
+\emph on
+PtlEQWait
+\emph default
+.
+
+\layout Enumerate
+
+Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:eqwait}
+
+\end_inset
+
+: waiting threads are awakened in FIFO order.
+
+\layout Standard
+
+Two functions,
+\emph on
+PtlNIBarrier
+\emph default
+ and
+\emph on
+PtlEQCount
+\emph default
+ were removed from the API.
+
+\emph on
+PtlNIBarrier
+\emph default
+ was defined to block the calling process until all of the processes in
+ the application group had invoked
+\emph on
+PtlNIBarrier
+\emph default
+.
+ We now consider this functionality, along with the concept of groups (see
+ the discussion under
+\begin_inset Quotes eld
+\end_inset
+
+other changes
+\begin_inset Quotes erd
+\end_inset
+
+), to be part of the runtime system, not part of the Portals API.
+
+\emph on
+PtlEQCount
+\emph default
+ was defined to return the number of events in an event queue.
+ Because external operations may lead to new events being added and other
+ threads may remove events, the value returned by
+\emph on
+PtlEQCount
+\emph default
+ would have to be a hint about the number of events in the event queue.
+\layout Section*
+
+Handling small, unexpected messages
+\layout Standard
+
+Another set of changes relates to handling small unexpected messages in
+ MPI.
+ In designing version 3.0, we assumed that each unexpected message would
+ be placed in a unique memory descriptor.
+ To avoid the need to process a long list of memory descriptors, we moved
+ the memory descriptors out of the match list and hung them off of a single
+ match list entry.
+ In this way, large unexpected messages would only encounter a single
+\begin_inset Quotes eld
+\end_inset
+
+short message
+\begin_inset Quotes erd
+\end_inset
+
+ match list entry before encountering the
+\begin_inset Quotes eld
+\end_inset
+
+long message
+\begin_inset Quotes erd
+\end_inset
+
+ match list entry.
+ Experience with this strategy identified resource management problems with
+ this approach.
+ In particular, a long sequence of very short (or zero length) messages
+ could quickly exhaust the memory descriptors constructed for handling unexpecte
+d messages.
+ Our new strategy involves the use of several very large memory descriptors
+ for small unexpected messages.
+ Consecutive unexpected messages will be written into the first of these
+ memory descriptors until the memory descriptor fills up.
+ When the first of the
+\begin_inset Quotes eld
+\end_inset
+
+small memory
+\begin_inset Quotes erd
+\end_inset
+
+ descriptors fills up, it will be unlinked and subsequent short messages
+ will be written into the next
+\begin_inset Quotes eld
+\end_inset
+
+short message
+\begin_inset Quotes erd
+\end_inset
+
+ memory descriptor.
+ In this case, a
+\begin_inset Quotes eld
+\end_inset
+
+short message
+\begin_inset Quotes erd
+\end_inset
+
+ memory descriptor will be declared full when it does not have sufficient
+ space for the largest small unexpected message.
+\layout Standard
+
+This lead to two significant changes.
+ First, each match list entry now has a single memory descriptor rather
+ than a list of memory descriptors.
+ Second, in addition to exceeding the operation threshold, a memory descriptor
+ can be unlinked when the local offset exceeds a specified value.
+ These changes have lead to several changes in this document:
+\layout Enumerate
+
+Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{subsec:paddress}
+
+\end_inset
+
+:
+\begin_deeper
+\layout Enumerate
+
+removed references to the memory descriptor list,
+\layout Enumerate
+
+changed the portals address translation description to indicate that unlinking
+ a memory descriptor implies unlinking the associated match list entry--match
+ list entries can no longer be unlinked independently from the memory descriptor.
+
+\end_deeper
+\layout Enumerate
+
+Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:meattach}
+
+\end_inset
+
+:
+\begin_deeper
+\layout Enumerate
+
+removed unlink from argument list,
+\layout Enumerate
+
+removed description of
+\family typewriter
+ptl_unlink
+\family default
+ type,
+\layout Enumerate
+
+changed wording of the error condition when the Portal table index already
+ has an associated match list.
+
+\end_deeper
+\layout Enumerate
+
+Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:meinsert}
+
+\end_inset
+
+: removed unlink from argument list.
+
+\layout Enumerate
+
+Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:md-type}
+
+\end_inset
+
+: added
+\family typewriter
+max_offset
+\family default
+.
+
+\layout Enumerate
+
+Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:mdattach}
+
+\end_inset
+
+:
+\begin_deeper
+\layout Enumerate
+
+added description of
+\family typewriter
+ptl_unlink
+\family default
+ type,
+\layout Enumerate
+
+removed reference to memory descriptor lists,
+\layout Enumerate
+
+changed wording of the error condition when match list entry already has
+ an associated memory descriptor,
+\layout Enumerate
+
+changed the description of the
+\family typewriter
+unlink
+\family default
+ argument.
+
+\end_deeper
+\layout Enumerate
+
+Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:md}
+
+\end_inset
+
+: removed
+\family typewriter
+PtlMDInsert
+\family default
+ operation.
+
+\layout Enumerate
+
+Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:mdbind}
+
+\end_inset
+
+: removed references to memory descriptor list.
+
+\layout Enumerate
+
+Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:mdfree}
+
+\end_inset
+
+: removed reference to memory descriptor list.
+
+\layout Enumerate
+
+Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:summary}
+
+\end_inset
+
+: removed references to PtlMDInsert.
+
+\layout Enumerate
+
+Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:semantics}
+
+\end_inset
+
+: removed reference to memory descriptor list.
+
+\layout Enumerate
+
+Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:exmpi}
+
+\end_inset
+
+: revised the MPI example to reflect the changes to the interface.
+
+\layout Standard
+
+Several changes have been made to improve the general documentation of the
+ interface.
+
+\layout Enumerate
+
+Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:handle-type}
+
+\end_inset
+
+: documented the special value
+\family typewriter
+PTL_EQ_NONE
+\family default
+.
+
+\layout Enumerate
+
+Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:id-type}
+
+\end_inset
+
+: documented the special value
+\family typewriter
+PTL_ID_ANY
+\family default
+.
+
+\layout Enumerate
+
+Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:mdbind}
+
+\end_inset
+
+: documented the return value
+\family typewriter
+PTL_INV_EQ
+\layout Enumerate
+
+Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:mdupdate}
+
+\end_inset
+
+: clarified the description of the
+\emph on
+PtlMDUpdate
+\emph default
+ function.
+
+\layout Enumerate
+
+Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:implvals}
+
+\end_inset
+
+: introduced a new section to document the implementation defined values.
+
+\layout Enumerate
+
+Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:summary}
+
+\end_inset
+
+: modified Table\SpecialChar ~
+
+\begin_inset LatexCommand \ref{tab:oconsts}
+
+\end_inset
+
+ to indicate where each constant is introduced and where it is used.
+
+\layout Section*
+
+Other changes
+\layout Subsection*
+
+Implementation defined limits (Section
+\begin_inset LatexCommand \ref{sec:niinit}
+
+\end_inset
+
+)
+\layout Standard
+
+The earlier version provided implementation defined limits for the maximum
+ number of match entries, the maximum number of memory descriptors, etc.
+ Rather than spanning the entire implementation, these limits are now associated
+ with individual network interfaces.
+\layout Subsection*
+
+Added User Ids (Section
+\begin_inset LatexCommand \ref{sec:uid}
+
+\end_inset
+
+)
+\layout Standard
+
+Group Ids had been used to simplify access control entries.
+ In particular, a process could allow access for all of the processes in
+ a group.
+ User Ids have been introduced to regain this functionality.
+ We use user ids to fill this role.
+\layout Subsection*
+
+Removed Group Ids and Rank Ids (Section
+\begin_inset LatexCommand \ref{sec:pid}
+
+\end_inset
+
+)
+\layout Standard
+
+The earlier version of Portals had two forms for addressing processes: <node
+ id, process id> and <group id, rank id>.
+ A process group was defined as the collection processes created during
+ application launch.
+ Each process in the group was given a unique rank id in the range 0 to
+
+\begin_inset Formula $n-1$
+\end_inset
+
+ where
+\begin_inset Formula $n$
+\end_inset
+
+ was the number of processes in the group.
+ We removed groups because they are better handled in the runtime system.
+\layout Subsection*
+
+Match lists (Section
+\begin_inset LatexCommand \ref{sec:meattach}
+
+\end_inset
+
+)
+\layout Standard
+
+It is no longer illegal to have an existing match entry when calling PtlMEAttach.
+ A position argument was added to the list of arguments supplied to
+\emph on
+PtlMEAttach
+\emph default
+ to specify whether the new match entry is prepended or appended to the
+ existing list.
+ If there is no existing match list, the position argument is ignored.
+\layout Subsection*
+
+Unlinking Memory Descriptors (Section
+\begin_inset LatexCommand \ref{sec:md}
+
+\end_inset
+
+)
+\layout Standard
+
+Previously, a memory descriptor could be unlinked if the offset exceeded
+ a threshold upon the completion of an operation.
+ In this version, the unlinking is delayed until there is a matching operation
+ which requires more memory than is currently available in the descriptor.
+ In addition to changes in section, this lead to a revision of Figure\SpecialChar ~
+
+\begin_inset LatexCommand \ref{fig:flow}
+
+\end_inset
+
+.
+\layout Subsection*
+
+Split Phase Operations and Events (Section
+\begin_inset LatexCommand \ref{sec:eq}
+
+\end_inset
+
+)
+\layout Standard
+
+Previously, there were five types of events:
+\family typewriter
+PTL_EVENT_PUT
+\family default
+,
+\family typewriter
+PTL_EVENT_GET
+\family default
+,
+\family typewriter
+PTL_EVENT_REPLY
+\family default
+,
+\family typewriter
+PTL_EVENT_SENT
+\family default
+, and
+\family typewriter
+PTL_EVENT_ACK.
+
+\family default
+The first four of these reflected the completion of potentially long operations.
+ We have introduced new event types to reflect the fact that long operations
+ have a distinct starting point and a distinct completion point.
+ Moreover, the completion may be successful or unsuccessful.
+\layout Standard
+
+In addition to providing a mechanism for reporting failure to higher levels
+ of software, this split provides an opportunity for for improved ordering
+ semantics.
+ Previously, if one process intiated two operations (e.g., two put operations)
+ on a remote process, these operations were guaranteed to complete in the
+ same order that they were initiated.
+ Now, we only guarantee that the initiation events are delivered in the
+ same order.
+ In particular, the operations do not need to complete in the order that
+ they were intiated.
+\layout Subsection*
+
+Well known proces ids (Section
+\begin_inset LatexCommand \ref{sec:niinit}
+
+\end_inset
+
+)
+\layout Standard
+
+To support the notion of
+\begin_inset Quotes eld
+\end_inset
+
+well known process ids,
+\begin_inset Quotes erd
+\end_inset
+
+ we added a process id argument to the arguments for PtlNIInit.
+\layout Chapter*
+
+Glossary
+\layout Description
+
+API Application Programming Interface.
+ A definition of the functions and semantics provided by library of functions.
+
+\layout Description
+
+Initiator A
+\emph on
+process
+\emph default
+ that initiates a message operation.
+
+\layout Description
+
+Message An application-defined unit of data that is exchanged between
+\emph on
+processes
+\emph default
+.
+
+\layout Description
+
+Message\SpecialChar ~
+Operation Either a put operation, which writes data, or a get operation,
+ which reads data.
+
+\layout Description
+
+Network A network provides point-to-point communication between
+\emph on
+nodes
+\emph default
+.
+ Internally, a network may provide multiple routes between endpoints (to
+ improve fault tolerance or to improve performance characteristics); however,
+ multiple paths will not be exposed outside of the network.
+
+\layout Description
+
+Node A node is an endpoint in a
+\emph on
+network
+\emph default
+.
+ Nodes provide processing capabilities and memory.
+ A node may provide multiple processors (an SMP node) or it may act as a
+
+\emph on
+gateway
+\emph default
+ between networks.
+
+\layout Description
+
+Process A context of execution.
+ A process defines a virtual memory (VM) context.
+ This context is not shared with other processes.
+ Several threads may share the VM context defined by a process.
+
+\layout Description
+
+Target A
+\emph on
+process
+\emph default
+ that is acted upon by a message operation.
+
+\layout Description
+
+Thread A context of execution that shares a VM context with other threads.
+
+\layout Standard
+
+
+\begin_inset ERT
+status Collapsed
+
+\layout Standard
+
+\backslash
+cleardoublepage
+\layout Standard
+
+\backslash
+setcounter{page}{1}
+\backslash
+pagenumbering{arabic}
+\end_inset
+
+
+\layout Chapter
+
+Introduction
+\begin_inset LatexCommand \label{sec:intro}
+
+\end_inset
+
+
+\layout Section
+
+Overview
+\layout Standard
+
+This document describes an application programming interface for message
+ passing between nodes in a system area network.
+ The goal of this interface is to improve the scalability and performance
+ of network communication by defining the functions and semantics of message
+ passing required for scaling a parallel computing system to ten thousand
+ nodes.
+ This goal is achieved by providing an interface that will allow a quality
+ implementation to take advantage of the inherently scalable design of Portals.
+\layout Standard
+
+This document is divided into several sections:
+\layout Description
+
+Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:intro}
+
+\end_inset
+
+---Introduction This section describes the purpose and scope of the Portals
+ API.
+
+\layout Description
+
+Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:apiover}
+
+\end_inset
+
+---An\SpecialChar ~
+Overview\SpecialChar ~
+of\SpecialChar ~
+the\SpecialChar ~
+Portals\SpecialChar ~
+3.1\SpecialChar ~
+API This section gives a brief overview of the
+ Portals API.
+ The goal is to introduce the key concepts and terminology used in the descripti
+on of the API.
+
+\layout Description
+
+Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:api}
+
+\end_inset
+
+---The\SpecialChar ~
+Portals\SpecialChar ~
+3.2\SpecialChar ~
+API This section describes the functions and semantics of
+ the Portals application programming interface.
+
+\layout Description
+
+Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:semantics}
+
+\end_inset
+
+--The\SpecialChar ~
+Semantics\SpecialChar ~
+of\SpecialChar ~
+Message\SpecialChar ~
+Transmission This section describes the semantics
+ of message transmission.
+ In particular, the information transmitted in each type of message and
+ the processing of incoming messages.
+
+\layout Description
+
+Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:examples}
+
+\end_inset
+
+---Examples This section presents several examples intended to illustrates
+ the use of the Portals API.
+
+\layout Section
+
+Purpose
+\layout Standard
+
+Existing message passing technologies available for commodity cluster networking
+ hardware do not meet the scalability goals required by the Cplant\SpecialChar ~
+
+\begin_inset LatexCommand \cite{Cplant}
+
+\end_inset
+
+ project at Sandia National Laboratories.
+ The goal of the Cplant project is to construct a commodity cluster that
+ can scale to the order of ten thousand nodes.
+ This number greatly exceeds the capacity for which existing message passing
+ technologies have been designed and implemented.
+\layout Standard
+
+In addition to the scalability requirements of the network, these technologies
+ must also be able to support a scalable implementation of the Message Passing
+ Interface (MPI)\SpecialChar ~
+
+\begin_inset LatexCommand \cite{MPIstandard}
+
+\end_inset
+
+ standard, which has become the
+\shape italic
+de facto
+\shape default
+ standard for parallel scientific computing.
+ While MPI does not impose any scalability limitations, existing message
+ passing technologies do not provide the functionality needed to allow implement
+ations of MPI to meet the scalability requirements of Cplant.
+\layout Standard
+
+The following are properties of a network architecture that do not impose
+ any inherent scalability limitations:
+\layout Itemize
+
+Connectionless - Many connection-oriented architectures, such as VIA\SpecialChar ~
+
+\begin_inset LatexCommand \cite{VIA}
+
+\end_inset
+
+ and TCP/IP sockets, have limitations on the number of peer connections
+ that can be established.
+
+\layout Itemize
+
+Network independence - Many communication systems depend on the host processor
+ to perform operations in order for messages in the network to be consumed.
+ Message consumption from the network should not be dependent on host processor
+ activity, such as the operating system scheduler or user-level thread scheduler.
+
+\layout Itemize
+
+User-level flow control - Many communication systems manage flow control
+ internally to avoid depleting resources, which can significantly impact
+ performance as the number of communicating processes increases.
+
+\layout Itemize
+
+OS Bypass - High performance network communication should not involve memory
+ copies into or out of a kernel-managed protocol stack.
+
+\layout Standard
+
+The following are properties of a network architecture that do not impose
+ scalability limitations for an implementation of MPI:
+\layout Itemize
+
+Receiver-managed - Sender-managed message passing implementations require
+ a persistent block of memory to be available for every process, requiring
+ memory resources to increase with job size and requiring user-level flow
+ control mechanisms to manage these resources.
+
+\layout Itemize
+
+User-level Bypass - While OS Bypass is necessary for high-performance, it
+ alone is not sufficient to support the Progress Rule of MPI asynchronous
+ operations.
+
+\layout Itemize
+
+Unexpected messages - Few communication systems have support for receiving
+ messages for which there is no prior notification.
+ Support for these types of messages is necessary to avoid flow control
+ and protocol overhead.
+
+\layout Section
+
+Background
+\layout Standard
+
+Portals was originally designed for and implemented on the nCube machine
+ as part of the SUNMOS (Sandia/UNM OS)\SpecialChar ~
+
+\begin_inset LatexCommand \cite{SUNMOS}
+
+\end_inset
+
+ and Puma\SpecialChar ~
+
+\begin_inset LatexCommand \cite{PumaOS}
+
+\end_inset
+
+ lightweight kernel development projects.
+ Portals went through two design phases, the latter of which is used on
+ the 4500-node Intel TeraFLOPS machine\SpecialChar ~
+
+\begin_inset LatexCommand \cite{TFLOPS}
+
+\end_inset
+
+.
+ Portals have been very successful in meeting the needs of such a large
+ machine, not only as a layer for a high-performance MPI implementation\SpecialChar ~
+
+\begin_inset LatexCommand \cite{PumaMPI}
+
+\end_inset
+
+, but also for implementing the scalable run-time environment and parallel
+ I/O capabilities of the machine.
+\layout Standard
+
+The second generation Portals implementation was designed to take full advantage
+ of the hardware architecture of large MPP machines.
+ However, efforts to implement this same design on commodity cluster technology
+ identified several limitations, due to the differences in network hardware
+ as well as to shortcomings in the design of Portals.
+\layout Section
+
+Scalability
+\layout Standard
+
+The primary goal in the design of Portals is scalability.
+ Portals are designed specifically for an implementation capable of supporting
+ a parallel job running on tens of thousands of nodes.
+ Performance is critical only in terms of scalability.
+ That is, the level of message passing performance is characterized by how
+ far it allows an application to scale and not by how it performs in micro-bench
+marks (e.g., a two node bandwidth or latency test).
+\layout Standard
+
+The Portals API is designed to allow for scalability, not to guarantee it.
+ Portals cannot overcome the shortcomings of a poorly designed application
+ program.
+ Applications that have inherent scalability limitations, either through
+ design or implementation, will not be transformed by Portals into scalable
+ applications.
+ Scalability must be addressed at all levels.
+ Portals do not inhibit scalability, but do not guarantee it either.
+\layout Standard
+
+To support scalability, the Portals interface maintains a minimal amount
+ of state.
+ Portals provide reliable, ordered delivery of messages between pairs of
+ processes.
+ They are connectionless: a process is not required to explicitly establish
+ a point-to-point connection with another process in order to communicate.
+ Moreover, all buffers used in the transmission of messages are maintained
+ in user space.
+ The target process determines how to respond to incoming messages, and
+ messages for which there are no buffers are discarded.
+\layout Section
+
+Communication Model
+\layout Standard
+
+Portals combine the characteristics of both one-side and two-sided communication.
+ They define a
+\begin_inset Quotes eld
+\end_inset
+
+matching put
+\begin_inset Quotes erd
+\end_inset
+
+ operation and a
+\begin_inset Quotes eld
+\end_inset
+
+matching get
+\begin_inset Quotes erd
+\end_inset
+
+ operation.
+ The destination of a put (or send) is not an explicit address; instead,
+ each message contains a set of match bits that allow the receiver to determine
+ where incoming messages should be placed.
+ This flexibility allows Portals to support both traditional one-sided operation
+s and two-sided send/receive operations.
+\layout Standard
+
+Portals allows the target to determine whether incoming messages are acceptable.
+ A target process can choose to accept message operations from any specific
+ process or can choose to ignore message operations from any specific process.
+\layout Section
+
+Zero Copy, OS Bypass and Application Bypass
+\layout Standard
+
+In traditional system architectures, network packets arrive at the network
+ interface card (NIC), are passed through one or more protocol layers in
+ the operating system, and eventually copied into the address space of the
+ application.
+ As network bandwidth began to approach memory copy rates, reduction of
+ memory copies became a critical concern.
+ This concern lead to the development of zero-copy message passing protocols
+ in which message copies are eliminated or pipelined to avoid the loss of
+ bandwidth.
+\layout Standard
+
+A typical zero-copy protocol has the NIC generate an interrupt for the CPU
+ when a message arrives from the network.
+ The interrupt handler then controls the transfer of the incoming message
+ into the address space of the appropriate application.
+ The interrupt latency, the time from the initiation of an interrupt until
+ the interrupt handler is running, is fairly significant.
+ To avoid this cost, some modern NICs have processors that can be programmed
+ to implement part of a message passing protocol.
+ Given a properly designed protocol, it is possible to program the NIC to
+ control the transfer of incoming messages, without needing to interrupt
+ the CPU.
+ Because this strategy does not need to involve the OS on every message
+ transfer, it is frequently called
+\begin_inset Quotes eld
+\end_inset
+
+OS Bypass.
+\begin_inset Quotes erd
+\end_inset
+
+ ST\SpecialChar ~
+
+\begin_inset LatexCommand \cite{ST}
+
+\end_inset
+
+, VIA\SpecialChar ~
+
+\begin_inset LatexCommand \cite{VIA}
+
+\end_inset
+
+, FM\SpecialChar ~
+
+\begin_inset LatexCommand \cite{FM2}
+
+\end_inset
+
+, GM\SpecialChar ~
+
+\begin_inset LatexCommand \cite{GM}
+
+\end_inset
+
+, and Portals are examples of OS Bypass protocols.
+\layout Standard
+
+Many protocols that support OS Bypass still require that the application
+ actively participate in the protocol to ensure progress.
+ As an example, the long message protocol of PM requires that the application
+ receive and reply to a request to put or get a long message.
+ This complicates the runtime environment, requiring a thread to process
+ incoming requests, and significantly increases the latency required to
+ initiate a long message protocol.
+ The Portals message passing protocol does not require activity on the part
+ of the application to ensure progress.
+ We use the term
+\begin_inset Quotes eld
+\end_inset
+
+Application Bypass
+\begin_inset Quotes erd
+\end_inset
+
+ to refer to this aspect of the Portals protocol.
+\layout Section
+
+Faults
+\layout Standard
+
+Given the number of components that we are dealing with and the fact that
+ we are interested in supporting applications that run for very long times,
+ failures are inevitable.
+ The Portals API recognizes that the underlying transport may not be able
+ to successfully complete an operation once it has been initiated.
+ This is reflected in the fact that the Portals API reports three types
+ of events: events indicating the initiation of an operation, events indicating
+ the successful completion of an operation, and events indicating the unsuccessf
+ul completion of an operation.
+ Every initiation event is eventually followed by a successful completion
+ event or an unsuccessful completion event.
+\layout Standard
+
+Between the time an operation is started and the time that the operation
+ completes (successfully or unsuccessfully), any memory associated with
+ the operation should be considered volatile.
+ That is, the memory may be changed in unpredictable ways while the operation
+ is progressing.
+ Once the operation completes, the memory associated with the operation
+ will not be subject to further modification (from this operation).
+ Notice that unsuccessful operations may alter memory in an essentially
+ unpredictable fashion.
+\layout Chapter
+
+An Overview of the Portals API
+\begin_inset LatexCommand \label{sec:apiover}
+
+\end_inset
+
+
+\layout Standard
+
+In this section, we give a conceptual overview of the Portals API.
+ The goal is to provide a context for understanding the detailed description
+ of the API presented in the next section.
+\layout Section
+
+Data Movement
+\begin_inset LatexCommand \label{sec:dmsemantics}
+
+\end_inset
+
+
+\layout Standard
+
+A Portal represents an opening in the address space of a process.
+ Other processes can use a Portal to read (get) or write (put) the memory
+ associated with the portal.
+ Every data movement operation involves two processes, the
+\series bold
+initiator
+\series default
+ and the
+\series bold
+target
+\series default
+.
+ The initiator is the process that initiates the data movement operation.
+ The target is the process that responds to the operation by either accepting
+ the data for a put operation, or replying with the data for a get operation.
+\layout Standard
+
+In this discussion, activities attributed to a process may refer to activities
+ that are actually performed by the process or
+\emph on
+on behalf of the process
+\emph default
+.
+ The inclusiveness of our terminology is important in the context of
+\emph on
+application bypass
+\emph default
+.
+ In particular, when we note that the target sends a reply in the case of
+ a get operation, it is possible that reply will be generated by another
+ component in the system, bypassing the application.
+\layout Standard
+
+Figures\SpecialChar ~
+
+\begin_inset LatexCommand \ref{fig:put}
+
+\end_inset
+
+ and
+\begin_inset LatexCommand \ref{fig:get}
+
+\end_inset
+
+ present graphical interpretations of the Portal data movement operations:
+ put and get.
+ In the case of a put operation, the initiator sends a put request message
+ containing the data to the target.
+ The target translates the Portal addressing information in the request
+ using its local Portal structures.
+ When the request has been processed, the target optionally sends an acknowledge
+ment message.
+\layout Standard
+
+
+\begin_inset Float figure
+placement htbp
+wide false
+collapsed false
+
+\layout Standard
+\align center
+
+\begin_inset Graphics FormatVersion 1
+ filename put.eps
+ display color
+ size_type 0
+ rotateOrigin center
+ lyxsize_type 1
+ lyxwidth 218pt
+ lyxheight 119pt
+\end_inset
+
+
+\layout Caption
+
+Portal Put (Send)
+\begin_inset LatexCommand \label{fig:put}
+
+\end_inset
+
+
+\end_inset
+
+
+\layout Standard
+
+In the case of a get operation, the initiator sends a get request to the
+ target.
+ As with the put operation, the target translates the Portal addressing
+ information in the request using its local Portal structures.
+ Once it has translated the Portal addressing information, the target sends
+ a reply that includes the requested data.
+\layout Standard
+
+
+\begin_inset Float figure
+placement htbp
+wide false
+collapsed false
+
+\layout Standard
+\align center
+
+\begin_inset Graphics FormatVersion 1
+ filename get.eps
+ display color
+ size_type 0
+ rotateOrigin center
+ lyxsize_type 1
+ lyxwidth 218pt
+ lyxheight 119pt
+\end_inset
+
+
+\layout Caption
+
+Portal Get
+\begin_inset LatexCommand \label{fig:get}
+
+\end_inset
+
+
+\end_inset
+
+
+\layout Standard
+
+We should note that Portal address translations are only performed on nodes
+ that respond to operations initiated by other nodes.
+ Acknowledgements and replies to get operations bypass the portals address
+ translation structures.
+\layout Section
+
+Portal Addressing
+\begin_inset LatexCommand \label{subsec:paddress}
+
+\end_inset
+
+
+\layout Standard
+
+One-sided data movement models (e.g., shmem\SpecialChar ~
+
+\begin_inset LatexCommand \cite{CraySHMEM}
+
+\end_inset
+
+, ST\SpecialChar ~
+
+\begin_inset LatexCommand \cite{ST}
+
+\end_inset
+
+, MPI-2\SpecialChar ~
+
+\begin_inset LatexCommand \cite{MPI2}
+
+\end_inset
+
+) typically use a triple to address memory on a remote node.
+ This triple consists of a process id, memory buffer id, and offset.
+ The process id identifies the target process, the memory buffer id specifies
+ the region of memory to be used for the operation, and the offset specifies
+ an offset within the memory buffer.
+\layout Standard
+
+In addition to the standard address components (process id, memory buffer
+ id, and offset), a Portal address includes a set of match bits.
+ This addressing model is appropriate for supporting one-sided operations
+ as well as traditional two-sided message passing operations.
+ Specifically, the Portals API provides the flexibility needed for an efficient
+ implementation of MPI-1, which defines two-sided operations with one-sided
+ completion semantics.
+\layout Standard
+
+Figure\SpecialChar ~
+
+\begin_inset LatexCommand \ref{fig:portals}
+
+\end_inset
+
+ presents a graphical representation of the structures used by a target
+ in the interpretation of a Portal address.
+ The process id is used to route the message to the appropriate node and
+ is not reflected in this diagram.
+ The memory buffer id, called the
+\series bold
+portal id
+\series default
+, is used as an index into the Portal table.
+ Each element of the Portal table identifies a match list.
+ Each element of the match list specifies two bit patterns: a set of
+\begin_inset Quotes eld
+\end_inset
+
+don't care
+\begin_inset Quotes erd
+\end_inset
+
+ bits, and a set of
+\begin_inset Quotes eld
+\end_inset
+
+must match
+\begin_inset Quotes erd
+\end_inset
+
+ bits.
+ In addition to the two sets of match bits, each match list element has
+ at most one memory descriptor.
+ Each memory descriptor identifies a memory region and an optional event
+ queue.
+ The memory region specifies the memory to be used in the operation and
+ the event queue is used to record information about these operations.
+\layout Standard
+
+
+\begin_inset Float figure
+placement htbp
+wide false
+collapsed false
+
+\layout Standard
+\align center
+
+\begin_inset Graphics FormatVersion 1
+ filename portals.eps
+ display color
+ size_type 0
+ rotateOrigin center
+ lyxsize_type 1
+ lyxwidth 305pt
+ lyxheight 106pt
+\end_inset
+
+
+\layout Caption
+
+Portal Addressing Structures
+\begin_inset LatexCommand \label{fig:portals}
+
+\end_inset
+
+
+\end_inset
+
+
+\layout Standard
+
+Figure\SpecialChar ~
+
+\begin_inset LatexCommand \ref{fig:flow}
+
+\end_inset
+
+ illustrates the steps involved in translating a Portal address, starting
+ from the first element in a match list.
+ If the match criteria specified in the match list entry are met and the
+ memory descriptor list accepts the operation
+\begin_inset Foot
+collapsed true
+
+\layout Standard
+
+Memory descriptors can reject operations because a threshold has been exceeded
+ or because the memory region does not have sufficient space, see Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:md}
+
+\end_inset
+
+
+\end_inset
+
+, the operation (put or get) is performed using the memory region specified
+ in the memory descriptor.
+ If the memory descriptor specifies that it is to be unlinked when a threshold
+ has been exceeded, the match list entry is removed from the match list
+ and the resources associated with the memory descriptor and match list
+ entry are reclaimed.
+ Finally, if there is an event queue specified in the memory descriptor,
+ the operation is logged in the event queue.
+\layout Standard
+
+
+\begin_inset Float figure
+placement htbp
+wide false
+collapsed false
+
+\layout Standard
+\align center
+
+\begin_inset Graphics FormatVersion 1
+ filename flow_new.eps
+ display color
+ size_type 0
+ rotateOrigin center
+ lyxsize_type 1
+ lyxwidth 447pt
+ lyxheight 282pt
+\end_inset
+
+
+\layout Caption
+
+Portals Address Translation
+\begin_inset LatexCommand \label{fig:flow}
+
+\end_inset
+
+
+\end_inset
+
+
+\layout Standard
+
+If the match criteria specified in the match list entry are not met, or
+ there is no memory descriptor associated with the match list entry, or
+ the memory descriptor associated with the match list entry rejects the
+ operation, the address translation continues with the next match list entry.
+ If the end of the match list has been reached, the address translation
+ is aborted and the incoming requested is discarded.
+\layout Section
+
+Access Control
+\layout Standard
+
+A process can control access to its portals using an access control list.
+ Each entry in the access control list specifies a process id and a Portal
+ table index.
+ The access control list is actually an array of entries.
+ Each incoming request includes an index into the access control list (i.e.,
+ a
+\begin_inset Quotes eld
+\end_inset
+
+cookie
+\begin_inset Quotes erd
+\end_inset
+
+ or hint).
+ If the id of the process issuing the request doesn't match the id specified
+ in the access control list entry or the Portal table index specified in
+ the request doesn't match the Portal table index specified in the access
+ control list entry, the request is rejected.
+ Process identifiers and Portal table indexes may include wild card values
+ to increase the flexibility of this mechanism.
+
+\layout Standard
+
+Two aspects of this design merit further discussion.
+ First, the model assumes that the information in a message header, the
+ sender's id in particular, is trustworthy.
+ In most contexts, we assume that the entity that constructs the header
+ is trustworthy; however, using cryptographic techniques, we could easily
+ devise a protocol that would ensure the authenticity of the sender.
+\layout Standard
+
+Second, because the access check is performed by the receiver, it is possible
+ that a malicious process will generate thousands of messages that will
+ be denied by the receiver.
+ This could saturate the network and/or the receiver, resulting in a
+\emph on
+denial of service
+\emph default
+ attack.
+ Moving the check to the sender using capabilities, would remove the potential
+ for this form of attack.
+ However, the solution introduces the complexities of capability management
+ (exchange of capabilities, revocation, protections, etc).
+\layout Section
+
+Multi-threaded Applications
+\begin_inset LatexCommand \label{sec:threads}
+
+\end_inset
+
+
+\layout Standard
+
+The Portals API supports a generic view of multi-threaded applications.
+ From the perspective of the Portals API, an application program is defined
+ by a set of processes.
+ Each process defines a unique address space.
+ The Portals API defines access to this address space from other processes
+ (using portals addressing and the data movement operations).
+ A process may have one or more
+\emph on
+threads
+\emph default
+ executing in its address space.
+
+\layout Standard
+
+With the exception of
+\emph on
+PtlEQWait
+\emph default
+ every function in the Portals API is non-blocking and atomic with respect
+ to both other threads and external operations that result from data movement
+ operations.
+ While individual operations are atomic, sequences of these operations may
+ be interleaved between different threads and with external operations.
+ The Portals API does not provide any mechanisms to control this interleaving.
+ It is expected that these mechanisms will be provided by the API used to
+ create threads.
+\layout Chapter
+
+The Portals API
+\begin_inset LatexCommand \label{sec:api}
+
+\end_inset
+
+
+\layout Section
+
+Naming Conventions
+\begin_inset LatexCommand \label{sec:conv}
+
+\end_inset
+
+
+\layout Standard
+
+The Portals API defines two types of entities: functions and types.
+ Function always start with
+\emph on
+Ptl
+\emph default
+ and use mixed upper and lower case.
+ When used in the body of this report, function names appear in italic face,
+ e.g.,
+\emph on
+PtlInit
+\emph default
+.
+ The functions associated with an object type will have names that start
+ with
+\emph on
+Ptl
+\emph default
+, followed by the two letter object type code shown in Table\SpecialChar ~
+
+\begin_inset LatexCommand \ref{tab:objcodes}
+
+\end_inset
+
+.
+ As an example, the function
+\emph on
+PtlEQAlloc
+\emph default
+ allocates resources for an event queue.
+\layout Standard
+
+
+\begin_inset Float table
+placement htbp
+wide false
+collapsed false
+
+\layout Caption
+
+Object Type Codes
+\begin_inset LatexCommand \label{tab:objcodes}
+
+\end_inset
+
+
+\begin_inset ERT
+status Collapsed
+
+\layout Standard
+
+\backslash
+medskip
+\newline
+
+\end_inset
+
+
+\layout Standard
+\align center
+
+\size small
+
+\begin_inset Tabular
+<lyxtabular version="3" rows="5" columns="3">
+<features firstHeadEmpty="true">
+<column alignment="left" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="0pt">
+<row bottomline="true">
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\emph on
+xx
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ Name
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ Section
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+EQ
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ Event Queue
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:eq}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ MD
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ Memory Descriptor
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:md}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ ME
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ Match list Entry
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:me}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ NI
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ Network Interface
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:ni}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+</lyxtabular>
+
+\end_inset
+
+
+\end_inset
+
+
+\layout Standard
+
+Type names use lower case with underscores to separate words.
+ Each type name starts with
+\family typewriter
+ptl
+\family default
+_ and ends with
+\family typewriter
+_t
+\family default
+.
+ When used in the body of this report, type names appear in a fixed font,
+ e.g.,
+\family typewriter
+ptl_match_bits_t
+\family default
+.
+\layout Standard
+
+Names for constants use upper case with underscores to separate words.
+ Each constant name starts with
+\family typewriter
+PTL_
+\family default
+.
+ When used in the body of this report, type names appear in a fixed font,
+ e.g.,
+\family typewriter
+PTL_OK
+\family default
+.
+\layout Section
+
+Base Types
+\layout Standard
+
+The Portals API defines a variety of base types.
+ These types represent a simple renaming of the base types provided by the
+ C programming language.
+ In most cases these new type names have been introduced to improve type
+ safety and to avoid issues arising from differences in representation sizes
+ (e.g., 16-bit or 32-bit integers).
+\layout Subsection
+
+Sizes
+\begin_inset LatexCommand \label{sec:size-t}
+
+\end_inset
+
+
+\layout Standard
+
+The type
+\family typewriter
+ptl_size_t
+\family default
+ is an unsigned 64-bit integral type used for representing sizes.
+\layout Subsection
+
+Handles
+\begin_inset LatexCommand \label{sec:handle-type}
+
+\end_inset
+
+
+\layout Standard
+
+Objects maintained by the API are accessed through handles.
+ Handle types have names of the form
+\family typewriter
+ptl_handle_
+\emph on
+xx
+\emph default
+_t
+\family default
+, where
+\emph on
+xx
+\emph default
+ is one of the two letter object type codes shown in Table\SpecialChar ~
+
+\begin_inset LatexCommand \ref{tab:objcodes}
+
+\end_inset
+
+.
+ For example, the type
+\family typewriter
+ptl_handle_ni_t
+\family default
+ is used for network interface handles.
+\layout Standard
+
+Each type of object is given a unique handle type to enhance type checking.
+ The type,
+\family typewriter
+ptl_handle_any_t
+\family default
+, can be used when a generic handle is needed.
+ Every handle value can be converted into a value of type
+\family typewriter
+ptl_handle_any_t
+\family default
+ without loss of information.
+\layout Standard
+
+Handles are not simple values.
+ Every portals object is associated with a specific network interface and
+ an identifier for this interface (along with an object identifier) is part
+ of the handle for the object.
+\layout Standard
+
+The special value
+\family typewriter
+PTL_EQ_NONE
+\family default
+, of type
+\family typewriter
+ptl_handle_eq_t
+\family default
+, is used to indicate the absence of an event queue.
+ See sections
+\begin_inset LatexCommand \ref{sec:mdfree}
+
+\end_inset
+
+ and\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:mdupdate}
+
+\end_inset
+
+ for uses of this value.
+\layout Subsection
+
+Indexes
+\begin_inset LatexCommand \label{sec:index-type}
+
+\end_inset
+
+
+\layout Standard
+
+The types
+\family typewriter
+ptl_pt_index_t
+\family default
+ and
+\family typewriter
+ptl_ac_index_t
+\family default
+ are integral types used for representing Portal table indexes and access
+ control tables indexes, respectively.
+ See section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:niinit}
+
+\end_inset
+
+ for limits on values of these types.
+\layout Subsection
+
+Match Bits
+\begin_inset LatexCommand \label{sec:mb-type}
+
+\end_inset
+
+
+\layout Standard
+
+The type
+\family typewriter
+ptl_match_bits_t
+\family default
+ is capable of holding unsigned 64-bit integer values.
+\layout Subsection
+
+Network Interfaces
+\begin_inset LatexCommand \label{sec:ni-type}
+
+\end_inset
+
+
+\layout Standard
+
+The type
+\family typewriter
+ptl_interface_t
+\family default
+ is an integral type used for identifying different network interfaces.
+ Users will need to consult the local documentation to determine appropriate
+ values for the interfaces available.
+ The special value
+\family typewriter
+PTL_IFACE_DEFAULT
+\family default
+ identifies the default interface.
+\layout Subsection
+
+Identifiers
+\begin_inset LatexCommand \label{sec:id-type}
+
+\end_inset
+
+
+\layout Standard
+
+The type
+\family typewriter
+ptl_nid_t
+\family default
+ is an integral type used for representing node ids
+\family typewriter
+, ptl_pid_t
+\family default
+ is an integral type for representing process ids, and
+\family typewriter
+ptl_uid_t
+\family default
+is an integral type for representing user ids.
+\layout Standard
+
+The special values
+\family typewriter
+PTL_PID_ANY
+\family default
+ matches any process identifier, PTL_NID_ANY matches any node identifier,
+ and
+\family typewriter
+PTL_UID_ANY
+\family default
+ matches any user identifier.
+ See sections
+\begin_inset LatexCommand \ref{sec:meattach}
+
+\end_inset
+
+ and\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:acentry}
+
+\end_inset
+
+ for uses of these values.
+\layout Subsection
+
+Status Registers
+\begin_inset LatexCommand \label{sec:stat-type}
+
+\end_inset
+
+
+\layout Standard
+
+Each network interface maintains an array of status registers that can be
+ accessed using the
+\family typewriter
+PtlNIStatus
+\family default
+ function (see Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:nistatus}
+
+\end_inset
+
+).
+ The type
+\family typewriter
+ptl_sr_index_t
+\family default
+ defines the types of indexes that can be used to access the status registers.
+ The only index defined for all implementations is
+\family typewriter
+PTL_SR_DROP_COUNT
+\family default
+ which identifies the status register that counts the dropped requests for
+ the interface.
+ Other indexes (and registers) may be defined by the implementation.
+\layout Standard
+
+The type
+\family typewriter
+ptl_sr_value_t
+\family default
+ defines the types of values held in status registers.
+ This is a signed integer type.
+ The size is implementation dependent, but must be at least 32 bits.
+\layout Section
+
+Initialization and Cleanup
+\begin_inset LatexCommand \label{sec:init}
+
+\end_inset
+
+
+\layout Standard
+
+The Portals API includes a function,
+\emph on
+PtlInit
+\emph default
+, to initialize the library and a function,
+\emph on
+PtlFini
+\emph default
+, to cleanup after the application is done using the library.
+\layout Subsection
+
+PtlInit
+\begin_inset LatexCommand \label{sec:ptlinit}
+
+\end_inset
+
+
+\layout LyX-Code
+
+int PtlInit( int *max_interfaces );
+\layout Standard
+\noindent
+The
+\emph on
+PtlInit
+\emph default
+ function initializes the Portals library.
+ PtlInit must be called at least once by a process before any thread makes
+ a Portals function call, but may be safely called more than once.
+\layout Subsubsection
+
+Return Codes
+\layout Description
+
+PTL_OK Indicates success.
+
+\layout Description
+
+PTL_FAIL Indicates an error during initialization.
+
+\layout Description
+
+PTL_SEGV Indicates that
+\family typewriter
+max_interfaces
+\family default
+ is not a legal address.
+
+\layout Subsubsection
+
+Arguments
+\layout Standard
+
+
+\begin_inset Tabular
+<lyxtabular version="3" rows="1" columns="3">
+<features>
+<column alignment="right" valignment="top" width="0pt">
+<column alignment="center" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="5in">
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+max_interfaces
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+output
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+On successful return, this location will hold the maximum number of interfaces
+ that can be initialized.
+\end_inset
+</cell>
+</row>
+</lyxtabular>
+
+\end_inset
+
+
+\layout Subsection
+
+PtlFini
+\begin_inset LatexCommand \label{sec:ptlfini}
+
+\end_inset
+
+
+\layout LyX-Code
+
+void PtlFini( void );
+\layout Standard
+\noindent
+The
+\emph on
+PtlFini
+\emph default
+ function cleans up after the Portals library is no longer needed by a process.
+ After this function is called, calls to any of the functions defined by
+ the Portal API or use of the structures set up by the Portals API will
+ result in undefined behavior.
+ This function should be called once and only once during termination by
+ a process.
+ Typically, this function will be called in the exit sequence of a process.
+ Individual threads should not call PtlFini when they terminate.
+\layout Section
+
+Network Interfaces
+\begin_inset LatexCommand \label{sec:ni}
+
+\end_inset
+
+
+\layout Standard
+
+The Portals API supports the use of multiple network interfaces.
+ However, each interface is treated as an independent entity.
+ Combining interfaces (e.g.,
+\begin_inset Quotes eld
+\end_inset
+
+bonding
+\begin_inset Quotes erd
+\end_inset
+
+ to create a higher bandwidth connection) must be implemented by the application
+ or embedded in the underlying network.
+ Interfaces are treated as independent entities to make it easier to cache
+ information on individual network interface cards.
+\layout Standard
+
+Once initialized, each interface provides a Portal table, an access control
+ table, and a collection of status registers.
+ See Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:me}
+
+\end_inset
+
+ for a discussion of updating Portal table entries using the
+\emph on
+PtlMEAttach
+\emph default
+ function.
+ See Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:ac}
+
+\end_inset
+
+ for a discussion of the initialization and updating of entries in the access
+ control table.
+ See Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:nistatus}
+
+\end_inset
+
+ for a discussion of the
+\emph on
+PtlNIStatus
+\emph default
+ function which can be used to determine the value of a status register.
+\layout Standard
+
+Every other type of Portal object (e.g., memory descriptor, event queue, or
+ match list entry) is associated with a specific network interface.
+ The association to a network interface is established when the object is
+ created and is encoded in the handle for the object.
+\layout Standard
+
+Each network interface is initialized and shutdown independently.
+ The initialization routine,
+\emph on
+PtlNIInit
+\emph default
+, returns a handle for an interface object which is used in all subsequent
+ Portal operations.
+ The
+\emph on
+PtlNIFini
+\emph default
+ function is used to shutdown an interface and release any resources that
+ are associated with the interface.
+ Network interface handles are associated with processes, not threads.
+ All threads in a process share all of the network interface handles.
+\layout Standard
+
+The Portals API also defines the
+\emph on
+PtlNIStatus
+\emph default
+ function to query the status registers for a network interface, the
+\emph on
+PtlNIDist
+\emph default
+ function to determine the
+\begin_inset Quotes eld
+\end_inset
+
+distance
+\begin_inset Quotes erd
+\end_inset
+
+ to another process, and the
+\emph on
+PtlNIHandle
+\emph default
+ function to determine the network interface that an object is associated
+ with.
+\layout Subsection
+
+PtlNIInit
+\begin_inset LatexCommand \label{sec:niinit}
+
+\end_inset
+
+
+\layout LyX-Code
+
+typedef struct {
+\newline
+ int max_match_entries;
+\newline
+ int max_mem_descriptors;
+\newline
+ int max_event_queues;
+\newline
+ ptl_ac_index_t max_atable_index;
+\newline
+ ptl_pt_index_t max_ptable_index;
+\newline
+} ptl_ni_limits_t;
+\newline
+
+\newline
+int PtlNIInit( ptl_interface_t interface
+\newline
+ ptl_pid_t pid,
+\newline
+ ptl_ni_limits_t* desired,
+\newline
+ ptl_ni_limits_t* actual,
+\newline
+ ptl_handle_ni_t* handle );
+\layout Standard
+
+Values of type
+\family typewriter
+ptl_ni_limits_t
+\family default
+ include the following members:
+\layout Description
+
+max_match_entries Maximum number of match entries that can be allocated
+ at any one time.
+\layout Description
+
+max_mem_descriptors Maximum number of memory descriptors that can be allocated
+ at any one time.
+\layout Description
+
+max_event_queues Maximum number of event queues that can be allocated at
+ any one time.
+\layout Description
+
+max_atable_index Largest access control table index for this interface,
+ valid indexes range from zero to
+\family typewriter
+max_atable_index
+\family default
+, inclusive.
+\layout Description
+
+max_ptable_index Largest Portal table index for this interface, valid indexes
+ range from zero to
+\family typewriter
+max_ptable_index
+\family default
+, inclusive.
+\layout Standard
+\noindent
+The
+\emph on
+PtlNIInit
+\emph default
+ function is used to initialized the Portals API for a network interface.
+ This function must be called at least once by each process before any other
+ operations that apply to the interface by any process or thread.
+ For subsequent calls to
+\shape italic
+PtlNIInit
+\shape default
+ from within the same process (either by different threads or the same thread),
+ the desired limits will be ignored and the call will return the existing
+ NI handle.
+\layout Subsubsection
+
+Return Codes
+\layout Description
+
+PTL_OK Indicates success.
+
+\layout Description
+
+PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
+
+\layout Description
+
+PTL_INIT_DUP Indicates a duplicate initialization of
+\family typewriter
+interface
+\family default
+.
+
+\layout Description
+
+PTL_INIT_INV Indicates that
+\family typewriter
+interface
+\family default
+ is not a valid network interface.
+
+\layout Description
+
+PTL_NOSPACE Indicates that there is insufficient memory to initialize the
+ interface.
+
+\layout Description
+
+PTL_INV_PROC Indicates that
+\family typewriter
+pid
+\family default
+ is not a valid process id.
+\layout Description
+
+PTL_SEGV Indicates that
+\family typewriter
+actual
+\family default
+or
+\family typewriter
+ handle
+\family default
+ is not a legal address.
+
+\layout Subsubsection
+
+Arguments
+\layout Standard
+
+
+\begin_inset Tabular
+<lyxtabular version="3" rows="5" columns="3">
+<features>
+<column alignment="right" valignment="top" width="0pt">
+<column alignment="center" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="4.7in">
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+interface
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+Identifies the network interface to be initialized.
+ (See section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:ni-type}
+
+\end_inset
+
+ for a discussion of values used to identify network interfaces.)
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+pid
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+Identifies the desired process id (for well known process ids).
+ The value
+\family typewriter
+PTL_PID_ANY
+\family default
+ may be used to have the process id assigned by the underlying library.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+desired
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+If non-NULL, points to a structure that holds the desired limits.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+actual
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+output
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+On successful return, the location pointed to by actual will hold the actual
+ limits.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+handle
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+output
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+On successful return, this location will hold a handle for the interface.
+\end_inset
+</cell>
+</row>
+</lyxtabular>
+
+\end_inset
+
+
+\layout Comment
+
+The use of desired is implementation dependent.
+ In particular, an implementation may choose to ignore this argument.
+\layout Subsection
+
+PtlNIFini
+\begin_inset LatexCommand \label{sec:nifini}
+
+\end_inset
+
+
+\layout LyX-Code
+
+int PtlNIFini( ptl_handle_ni_t interface );
+\layout Standard
+\noindent
+The
+\emph on
+PtlNIFini
+\emph default
+ function is used to release the resources allocated for a network interface.
+ Once the
+\emph on
+PtlNIFini
+\emph default
+ operation has been started, the results of pending API operations (e.g.,
+ operations initiated by another thread) for this interface are undefined.
+ Similarly, the effects of incoming operations (puts and gets) or return
+ values (acknowledgements and replies) for this interface are undefined.
+\layout Subsubsection
+
+Return Codes
+\layout Description
+
+PTL_OK Indicates success.
+
+\layout Description
+
+PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
+
+\layout Description
+
+PTL_INV_NI Indicates that
+\family typewriter
+interface
+\family default
+ is not a valid network interface handle.
+
+\layout Subsubsection
+
+Arguments
+\layout Standard
+
+
+\begin_inset Tabular
+<lyxtabular version="3" rows="1" columns="3">
+<features>
+<column alignment="right" valignment="top" width="0pt">
+<column alignment="center" valignment="top" width="0pt">
+<column alignment="center" valignment="top" width="0pt">
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+interface
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+A handle for the interface to shutdown.
+\end_inset
+</cell>
+</row>
+</lyxtabular>
+
+\end_inset
+
+
+\layout Subsection
+
+PtlNIStatus
+\begin_inset LatexCommand \label{sec:nistatus}
+
+\end_inset
+
+
+\layout LyX-Code
+
+int PtlNIStatus( ptl_handle_ni_t interface,
+\newline
+ ptl_sr_index_t status_register,
+\newline
+ ptl_sr_value_t* status );
+\layout Standard
+\noindent
+The
+\emph on
+PtlNIStatus
+\emph default
+ function returns the value of a status register for the specified interface.
+ (See section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:stat-type}
+
+\end_inset
+
+ for more information on status register indexes and status register values.)
+\layout Subsubsection
+
+Return Codes
+\layout Description
+
+PTL_OK Indicates success.
+
+\layout Description
+
+PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
+
+\layout Description
+
+PTL_INV_NI Indicates that
+\family typewriter
+interface
+\family default
+ is not a valid network interface handle.
+
+\layout Description
+
+PTL_INV_SR_INDX Indicates that
+\family typewriter
+status_register
+\family default
+ is not a valid status register.
+
+\layout Description
+
+PTL_SEGV Indicates that
+\family typewriter
+status
+\family default
+ is not a legal address.
+
+\layout Subsubsection
+
+Arguments
+\layout Standard
+
+
+\begin_inset Tabular
+<lyxtabular version="3" rows="3" columns="3">
+<features>
+<column alignment="right" valignment="top" width="0pt">
+<column alignment="center" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="4.7in">
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+interface
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+A handle for the interface to use.
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+status_register
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+An index for the status register to read.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+status
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+output
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+On successful return, this location will hold the current value of the status
+ register.
+\end_inset
+</cell>
+</row>
+</lyxtabular>
+
+\end_inset
+
+
+\layout Comment
+
+The only status register that must be defined is a drop count register (
+\family typewriter
+PTL_SR_DROP_COUNT
+\family default
+).
+ Implementations may define additional status registers.
+ Identifiers for the indexes associated with these registers should start
+ with the prefix
+\family typewriter
+PTL_SR_
+\family default
+.
+\layout Subsection
+
+PtlNIDist
+\layout LyX-Code
+
+int PtlNIDist( ptl_handle_ni_t interface,
+\newline
+ ptl_process_id_t process,
+\newline
+ unsigned long* distance );
+\layout Standard
+\noindent
+The
+\emph on
+PtlNIDist
+\emph default
+ function returns the distance to another process using the specified interface.
+ Distances are only defined relative to an interface.
+ Distance comparisons between different interfaces on the same process may
+ be meaningless.
+\layout Subsubsection
+
+Return Codes
+\layout Description
+
+PTL_OK Indicates success.
+
+\layout Description
+
+PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
+
+\layout Description
+
+PTL_INV_NI Indicates that
+\family typewriter
+interface
+\family default
+ is not a valid network interface handle.
+
+\layout Description
+
+PTL_INV_PROC Indicates that
+\family typewriter
+process
+\family default
+ is not a valid process identifier.
+
+\layout Description
+
+PTL_SEGV Indicates that
+\family typewriter
+distance
+\family default
+ is not a legal address.
+
+\layout Subsubsection
+
+Arguments
+\layout Standard
+
+
+\begin_inset Tabular
+<lyxtabular version="3" rows="3" columns="3">
+<features>
+<column alignment="right" valignment="top" width="0pt">
+<column alignment="center" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="4.7in">
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+interface
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+A handle for the interface to use.
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+process
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+An identifier for the process whose distance is being requested.
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+distance
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+output
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+On successful return, this location will hold the distance to the remote
+ process.
+\end_inset
+</cell>
+</row>
+</lyxtabular>
+
+\end_inset
+
+
+\layout Comment
+
+This function should return a static measure of distance.
+ Examples include minimum latency, the inverse of available bandwidth, or
+ the number of switches between the two endpoints.
+\layout Subsection
+
+PtlNIHandle
+\layout LyX-Code
+
+int PtlNIHandle( ptl_handle_any_t handle,
+\newline
+ ptl_handle_ni_t* interface );
+\layout Standard
+\noindent
+The
+\emph on
+PtlNIHandle
+\emph default
+ function returns a handle for the network interface with which the object
+ identified by
+\family typewriter
+handle
+\family default
+ is associated.
+ If the object identified by
+\family typewriter
+handle
+\family default
+ is a network interface, this function returns the same value it is passed.
+\layout Subsubsection
+
+Return Codes
+\layout Description
+
+PTL_OK Indicates success.
+
+\layout Description
+
+PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
+
+\layout Description
+
+PTL_INV_HANDLE Indicates that
+\family typewriter
+handle
+\family default
+ is not a valid handle.
+
+\layout Description
+
+PTL_SEGV Indicates that
+\family typewriter
+interface
+\family default
+ is not a legal address.
+
+\layout Subsubsection
+
+Arguments
+\layout Standard
+
+
+\begin_inset Tabular
+<lyxtabular version="3" rows="2" columns="3">
+<features>
+<column alignment="right" valignment="top" width="0pt">
+<column alignment="center" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="4.7in">
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+handle
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+A handle for the object.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+interface
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+output
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+On successful return, this location will hold a handle for the network interface
+ associated with
+\family typewriter
+handle
+\family default
+.
+\end_inset
+</cell>
+</row>
+</lyxtabular>
+
+\end_inset
+
+
+\layout Comment
+
+Every handle should encode the network interface and the object id relative
+ to this handle.
+ Both are presumably encoded using integer values.
+\layout Section
+
+User Identification
+\begin_inset LatexCommand \label{sec:uid}
+
+\end_inset
+
+
+\layout Standard
+
+Every process runs on behalf of a user.
+
+\layout Subsection
+
+PtlGetUid
+\layout LyX-Code
+
+int PtlGetUid( ptl_handle_ni_t ni_handle,
+\newline
+ ptl_uid_t* uid );
+\layout Subsubsection
+
+Return Codes
+\layout Description
+
+PTL_OK Indicates success.
+
+\layout Description
+
+PTL_INV_NI Indicates that
+\family typewriter
+ni_handle
+\family default
+ is not a valid network interface handle.
+
+\layout Description
+
+PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
+
+\layout Description
+
+PTL_SEGV Indicates that
+\family typewriter
+interface
+\family default
+ is not a legal address.
+
+\layout Subsubsection
+
+Arguments
+\layout Standard
+
+
+\begin_inset Tabular
+<lyxtabular version="3" rows="2" columns="3">
+<features>
+<column alignment="right" valignment="top" width="0pt">
+<column alignment="center" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="5in">
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+handle
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+A network interface handle.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+id
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+output
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+On successful return, this location will hold the user id for the calling
+ process.
+\end_inset
+</cell>
+</row>
+</lyxtabular>
+
+\end_inset
+
+
+\layout Comment
+
+Note that user identifiers are dependent on the network interface(s).
+ In particular, if a node has multiple interfaces, a process may have multiple
+ user identifiers.
+\layout Section
+
+Process Identification
+\begin_inset LatexCommand \label{sec:pid}
+
+\end_inset
+
+
+\layout Standard
+
+Processes that use the Portals API, can be identified using a node id and
+ process id.
+ Every node accessible through a network interface has a unique node identifier
+ and every process running on a node has a unique process identifier.
+ As such, any process in the computing system can be identified by its node
+ id and process id.
+
+\layout Standard
+
+The Portals API defines a type,
+\family typewriter
+ptl_process_id_t
+\family default
+ for representing process ids and a function,
+\emph on
+PtlGetId
+\emph default
+, which can be used to obtain the id of the current process.
+\layout Comment
+
+The portals API does not include thread identifiers.
+ Messages are delivered to processes (address spaces) not threads (contexts
+ of execution).
+\layout Subsection
+
+The Process Id Type
+\begin_inset LatexCommand \label{sec:pid-type}
+
+\end_inset
+
+
+\layout LyX-Code
+
+typedef struct {
+\newline
+ ptl_nid_t nid; /* node id */
+\newline
+ ptl_pid_t pid; /* process id */
+\newline
+} ptl_process_id_t;
+\layout Standard
+\noindent
+The
+\family typewriter
+ptl_process_id_t
+\family default
+ type uses two identifiers to represent a process id: a node id and a process
+ id.
+
+\layout Subsection
+
+PtlGetId
+\begin_inset LatexCommand \label{sub:PtlGetId}
+
+\end_inset
+
+
+\layout LyX-Code
+
+int PtlGetId( ptl_handle_ni_t ni_handle,
+\newline
+ ptl_process_id_t* id );
+\layout Subsubsection
+
+Return Codes
+\layout Description
+
+PTL_OK Indicates success.
+
+\layout Description
+
+PTL_INV_NI Indicates that
+\family typewriter
+ni_handle
+\family default
+ is not a valid network interface handle.
+
+\layout Description
+
+PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
+
+\layout Description
+
+PTL_SEGV Indicates that
+\family typewriter
+id
+\family default
+ is not a legal address.
+
+\layout Subsubsection
+
+Arguments
+\layout Standard
+
+
+\begin_inset Tabular
+<lyxtabular version="3" rows="2" columns="3">
+<features>
+<column alignment="right" valignment="top" width="0pt">
+<column alignment="center" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="5in">
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+handle
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+A network interface handle.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+id
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+output
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+On successful return, this location will hold the id for the calling process.
+\end_inset
+</cell>
+</row>
+</lyxtabular>
+
+\end_inset
+
+
+\layout Comment
+
+Note that process identifiers are dependent on the network interface(s).
+ In particular, if a node has multiple interfaces, it may have multiple
+ node identifiers.
+\layout Section
+
+Match List Entries and Match Lists
+\begin_inset LatexCommand \label{sec:me}
+
+\end_inset
+
+
+\layout Standard
+
+A match list is a chain of match list entries.
+ Each match list entry includes a memory descriptor and a set of match criteria.
+ The match criteria can be used to reject incoming requests based on process
+ id or the match bits provided in the request.
+ A match list is created using the
+\emph on
+PtlMEAttach
+\emph default
+ or
+\shape italic
+PtlMEAttachAny
+\shape default
+ functions, which create a match list consisting of a single match list
+ entry, attaches the match list to the specified Portal index, and returns
+ a handle for the match list entry.
+ Match entries can be dynamically inserted and removed from a match list
+ using the
+\emph on
+PtlMEInsert
+\emph default
+ and
+\emph on
+PtlMEUnlink
+\emph default
+ functions.
+\layout Subsection
+
+PtlMEAttach
+\begin_inset LatexCommand \label{sec:meattach}
+
+\end_inset
+
+
+\layout LyX-Code
+
+typedef enum { PTL_RETAIN, PTL_UNLINK } ptl_unlink_t;
+\newline
+
+\layout LyX-Code
+
+typedef enum { PTL_INS_BEFORE, PTL_INS_AFTER } ptl_ins_pos_t;
+\newline
+
+\layout LyX-Code
+
+int PtlMEAttach( ptl_handle_ni_t interface,
+\newline
+ ptl_pt_index_t index,
+\newline
+ ptl_process_id_t matchid,
+\newline
+ ptl_match_bits_t match_bits,
+\newline
+ ptl_match_bits_t ignorebits,
+\newline
+ ptl_unlink_t unlink,
+\newline
+ ptl_ins_pos_t position,
+\newline
+ ptl_handle_me_t* handle );
+\layout Standard
+\noindent
+Values of the type
+\family typewriter
+ptl_ins_pos_t
+\family default
+ are used to control where a new item is inserted.
+ The value
+\family typewriter
+PTL_INS_BEFORE
+\family default
+ is used to insert the new item before the current item or before the head
+ of the list.
+ The value
+\family typewriter
+PTL_INS_AFTER
+\family default
+ is used to insert the new item after the current item or after the last
+ item in the list.
+
+\layout Standard
+
+The
+\emph on
+PtlMEAttach
+\emph default
+ function creates a match list consisting of a single entry and attaches
+ this list to the Portal table for
+\family typewriter
+interface
+\family default
+.
+\layout Subsubsection
+
+Return Codes
+\layout Description
+
+PTL_OK Indicates success.
+
+\layout Description
+
+PTL_INV_NI Indicates that
+\family typewriter
+interface
+\family default
+ is not a valid network interface handle.
+
+\layout Description
+
+PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
+
+\layout Description
+
+PTL_INV_PTINDEX Indicates that
+\family typewriter
+index
+\family default
+ is not a valid Portal table index.
+
+\layout Description
+
+PTL_INV_PROC Indicates that
+\family typewriter
+matchid
+\family default
+ is not a valid process identifier.
+
+\layout Description
+
+PTL_NOSPACE Indicates that there is insufficient memory to allocate the
+ match list entry.
+
+\layout Description
+
+PTL_ML_TOOLONG Indicates that the resulting match list is too long.
+ The maximum length for a match list is defined by the interface.
+
+\layout Subsubsection
+
+Arguments
+\layout Standard
+
+
+\begin_inset Tabular
+<lyxtabular version="3" rows="7" columns="3">
+<features>
+<column alignment="left" valignment="top" width="0.8in">
+<column alignment="center" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="4.75in">
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+
+\family typewriter
+interface
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+A handle for the interface to use.
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+
+\family typewriter
+index
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+The Portal table index where the match list should be attached.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+
+\family typewriter
+matchid
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+Specifies the match criteria for the process id of the requestor.
+ The constants
+\family typewriter
+PTL_PID_ANY
+\family default
+ and
+\family typewriter
+PTL_NID_ANY
+\family default
+ can be used to wildcard either of the ids in the
+\family typewriter
+ptl_process_id_t
+\family default
+ structure.
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+
+\family typewriter
+match_bits, ignorebits
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+Specify the match criteria to apply to the match bits in the incoming request.
+ The
+\family typewriter
+ignorebits
+\family default
+ are used to mask out insignificant bits in the incoming match bits.
+ The resulting bits are then compared to the match list entry's match
+ bits to determine if the incoming request meets the match criteria.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+
+\family typewriter
+unlink
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+Indicates the match list entry should be unlinked when the last memory descripto
+r associated with this match list entry is unlinked.
+ (Note, the check for unlinking a match entry only occurs when a memory
+ descriptor is unlinked.)
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+
+\family typewriter
+position
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+Indicates whether the new match entry should be prepended or appended to
+ the existing match list.
+ If there is no existing list, this argument is ignored and the new match
+ entry becomes the only entry in the list.
+ Allowed constants:
+\family typewriter
+PTL_INS_BEFORE
+\family default
+,
+\family typewriter
+PTL_INS_AFTER
+\family default
+.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+
+\family typewriter
+handle
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+output
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+On successful return, this location will hold a handle for the newly created
+ match list entry.
+\end_inset
+</cell>
+</row>
+</lyxtabular>
+
+\end_inset
+
+
+\layout Subsection
+
+PtlMEAttachAny
+\begin_inset LatexCommand \label{sec:attachany}
+
+\end_inset
+
+
+\layout LyX-Code
+
+int PtlMEAttachAny( ptl_handle_ni_t interface,
+\newline
+ ptl_pt_index_t *index,
+\newline
+ ptl_process_id_t matchid,
+\newline
+ ptl_match_bits_t match_bits,
+\newline
+ ptl_match_bits_t ignorebits,
+\newline
+ ptl_unlink_t unlink,
+\newline
+ ptl_handle_me_t* handle );
+\layout Standard
+
+The
+\emph on
+PtlMEAttachAny
+\emph default
+ function creates a match list consisting of a single entry and attaches
+ this list to an unused Portal table entry for
+\family typewriter
+interface
+\family default
+.
+\layout Subsubsection
+
+Return Codes
+\layout Description
+
+PTL_OK Indicates success.
+
+\layout Description
+
+PTL_INV_NI Indicates that
+\family typewriter
+interface
+\family default
+ is not a valid network interface handle.
+
+\layout Description
+
+PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
+
+\layout Description
+
+PTL_INV_PROC Indicates that
+\family typewriter
+matchid
+\family default
+ is not a valid process identifier.
+
+\layout Description
+
+PTL_NOSPACE Indicates that there is insufficient memory to allocate the
+ match list entry.
+
+\layout Description
+
+PTL_PT_FULL Indicates that there are no free entries in the Portal table.
+\layout Subsubsection
+
+Arguments
+\layout Standard
+
+
+\begin_inset Tabular
+<lyxtabular version="3" rows="4" columns="3">
+<features>
+<column alignment="left" valignment="top" width="0.8in">
+<column alignment="center" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="4.75in">
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+
+\family typewriter
+interface
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+A handle for the interface to use.
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+
+\family typewriter
+index
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+output
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+On succesfful return, this location will hold the Portal index where the
+ match list has been attached.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+
+\family typewriter
+matchid, match_bits, ignorebits, unlink
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+See the discussion for
+\shape italic
+PtlMEAttach
+\shape default
+.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+
+\family typewriter
+handle
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+output
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+On successful return, this location will hold a handle for the newly created
+ match list entry.
+\end_inset
+</cell>
+</row>
+</lyxtabular>
+
+\end_inset
+
+
+\layout Subsection
+
+PtlMEInsert
+\begin_inset LatexCommand \label{sec:meinsert}
+
+\end_inset
+
+
+\layout LyX-Code
+
+int PtlMEInsert( ptl_handle_me_t current,
+\newline
+ ptl_process_id_t matchid,
+\newline
+ ptl_match_bits_t match_bits,
+\newline
+ ptl_match_bits_t ignorebits,
+\newline
+ ptl_ins_pos_t position,
+\newline
+ ptl_handle_me_t* handle );
+\layout Standard
+
+The
+\emph on
+PtlMEInsert
+\emph default
+ function creates a new match list entry and inserts this entry into the
+ match list containing
+\family typewriter
+current
+\family default
+.
+\layout Subsubsection
+
+Return Codes
+\layout Description
+
+PTL_OK Indicates success.
+
+\layout Description
+
+PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
+
+\layout Description
+
+PTL_INV_PROC Indicates that
+\family typewriter
+matchid
+\family default
+ is not a valid process identifier.
+
+\layout Description
+
+PTL_INV_ME Indicates that
+\family typewriter
+current
+\family default
+ is not a valid match entry handle.
+
+\layout Description
+
+PTL_ML_TOOLONG Indicates that the resulting match list is too long.
+ The maximum length for a match list is defined by the interface.
+
+\layout Description
+
+PTL_NOSPACE Indicates that there is insufficient memory to allocate the
+ match entry.
+
+\layout Subsubsection
+
+Arguments
+\layout Standard
+
+
+\begin_inset Tabular
+<lyxtabular version="3" rows="4" columns="3">
+<features>
+<column alignment="left" valignment="top" width="0.8in">
+<column alignment="center" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="4.7in">
+<row>
+<cell alignment="left" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+
+\family typewriter
+current
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+A handle for a match entry.
+ The new match entry will be inserted immediately before or immediately
+ after this match entry.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+
+\family typewriter
+matchid
+\family default
+,
+\family typewriter
+match_bits
+\family default
+,
+\family typewriter
+ignorebits
+\family default
+,
+\family typewriter
+unlink
+\family default
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+See the discussion for
+\emph on
+PtlMEAttach
+\emph default
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+
+\family typewriter
+position
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+Indicates whether the new match entry should be inserted before or after
+ the
+\family typewriter
+current
+\family default
+ entry.
+ Allowed constants:
+\family typewriter
+PTL_INS_BEFORE
+\family default
+,
+\family typewriter
+PTL_INS_AFTER
+\family default
+.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+
+\family typewriter
+handle
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+See the discussion for
+\emph on
+PtlMEAttach
+\emph default
+.
+\end_inset
+</cell>
+</row>
+</lyxtabular>
+
+\end_inset
+
+
+\layout Subsection
+
+PtlMEUnlink
+\begin_inset LatexCommand \label{sec:meunlink}
+
+\end_inset
+
+
+\layout LyX-Code
+
+int PtlMEUnlink( ptl_handle_me_t entry );
+\layout Standard
+\noindent
+The
+\emph on
+PtlMEUnlink
+\emph default
+ function can be used to unlink a match entry from a match list.
+ This operation also releases any resources associated with the match entry
+ (including the associated memory descriptor).
+ It is an error to use the match entry handle after calling
+\emph on
+PtlMEUnlink
+\emph default
+.
+\layout Subsubsection
+
+Return Codes
+\layout Description
+
+PTL_OK Indicates success.
+
+\layout Description
+
+PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
+
+\layout Description
+
+PTL_INV_ME Indicates that
+\family typewriter
+entry
+\family default
+ is not a valid match entry handle.
+
+\layout Subsubsection
+
+Arguments
+\layout Standard
+
+
+\begin_inset Tabular
+<lyxtabular version="3" rows="1" columns="3">
+<features>
+<column alignment="right" valignment="top" width="0pt">
+<column alignment="center" valignment="top" width="0pt">
+<column alignment="center" valignment="top" width="0pt">
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+entry
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+A handle for the match entry to be unlinked.
+\end_inset
+</cell>
+</row>
+</lyxtabular>
+
+\end_inset
+
+
+\layout Section
+
+Memory Descriptors
+\begin_inset LatexCommand \label{sec:md}
+
+\end_inset
+
+
+\layout Standard
+
+A memory descriptor contains information about a region of an application
+ process' memory and an event queue where information about the operations
+ performed on the memory descriptor are recorded.
+ The Portals API provides two operations to create memory descriptors:
+\emph on
+PtlMDAttach
+\emph default
+, and
+\emph on
+PtlMDBind
+\emph default
+; an operation to update a memory descriptor,
+\emph on
+PtlMDUpdate
+\emph default
+; and an operation to unlink and release the resources associated with a
+ memory descriptor,
+\emph on
+PtlMDUnlink
+\emph default
+.
+\layout Subsection
+
+The Memory Descriptor Type
+\begin_inset LatexCommand \label{sec:md-type}
+
+\end_inset
+
+
+\layout LyX-Code
+
+typedef struct {
+\newline
+ void* start;
+\newline
+ ptl_size_t length;
+\newline
+ int threshold;
+\newline
+ unsigned int max_offset;
+\newline
+ unsigned int options;
+\newline
+ void* user_ptr;
+\newline
+ ptl_handle_eq_t eventq;
+\newline
+} ptl_md_t;
+\layout Standard
+\noindent
+The
+\family typewriter
+ptl_md_t
+\family default
+ type defines the application view of a memory descriptor.
+ Values of this type are used to initialize and update the memory descriptors.
+\layout Subsubsection
+
+Members
+\layout Description
+
+start,\SpecialChar ~
+length Specify the memory region associated with the memory descriptor.
+ The
+\family typewriter
+start
+\family default
+ member specifies the starting address for the memory region and the
+\family typewriter
+length
+\family default
+ member specifies the length of the region.
+ The
+\family typewriter
+start member
+\family default
+ can be NULL provided that the
+\family typewriter
+length
+\family default
+ member is zero.
+ (Zero length buffers are useful to record events.) There are no alignment
+ restrictions on the starting address or the length of the region; although,
+ unaligned messages may be slower (i.e., lower bandwidth and/or longer latency)
+ on some implementations.
+
+\layout Description
+
+threshold Specifies the maximum number of operations that can be performed
+ on the memory descriptor.
+ An operation is any action that could possibly generate an event (see Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:ek-type}
+
+\end_inset
+
+ for the different types of events).
+ In the usual case, the threshold value is decremented for each operation
+ on the memory descriptor.
+ When the threshold value is zero, the memory descriptor is
+\emph on
+inactive
+\emph default
+, and does not respond to operations.
+ A memory descriptor can have an initial threshold value of zero to allow
+ for manipulation of an inactive memory descriptor by the local process.
+ A threshold value of
+\family typewriter
+PTL_MD_THRESH_INF
+\family default
+ indicates that there is no bound on the number of operations that may be
+ applied to a memory descriptor.
+ Note that local operations (e.g.,
+\emph on
+PtlMDUpdate
+\emph default
+) are not applied to the threshold count.
+
+\layout Description
+
+max_offset Specifies the maximum local offset of a memory descriptor.
+ When the local offset of a memory descriptor exceeds this maximum, the
+ memory descriptor becomes
+\shape italic
+inactive
+\shape default
+ and does not respond to further operations.
+\layout Description
+
+options Specifies the behavior of the memory descriptor.
+ There are five options that can be selected: enable put operations (yes
+ or no), enable get operations (yes or no), offset management (local or
+ remote), message truncation (yes or no), and acknowledgement (yes or no).
+ Values for this argument can be constructed using a bitwise or of the following
+ values:
+\begin_deeper
+\begin_deeper
+\layout Description
+
+PTL_MD_OP_PUT Specifies that the memory descriptor will respond to
+\emph on
+put
+\emph default
+ operations.
+ By default, memory descriptors reject
+\emph on
+put
+\emph default
+ operations.
+
+\layout Description
+
+PTL_MD_OP_GET Specifies that the memory descriptor will respond to
+\emph on
+get
+\emph default
+ operations.
+ By default, memory descriptors reject
+\emph on
+get
+\emph default
+ operations.
+
+\layout Description
+
+PTL_MD_MANAGE_REMOTE Specifies that the offset used in accessing the memory
+ region is provided by the incoming request.
+ By default, the offset is maintained locally.
+ When the offset is maintained locally, the offset is incremented by the
+ length of the request so that the next operation (put and/or get) will
+ access the next part of the memory region.
+\layout Description
+
+PTL_MD_TRUNCATE Specifies that the length provided in the incoming request
+ can be reduced to match the memory available in the region.
+ (The memory available in a memory region is determined by subtracting the
+ offset from the length of the memory region.) By default, if the length
+ in the incoming operation is greater than the amount of memory available,
+ the operation is rejected.
+
+\layout Description
+
+PTL_MD_ACK_DISABLE Specifies that an acknowledgement should
+\emph on
+not
+\emph default
+ be sent for incoming
+\emph on
+put
+\emph default
+ operations, even if requested.
+ By default, acknowledgements are sent for
+\emph on
+put
+\emph default
+ operations that request an acknowledgement.
+ Acknowledgements are never sent for
+\emph on
+get
+\emph default
+ operations.
+ The value sent in the reply serves as an implicit acknowledgement.
+
+\end_deeper
+\layout Standard
+
+
+\series bold
+Note
+\series default
+: It is not considered an error to have a memory descriptor that does not
+ respond to either
+\emph on
+put
+\emph default
+ or
+\emph on
+get
+\emph default
+ operations: Every memory descriptor responds to
+\emph on
+reply
+\emph default
+ operations.
+ Nor is it considered an error to have a memory descriptor that responds
+ to both
+\emph on
+put
+\emph default
+ and
+\emph on
+get
+\emph default
+ operations.
+
+\end_deeper
+\layout Description
+
+user_ptr A user-specified value that is associated with the memory descriptor.
+ The value does not need to be a pointer, but must fit in the space used
+ by a pointer.
+ This value (along with other values) is recorded in events associated with
+ operations on this memory descriptor.
+\begin_inset Foot
+collapsed true
+
+\layout Standard
+
+Tying the memory descriptor to a user-defined value can be useful when multiple
+ memory descriptor share the same event queue or when the memory descriptor
+ needs to be associated with a data structure maintained by the application.
+ For example, an MPI implementation can set the
+\family typewriter
+user_ptr
+\family default
+ argument to the value of an MPI Request.
+ This direct association allows for processing of memory descriptor's by
+ the MPI implementation without a table lookup or a search for the appropriate
+ MPI Request.
+\end_inset
+
+
+\layout Description
+
+eventq A handle for the event queue used to log the operations performed
+ on the memory region.
+ If this argument is
+\family typewriter
+PTl_EQ_NONE
+\family default
+, operations performed on this memory descriptor are not logged.
+
+\layout Subsection
+
+PtlMDAttach
+\begin_inset LatexCommand \label{sec:mdattach}
+
+\end_inset
+
+
+\layout LyX-Code
+
+int PtlMDAttach( ptl_handle_me_t match,
+\newline
+ ptl_md_t mem_desc,
+\newline
+ ptl_unlink_t unlink_op,
+\newline
+ ptl_unlink_t unlink_nofit,
+\newline
+ ptl_handle_md_t* handle );
+\layout Standard
+\noindent
+Values of the type
+\family typewriter
+ptl_unlink_t
+\family default
+ are used to control whether an item is unlinked from a list.
+ The value
+\family typewriter
+PTL_UNLINK
+\family default
+ enables unlinking.
+ The value
+\family typewriter
+PTL_RETAIN
+\family default
+ disables unlinking.
+\layout Standard
+
+The
+\emph on
+PtlMDAttach
+\emph default
+ operation is used to create a memory descriptor and attach it to a match
+ list entry.
+ An error code is returned if this match list entry already has an associated
+ memory descriptor.
+\layout Subsubsection
+
+Return Codes
+\layout Description
+
+PTL_OK Indicates success.
+
+\layout Description
+
+PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
+
+\layout Description
+
+PTL_INUSE Indicates that
+\family typewriter
+match
+\family default
+ already has a memory descriptor attached.
+
+\layout Description
+
+PTL_INV_ME Indicates that
+\family typewriter
+match
+\family default
+ is not a valid match entry handle.
+
+\layout Description
+
+PTL_ILL_MD Indicates that
+\family typewriter
+mem_desc
+\family default
+ is not a legal memory descriptor.
+ This may happen because the memory region defined in
+\family typewriter
+mem_desc
+\family default
+ is invalid or because the network interface associated with the
+\family typewriter
+eventq
+\family default
+ in
+\family typewriter
+mem_desc
+\family default
+ is not the same as the network interface associated with
+\family typewriter
+match
+\family default
+.
+
+\layout Description
+
+PTL_NOSPACE Indicates that there is insufficient memory to allocate the
+ memory descriptor.
+
+\layout Subsubsection
+
+Arguments
+\layout Standard
+
+
+\begin_inset Tabular
+<lyxtabular version="3" rows="5" columns="3">
+<features>
+<column alignment="right" valignment="top" width="0pt">
+<column alignment="center" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="4.7in">
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+match
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+A handle for the match entry that the memory descriptor will be associated
+ with.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+mem_desc
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+Provides initial values for the application visible parts of a memory descriptor.
+ Other than its use for initialization, there is no linkage between this
+ structure and the memory descriptor maintained by the API.
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+unlink_op
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+A flag to indicate whether the memory descriptor is unlinked when it becomes
+ inactive, either because the operation threshold drops to zero or because
+ the maximum offset has been exceeded.
+ (Note, the check for unlinking a memory descriptor only occurs after a
+ the completion of a successful operation.
+ If the threshold is set to zero during initialization or using
+\emph on
+PtlMDUpdate
+\emph default
+, the memory descriptor is
+\series bold
+not
+\series default
+ unlinked.)
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+unlink_nofit
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+A flag to indicate whether the memory descriptor is unlinked when the space
+ remaining in the memory descriptor is not sufficient for a matching operation.
+ If an incoming message arrives arrives at a memory descriptor that does
+ not have sufficient space and the
+\series bold
+PTL_MD_TRUNCATE
+\series default
+ operation is not specified, the memory descriptor will be unlinked.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+handle
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+output
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+On successful return, this location will hold a handle for the newly created
+ memory descriptor.
+ The
+\family typewriter
+handle
+\family default
+ argument can be NULL, in which case the handle will not be returned.
+\end_inset
+</cell>
+</row>
+</lyxtabular>
+
+\end_inset
+
+
+\layout Subsection
+
+PtlMDBind
+\begin_inset LatexCommand \label{sec:mdbind}
+
+\end_inset
+
+
+\layout LyX-Code
+
+int PtlMDBind( ptl_handle_ni_t interface,
+\newline
+ ptl_md_t mem_desc,
+\newline
+ ptl_handle_md_t* handle );
+\layout Standard
+\noindent
+The
+\emph on
+PtlMDBind
+\emph default
+ operation is used to create a
+\begin_inset Quotes eld
+\end_inset
+
+free floating
+\begin_inset Quotes erd
+\end_inset
+
+ memory descriptor, i.e., a memory descriptor that is not associated with
+ a match list entry.
+\layout Subsubsection
+
+Return Codes
+\layout Description
+
+PTL_OK Indicates success.
+
+\layout Description
+
+PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
+
+\layout Description
+
+PTL_INV_NI Indicates that
+\family typewriter
+interface
+\family default
+ is not a valid match entry handle.
+
+\layout Description
+
+PTL_ILL_MD Indicates that
+\family typewriter
+mem_desc
+\family default
+ is not a legal memory descriptor.
+ This may happen because the memory region defined in
+\family typewriter
+mem_desc
+\family default
+ is invalid or because the network interface associated with the
+\family typewriter
+eventq
+\family default
+ in
+\family typewriter
+mem_desc
+\family default
+ is not the same as the network interface,
+\family typewriter
+interface
+\family default
+.
+
+\layout Description
+
+PTL_INV_EQ Indicates that the event queue associated with
+\family typewriter
+mem_desc
+\family default
+ is not valid.
+
+\layout Description
+
+PTL_NOSPACE Indicates that there is insufficient memory to allocate the
+ memory descriptor.
+
+\layout Description
+
+PTL_SEGV Indicates that
+\family typewriter
+handle
+\family default
+ is not a legal address.
+
+\layout Subsubsection
+
+Arguments
+\layout Standard
+
+
+\begin_inset Tabular
+<lyxtabular version="3" rows="3" columns="3">
+<features>
+<column alignment="right" valignment="top" width="0pt">
+<column alignment="center" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="4.7in">
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+interface
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+A handle for the network interface with which the memory descriptor will
+ be associated.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+mem_desc
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+Provides initial values for the application visible parts of a memory descriptor.
+ Other than its use for initialization, there is no linkage between this
+ structure and the memory descriptor maintained by the API.
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+handle
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+output
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+On successful return, this location will hold a handle for the newly created
+ memory descriptor.
+ The
+\family typewriter
+handle
+\family default
+ argument must be a valid address and cannot be NULL.
+\end_inset
+</cell>
+</row>
+</lyxtabular>
+
+\end_inset
+
+
+\layout Subsection
+
+PtlMDUnlink
+\begin_inset LatexCommand \label{sec:mdfree}
+
+\end_inset
+
+
+\layout LyX-Code
+
+int PtlMDUnlink( ptl_handle_md_t mem_desc );
+\layout Standard
+\noindent
+The
+\emph on
+PtlMDUnlink
+\emph default
+ function unlinks the memory descriptor from any match list entry it may
+ be linked to and releases the resources associated with a memory descriptor.
+ (This function does not free the memory region associated with the memory
+ descriptor.) This function also releases the resources associated with a
+ floating memory descriptor.
+ Only memory descriptors with no pending operations may be unlinked.
+\layout Subsubsection
+
+Return Codes
+\layout Description
+
+PTL_OK Indicates success.
+
+\layout Description
+
+PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
+
+\layout Description
+
+PTL_INV_MD Indicates that
+\family typewriter
+mem_desc
+\family default
+ is not a valid memory descriptor handle.
+\layout Description
+
+PTL_MD_INUSE Indicates that
+\family typewriter
+mem_desc
+\family default
+ has pending operations and cannot be unlinked.
+\layout Subsubsection
+
+Arguments
+\layout Standard
+
+
+\begin_inset Tabular
+<lyxtabular version="3" rows="1" columns="3">
+<features>
+<column alignment="right" valignment="top" width="0pt">
+<column alignment="center" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="4.7in">
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+mem_desc
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+A handle for the memory descriptor to be released.
+\end_inset
+</cell>
+</row>
+</lyxtabular>
+
+\end_inset
+
+
+\layout Subsection
+
+PtlMDUpdate
+\begin_inset LatexCommand \label{sec:mdupdate}
+
+\end_inset
+
+
+\layout LyX-Code
+
+int PtlMDUpdate( ptl_handle_md_t mem_desc,
+\newline
+ ptl_md_t* old_md,
+\newline
+ ptl_md_t* new_md,
+\newline
+ ptl_handle_eq_t testq );
+\layout Standard
+\noindent
+The
+\emph on
+PtlMDUpdate
+\emph default
+ function provides a conditional, atomic update operation for memory descriptors.
+ The memory descriptor identified by
+\family typewriter
+mem_desc
+\family default
+ is only updated if the event queue identified by
+\family typewriter
+testq
+\family default
+ is empty.
+ The intent is to only enable updates to the memory descriptor when no new
+ messages have arrived since the last time the queue was checked.
+ See section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:exmpi}
+
+\end_inset
+
+ for an example of how this function can be used.
+\layout Standard
+
+If
+\family typewriter
+new
+\family default
+ is not NULL the memory descriptor identified by handle will be updated
+ to reflect the values in the structure pointed to by
+\family typewriter
+new
+\family default
+ if
+\family typewriter
+testq
+\family default
+ has the value
+\family typewriter
+PTL_EQ_NONE
+\family default
+ or if the event queue identified by
+\family typewriter
+testq
+\family default
+ is empty.
+ If
+\family typewriter
+old
+\family default
+ is not NULL, the current value of the memory descriptor identified by
+\family typewriter
+mem_desc
+\family default
+ is recorded in the location identified by
+\family typewriter
+old
+\family default
+.
+\layout Subsubsection
+
+Return Codes
+\layout Description
+
+PTL_OK Indicates success.
+
+\layout Description
+
+PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
+
+\layout Description
+
+PTL_NOUPDATE Indicates that the update was not performed because
+\family typewriter
+testq
+\family default
+ was not empty.
+
+\layout Description
+
+PTL_INV_MD Indicates that
+\family typewriter
+mem_desc
+\family default
+ is not a valid memory descriptor handle.
+
+\layout Description
+
+PTL_ILL_MD Indicates that the value pointed to by
+\family typewriter
+new
+\family default
+ is not a legal memory descriptor (e.g., the memory region specified by the
+ memory descriptor may be invalid).
+
+\layout Description
+
+PTL_INV_EQ Indicates that
+\family typewriter
+testq
+\family default
+ is not a valid event queue handle.
+
+\layout Description
+
+PTL_SEGV Indicates that
+\family typewriter
+new
+\family default
+ or
+\family typewriter
+old
+\family default
+ is not a legal address.
+
+\layout Subsubsection
+
+Arguments
+\layout Standard
+
+
+\begin_inset Tabular
+<lyxtabular version="3" rows="4" columns="3">
+<features>
+<column alignment="right" valignment="top" width="0pt">
+<column alignment="center" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="4.7in">
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+mem_desc
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+A handle for the memory descriptor to update.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+old_md
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+output
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+If
+\family typewriter
+old_md
+\family default
+ is not the value
+\family typewriter
+NULL
+\family default
+, the current value of the memory descriptor will be stored in the location
+ identified by
+\family typewriter
+old
+\family default
+_md.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+new_md
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+If
+\family typewriter
+new_md
+\family default
+ is not the value
+\family typewriter
+NULL
+\family default
+, this argument provides the new values for the memory descriptor, if the
+ update is performed.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+testq
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+A handle for an event queue used to predicate the update.
+ If
+\family typewriter
+testq
+\family default
+ is equal to
+\family typewriter
+PTL_EQ_NONE
+\family default
+, the update is performed unconditionally.
+ Otherwise, the update is performed if and only if
+\family typewriter
+testq
+\family default
+ is empty.
+ If the update is not performed, the function returns the value
+\family typewriter
+PTL_NOUPDATE
+\family default
+.
+ (Note, the
+\family typewriter
+testq
+\family default
+ argument does not need to be the same as the event queue associated with
+ the memory descriptor.)
+\end_inset
+</cell>
+</row>
+</lyxtabular>
+
+\end_inset
+
+
+\layout Standard
+
+The conditional update can be used to ensure that the memory descriptor
+ has not changed between the time it was examined and the time it is updated.
+ In particular, it is needed to support an MPI implementation where the
+ activity of searching an unexpected message queue and posting a receive
+ must be atomic.
+\layout Section
+
+Events and Event Queues
+\begin_inset LatexCommand \label{sec:eq}
+
+\end_inset
+
+
+\layout Standard
+
+Event queues are used to log operations performed on memory descriptors.
+ They can also be used to hold acknowledgements for completed
+\emph on
+put
+\emph default
+ operations and to note when the data specified in a
+\emph on
+put
+\emph default
+ operation has been sent (i.e., when it is safe to reuse the buffer that holds
+ this data).
+ Multiple memory descriptors can share a single event queue.
+\layout Standard
+
+In addition to the
+\family typewriter
+ptl_handle_eq_t
+\family default
+ type, the Portals API defines two types associated with events: The
+\family typewriter
+
+\newline
+ptl_event_kind_t
+\family default
+ type defines the kinds of events that can be stored in an event queue.
+ The
+\family typewriter
+ptl_event_t
+\family default
+ type defines a structure that holds the information associated with an
+ event.
+\layout Standard
+
+The Portals API also provides four functions for dealing with event queues:
+ The
+\emph on
+PtlEQAlloc
+\emph default
+ function is used to allocate the API resources needed for an event queue,
+ the
+\emph on
+PtlEQFree
+\emph default
+ function is used to release these resources, the
+\emph on
+PtlEQGet
+\emph default
+ function can be used to get the next event from an event queue, and the
+
+\emph on
+PtlEQWait
+\emph default
+ function can be used to block a process (or thread) until an event queue
+ has at least one event.
+\layout Subsection
+
+Kinds of Events
+\begin_inset LatexCommand \label{sec:ek-type}
+
+\end_inset
+
+
+\layout LyX-Code
+
+typedef enum {
+\newline
+ PTL_EVENT_GET_START, PTL_EVENT_GET_END, PTL_EVENT_GET_FAIL,
+\newline
+ PTL_EVENT_PUT_START, PTL_EVENT_PUT_END, PTL_EVENT_PUT_FAIL,
+\newline
+ PTL_EVENT_REPLY_START, PTL_EVENT_REPLY_END, PTL_EVENT_REPLY_FAIL,
+\newline
+ PTL_EVENT_SEND_START, PTL_EVENT_SEND_END, PTL_EVENT_SEND_FAIL,
+\newline
+ PTL_EVENT_ACK,
+\newline
+ PTL_EVENT_UNLINK
+\newline
+} ptl_event_kind_t;
+\layout Standard
+\noindent
+The Portals API defines fourteen types of events that can be logged in an
+ event queue:
+\layout Description
+
+PTL_EVENT_GET_START A remote
+\emph on
+get
+\emph default
+ operation has been started on the memory descriptor.
+ The memory region associated with this descriptor should not be altered
+ until the corresponding END or FAIL event is logged.
+\layout Description
+
+PTL_EVENT_GET_END A previously initiated
+\emph on
+get
+\emph default
+ operation completed successfully.
+ This event is logged after the reply has been sent by the local node.
+ As such, the process could free the memory descriptor once it sees this
+ event.
+
+\layout Description
+
+PTL_EVENT_GET_FAIL A previously initiated
+\emph on
+get
+\emph default
+ operation completed unsuccessfully.
+ This event is logged after the reply has been sent by the local node.
+ As such, the process could free the memory descriptor once it sees this
+ event.
+
+\layout Description
+
+PTL_EVENT_PUT_START A remote
+\emph on
+put
+\emph default
+ operation has been started on the memory descriptor.
+ The memory region associated with this descriptor should should be considered
+ volatile until the corresponding END or FAIL event is logged.
+\layout Description
+
+PTL_EVENT_PUT_END A previously initiated
+\emph on
+put
+\emph default
+ operation completed successfully.
+ The underlying layers will not alter the memory (on behalf of this operation)
+ once this event has been logged.
+
+\layout Description
+
+PTL_EVENT_PUT_FAIL A previously initiated
+\emph on
+put
+\emph default
+ operation completed unsuccessfully.
+ The underlying layers will not alter the memory (on behalf of this operation)
+ once this event has been logged.
+
+\layout Description
+
+PTL_EVENT_REPLY_START A
+\emph on
+reply
+\emph default
+ operation has been started on the memory descriptor.
+
+\layout Description
+
+PTL_EVENT_REPLY_END A previously initiated
+\emph on
+reply
+\emph default
+ operation has completed successfully .
+ This event is logged after the data (if any) from the reply has been written
+ into the memory descriptor.
+
+\layout Description
+
+PTL_EVENT_REPLY_FAIL A previously initiated
+\emph on
+reply
+\emph default
+ operation has completed unsuccessfully.
+ This event is logged after the data (if any) from the reply has been written
+ into the memory descriptor.
+
+\layout Description
+
+PTL_EVENT_ACK An
+\emph on
+acknowledgement
+\emph default
+ was received.
+ This event is logged when the acknowledgement is received
+\layout Description
+
+PTL_EVENT_SEND_START An outgoing
+\emph on
+send
+\emph default
+ operation has been started.
+ The memory region associated with this descriptor should not be altered
+ until the corresponding END or FAIL event is logged.
+\layout Description
+
+PTL_EVENT_SEND_END A previously initiated
+\emph on
+send
+\emph default
+ operation has completed successfully.
+ This event is logged after the entire buffer has been sent and it is safe
+ for the application to reuse the buffer.
+
+\layout Description
+
+PTL_EVENT_SEND_FAIL A previously initiated
+\emph on
+send
+\emph default
+ operation has completed unsuccessfully.
+ The process can safely manipulate the memory or free the memory descriptor
+ once it sees this event.
+\layout Description
+
+PTL_EVENT_UNLINK A memory descriptor associated with this event queue has
+ been automatically unlinked.
+ This event is not generated when a memory descriptor is explicitly unlinked
+ by calling
+\shape italic
+PtlMDUnlink
+\shape default
+.
+ This event does not decrement the threshold count.
+\layout Subsection
+
+Event Ordering
+\layout Standard
+
+The Portals API guarantees that a when a process initiates two operations
+ on a remote process, the operations will be initiated on the remote process
+ in the same order that they were initiated on the original process.
+ As an example, if process A intitates two
+\emph on
+put
+\emph default
+ operations,
+\emph on
+x
+\emph default
+ and
+\emph on
+y
+\emph default
+, on process B, the Portals API guarantees that process A will receive the
+
+\family typewriter
+PTL_EVENT_SEND_START
+\family default
+ events for
+\emph on
+x
+\emph default
+ and
+\emph on
+y
+\emph default
+ in the same order that process B receives the
+\family typewriter
+PTL_EVENT_PUT_START
+\family default
+ events for
+\emph on
+x
+\emph default
+ and
+\emph on
+y
+\emph default
+.
+ Notice that the API does not guarantee that the start events will be delivered
+ in the same order that process A initiated the
+\emph on
+x
+\emph default
+ and
+\emph on
+y
+\emph default
+ operations.
+ If process A needs to ensure the ordering of these operations, it should
+ include code to wait for the initiation of
+\emph on
+x
+\emph default
+ before it initiates
+\emph on
+y
+\emph default
+.
+\layout Subsection
+
+Failure Notification
+\layout Standard
+
+Operations may fail to complete successfully; however, unless the node itself
+ fails, every operation that is started will eventually complete.
+ While an operation is in progress, the memory associated with the operation
+ should not be viewed (in the case of a put or a reply) or altered (in the
+ case of a send or get).
+ Operation completion, whether successful or unsuccessful, is final.
+ That is, when an operation completes, the memory associated with the operation
+ will no longer be read or altered by the operation.
+ A network interface can use the
+\family typewriter
+ptl_ni_fail_t
+\family default
+ to define more specific information regarding the failure of the operation
+ and record this information in the
+\family typewriter
+ni_fail_type
+\family default
+ field of the event.
+\layout Subsection
+
+The Event Type
+\begin_inset LatexCommand \label{sec:event-type}
+
+\end_inset
+
+
+\layout LyX-Code
+
+typedef struct {
+\newline
+ ptl_event_kind_t type;
+\newline
+ ptl_process_id_t initiator;
+\newline
+ ptl_uid_t uid;
+\layout LyX-Code
+
+ ptl_pt_index_t portal;
+\newline
+ ptl_match_bits_t match_bits;
+\newline
+ ptl_size_t rlength;
+\newline
+ ptl_size_t mlength;
+\newline
+ ptl_size_t offset;
+\newline
+ ptl_handle_md_t md_handle;
+\newline
+ ptl_md_t mem_desc;
+\newline
+ ptl_hdr_data_t hdr_data;
+\newline
+ ptl_seq_t link;
+\newline
+ ptl_ni_fail_t ni_fail_type;
+\newline
+ volatile ptl_seq_t sequence;
+\newline
+} ptl_event_t;
+\layout Standard
+\noindent
+An event structure includes the following members:
+\layout Description
+
+type Indicates the type of the event.
+
+\layout Description
+
+initiator The id of the initiator.
+
+\layout Description
+
+portal The Portal table index specified in the request.
+
+\layout Description
+
+match_bits A copy of the match bits specified in the request.
+ See section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:me}
+
+\end_inset
+
+ for more information on match bits.
+
+\layout Description
+
+rlength The length (in bytes) specified in the request.
+
+\layout Description
+
+mlength The length (in bytes) of the data that was manipulated by the operation.
+ For truncated operations, the manipulated length will be the number of
+ bytes specified by the memory descriptor (possibly with an offset) operation.
+ For all other operations, the manipulated length will be the length of
+ the requested operation.
+
+\layout Description
+
+offset Is the displacement (in bytes) into the memory region that the operation
+ used.
+ The offset can be determined by the operation (see Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:datamovement}
+
+\end_inset
+
+) for a remote managed memory descriptor, or by the local memory descriptor
+ (see Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:md}
+
+\end_inset
+
+).
+
+\layout Description
+
+md_handle Is the handle to the memory descriptor associated with the event.
+\layout Description
+
+mem_desc Is the state of the memory descriptor immediately after the event
+ has been processed.
+
+\layout Description
+
+hdr_data 64 bits of out-of-band user data (see Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:put}
+
+\end_inset
+
+).
+
+\layout Description
+
+link The
+\emph on
+link
+\emph default
+ member is used to link
+\family typewriter
+START
+\family default
+ events with the
+\family typewriter
+END
+\family default
+ or
+\family typewriter
+FAIL
+\family default
+ event that signifies completion of the operation.
+ The
+\emph on
+link
+\emph default
+ member will be the same for the two events associated with an operation.
+ The link member is also used to link an
+\family typewriter
+UNLINK
+\family default
+ event with the event that caused the memory descriptor to be unlinked.
+\layout Description
+
+sequence The sequence number for this event.
+ Sequence numbers are unique to each event.
+\layout Comment
+
+The
+\emph on
+sequence
+\emph default
+ member is the last member and is volatile to support SMP implementations.
+ When an event structure is filled in, the
+\emph on
+sequence
+\emph default
+ member should be written after all other members have been updated.
+ Moreover, a memory barrier should be inserted between the updating of other
+ members and the updating of the
+\emph on
+sequence
+\emph default
+ member.
+\layout Subsection
+
+PtlEQAlloc
+\begin_inset LatexCommand \label{sec:eqalloc}
+
+\end_inset
+
+
+\layout LyX-Code
+
+int PtlEQAlloc( ptl_handle_ni_t interface,
+\newline
+ ptl_size_t count,
+\newline
+ ptl_handle_eq_t* handle );
+\layout Standard
+\noindent
+The
+\emph on
+PtlEQAlloc
+\emph default
+ function is used to build an event queue.
+
+\layout Subsubsection
+
+Return Codes
+\layout Description
+
+PTL_OK Indicates success.
+
+\layout Description
+
+PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
+
+\layout Description
+
+PTL_INV_NI Indicates that
+\family typewriter
+interface
+\family default
+ is not a valid network interface handle.
+
+\layout Description
+
+PTL_NOSPACE Indicates that there is insufficient memory to allocate the
+ event queue.
+
+\layout Description
+
+PTL_SEGV Indicates that
+\family typewriter
+handle
+\family default
+ is not a legal address.
+
+\layout Subsubsection
+
+Arguments
+\layout Standard
+
+
+\begin_inset Tabular
+<lyxtabular version="3" rows="3" columns="3">
+<features>
+<column alignment="right" valignment="top" width="0pt">
+<column alignment="center" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="4.7in">
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+interface
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+A handle for the interface with which the event queue will be associated.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+count
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+The number of events that can be stored in the event queue.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+handle
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+output
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+On successful return, this location will hold a handle for the newly created
+ event queue.
+\end_inset
+</cell>
+</row>
+</lyxtabular>
+
+\end_inset
+
+
+\layout Subsection
+
+PtlEQFree
+\begin_inset LatexCommand \label{sec:eqfree}
+
+\end_inset
+
+
+\layout LyX-Code
+
+int PtlEQFree( ptl_handle_eq_t eventq );
+\layout Standard
+\noindent
+The
+\emph on
+PtlEQFree
+\emph default
+ function releases the resources associated with an event queue.
+ It is up to the user to insure that no memory descriptors are associated
+ with the event queue once it is freed.
+
+\layout Subsubsection
+
+Return Codes
+\layout Description
+
+PTL_OK Indicates success.
+
+\layout Description
+
+PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
+
+\layout Description
+
+PTL_INV_EQ Indicates that
+\family typewriter
+eventq
+\family default
+ is not a valid event queue handle.
+
+\layout Subsubsection
+
+Arguments
+\layout Standard
+
+
+\begin_inset Tabular
+<lyxtabular version="3" rows="1" columns="3">
+<features>
+<column alignment="right" valignment="top" width="0pt">
+<column alignment="center" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="4.7in">
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+eventq
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+A handle for the event queue to be released.
+\end_inset
+</cell>
+</row>
+</lyxtabular>
+
+\end_inset
+
+
+\layout Subsection
+
+PtlEQGet
+\begin_inset LatexCommand \label{sec:eqget}
+
+\end_inset
+
+
+\layout LyX-Code
+
+int PtlEQGet( ptl_handle_eq_t eventq,
+\newline
+ ptl_event_t* event );
+\layout Standard
+\noindent
+The
+\emph on
+PTLEQGet
+\emph default
+ function is a nonblocking function that can be used to get the next event
+ in an event queue.
+ The event is removed from the queue.
+\layout Subsubsection
+
+Return Codes
+\layout Description
+
+PTL_OK Indicates success.
+
+\layout Description
+
+PTL_EQ_DROPPED Indicates success (i.e., an event is returned) and that at
+ least one event between this event and the last event obtained (using
+\emph on
+PtlEQGet
+\emph default
+ or
+\emph on
+PtlEQWait
+\emph default
+) from this event queue has been dropped due to limited space in the event
+ queue.
+
+\layout Description
+
+PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
+
+\layout Description
+
+PTL_EQ_EMPTY Indicates that
+\family typewriter
+eventq
+\family default
+ is empty or another thread is waiting on
+\emph on
+PtlEQWait
+\emph default
+.
+
+\layout Description
+
+PTL_INV_EQ Indicates that
+\family typewriter
+eventq
+\family default
+ is not a valid event queue handle.
+
+\layout Description
+
+PTL_SEGV Indicates that
+\family typewriter
+event
+\family default
+ is not a legal address.
+
+\layout Subsubsection
+
+Arguments
+\layout Standard
+
+
+\begin_inset Tabular
+<lyxtabular version="3" rows="2" columns="3">
+<features>
+<column alignment="right" valignment="top" width="0pt">
+<column alignment="center" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="4.5in">
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+eventq
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+A handle for the event queue.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+event
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+output
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+On successful return, this location will hold the values associated with
+ the next event in the event queue.
+\end_inset
+</cell>
+</row>
+</lyxtabular>
+
+\end_inset
+
+
+\layout Subsection
+
+PtlEQWait
+\begin_inset LatexCommand \label{sec:eqwait}
+
+\end_inset
+
+
+\layout LyX-Code
+
+int PtlEQWait( ptl_handle_eq_t eventq,
+\newline
+ ptl_event_t* event );
+\layout Standard
+\noindent
+The
+\emph on
+PTLEQWait
+\emph default
+ function can be used to block the calling process (thread) until there
+ is an event in an event queue.
+ This function also returns the next event in the event queue and removes
+ this event from the queue.
+ This is the only blocking operation in the Portals 3.2 API.
+ In the event that multiple threads are waiting on the same event queue,
+ PtlEQWait is guaranteed to wake exactly one thread, but the order in which
+ they are awakened is not specified.
+\layout Subsubsection
+
+Return Codes
+\layout Description
+
+PTL_OK Indicates success.
+
+\layout Description
+
+PTL_EQ_DROPPED Indicates success (i.e., an event is returned) and that at
+ least one event between this event and the last event obtained (using
+\emph on
+PtlEQGet
+\emph default
+ or
+\emph on
+PtlEQWait
+\emph default
+) from this event queue has been dropped due to limited space in the event
+ queue.
+
+\layout Description
+
+PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
+
+\layout Description
+
+PTL_INV_EQ Indicates that
+\family typewriter
+eventq
+\family default
+ is not a valid event queue handle.
+
+\layout Description
+
+PTL_SEGV Indicates that
+\family typewriter
+event
+\family default
+ is not a legal address.
+ queue handle.
+
+\layout Subsubsection
+
+Arguments
+\layout Standard
+\noindent
+
+\begin_inset Tabular
+<lyxtabular version="3" rows="2" columns="3">
+<features>
+<column alignment="right" valignment="top" width="0pt">
+<column alignment="center" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="4.7in">
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+eventq
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+A handle for the event queue to wait on.
+ The calling process (thread) will be blocked until
+\family typewriter
+eventq
+\family default
+ is not empty.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+event
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+output
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+On successful return, this location will hold the values associated with
+ the next event in the event queue.
+\end_inset
+</cell>
+</row>
+</lyxtabular>
+
+\end_inset
+
+
+\layout Section
+
+The Access Control Table
+\begin_inset LatexCommand \label{sec:ac}
+
+\end_inset
+
+
+\layout Standard
+
+Processes can use the access control table to control which processes are
+ allowed to perform operations on Portal table entries.
+ Each communication interface has a Portal table and an access control table.
+ The access control table for the default interface contains an entry at
+ index zero that allows all processes with the same user id to communicate.
+ Entries in the access control table can be manipulated using the
+\emph on
+PtlACEntry
+\emph default
+ function.
+\layout Subsection
+
+PtlACEntry
+\begin_inset LatexCommand \label{sec:acentry}
+
+\end_inset
+
+
+\layout LyX-Code
+
+int PtlACEntry( ptl_handle_ni_t interface,
+\newline
+ ptl_ac_index_t index,
+\newline
+ ptl_process_id_t matchid,
+\newline
+ ptl_uid_t user_id,
+\newline
+ ptl_pt_index_t portal );
+\layout Standard
+\noindent
+The
+\emph on
+PtlACEntry
+\emph default
+ function can be used to update an entry in the access control table for
+ an interface.
+\layout Subsubsection
+
+Return Codes
+\layout Description
+
+PTL_OK Indicates success.
+
+\layout Description
+
+PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
+
+\layout Description
+
+PTL_INV_NI Indicates that
+\family typewriter
+interface
+\family default
+ is not a valid network interface handle.
+
+\layout Description
+
+PTL_AC_INV_INDEX Indicates that
+\family typewriter
+index
+\family default
+ is not a valid access control table index.
+
+\layout Description
+
+PTL_INV_PROC Indicates that
+\family typewriter
+matchid
+\family default
+ is not a valid process identifier.
+
+\layout Description
+
+PTL_PT_INV_INDEX Indicates that
+\family typewriter
+portal
+\family default
+ is not a valid Portal table index.
+
+\layout Subsubsection
+
+Arguments
+\layout Standard
+
+
+\begin_inset Tabular
+<lyxtabular version="3" rows="5" columns="3">
+<features>
+<column alignment="right" valignment="top" width="0pt">
+<column alignment="center" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="4.7in">
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+interface
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+Identifies the interface to use.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+index
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+The index of the entry in the access control table to update.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+matchid
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+Identifies the process(es) that are allowed to perform operations.
+ The constants
+\family typewriter
+PTL_PID_ANY
+\family default
+ and
+\family typewriter
+PTL_NID_ANY
+\family default
+ can be used to wildcard either of the ids in the
+\family typewriter
+ptl_process_id_t
+\family default
+ structure.
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+user_id
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+Identifies the user that is allowed to perform operations.
+ The value
+\family typewriter
+PTL_UID_ANY
+\family default
+ can be used to wildcard the user.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+portal
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+Identifies the Portal index(es) that can be used.
+ The value
+\family typewriter
+PTL_PT_INDEX_ANY
+\family default
+ can be used to wildcard the Portal index.
+\end_inset
+</cell>
+</row>
+</lyxtabular>
+
+\end_inset
+
+
+\layout Section
+
+Data Movement Operations
+\begin_inset LatexCommand \label{sec:datamovement}
+
+\end_inset
+
+
+\layout Standard
+
+The Portals API provides two data movement operations:
+\emph on
+PtlPut
+\emph default
+ and
+\emph on
+PtlGet
+\emph default
+.
+\layout Subsection
+
+PtlPut
+\begin_inset LatexCommand \label{sec:put}
+
+\end_inset
+
+
+\layout LyX-Code
+
+typedef enum { PTL_ACK_REQ, PTL_NOACK_REQ } ptl_ack_req_t;
+\newline
+
+\newline
+int PtlPut( ptl_handle_md_t mem_desc,
+\newline
+ ptl_ack_req_t ack_req,
+\newline
+ ptl_process_id_t target,
+\newline
+ ptl_pt_index_t portal,
+\newline
+ ptl_ac_index_t cookie,
+\newline
+ ptl_match_bits_t match_bits,
+\newline
+ ptl_size_t offset,
+\newline
+ ptl_hdr_data_t hdr_data );
+\layout Standard
+\noindent
+Values of the type
+\family typewriter
+ptl_ack_req_t
+\family default
+ are used to control whether an acknowledgement should be sent when the
+ operation completes (i.e., when the data has been written to a memory descriptor
+ of the
+\family typewriter
+target
+\family default
+ process).
+ The value
+\family typewriter
+PTL_ACK_REQ
+\family default
+ requests an acknowledgement, the value
+\family typewriter
+PTL_NOACK_REQ
+\family default
+ requests that no acknowledgement should be generated.
+\layout Standard
+
+The
+\emph on
+PtlPut
+\emph default
+ function initiates an asynchronous put operation.
+ There are several events associated with a put operation: initiation of
+ the send on the local node (
+\family typewriter
+PTL_EVENT_SEND_START
+\family default
+), completion of the send on the local node (
+\family typewriter
+PTL_EVENT_SEND_END
+\family default
+ or
+\family typewriter
+PTL_EVENT_SEND_FAIL
+\family default
+), and, when the send completes successfully, the receipt of an acknowledgement
+ (
+\family typewriter
+PTL_EVENT_ACK
+\family default
+) indicating that the operation was accepted by the target.
+ These events will be logged in the event queue associated with the memory
+ descriptor (
+\family typewriter
+mem_desc
+\family default
+) used in the put operation.
+ Using a memory descriptor that does not have an associated event queue
+ results in these events being discarded.
+ In this case, the application must have another mechanism (e.g., a higher
+ level protocol) for determining when it is safe to modify the memory region
+ associated with the memory descriptor.
+\layout Subsubsection
+
+Return Codes
+\layout Description
+
+PTL_OK Indicates success.
+
+\layout Description
+
+PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
+
+\layout Description
+
+PTL_INV_MD Indicates that
+\family typewriter
+mem_desc
+\family default
+ is not a valid memory descriptor.
+
+\layout Description
+
+PTL_INV_PROC Indicates that
+\family typewriter
+target
+\family default
+ is not a valid process id.
+
+\layout Subsubsection
+
+Arguments
+\layout Standard
+
+
+\begin_inset Tabular
+<lyxtabular version="3" rows="8" columns="3">
+<features>
+<column alignment="center" valignment="top" width="0pt">
+<column alignment="center" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="4.7in">
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+mem_desc
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+A handle for the memory descriptor that describes the memory to be sent.
+ If the memory descriptor has an event queue associated with it, it will
+ be used to record events when the message has been sent (PTL_EVENT_SEND_START,
+ PTL_EVENT_SEND_END).
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ack_req
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+Controls whether an acknowledgement event is requested.
+ Acknowledgements are only sent when they are requested by the initiating
+ process
+\series bold
+and
+\series default
+ the memory descriptor has an event queue
+\series bold
+and
+\series default
+ the target memory descriptor enables them.
+ Allowed constants:
+\family typewriter
+PTL_ACK_REQ
+\family default
+,
+\family typewriter
+PTL_NOACK_REQ
+\family default
+.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+target
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+A process id for the target process.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+portal
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+The index in the remote Portal table.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+cookie
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+The index into the access control table of the target process.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+match_bits
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+The match bits to use for message selection at the target process.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+offset
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+The offset into the target memory descriptor (only used when the target
+ memory descriptor has the
+\family typewriter
+PTL_MD_MANAGE_REMOTE
+\family default
+ option set).
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+hdr_data
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+64 bits of user data that can be included in message header.
+ This data is written to an event queue entry at the target if an event
+ queue is present on the matching memory descriptor.
+\end_inset
+</cell>
+</row>
+</lyxtabular>
+
+\end_inset
+
+
+\layout Subsection
+
+PtlGet
+\begin_inset LatexCommand \label{sec:get}
+
+\end_inset
+
+
+\layout LyX-Code
+
+int PtlGet( ptl_handle_md_t mem_desc,
+\newline
+ ptl_process_id_t target,
+\newline
+ ptl_pt_index_t portal,
+\newline
+ ptl_ac_index_t cookie,
+\newline
+ ptl_match_bits_t match_bits,
+\newline
+ ptl_size_t offset );
+\layout Standard
+\noindent
+The
+\emph on
+PtlGet
+\emph default
+ function initiates a remote read operation.
+ There are two event pairs associated with a get operation , when the data
+ is sent from the remote node, a
+\family typewriter
+PTL_EVENT_GET{START|END}
+\family default
+ event pair is registered on the remote node; and when the data is returned
+ from the remote node a
+\family typewriter
+PTL_EVENT_REPLY{START|END}
+\family default
+ event pair is registered on the local node.
+\layout Subsubsection
+
+Return Codes
+\layout Description
+
+PTL_OK Indicates success.
+
+\layout Description
+
+PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
+
+\layout Description
+
+PTL_INV_MD Indicates that
+\family typewriter
+mem_desc
+\family default
+ is not a valid memory descriptor.
+
+\layout Description
+
+PTL_INV_PROC Indicates that
+\family typewriter
+target
+\family default
+ is not a valid process id.
+
+\layout Subsubsection
+
+Arguments
+\layout Standard
+
+
+\begin_inset Tabular
+<lyxtabular version="3" rows="6" columns="3">
+<features>
+<column alignment="right" valignment="top" width="0pt">
+<column alignment="center" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="4.7in">
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+mem_desc
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+A handle for the memory descriptor that describes the memory into which
+ the requested data will be received.
+ The memory descriptor can have an event queue associated with it to record
+ events, such as when the message receive has started (
+\family typewriter
+PTL_EVENT_REPLY
+\family default
+_
+\family typewriter
+START
+\family default
+).
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+target
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+A process id for the target process.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+portal
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+The index in the remote Portal table.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+cookie
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+The index into the access control table of the target process.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+match_bits
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+The match bits to use for message selection at the target process.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+offset
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+input
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+The offset into the target memory descriptor (only used when the target
+ memory descriptor has the
+\family typewriter
+PTL_MD_MANAGE_REMOTE
+\family default
+ option set).
+\end_inset
+</cell>
+</row>
+</lyxtabular>
+
+\end_inset
+
+
+\layout Section
+
+Summary
+\layout Standard
+
+
+\begin_inset LatexCommand \label{sec:summary}
+
+\end_inset
+
+ We conclude this section by summarizing the names introduced by the Portals
+ 3.2 API.
+ We start by summarizing the names of the types introduced by the API.
+ This is followed by a summary of the functions introduced by the API.
+ Which is followed by a summary of the function return codes.
+ Finally, we conclude with a summary of the other constant values introduced
+ by the API.
+\layout Standard
+
+Table\SpecialChar ~
+
+\begin_inset LatexCommand \ref{tab:types}
+
+\end_inset
+
+ presents a summary of the types defined by the Portals API.
+ The first column in this table gives the type name, the second column gives
+ a brief description of the type, the third column identifies the section
+ where the type is defined, and the fourth column lists the functions that
+ have arguments of this type.
+\layout Standard
+
+
+\begin_inset Float table
+placement htbp
+wide false
+collapsed false
+
+\layout Caption
+
+Types Defined by the Portals 3.2 API
+\begin_inset LatexCommand \label{tab:types}
+
+\end_inset
+
+
+\layout Standard
+
+
+\begin_inset ERT
+status Collapsed
+
+\layout Standard
+
+\backslash
+medskip
+\end_inset
+
+
+\layout Standard
+\noindent
+
+\size small
+
+\begin_inset Tabular
+<lyxtabular version="3" rows="25" columns="4">
+<features firstHeadEmpty="true">
+<column alignment="left" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="2in">
+<column alignment="left" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="2.2in">
+<row bottomline="true">
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+ Name
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+ Meaning
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+ Sect
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+ Functions
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_ac_index_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+indexes for an access control table
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:index-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+PtlACEntry, PtlPut, PtlGet
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_ack_req_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+acknowledgement request types
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:put}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+PtlPut
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_event_kind_t
+\family default
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+kinds of events
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:ek-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+PtlGet
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_event_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+information about events
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:event-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+PtlEQGet
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+plt_seq_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+event sequence number
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:event-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+PtlEQGet, PtlEQWait
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_handle_any_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+handles for any object
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:handle-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+PtlNIHandle
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_handle_eq_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+handles for event queues
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:handle-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+PtlEQAlloc, PtlEQFree, PtlEQGet, PtlEQWait, PtlMDUpdate
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_handle_md_t
+\family default
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+handles for memory descriptors
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:handle-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+PtlMDAlloc, PtlMDUnlink, PtlMDUpdate, PtlMEAttach, PtlMEAttachAny, PtlMEInsert,
+ PtlPut, PtlGet
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_handle_me_t
+\family default
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+handles for match entries
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:handle-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+PtlMEAttach, PtlMEAttachAny, PtlMEInsert, PtlMEUnlink
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_handle_ni_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+handles for network interfaces
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:handle-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+PtlNIInit, PtlNIFini, PtlNIStatus, PtlNIDist, PtlEQAlloc, PtlACEntry, PtlPut,
+ PtlGet
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_nid_t
+\family default
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+node identifiers
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:id-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+ PtlGetId,PtlACEntry
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_pid_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+process identifier
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:id-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+PtlGetId, PtlACEntry
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_uid_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+user indentifier
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:id-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+PtlGetUid, PtlACEntry
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_ins_pos_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+insertion position (before or after)
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:meattach}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+PtlMEAttach, PtlMEAttachAny, PtlMEInsert
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_interface_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+identifiers for network interfaces
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:ni-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+PtlNIInit
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_match_bits_t
+\family default
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+match (and ignore) bits
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:mb-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+PtlMEAttach, PtlMEAttachAny, PtlMEInsert, PtlPut, PtlGet
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_md_t
+\family default
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+memory descriptors
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:md-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+PtlMDAttach, PtlMDUpdate
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_ni_fail_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+network interface-specific failures
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:eq}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+PtlEQGet, PtlEQWait
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_process_id_t
+\family default
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+process identifiers
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:pid-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+PtlGetId, PtlNIDist, PtlMEAttach, PtlMEAttachAny, PtlACEntry, PtlPut, PtlGet
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_pt_index_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+indexes for Portal tables
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:index-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+PtlMEAttach, PtlMEAttachAny, PtlACEntry
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_size_t
+\family default
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+sizes
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:size-t}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+PtlEQAlloc, PtlPut, PtlGet
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_sr_index_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+indexes for status registers
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:stat-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+PtlNIStatus
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_sr_value_t
+\family default
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+values in status registers
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:stat-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+PtlNIStatus
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_unlink_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+unlink options
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:meattach}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+PtlMEAttach, PtlMEAttachAny, PtlMEInsert, PtlMDAttach
+\end_inset
+</cell>
+</row>
+</lyxtabular>
+
+\end_inset
+
+
+\end_inset
+
+
+\layout Standard
+
+Table\SpecialChar ~
+
+\begin_inset LatexCommand \ref{tab:func}
+
+\end_inset
+
+ presents a summary of the functions defined by the Portals API.
+ The first column in this table gives the name for the function, the second
+ column gives a brief description of the operation implemented by the function,
+ and the third column identifies the section where the function is defined.
+\layout Standard
+
+
+\begin_inset Float table
+placement htbp
+wide false
+collapsed false
+
+\layout Caption
+
+Functions Defined by the Portals 3.2 API
+\begin_inset LatexCommand \label{tab:func}
+
+\end_inset
+
+
+\layout Standard
+
+
+\begin_inset ERT
+status Collapsed
+
+\layout Standard
+
+\backslash
+medskip
+\end_inset
+
+
+\layout Standard
+\align center
+
+\size small
+
+\begin_inset Tabular
+<lyxtabular version="3" rows="24" columns="3">
+<features firstHeadEmpty="true">
+<column alignment="left" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="0pt">
+<row bottomline="true">
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+Name
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ Operation
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ Section
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+PtlACEntry
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ update an entry in an access control table
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:ac}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ PtlEQAlloc
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ create an event queue
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:eq}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ PtlEQGet
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ get the next event from an event queue
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:eq}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ PtlEQFree
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ release the resources for an event queue
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:eq}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ PtlEQWait
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ wait for a new event in an event queue
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:eq}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ PtlFini
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ shutdown the Portals API
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:init}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ PtlGet
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ perform a get operation
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:datamovement}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ PtlGetId
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ get the id for the current process
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:pid}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ PtlInit
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ initialize the Portals API
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:init}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ PtlMDAttach
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ create a memory descriptor and attach it to a match entry
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:md}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ PtlMDBind
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ create a free-floating memory descriptor
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:mdbind}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ PtlMDUnlink
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ remove a memory descriptor from a list and release its resources
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:md}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ PtlMDUpdate
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ update a memory descriptor
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:md}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ PtlMEAttach
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+create a match entry and attach it to a Portal table
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:me}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+PtlMEAttachAny
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+create a match entry and attach it to a free Portal table entry
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:attachany}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ PtlMEInsert
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ create a match entry and insert it in a list
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:me}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ PtlMEUnlink
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ remove a match entry from a list and release its resources
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:me}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ PtlNIDist
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ get the distance to another process
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:ni}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ PtlNIFini
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ shutdown a network interface
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:ni}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ PtlNIHandle
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ get the network interface handle for an object
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:ni}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ PtlNIInit
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ initialize a network interface
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:ni}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ PtlNIStatus
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ read a network interface status register
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:ni}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ PtlPut
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ perform a put operation
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:datamovement}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+</lyxtabular>
+
+\end_inset
+
+
+\end_inset
+
+
+\layout Standard
+
+Table\SpecialChar ~
+
+\begin_inset LatexCommand \ref{tab:retcodes}
+
+\end_inset
+
+ summarizes the return codes used by functions defined by the Portals API.
+ All of these constants are integer values.
+ The first column of this table gives the symbolic name for the constant,
+ the second column gives a brief description of the value, and the third
+ column identifies the functions that can return this value.
+\layout Standard
+
+
+\begin_inset Float table
+placement htbp
+wide false
+collapsed false
+
+\layout Caption
+
+Function Return Codes for the Portals 3.2 API
+\begin_inset LatexCommand \label{tab:retcodes}
+
+\end_inset
+
+
+\layout Standard
+
+
+\begin_inset ERT
+status Collapsed
+
+\layout Standard
+
+\backslash
+medskip
+\end_inset
+
+
+\layout Standard
+\align center
+
+\size small
+
+\begin_inset Tabular
+<lyxtabular version="3" rows="27" columns="3">
+<features>
+<column alignment="left" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="2.6in">
+<row bottomline="true">
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+Name
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+Meaning
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+Functions
+\series default
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_AC_INV_INDEX
+\family default
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+invalid access control table index
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+ PtlACEntry
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_EQ_DROPPED
+\family default
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+at least one event has been dropped
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+ PtlEQGet, PtlWait
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_EQ_EMPTY
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+no events available in an event queue
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+ PtlEQGet
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_FAIL
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+error during initialization or cleanup
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+ PtlInit, PtlFini
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_ILL_MD
+\family default
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+illegal memory descriptor values
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+PtlMDAttach, PtlMDBind, PtlMDUpdate
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_INIT_DUP
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+duplicate initialization of an interface
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+PtlNIInit
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_INIT_INV
+\family default
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+initialization of an invalid interface
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+PtlNIInit
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_INUSE
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+the ME already has an MD
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+PtlMDAttach
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_INV_ASIZE
+\family default
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+invalid access control table size
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+PtlNIInit
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_INV_EQ
+\family default
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+invalid event queue handle
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+PtlMDUpdate, PtlEQFree, PtlEQGet
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_INV_HANDLE
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+invalid handle
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+PtlNIHandle
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_INV_MD
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+invalid memory descriptor handle
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+PtlMDUnlink, PtlMDUpdate
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_INV_ME
+\family default
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+invalid match entry handle
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+PtlMDAttach
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_INV_NI
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+invalid network interface handle
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+PtlNIDist, PtlNIFini, PtlMDBind, PtlEQAlloc
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_INV_PROC
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+invalid process identifier
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+PtlNIInit, PtlNIDist, PtlMEAttach, PtlMEInsert, PtlACEntry, PtlPut, PtlGet
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_INV_PTINDEX
+\family default
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+invalid Portal table index
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+ PtlMEAttach
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_INV_REG
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+invalid status register
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+ PtlNIStatus
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_INV_SR_INDX
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+invalid status register index
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+ PtlNIStatus
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_ML_TOOLONG
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+match list too long
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+ PtlMEAttach, PtlMEInsert
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_MD_INUSE
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+MD has pending operations
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+PtlMDUnlink
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_NOINIT
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+uninitialized API
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+
+\emph on
+all
+\emph default
+, except PtlInit
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_NOSPACE
+\family default
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+insufficient memory
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+PtlNIInit, PtlMDAttach, PtlMDBind, PtlEQAlloc, PtlMEAttach, PtlMEInsert
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_NOUPDATE
+\family default
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ no update was performed
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+ PtlMDUpdate
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_PT_FULL
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+Portal table is full
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+PtlMEAttachAny
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_OK
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ success
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+
+\emph on
+all
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_SEGV
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+addressing violation
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+\noindent
+PtlNIInit, PtlNIStatus, PtlNIDist, PtlNIHandle, PtlMDBind, PtlMDUpdate,
+ PtlEQAlloc, PtlEQGet, PtlEQWait
+\end_inset
+</cell>
+</row>
+</lyxtabular>
+
+\end_inset
+
+
+\end_inset
+
+
+\layout Standard
+
+Table\SpecialChar ~
+
+\begin_inset LatexCommand \ref{tab:oconsts}
+
+\end_inset
+
+ summarizes the remaining constant values introduced by the Portals API.
+ The first column in this table presents the symbolic name for the constant,
+ the second column gives a brief description of the value, the third column
+ identifies the type for the value, and the fourth column identifies the
+ sections in which the value is mentioned.
+\layout Standard
+
+
+\begin_inset Float table
+placement htbp
+wide false
+collapsed false
+
+\layout Caption
+
+Other Constants Defined by the Portals 3.2 API
+\begin_inset LatexCommand \label{tab:oconsts}
+
+\end_inset
+
+
+\layout Standard
+
+
+\begin_inset ERT
+status Collapsed
+
+\layout Standard
+
+\backslash
+medskip
+\end_inset
+
+
+\layout Standard
+\align center
+
+\size small
+
+\begin_inset Tabular
+<lyxtabular version="3" rows="36" columns="5">
+<features>
+<column alignment="left" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="0pt">
+<row bottomline="true">
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+Name
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+Meaning
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+Base type
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+Intr.
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+Ref.
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_ACK_REQ
+\family default
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+request an acknowledgement
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_ack_req_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:put}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_EQ_NONE
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+a NULL event queue handle
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_handle_eq_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:handle-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:md}
+
+\end_inset
+
+,
+\begin_inset LatexCommand \ref{sec:mdupdate}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_EVENT_GET_START
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+get event start
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_event_kind_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:ek-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:get}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_EVENT_GET_END
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+get event end
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_event_kind_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:ek-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:get}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_EVENT_GET_FAIL
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+get event fail
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_event_kind_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:ek-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:get}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_EVENT_PUT_START
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+put event start
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_event_kind_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:ek-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:put}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_EVENT_PUT_END
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+put event end
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_event_kind_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:ek-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:put}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_EVENT_PUT_FAIL
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+put event fail
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_event_kind_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:ek-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:put}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_EVENT_REPLY_START
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+reply event start
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_event_kind_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:ek-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:get}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_EVENT_REPLY_END
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+reply event end
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_event_kind_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:ek-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:get}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_EVENT_REPLY_FAIL
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+reply event fail
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_event_kind_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:ek-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:get}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_EVENT_ACK_START
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+acknowledgement event start
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_event_kind_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:ek-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:put}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_EVENT_ACK_END
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+acknowledgement event end
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_event_kind_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:ek-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:put}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_EVENT_ACK_FAIL
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+acknowledgement event fail
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_event_kind_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:ek-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:put}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_EVENT_SEND_START
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+send event start
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_event_kind_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:ek-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:put}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_EVENT_SEND_END
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+send event end
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_event_kind_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:ek-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:put}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_EVENT_SEND_FAIL
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+send event fail
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_event_kind_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:ek-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:put}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_EVENT_UNLINK
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+unlink event
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_event_kind_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:ek-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:md-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_PID_ANY
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+wildcard for process id fields
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_pid_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:id-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:meattach}
+
+\end_inset
+
+,
+\begin_inset LatexCommand \ref{sec:acentry}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_NID_ANY
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+wildcard for node id fields
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_nid_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:id-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:meattach}
+
+\end_inset
+
+,
+\begin_inset LatexCommand \ref{sec:acentry}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_UID_ANY
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+wildcard for user id
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_uid_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:id-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:meattach}
+
+\end_inset
+
+,
+\begin_inset LatexCommand \ref{sec:acentry}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_IFACE_DEFAULT
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+default interface
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_interface_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:ni-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_INS_AFTER
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+insert after
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_ins_pos_t
+\family default
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:meinsert}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_INS_BEFORE
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+insert before
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_ins_pos_t
+\family default
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:meinsert}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_MD_ACK_DISABLE
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+a flag to disable acknowledgements
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+int
+\family default
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:md-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_MD_MANAGE_REMOTE
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+a flag to enable the use of remote offsets
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+int
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:md-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:put}
+
+\end_inset
+
+,
+\begin_inset LatexCommand \ref{sec:get}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_MD_OP_GET
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+a flag to enable get operations
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+int
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:md-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_MD_OP_PUT
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+a flag to enable put operations
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+int
+\family default
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:md-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_MD_THRESH_INF
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+infinite threshold for a memory descriptor
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+int
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:md-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_MD_TRUNCATE
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+a flag to enable truncation of a request
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+int
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:md-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_NOACK_REQ
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+request no acknowledgement
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_ack_req_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:put}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_PT_INDEX_ANY
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+wildcard for Portal indexes
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_pt_index_t
+\family default
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:acentry}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_RETAIN
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+disable unlinking
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_unlink_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:mdattach}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_SR_DROP_COUNT
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+index for the dropped count register
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_sr_index_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:stat-type}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:nistatus}
+
+\end_inset
+
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+PTL_UNLINK
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+enable unlinking
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_unlink_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\begin_inset LatexCommand \ref{sec:mdattach}
+
+\end_inset
+
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+</row>
+</lyxtabular>
+
+\end_inset
+
+
+\end_inset
+
+
+\layout Chapter
+
+The Semantics of Message Transmission
+\begin_inset LatexCommand \label{sec:semantics}
+
+\end_inset
+
+
+\layout Standard
+
+The portals API uses four types of messages: put requests, acknowledgements,
+ get requests, and replies.
+ In this section, we describe the information passed on the wire for each
+ type of message.
+ We also describe how this information is used to process incoming messages.
+\layout Section
+
+Sending Messages
+\layout Standard
+
+Table\SpecialChar ~
+
+\begin_inset LatexCommand \ref{tab:put-wire}
+
+\end_inset
+
+ summarizes the information that is transmitted for a put request.
+ The first column provides a descriptive name for the information, the second
+ column provides the type for this information, the third column identifies
+ the source of the information, and the fourth column provides additional
+ notes.
+ Most information that is transmitted is obtained directly from the
+\emph on
+PtlPut
+\emph default
+ operation.
+ Notice that the handle for the memory descriptor used in the
+\emph on
+PtlPut
+\emph default
+ operation is transmitted even though this value cannot be interpreted by
+ the target.
+ A value of anything other than
+\family typewriter
+PTL_MD_NONE
+\family default
+, is interpreted as a request for an acknowledgement.
+\layout Standard
+
+
+\begin_inset Float table
+placement htbp
+wide false
+collapsed false
+
+\layout Caption
+
+Information Passed in a Put Request
+\begin_inset LatexCommand \label{tab:put-wire}
+
+\end_inset
+
+
+\layout Standard
+
+
+\begin_inset ERT
+status Collapsed
+
+\layout Standard
+
+\backslash
+medskip
+\end_inset
+
+
+\layout Standard
+\align center
+
+\size small
+
+\begin_inset Tabular
+<lyxtabular version="3" rows="12" columns="4">
+<features firstHeadEmpty="true">
+<column alignment="left" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="0pt">
+<row bottomline="true">
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+Information
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+Type
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+\emph on
+PtlPut
+\emph default
+ arg
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+Notes
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+operation
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+int
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+indicates a put request
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+initiator
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_process_id_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+local information
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+user
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_uid_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+local information
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+target
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_process_id_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+target
+\family default
+
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+portal index
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_pt_index_t
+\family default
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+portal
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+cookie
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_ac_index_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+cookie
+\family default
+
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+match bits
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_match_bits_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+match_bits
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+offset
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_size_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+offset
+\family default
+
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+memory desc
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_handle_md_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+mem_desc
+\family default
+
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+no ack if
+\family typewriter
+PTL_MD_NONE
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+length
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_size_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+mem_desc
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+length
+\family default
+ member
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+data
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family roman
+\emph on
+bytes
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+mem_desc
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+start
+\family default
+ and
+\family typewriter
+length
+\family default
+ members
+\end_inset
+</cell>
+</row>
+</lyxtabular>
+
+\end_inset
+
+
+\end_inset
+
+
+\layout Standard
+
+Table\SpecialChar ~
+
+\begin_inset LatexCommand \ref{tab:ack-wire}
+
+\end_inset
+
+ summarizes the information transmitted in an acknowledgement.
+ Most of the information is simply echoed from the put request.
+ Notice that the initiator and target are obtained directly from the put
+ request, but are swapped in generating the acknowledgement.
+ The only new piece of information in the acknowledgement is the manipulated
+ length which is determined as the put request is satisfied.
+\layout Standard
+
+
+\begin_inset Float table
+placement htbp
+wide false
+collapsed false
+
+\layout Caption
+
+Information Passed in an Acknowledgement
+\begin_inset LatexCommand \label{tab:ack-wire}
+
+\end_inset
+
+
+\layout Standard
+
+
+\begin_inset ERT
+status Collapsed
+
+\layout Standard
+
+\backslash
+medskip
+\end_inset
+
+
+\layout Standard
+\align center
+
+\size small
+
+\begin_inset Tabular
+<lyxtabular version="3" rows="10" columns="4">
+<features firstHeadEmpty="true">
+<column alignment="left" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="0pt">
+<row bottomline="true">
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+Information
+\series default
+
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+Type
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+Put Information
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+Notes
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+operation
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+int
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ indicates an acknowledgement
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ initiator
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_process_id_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ target
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ target
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_process_id_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ initiator
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ portal index
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_pt_index_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ portal index
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ echo
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ match bits
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_match_bits_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ match bits
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ echo
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ offset
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_size_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ offset
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ echo
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ memory desc
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ ptl_handle_md_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ memory desc
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ echo
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ requested length
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ ptl_size_t
+\family default
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ length
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ echo
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ manipulated length
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ ptl_size_t
+\family default
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+ obtained from the operation
+\end_inset
+</cell>
+</row>
+</lyxtabular>
+
+\end_inset
+
+
+\end_inset
+
+
+\layout Standard
+
+Table\SpecialChar ~
+
+\begin_inset LatexCommand \ref{tab:get-wire}
+
+\end_inset
+
+ summarizes the information that is transmitted for a get request.
+ Like the information transmitted in a put request, most of the information
+ transmitted in a get request is obtained directly from the
+\emph on
+PtlGet
+\emph default
+ operation.
+ Unlike put requests, get requests do not include the event queue handle.
+ In this case, the reply is generated whenever the operation succeeds and
+ the memory descriptor must not be unlinked until the reply is received.
+ As such, there is no advantage to explicitly sending the event queue handle.
+\layout Standard
+
+
+\begin_inset Float table
+placement htbp
+wide false
+collapsed false
+
+\layout Caption
+
+Information Passed in a Get Request
+\begin_inset LatexCommand \label{tab:get-wire}
+
+\end_inset
+
+
+\layout Standard
+
+
+\begin_inset ERT
+status Collapsed
+
+\layout Standard
+
+\backslash
+medskip
+\end_inset
+
+
+\layout Standard
+\align center
+
+\size small
+
+\begin_inset Tabular
+<lyxtabular version="3" rows="11" columns="4">
+<features firstHeadEmpty="true">
+<column alignment="left" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="0pt">
+<row bottomline="true">
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+Information
+\series default
+
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+Type
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+\emph on
+PtlGet
+\emph default
+ argument
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+Notes
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+operation
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+int
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+indicates a get operation
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+initiator
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_process_id_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+local information
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+user
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_uid_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+local information
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+target
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_process_id_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+target
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+portal index
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_pt_index_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+portal
+\family default
+
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+cookie
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_ac_index_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+cookie
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+match bits
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_match_bits_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+match_bits
+\family default
+
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+offset
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_size_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+offset
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+memory desc
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_handle_md_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+mem_desc
+\family default
+
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+length
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_size_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+mem_desc
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+length
+\family default
+ member
+\end_inset
+</cell>
+</row>
+</lyxtabular>
+
+\end_inset
+
+
+\end_inset
+
+
+\layout Standard
+
+Table\SpecialChar ~
+
+\begin_inset LatexCommand \ref{tab:reply-wire}
+
+\end_inset
+
+ summarizes the information transmitted in a reply.
+ Like an acknowledgement, most of the information is simply echoed from
+ the get request.
+ The initiator and target are obtained directly from the get request, but
+ are swapped in generating the acknowledgement.
+ The only new information in the acknowledgement are the manipulated length
+ and the data, which are determined as the get request is satisfied.
+\layout Standard
+
+
+\begin_inset Float table
+placement htbp
+wide false
+collapsed false
+
+\layout Caption
+
+Information Passed in a Reply
+\begin_inset LatexCommand \label{tab:reply-wire}
+
+\end_inset
+
+
+\layout Standard
+
+
+\begin_inset ERT
+status Collapsed
+
+\layout Standard
+
+\backslash
+medskip
+\end_inset
+
+
+\layout Standard
+\align center
+
+\size small
+
+\begin_inset Tabular
+<lyxtabular version="3" rows="11" columns="4">
+<features firstHeadEmpty="true">
+<column alignment="left" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="0pt">
+<column alignment="left" valignment="top" width="0pt">
+<row bottomline="true">
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+Information
+\series default
+
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+Type
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+Put Information
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\series bold
+Notes
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+operation
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+int
+\family default
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+indicates an acknowledgement
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+initiator
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_process_id_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+target
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+target
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_process_id_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+initiator
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+portal index
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_pt_index_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+portal index
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" bottomline="true" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+echo
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+match bits
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_match_bits_t
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+match bits
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+echo
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+offset
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_size_t
+\family default
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+offset
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+echo
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+memory desc
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_handle_md_t
+\family default
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+memory desc
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+echo
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+requested length
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_size_t
+\family default
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+length
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+echo
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+manipulated length
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\family typewriter
+ptl_size_t
+\family default
+
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+obtained from the operation
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+data
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+
+\emph on
+bytes
+\end_inset
+</cell>
+<cell alignment="left" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+\end_inset
+</cell>
+<cell alignment="right" valignment="top" usebox="none">
+\begin_inset Text
+
+\layout Standard
+
+obtained from the operation
+\end_inset
+</cell>
+</row>
+</lyxtabular>
+
+\end_inset
+
+
+\end_inset
+
+
+\layout Section
+
+Receiving Messages
+\begin_inset LatexCommand \label{sec:receiving}
+
+\end_inset
+
+
+\layout Standard
+
+When an incoming message arrives on a network interface, the communication
+ system first checks that the target process identified in the request is
+ a valid process that has initialized the network interface (i.e., that the
+ target process has a valid Portal table).
+ If this test fails, the communication system discards the message and increment
+s the dropped message count for the interface.
+ The remainder of the processing depends on the type of the incoming message.
+ Put and get messages are subject to access control checks and translation
+ (searching a match list), while acknowledgement and reply messages bypass
+ the access control checks and the translation step.
+\layout Standard
+
+Acknowledgement messages include a handle for the memory descriptor used
+ in the original
+\emph on
+PtlPut
+\emph default
+ operation.
+ This memory descriptor will identify the event queue where the event should
+ be recorded.
+ Upon receipt of an acknowledgement, the runtime system only needs to confirm
+ that the memory descriptor and event queue still exist and that there is
+ space for another event.
+ Should the any of these conditions fail, the message is simply discarded
+ and the dropped message count for the interface is incremented.
+ Otherwise, the system builds an acknowledgement event from the information
+ in the acknowledgement message and adds it to the event queue.
+\layout Standard
+
+Reception of reply messages is also relatively straightforward.
+ Each reply message includes a handle for a memory descriptor.
+ If this descriptor exists, it is used to receive the message.
+ A reply message will be dropped if the memory descriptor identified in
+ the request doesn't exist.
+ In either of this case, the dropped message count for the interface is
+ incremented.
+ These are the only reasons for dropping reply messages.
+ Every memory descriptor accepts and truncates incoming reply messages,
+ eliminating the other potential reasons for rejecting a reply message.
+\layout Standard
+
+The critical step in processing an incoming put or get request involves
+ mapping the request to a memory descriptor.
+ This step starts by using the Portal index in the incoming request to identify
+ a list of match entries.
+ This list of match entries is searched in order until a match entry is
+ found whose match criteria matches the match bits in the incoming request
+ and whose memory descriptor accepts the request.
+\layout Standard
+
+Because acknowledge and reply messages are generated in response to requests
+ made by the process receiving these messages, the checks performed by the
+ runtime system for acknowledgements and replies are minimal.
+ In contrast, put and get messages are generated by remote processes and
+ the checks performed for these messages are more extensive.
+ Incoming put or get messages may be rejected because:
+\layout Itemize
+
+the Portal index supplied in the request is not valid;
+\layout Itemize
+
+the cookie supplied in the request is not a valid access control entry;
+
+\layout Itemize
+
+the access control entry identified by the cookie does not match the identifier
+ of the requesting process;
+\layout Itemize
+
+the access control entry identified by the access control entry does not
+ match the Portal index supplied in the request; or
+\layout Itemize
+
+the match bits supplied in the request do not match any of the match entries
+ with a memory descriptor that accepts the request.
+
+\layout Standard
+
+In all cases, if the message is rejected, the incoming message is discarded
+ and the dropped message count for the interface is incremented.
+\layout Standard
+
+A memory descriptor may reject an incoming request for any of the following
+ reasons:
+\layout Itemize
+
+the
+\family typewriter
+PTL_MD_PUT
+\family default
+ or
+\family typewriter
+PTL_MD_GET
+\family default
+ option has not been enabled and the operation is put or get, respectively;
+
+\layout Itemize
+
+the length specified in the request is too long for the memory descriptor
+ and the
+\family typewriter
+PTL_MD_TRUNCATE
+\family default
+ option has not been enabled.
+\layout Chapter
+
+Examples
+\begin_inset LatexCommand \label{sec:examples}
+
+\end_inset
+
+
+\layout Comment
+
+The examples presented in this chapter have not been updated to reflect
+ the current API.
+\layout Standard
+
+In this section we present several example to illustrate expected usage
+ patterns for the Portals 3.2 API.
+ The first example describes how to implement parallel servers using the
+ features of the Portals 3.2 API.
+ This example covers the access control list and the use of remote managed
+ offsets.
+ The second example presents an approach to dealing with dropped requests.
+ This example covers aspects of match lists and memory descriptors.
+ The final example covers message reception in MPI.
+ This example illustrates more sophisticated uses of matching and a procedure
+ to update a memory descriptor.
+\layout Section
+
+Parallel File Servers
+\begin_inset LatexCommand \label{sec:expfs}
+
+\end_inset
+
+
+\layout Standard
+
+Figure\SpecialChar ~
+
+\begin_inset LatexCommand \ref{fig:file}
+
+\end_inset
+
+ illustrates the logical structure of a parallel file server.
+ In this case, the parallel server consists of four servers that stripe
+ application data across four disks.
+ We would like to present applications with the illusion that the file server
+ is a single entity.
+ We will assume that all of the processes that constitute the parallel server
+ have the same user id.
+\layout Standard
+
+
+\begin_inset Float figure
+placement htbp
+wide false
+collapsed false
+
+\layout Standard
+\align center
+
+\begin_inset Graphics FormatVersion 1
+ filename file.eps
+ display color
+ size_type 0
+ rotateOrigin center
+ lyxsize_type 1
+ lyxwidth 196pt
+ lyxheight 147pt
+\end_inset
+
+
+\layout Caption
+
+Parallel File Server
+\begin_inset LatexCommand \label{fig:file}
+
+\end_inset
+
+
+\end_inset
+
+
+\layout Standard
+
+When an application establishes a connection to the parallel file server,
+ it will allocate a Portal and access control list entry for communicating
+ with the server.
+ The access control list entry will include the Portal and match any process
+ in the parallel file server's, so all of the file server processes will
+ have access to the portal.
+ The Portal information and access control entry will be sent to the file
+ server at this time.
+ If the application and server need to have multiple, concurrent I/O operations,
+ they can use additional portals or match entries to keep the operations
+ from interfering with one another.
+\layout Standard
+
+When an application initiates an I/O operation, it first builds a memory
+ descriptor that describes the memory region involved in the operation.
+ This memory descriptor will enable the appropriate operation (put for read
+ operations and get for write operations) and enable the use of remote offsets
+ (this lets the servers decide where their data should be placed in the
+ memory region).
+ After creating the memory descriptor and linking it into the appropriate
+ Portal entry, the application sends a read or write request (using
+\emph on
+PtlPut
+\emph default
+) to one of the file server processes.
+ The file server processes can then use put or get operations with the appropria
+te offsets to fill or retrieve the contents of the application's buffer.
+ To know when the operation has completed, the application can add an event
+ queue to the memory descriptor and add up the lengths of the remote operations
+ until the sum is the size of the requested I/O operation.
+\layout Section
+
+Dealing with Dropped Requests
+\begin_inset LatexCommand \label{sec:exdrop}
+
+\end_inset
+
+
+\layout Standard
+
+If a process does not anticipate unexpected requests, they will be discarded.
+ Applications using the Portals API can query the dropped count for the
+ interface to determine the number of requests that have been dropped (see
+ Section\SpecialChar ~
+
+\begin_inset LatexCommand \ref{sec:nistatus}
+
+\end_inset
+
+).
+ While this approach minimizes resource consumption, it does not provide
+ information that might be critical in debugging the implementation of a
+ higher level protocol.
+\layout Standard
+
+To keep track of more information about dropped requests, we use a memory
+ descriptor that truncates each incoming request to zero bytes and logs
+ the
+\begin_inset Quotes eld
+\end_inset
+
+dropped
+\begin_inset Quotes erd
+\end_inset
+
+ operations in an event queue.
+ Note that the operations are not dropped in the Portals sense, because
+ the operation succeeds.
+\layout Standard
+
+The following code fragment illustrates an implementation of this approach.
+ In this case, we assume that a thread is launched to execute the function
+
+\family typewriter
+watch_drop
+\family default
+.
+ This code starts by building an event queue to log truncated operations
+ and a memory descriptor to truncate the incoming requests.
+ This example only captures
+\begin_inset Quotes eld
+\end_inset
+
+dropped
+\begin_inset Quotes erd
+\end_inset
+
+ requests for a single portal.
+ In a more realistic situation, the memory descriptor would be appended
+ to the match list for every portal.
+ We also assume that the thread is capable of keeping up with the
+\begin_inset Quotes eld
+\end_inset
+
+dropped
+\begin_inset Quotes erd
+\end_inset
+
+ requests.
+ If this is not the case, we could use a finite threshold on the memory
+ descriptor to capture the first few dropped requests.
+\layout LyX-Code
+
+
+\size small
+#include <stdio.h>
+\newline
+#include <stdlib.h>
+\newline
+#include <portals.h>
+\newline
+
+\newline
+#define DROP_SIZE 32 /* number of dropped requests to track */
+\newline
+
+\newline
+int watch_drop( ptl_handle_ni_t ni, ptl_pt_index_t index ) {
+\newline
+ ptl_handle_eq_t drop_events;
+\newline
+ ptl_event_t event;
+\newline
+ ptl_handle_md_t drop_em;
+\newline
+ ptl_md_t drop_desc;
+\newline
+ ptl_process_id_t any_proc;
+\newline
+ ptl_handle_me_t match_any;
+\newline
+
+\newline
+ /* create the event queue */
+\newline
+ if( PtlEQAlloc(ni, DROP_SIZE, &drop_events) != PTL_OK ) {
+\newline
+ fprintf( stderr, "Couldn't create the event queue
+\backslash
+n" );
+\newline
+ exit( 1 );
+\newline
+ }
+\newline
+
+\newline
+ /* build a match entry */
+\newline
+ any_proc.nid = PTL_ID_ANY;
+\newline
+ any_proc.pid = PTL_ID_ANY;
+\newline
+ PtlMEAttach( index, any_proc, 0, ~(ptl_match_bits_t)0, PTL_RETAIN,
+\newline
+ &match_any );
+\newline
+
+\newline
+ /* create the memory descriptor */
+\newline
+ drop_desc.start = NULL;
+\newline
+ drop_desc.length = 0;
+\newline
+ drop_desc.threshold = PTL_MD_THRESH_INF;
+\newline
+ drop_desc.options = PTL_MD_OP_PUT | PTL_MD_OP_GET | PTL_MD_TRUNCATE;
+\newline
+ drop_desc.user_ptr = NULL;
+\newline
+ drop_desc.eventq = drop_events;
+\newline
+ if( PtlMDAttach(match_any, drop_desc, &drop_em) != PTL_OK ) {
+\newline
+ fprintf( stderr, "Couldn't create the memory descriptor
+\backslash
+n" );
+\newline
+ exit( 1 );
+\newline
+ }
+\newline
+
+\newline
+ /* watch for "dropped" requests */
+\newline
+ while( 1 ) {
+\newline
+ if( PtlEQWait( drop_events, &event ) != PTL_OK ) break;
+\newline
+ fprintf( stderr, "Dropped request from gid = event.initiator.gid,
+ event.initiator.rid );
+\newline
+ }
+\newline
+}
+\layout Section
+
+Message Transmission in MPI
+\begin_inset LatexCommand \label{sec:exmpi}
+
+\end_inset
+
+
+\layout Standard
+
+We conclude this section with a fairly extensive example that describes
+ an approach to implementing message transmission for MPI.
+ Like many MPI implementations, we distinguish two message transmission
+ protocols: a short message protocol and a long message protocol.
+ We use the constant
+\family typewriter
+MPI_LONG_LENGTH
+\family default
+ to determine the size of a long message.
+\layout Standard
+
+For small messages, the sender simply sends the message and presumes that
+ the message will be received (i.e., the receiver has allocated a memory region
+ to receive the message body).
+ For large messages, the sender also sends the message, but does not presume
+ that the message body will be saved.
+ Instead, the sender builds a memory descriptor for the message and enables
+ get operations on this descriptor.
+ If the target does not save the body of the message, it will record an
+ event for the put operation.
+ When the process later issues a matching MPI receive, it will perform a
+ get operation to retrieve the body of the message.
+\layout Standard
+
+To facilitate receive side matching based on the protocol, we use the most
+ significant bit in the match bits to indicate the protocol: 1 for long
+ messages and 0 for short messages.
+\layout Standard
+
+The following code presents a function that implements the send side of
+ the protocol.
+ The global variable
+\family typewriter
+EndGet
+\family default
+ is the last match entry attached to the Portal index used for posting long
+ messages.
+ This entry does not match any incoming requests (i.e., the memory descriptor
+ rejects all get operations) and is built during initialization of the MPI
+ library.
+ The other global variable,
+\family typewriter
+MPI_NI
+\family default
+, is a handle for the network interface used by the MPI implementation.
+\layout LyX-Code
+
+
+\size small
+extern ptl_handle_me_t EndGet;
+\newline
+extern ptl_handle_ni_t MPI_NI;
+\newline
+
+\newline
+void MPIsend( void *buf, ptl_size_t len, void *data, ptl_handle_eq_t eventq,
+\newline
+ ptl_process_id target, ptl_match_bits_t match )
+\newline
+{
+\newline
+ ptl_handle_md_t send_handle;
+\newline
+ ptl_md_t mem_desc;
+\newline
+ ptl_ack_req_t want_ack;
+\newline
+
+\newline
+ mem_desc.start = buf;
+\newline
+ mem_desc.length = len;
+\newline
+ mem_desc.threshold = 1;
+\newline
+ mem_desc.options = PTL_MD_GET_OP;
+\newline
+ mem_desc.user_ptr = data;
+\newline
+ mem_desc.eventq = eventq;
+\newline
+
+\newline
+ if( len >= MPI_LONG_LENGTH ) {
+\newline
+ ptl_handle_me_t me_handle;
+\newline
+
+\newline
+ /* add a match entry to the end of the get list */
+\newline
+ PtlMEInsert( target, match, 0, PTL_UNLINK, PTL_INS_BEFORE, EndGet,
+ &me_handle );
+\newline
+ PtlMDAttach( me_handle, mem_desc, PTL_UNLINK, NULL );
+\newline
+
+\newline
+ /* we want an ack for long messages */
+\newline
+ want_ack = PTL_ACK_REQ;
+\newline
+
+\newline
+ /* set the protocol bit to indicate that this is a long message
+ */
+\newline
+ match |= 1<<63;
+\newline
+ } else {
+\newline
+ /* we don't want an ack for short messages */
+\newline
+ want_ack = PTL_ACK_REQ;
+\newline
+
+\newline
+ /* set the protocol bit to indicate that this is a short message
+ */
+\newline
+ match &= ~(1<<63);
+\newline
+ }
+\newline
+
+\newline
+ /* create a memory descriptor and send it */
+\newline
+ PtlMDBind( MPI_NI, mem_desc, &send_handle );
+\newline
+ PtlPut( send_handle, want_ack, target, MPI_SEND_PINDEX, MPI_AINDEX, match,
+ 0 );
+\newline
+}
+\layout Standard
+
+The
+\emph on
+MPISend
+\emph default
+ function returns as soon as the message has been scheduled for transmission.
+ The event queue argument,
+\family typewriter
+eventq
+\family default
+, can be used to determine the disposition of the message.
+ Assuming that
+\family typewriter
+eventq
+\family default
+ is not
+\family typewriter
+PTL_EQ_NONE
+\family default
+, a
+\family typewriter
+PTL_EVENT_SENT
+\family default
+ event will be recorded for each message as the message is transmitted.
+ For small messages, this is the only event that will be recorded in
+\family typewriter
+eventq
+\family default
+.
+ In contrast, long messages include an explicit request for an acknowledgement.
+ If the
+\family typewriter
+target
+\family default
+ process has posted a matching receive, the acknowledgement will be sent
+ as the message is received.
+ If a matching receive has not been posted, the message will be discarded
+ and no acknowledgement will be sent.
+ When the
+\family typewriter
+target
+\family default
+ process later issues a matching receive, the receive will be translated
+ into a get operation and a
+\family typewriter
+PTL_EVENT_GET
+\family default
+ event will be recorded in
+\family typewriter
+eventq
+\family default
+.
+\layout Standard
+
+Figure\SpecialChar ~
+
+\begin_inset LatexCommand \ref{fig:mpi}
+
+\end_inset
+
+ illustrates the organization of the match list used for receiving MPI messages.
+ The initial entries (not shown in this figure) would be used to match the
+ MPI receives that have been preposted by the application.
+ The preposted receives are followed by a match entry,
+\emph on
+RcvMark
+\emph default
+, that marks the boundary between preposted receives and the memory descriptors
+ used for
+\begin_inset Quotes eld
+\end_inset
+
+unexpected
+\begin_inset Quotes erd
+\end_inset
+
+ messages.
+ The
+\emph on
+RcvMark
+\emph default
+ entry is followed by a small collection of match entries that match unexpected
+
+\begin_inset Quotes eld
+\end_inset
+
+short
+\begin_inset Quotes erd
+\end_inset
+
+ messages, i.e., messages that have a 0 in the most significant bit of their
+ match bits.
+ The memory descriptors associated with these match entries will append
+ the incoming message to the associated memory descriptor and record an
+ event in an event queue for unexpected messages.
+ The unexpected short message matching entries are followed by a match entry
+ that will match messages that were not matched by the preceding match entries,
+ i.e., the unexpected long messages.
+ The memory descriptor associated with this match entry truncates the message
+ body and records an event in the event queue for unexpected messages.
+ Note that of the memory descriptors used for unexpected messages share
+ a common event queue.
+ This makes it possible to process the unexpected messages in the order
+ in which they arrived, regardless of.
+\layout Standard
+
+
+\begin_inset Float figure
+placement htbp
+wide false
+collapsed false
+
+\layout Standard
+\align center
+
+\begin_inset Graphics FormatVersion 1
+ filename mpi.eps
+ display color
+ size_type 0
+ rotateOrigin center
+ lyxsize_type 1
+ lyxwidth 389pt
+ lyxheight 284pt
+\end_inset
+
+
+\layout Caption
+
+Message Reception in MPI
+\begin_inset LatexCommand \label{fig:mpi}
+
+\end_inset
+
+
+\end_inset
+
+
+\layout Standard
+
+When the local MPI process posts an MPI receive, we must first search the
+ events unexpected message queue to see if a matching message has already
+ arrived.
+ If no matching message is found, a match entry for the receive is inserted
+ before the
+\emph on
+RcvMark
+\emph default
+ entry--after the match entries for all of the previously posted receives
+ and before the match entries for the unexpected messages.
+ This ensures that preposted receives are matched in the order that they
+ were posted (a requirement of MPI).
+
+\layout Standard
+
+While this strategy respects the temporal semantics of MPI, it introduces
+ a race condition: a matching message might arrive after the events in the
+ unexpected message queue have been searched, but before the match entry
+ for the receive has been inserted in the match list.
+
+\layout Standard
+
+To avoid this race condition we start by setting the
+\family typewriter
+threshold
+\family default
+ of the memory descriptor to 0, making the descriptor inactive.
+ We then insert the match entry into the match list and proceed to search
+ the events in the unexpected message queue.
+ A matching message that arrives as we are searching the unexpected message
+ queue will not be accepted by the memory descriptor and, if not matched
+ by an earlier match list element, will add an event to the unexpected message
+ queue.
+ After searching the events in the unexpected message queue, we update the
+ memory descriptor, setting the threshold to 1 to activate the memory descriptor.
+ This update is predicated by the condition that the unexpected message
+ queue is empty.
+ We repeat the process of searching the unexpected message queue until the
+ update succeeds.
+\layout Standard
+
+The following code fragment illustrates this approach.
+ Because events must be removed from the unexpected message queue to be
+ examined, this code fragment assumes the existence of a user managed event
+ list,
+\family typewriter
+Rcvd
+\family default
+, for the events that have already been removed from the unexpected message
+ queue.
+ In an effort to keep the example focused on the basic protocol, we have
+ omitted the code that would be needed to manage the memory descriptors
+ used for unexpected short messages.
+ In particular, we simply leave messages in these descriptors until they
+ are received by the application.
+ In a robust implementation, we would introduce code to ensure that short
+ unexpected messages are removed from these memory descriptors so that they
+ can be re-used.
+\layout LyX-Code
+
+
+\size small
+extern ptl_handle_eq_t UnexpQueue;
+\newline
+extern ptl_handle_me_t RcvMark;
+\newline
+extern ptl_handle_me_t ShortMatch;
+\newline
+
+\newline
+typedef struct event_list_tag {
+\newline
+ ptl_event_t event;
+\newline
+ struct event_list_tag* next;
+\newline
+} event_list;
+\newline
+
+\newline
+extern event_list Rcvd;
+\newline
+
+\newline
+void AppendRcvd( ptl_event_t event )
+\newline
+{
+\newline
+ /* append an event onto the Rcvd list */
+\newline
+}
+\newline
+
+\newline
+int SearchRcvd( void *buf, ptl_size_t len, ptl_process_id_t sender, ptl_match_bi
+ts_t match,
+\newline
+ ptl_match_bits_t ignore, ptl_event_t *event )
+\newline
+{
+\newline
+ /* Search the Rcvd event queue, looking for a message that matches the
+ requested message.
+\newline
+ * If one is found, remove the event from the Rcvd list and return it.
+ */
+\newline
+}
+\newline
+
+\newline
+typedef enum { RECEIVED, POSTED } receive_state;
+\newline
+
+\newline
+receive_state CopyMsg( void *buf, ptl_size_t &length, ptl_event_t event,
+ ptl_md_t md_buf )
+\newline
+{
+\newline
+ ptl_md_t md_buf;
+\newline
+ ptl_handle_me_t me_handle;
+\newline
+
+\newline
+ if( event.rlength >= MPI_LONG_LENGTH ) {
+\newline
+ PtlMDBind( MPI_NI, md_buf, &md_handle );
+\newline
+ PtlGet( event.initiator, MPI_GET_PINDEX, 0, event.match_bits, MPI_AINDEX,
+ md_handle );
+\newline
+ return POSTED;
+\newline
+ } else {
+\newline
+ /* copy the message */
+\newline
+ if( event.mlength < *length ) *length = event.mlength;
+\newline
+ memcpy( buf, (char*)event.md_desc.start+event.offset, *length );
+\newline
+ return RECEIVED;
+\newline
+ }
+\newline
+}
+\newline
+
+\newline
+receive_state MPIreceive( void *buf, ptl_size_t &len, void *MPI_data, ptl_handle
+_eq_t eventq,
+\newline
+ ptl_process_id_t sender, ptl_match_bits_t match,
+ ptl_match_bits_t ignore )
+\newline
+{
+\newline
+ ptl_md_t md_buf;
+\newline
+ ptl_handle_md_t md_handle;
+\newline
+ ptl_handle_me_t me_handle;
+\newline
+ ptl_event_t event;
+\newline
+
+\newline
+ /* build a memory descriptor for the receive */
+\newline
+ md_buf.start = buf;
+\newline
+ md_buf.length = *len;
+\newline
+ md_buf.threshold = 0; /* temporarily disabled */
+\newline
+ md_buf.options = PTL_MD_PUT_OP;
+\newline
+ md_buf.user_ptr = MPI_data;
+\newline
+ md_buf.eventq = eventq;
+\newline
+
+\newline
+ /* see if we have already received the message */
+\newline
+ if( SearchRcvd(buf, len, sender, match, ignore, &event) )
+\newline
+ return CopyMsg( buf, len, event, md_buf );
+\newline
+
+\newline
+ /* create the match entry and attach the memory descriptor */
+\newline
+ PtlMEInsert(sender, match, ignore, PTL_UNLINK, PTL_INS_BEFORE, RcvMark,
+ &me_handle);
+\newline
+ PtlMDAttach( me_handle, md_buf, PTL_UNLINK, &md_handle );
+\newline
+
+\newline
+ md_buf.threshold = 1;
+\newline
+ do
+\newline
+ if( PtlEQGet( UnexpQueue, &event ) != PTL_EQ_EMPTY ) {
+\newline
+ if( MPIMatch(event, match, ignore, sender) ) {
+\newline
+ return CopyMsg( buf, len, (char*)event.md_desc.start+event.offset,
+ md_buf );
+\newline
+ } else {
+\newline
+ AppendRcvd( event );
+\newline
+ }
+\newline
+ }
+\newline
+ while( PtlMDUpdate(md_handle, NULL, &md_buf, unexp_queue) == PTL_NOUPDATE
+ );
+\newline
+ return POSTED;
+\newline
+}
+\layout Chapter*
+
+Acknowledgments
+\layout Standard
+
+Several people have contributed to the philosophy, design, and implementation
+ of the Portals message passing architecture as it has evolved.
+ We acknowledge the following people for their contributions: Al Audette,
+ Lee Ann Fisk, David Greenberg, Tramm Hudson, Gabi Istrail, Chu Jong, Mike
+ Levenhagen, Jim Otto, Mark Sears, Lance Shuler, Mack Stallcup, Jeff VanDyke,
+ Dave van Dresser, Lee Ward, and Stephen Wheat.
+
+\layout Standard
+
+
+\begin_inset LatexCommand \BibTeX[ieee]{portals3}
+
+\end_inset
+
+
+\the_end
--- /dev/null
+#FIG 3.2
+Landscape
+Center
+Inches
+Letter
+100.00
+Single
+-2
+1200 2
+6 1350 900 2175 1200
+4 0 0 100 0 0 10 0.0000 0 105 825 1350 1200 Transmission\001
+4 0 0 100 0 0 10 0.0000 0 105 285 1620 1050 Data\001
+-6
+2 1 0 1 0 7 100 0 -1 4.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 2700 1275 2700 1725
+2 1 0 1 0 7 100 0 -1 4.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 900 525 2700 1200
+2 2 0 1 0 7 100 0 -1 3.000 0 0 7 0 0 5
+ 0 300 1200 300 1200 2250 0 2250 0 300
+2 2 0 1 0 7 100 0 -1 3.000 0 0 7 0 0 5
+ 2400 300 3600 300 3600 2250 2400 2250 2400 300
+2 1 1 1 0 7 100 0 -1 4.000 0 0 7 1 0 2
+ 0 0 1.00 60.00 120.00
+ 2699 1788 899 1938
+4 0 0 100 0 0 10 0.0000 0 105 720 2775 1650 Translation\001
+4 1 0 100 0 0 10 0.0000 0 135 555 1800 2025 Optional\001
+4 1 0 100 0 0 10 0.0000 0 135 1170 1800 2175 Acknowledgement\001
+4 0 0 100 0 0 10 0.0000 0 105 405 2850 1500 Portal\001
+4 1 0 100 0 0 10 0.0000 0 135 405 3000 525 Target\001
+4 1 0 100 0 0 10 0.0000 0 105 540 600 525 Initiator\001
--- /dev/null
+# Copyright (C) 2001 Cluster File Systems, Inc.
+#
+# This code is issued under the GNU General Public License.
+# See the file COPYING in this distribution
+
+SUBDIRS = portals linux
+EXTRA_DIST = config.h.in
+include $(top_srcdir)/Rules
--- /dev/null
+/* ../include/config.h.in. Generated automatically from configure.in by autoheader. */
+
+/* Define if you have the readline library (-lreadline). */
+#undef HAVE_LIBREADLINE
+
+/* Name of package */
+#undef PACKAGE
+
+/* Version number of package */
+#undef VERSION
+
--- /dev/null
+# Copyright (C) 2001 Cluster File Systems, Inc.
+#
+# This code is issued under the GNU General Public License.
+# See the file COPYING in this distribution
+
+include $(top_srcdir)/Rules
+
+linuxincludedir = $(includedir)/linux
+
+linuxinclude_HEADERS=kp30.h portals_lib.h
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
+#ifndef _KP30_INCLUDED
+#define _KP30_INCLUDED
+
+
+#define PORTAL_DEBUG
+
+#ifndef offsetof
+# define offsetof(typ,memb) ((int)((char *)&(((typ *)0)->memb)))
+#endif
+
+#define LOWEST_BIT_SET(x) ((x) & ~((x) - 1))
+
+#ifndef CONFIG_SMP
+# define smp_processor_id() 0
+#endif
+
+/*
+ * Debugging
+ */
+extern unsigned int portal_subsystem_debug;
+extern unsigned int portal_stack;
+extern unsigned int portal_debug;
+extern unsigned int portal_printk;
+/* Debugging subsystems (8 bit ID)
+ *
+ * If you add debug subsystem #32, you need to send email to phil, because
+ * you're going to break kernel subsystem debug filtering. */
+#define S_UNDEFINED (0 << 24)
+#define S_MDC (1 << 24)
+#define S_MDS (2 << 24)
+#define S_OSC (3 << 24)
+#define S_OST (4 << 24)
+#define S_CLASS (5 << 24)
+#define S_OBDFS (6 << 24) /* obsolete */
+#define S_LLITE (7 << 24)
+#define S_RPC (8 << 24)
+#define S_EXT2OBD (9 << 24) /* obsolete */
+#define S_PORTALS (10 << 24)
+#define S_SOCKNAL (11 << 24)
+#define S_QSWNAL (12 << 24)
+#define S_PINGER (13 << 24)
+#define S_FILTER (14 << 24)
+#define S_TRACE (15 << 24) /* obsolete */
+#define S_ECHO (16 << 24)
+#define S_LDLM (17 << 24)
+#define S_LOV (18 << 24)
+#define S_GMNAL (19 << 24)
+#define S_PTLROUTER (20 << 24)
+#define S_COBD (21 << 24)
+#define S_PTLBD (22 << 24)
+#define S_LOG (23 << 24)
+
+/* If you change these values, please keep portals/linux/utils/debug.c
+ * up to date! */
+
+/* Debugging masks (24 bits, non-overlapping) */
+#define D_TRACE (1 << 0) /* ENTRY/EXIT markers */
+#define D_INODE (1 << 1)
+#define D_SUPER (1 << 2)
+#define D_EXT2 (1 << 3) /* anything from ext2_debug */
+#define D_MALLOC (1 << 4) /* print malloc, free information */
+#define D_CACHE (1 << 5) /* cache-related items */
+#define D_INFO (1 << 6) /* general information */
+#define D_IOCTL (1 << 7) /* ioctl related information */
+#define D_BLOCKS (1 << 8) /* ext2 block allocation */
+#define D_NET (1 << 9) /* network communications */
+#define D_WARNING (1 << 10)
+#define D_BUFFS (1 << 11)
+#define D_OTHER (1 << 12)
+#define D_DENTRY (1 << 13)
+#define D_PORTALS (1 << 14) /* ENTRY/EXIT markers */
+#define D_PAGE (1 << 15) /* bulk page handling */
+#define D_DLMTRACE (1 << 16)
+#define D_ERROR (1 << 17) /* CERROR(...) == CDEBUG (D_ERROR, ...) */
+#define D_EMERG (1 << 18) /* CEMERG(...) == CDEBUG (D_EMERG, ...) */
+#define D_HA (1 << 19) /* recovery and failover */
+#define D_RPCTRACE (1 << 20) /* for distributed debugging */
+#define D_VFSTRACE (1 << 21)
+
+#ifndef THREAD_SIZE
+#define THREAD_SIZE 8192
+#endif
+#ifdef __arch_ia64__
+#define CDEBUG_STACK(var) (&var & (THREAD_SIZE - 1))
+#else
+#define CDEBUG_STACK(var) (THREAD_SIZE - \
+ ((unsigned long)__builtin_frame_address(0)& \
+ (THREAD_SIZE - 1)))
+#endif
+
+#ifdef __KERNEL__
+#define CHECK_STACK(stack) \
+ do { \
+ if ((stack) > 3*THREAD_SIZE/4 && (stack) > portal_stack) \
+ portals_debug_msg(DEBUG_SUBSYSTEM, D_ERROR, \
+ __FILE__, __FUNCTION__, __LINE__, \
+ (stack), \
+ "maximum lustre stack %u\n", \
+ portal_stack = (stack)); \
+ } while (0)
+#else
+#define CHECK_STACK(stack) do{}while(0)
+#endif
+
+#define CDEBUG(mask, format, a...) \
+do { \
+ unsigned long stack = CDEBUG_STACK(stack); \
+ int match = 0; \
+ \
+ CHECK_STACK(stack); \
+ if (!(mask)) \
+ match = 1; \
+ else if ((mask) & (D_ERROR | D_EMERG)) \
+ match = 1; \
+ else if (portal_debug & (mask) && \
+ portal_subsystem_debug & (1 << (DEBUG_SUBSYSTEM >> 24))) \
+ match = 1; \
+ if (match) \
+ portals_debug_msg(DEBUG_SUBSYSTEM, mask, \
+ __FILE__, __FUNCTION__, __LINE__, \
+ stack, format , ## a); \
+} while (0)
+
+#define CWARN(format, a...) CDEBUG(D_WARNING, format, ## a)
+#define CERROR(format, a...) CDEBUG(D_ERROR, format, ## a)
+#define CEMERG(format, a...) CDEBUG(D_EMERG, format, ## a)
+
+#define GOTO(label, rc) \
+do { \
+ long GOTO__ret = (long)(rc); \
+ CDEBUG(D_TRACE,"Process leaving via %s (rc=%lu : %ld : %lx)\n", \
+ #label, (unsigned long)GOTO__ret, (signed long)GOTO__ret,\
+ (signed long)GOTO__ret); \
+ goto label; \
+} while (0)
+
+#define RETURN(rc) \
+do { \
+ typeof(rc) RETURN__ret = (rc); \
+ long tmp = (long)RETURN__ret; \
+ CDEBUG(D_TRACE, "Process leaving (rc=%lu : %ld : %lx)\n", \
+ (unsigned long)tmp, (signed long)tmp, \
+ (signed long)tmp); \
+ return RETURN__ret; \
+} while (0)
+
+#define ENTRY \
+do { \
+ CDEBUG(D_TRACE, "Process entered\n"); \
+} while (0)
+
+#define EXIT \
+do { \
+ CDEBUG(D_TRACE, "Process leaving\n"); \
+} while(0)
+
+
+#ifdef __KERNEL__
+# include <linux/vmalloc.h>
+# include <linux/time.h>
+# include <linux/slab.h>
+# include <linux/interrupt.h>
+# include <linux/highmem.h>
+# include <linux/module.h>
+# include <linux/version.h>
+# include <portals/lib-nal.h>
+# include <linux/smp_lock.h>
+# include <asm/atomic.h>
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+#define schedule_work schedule_task
+#define prepare_work(wq,cb,cbdata) \
+do { \
+ INIT_TQUEUE((wq), 0, 0); \
+ PREPARE_TQUEUE((wq), (cb), (cbdata)); \
+} while (0)
+
+#define ll_invalidate_inode_pages invalidate_inode_pages
+#define PageUptodate Page_Uptodate
+#define our_recalc_sigpending(current) recalc_sigpending(current)
+#define num_online_cpus() smp_num_cpus
+static inline void our_cond_resched(void)
+{
+ if (current->need_resched)
+ schedule ();
+}
+
+#else
+
+#define prepare_work(wq,cb,cbdata) \
+do { \
+ INIT_WORK((wq), (void *)(cb), (void *)(cbdata)); \
+} while (0)
+#define ll_invalidate_inode_pages(inode) invalidate_inode_pages((inode)->i_mapping)
+#define wait_on_page wait_on_page_locked
+#define our_recalc_sigpending(current) recalc_sigpending()
+#define strtok(a,b) strpbrk(a, b)
+static inline void our_cond_resched(void)
+{
+ cond_resched();
+}
+#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) */
+
+#ifdef PORTAL_DEBUG
+extern void kportal_assertion_failed(char *expr,char *file,char *func,int line);
+#define LASSERT(e) ((e) ? 0 : kportal_assertion_failed( #e , __FILE__, \
+ __FUNCTION__, __LINE__))
+#else
+#define LASSERT(e)
+#endif
+
+#ifdef __arch_um__
+#define LBUG() \
+do { \
+ CEMERG("LBUG - trying to dump log to /tmp/lustre-log\n"); \
+ portals_debug_dumplog(); \
+ portals_run_lbug_upcall(__FILE__, __FUNCTION__, __LINE__); \
+ panic("LBUG"); \
+} while (0)
+#else
+#define LBUG() \
+do { \
+ CEMERG("LBUG\n"); \
+ portals_debug_dumplog(); \
+ portals_run_lbug_upcall(__FILE__, __FUNCTION__, __LINE__); \
+ set_task_state(current, TASK_UNINTERRUPTIBLE); \
+ schedule(); \
+} while (0)
+#endif /* __arch_um__ */
+
+/*
+ * Memory
+ */
+#ifdef PORTAL_DEBUG
+extern atomic_t portal_kmemory;
+
+# define portal_kmem_inc(ptr, size) \
+do { \
+ atomic_add(size, &portal_kmemory); \
+} while (0)
+
+# define portal_kmem_dec(ptr, size) do { \
+ atomic_sub(size, &portal_kmemory); \
+} while (0)
+
+#else
+# define portal_kmem_inc(ptr, size) do {} while (0)
+# define portal_kmem_dec(ptr, size) do {} while (0)
+#endif /* PORTAL_DEBUG */
+
+#define PORTAL_VMALLOC_SIZE 16384
+
+#define PORTAL_ALLOC(ptr, size) \
+do { \
+ long s = size; \
+ LASSERT (!in_interrupt()); \
+ if (s > PORTAL_VMALLOC_SIZE) \
+ (ptr) = vmalloc(s); \
+ else \
+ (ptr) = kmalloc(s, GFP_KERNEL); \
+ if ((ptr) == NULL) \
+ CERROR("PORTALS: out of memory at %s:%d (tried to alloc" \
+ " '" #ptr "' = %ld)\n", __FILE__, __LINE__, s); \
+ else { \
+ portal_kmem_inc((ptr), s); \
+ memset((ptr), 0, s); \
+ } \
+ CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %ld at %p (tot %d).\n", \
+ s, (ptr), atomic_read (&portal_kmemory)); \
+} while (0)
+
+#define PORTAL_FREE(ptr, size) \
+do { \
+ long s = (size); \
+ if ((ptr) == NULL) { \
+ CERROR("PORTALS: free NULL '" #ptr "' (%ld bytes) at " \
+ "%s:%d\n", s, __FILE__, __LINE__); \
+ break; \
+ } \
+ if (s > PORTAL_VMALLOC_SIZE) \
+ vfree(ptr); \
+ else \
+ kfree(ptr); \
+ portal_kmem_dec((ptr), s); \
+ CDEBUG(D_MALLOC, "kfreed '" #ptr "': %ld at %p (tot %d).\n", \
+ s, (ptr), atomic_read (&portal_kmemory)); \
+} while (0)
+
+#define PORTAL_SLAB_ALLOC(ptr, slab, size) \
+do { \
+ long s = (size); \
+ LASSERT (!in_interrupt()); \
+ (ptr) = kmem_cache_alloc((slab), SLAB_KERNEL); \
+ if ((ptr) == NULL) { \
+ CERROR("PORTALS: out of memory at %s:%d (tried to alloc" \
+ " '" #ptr "' from slab '" #slab "')\n", __FILE__, \
+ __LINE__); \
+ } else { \
+ portal_kmem_inc((ptr), s); \
+ memset((ptr), 0, s); \
+ } \
+ CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %ld at %p (tot %d).\n", \
+ s, (ptr), atomic_read (&portal_kmemory)); \
+} while (0)
+
+#define PORTAL_SLAB_FREE(ptr, slab, size) \
+do { \
+ long s = (size); \
+ if ((ptr) == NULL) { \
+ CERROR("PORTALS: free NULL '" #ptr "' (%ld bytes) at " \
+ "%s:%d\n", s, __FILE__, __LINE__); \
+ break; \
+ } \
+ memset((ptr), 0x5a, s); \
+ kmem_cache_free((slab), ptr); \
+ portal_kmem_dec((ptr), s); \
+ CDEBUG(D_MALLOC, "kfreed '" #ptr "': %ld at %p (tot %d).\n", \
+ s, (ptr), atomic_read (&portal_kmemory)); \
+} while (0)
+
+/* ------------------------------------------------------------------- */
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+
+#define PORTAL_SYMBOL_REGISTER(x) inter_module_register(#x, THIS_MODULE, &x)
+#define PORTAL_SYMBOL_UNREGISTER(x) inter_module_unregister(#x)
+
+#define PORTAL_SYMBOL_GET(x) ((typeof(&x))inter_module_get(#x))
+#define PORTAL_SYMBOL_PUT(x) inter_module_put(#x)
+
+#define PORTAL_MODULE_USE MOD_INC_USE_COUNT
+#define PORTAL_MODULE_UNUSE MOD_DEC_USE_COUNT
+#else
+
+#define PORTAL_SYMBOL_REGISTER(x)
+#define PORTAL_SYMBOL_UNREGISTER(x)
+
+#define PORTAL_SYMBOL_GET(x) symbol_get(x)
+#define PORTAL_SYMBOL_PUT(x) symbol_put(x)
+
+#define PORTAL_MODULE_USE try_module_get(THIS_MODULE)
+#define PORTAL_MODULE_UNUSE module_put(THIS_MODULE)
+
+#endif
+
+/******************************************************************************/
+/* Kernel Portals Router interface */
+
+typedef void (*kpr_fwd_callback_t)(void *arg, int error); // completion callback
+
+/* space for routing targets to stash "stuff" in a forwarded packet */
+typedef union {
+ long long _alignment;
+ void *_space[16]; /* scale with CPU arch */
+} kprfd_scratch_t;
+
+/* Kernel Portals Routing Forwarded message Descriptor */
+typedef struct {
+ struct list_head kprfd_list; /* stash in queues (routing target can use) */
+ ptl_nid_t kprfd_target_nid; /* final destination NID */
+ ptl_nid_t kprfd_gateway_nid; /* gateway NID */
+ int kprfd_nob; /* # message bytes (including header) */
+ int kprfd_niov; /* # message frags (including header) */
+ struct iovec *kprfd_iov; /* message fragments */
+ void *kprfd_router_arg; // originating NAL's router arg
+ kpr_fwd_callback_t kprfd_callback; /* completion callback */
+ void *kprfd_callback_arg; /* completion callback arg */
+ kprfd_scratch_t kprfd_scratch; // scratchpad for routing targets
+} kpr_fwd_desc_t;
+
+typedef void (*kpr_fwd_t)(void *arg, kpr_fwd_desc_t *fwd);
+
+/* NAL's routing interface (Kernel Portals Routing Nal Interface) */
+typedef const struct {
+ int kprni_nalid; /* NAL's id */
+ void *kprni_arg; /* Arg to pass when calling into NAL */
+ kpr_fwd_t kprni_fwd; /* NAL's forwarding entrypoint */
+} kpr_nal_interface_t;
+
+/* Router's routing interface (Kernel Portals Routing Router Interface) */
+typedef const struct {
+ /* register the calling NAL with the router and get back the handle for
+ * subsequent calls */
+ int (*kprri_register) (kpr_nal_interface_t *nal_interface,
+ void **router_arg);
+
+ /* ask the router to find a gateway that forwards to 'nid' and is a peer
+ * of the calling NAL */
+ int (*kprri_lookup) (void *router_arg, ptl_nid_t nid,
+ ptl_nid_t *gateway_nid);
+
+ /* hand a packet over to the router for forwarding */
+ kpr_fwd_t kprri_fwd_start;
+
+ /* hand a packet back to the router for completion */
+ void (*kprri_fwd_done) (void *router_arg, kpr_fwd_desc_t *fwd,
+ int error);
+
+ /* the calling NAL is shutting down */
+ void (*kprri_shutdown) (void *router_arg);
+
+ /* deregister the calling NAL with the router */
+ void (*kprri_deregister) (void *router_arg);
+
+} kpr_router_interface_t;
+
+/* Convenient struct for NAL to stash router interface/args */
+typedef struct {
+ kpr_router_interface_t *kpr_interface;
+ void *kpr_arg;
+} kpr_router_t;
+
+/* Router's control interface (Kernel Portals Routing Control Interface) */
+typedef const struct {
+ int (*kprci_add_route)(int gateway_nal, ptl_nid_t gateway_nid,
+ ptl_nid_t lo_nid, ptl_nid_t hi_nid);
+ int (*kprci_del_route)(ptl_nid_t nid);
+ int (*kprci_get_route)(int index, int *gateway_nal,
+ ptl_nid_t *gateway, ptl_nid_t *lo_nid,
+ ptl_nid_t *hi_nid);
+} kpr_control_interface_t;
+
+extern kpr_control_interface_t kpr_control_interface;
+extern kpr_router_interface_t kpr_router_interface;
+
+static inline int
+kpr_register (kpr_router_t *router, kpr_nal_interface_t *nalif)
+{
+ int rc;
+
+ router->kpr_interface = PORTAL_SYMBOL_GET (kpr_router_interface);
+ if (router->kpr_interface == NULL)
+ return (-ENOENT);
+
+ rc = (router->kpr_interface)->kprri_register (nalif, &router->kpr_arg);
+ if (rc != 0)
+ router->kpr_interface = NULL;
+
+ PORTAL_SYMBOL_PUT (kpr_router_interface);
+ return (rc);
+}
+
+static inline int
+kpr_routing (kpr_router_t *router)
+{
+ return (router->kpr_interface != NULL);
+}
+
+static inline int
+kpr_lookup (kpr_router_t *router, ptl_nid_t nid, ptl_nid_t *gateway_nid)
+{
+ if (!kpr_routing (router))
+ return (-EHOSTUNREACH);
+
+ return (router->kpr_interface->kprri_lookup(router->kpr_arg, nid,
+ gateway_nid));
+}
+
+static inline void
+kpr_fwd_init (kpr_fwd_desc_t *fwd, ptl_nid_t nid,
+ int nob, int niov, struct iovec *iov,
+ kpr_fwd_callback_t callback, void *callback_arg)
+{
+ fwd->kprfd_target_nid = nid;
+ fwd->kprfd_gateway_nid = nid;
+ fwd->kprfd_nob = nob;
+ fwd->kprfd_niov = niov;
+ fwd->kprfd_iov = iov;
+ fwd->kprfd_callback = callback;
+ fwd->kprfd_callback_arg = callback_arg;
+}
+
+static inline void
+kpr_fwd_start (kpr_router_t *router, kpr_fwd_desc_t *fwd)
+{
+ if (!kpr_routing (router))
+ fwd->kprfd_callback (fwd->kprfd_callback_arg, -EHOSTUNREACH);
+ else
+ router->kpr_interface->kprri_fwd_start (router->kpr_arg, fwd);
+}
+
+static inline void
+kpr_fwd_done (kpr_router_t *router, kpr_fwd_desc_t *fwd, int error)
+{
+ LASSERT (kpr_routing (router));
+ router->kpr_interface->kprri_fwd_done (router->kpr_arg, fwd, error);
+}
+
+static inline void
+kpr_shutdown (kpr_router_t *router)
+{
+ if (kpr_routing (router))
+ router->kpr_interface->kprri_shutdown (router->kpr_arg);
+}
+
+static inline void
+kpr_deregister (kpr_router_t *router)
+{
+ if (!kpr_routing (router))
+ return;
+ router->kpr_interface->kprri_deregister (router->kpr_arg);
+ router->kpr_interface = NULL;
+}
+
+/******************************************************************************/
+
+#ifdef PORTALS_PROFILING
+#define prof_enum(FOO) PROF__##FOO
+enum {
+ prof_enum(our_recvmsg),
+ prof_enum(our_sendmsg),
+ prof_enum(socknal_recv),
+ prof_enum(lib_parse),
+ prof_enum(conn_list_walk),
+ prof_enum(memcpy),
+ prof_enum(lib_finalize),
+ prof_enum(pingcli_time),
+ prof_enum(gmnal_send),
+ prof_enum(gmnal_recv),
+ MAX_PROFS
+};
+
+struct prof_ent {
+ char *str;
+ /* hrmph. wrap-tastic. */
+ u32 starts;
+ u32 finishes;
+ cycles_t total_cycles;
+ cycles_t start;
+ cycles_t end;
+};
+
+extern struct prof_ent prof_ents[MAX_PROFS];
+
+#define PROF_START(FOO) \
+ do { \
+ struct prof_ent *pe = &prof_ents[PROF__##FOO]; \
+ pe->starts++; \
+ pe->start = get_cycles(); \
+ } while (0)
+
+#define PROF_FINISH(FOO) \
+ do { \
+ struct prof_ent *pe = &prof_ents[PROF__##FOO]; \
+ pe->finishes++; \
+ pe->end = get_cycles(); \
+ pe->total_cycles += (pe->end - pe->start); \
+ } while (0)
+#else /* !PORTALS_PROFILING */
+#define PROF_START(FOO) do {} while(0)
+#define PROF_FINISH(FOO) do {} while(0)
+#endif /* PORTALS_PROFILING */
+
+/* debug.c */
+void portals_run_lbug_upcall(char * file, char *fn, int line);
+void portals_debug_dumplog(void);
+int portals_debug_init(unsigned long bufsize);
+int portals_debug_cleanup(void);
+int portals_debug_clear_buffer(void);
+int portals_debug_mark_buffer(char *text);
+int portals_debug_set_daemon(unsigned int cmd, unsigned int length,
+ char *file, unsigned int size);
+__s32 portals_debug_copy_to_user(char *buf, unsigned long len);
+#if (__GNUC__)
+/* Use the special GNU C __attribute__ hack to have the compiler check the
+ * printf style argument string against the actual argument count and
+ * types.
+ */
+#ifdef printf
+# warning printf has been defined as a macro...
+# undef printf
+#endif
+void portals_debug_msg (int subsys, int mask, char *file, char *fn, int line,
+ unsigned long stack, const char *format, ...)
+ __attribute__ ((format (printf, 7, 8)));
+#else
+void portals_debug_msg (int subsys, int mask, char *file, char *fn,
+ int line, unsigned long stack,
+ const char *format, ...);
+#endif /* __GNUC__ */
+void portals_debug_set_level(unsigned int debug_level);
+
+# define fprintf(a, format, b...) CDEBUG(D_OTHER, format , ## b)
+# define printf(format, b...) CDEBUG(D_OTHER, format , ## b)
+# define time(a) CURRENT_TIME
+
+extern void kportal_daemonize (char *name);
+extern void kportal_blockallsigs (void);
+
+#else /* !__KERNEL__ */
+# include <stdio.h>
+# include <stdlib.h>
+#ifndef __CYGWIN__
+# include <stdint.h>
+#endif
+# include <unistd.h>
+# include <time.h>
+# include <asm/types.h>
+# ifndef DEBUG_SUBSYSTEM
+# define DEBUG_SUBSYSTEM S_UNDEFINED
+# endif
+# ifdef PORTAL_DEBUG
+# undef NDEBUG
+# include <assert.h>
+# define LASSERT(e) assert(e)
+# else
+# define LASSERT(e)
+# endif
+# define printk(format, args...) printf (format, ## args)
+# define PORTAL_ALLOC(ptr, size) do { (ptr) = malloc(size); } while (0);
+# define PORTAL_FREE(a, b) do { free(a); } while (0);
+# define portals_debug_msg(subsys, mask, file, fn, line, stack, format, a...) \
+ printf ("%02x:%06x (@%lu %s:%s,l. %d %d %lu): " format, \
+ (subsys) >> 24, (mask), (long)time(0), file, fn, line, \
+ getpid() , stack, ## a);
+#endif
+
+#ifndef CURRENT_TIME
+# define CURRENT_TIME time(0)
+#endif
+
+#include <linux/portals_lib.h>
+
+/*
+ * USER LEVEL STUFF BELOW
+ */
+
+#define PORTAL_IOCTL_VERSION 0x00010007
+#define PING_SYNC 0
+#define PING_ASYNC 1
+
+struct portal_ioctl_data {
+ __u32 ioc_len;
+ __u32 ioc_version;
+ __u64 ioc_nid;
+ __u64 ioc_nid2;
+ __u64 ioc_nid3;
+ __u32 ioc_count;
+ __u32 ioc_nal;
+ __u32 ioc_nal_cmd;
+ __u32 ioc_fd;
+ __u32 ioc_id;
+
+ __u32 ioc_flags;
+ __u32 ioc_size;
+
+ __u32 ioc_wait;
+ __u32 ioc_timeout;
+ __u32 ioc_misc;
+
+ __u32 ioc_inllen1;
+ char *ioc_inlbuf1;
+ __u32 ioc_inllen2;
+ char *ioc_inlbuf2;
+
+ __u32 ioc_plen1; /* buffers in userspace */
+ char *ioc_pbuf1;
+ __u32 ioc_plen2; /* buffers in userspace */
+ char *ioc_pbuf2;
+
+ char ioc_bulk[0];
+};
+
+struct portal_ioctl_hdr {
+ __u32 ioc_len;
+ __u32 ioc_version;
+};
+
+struct portals_debug_ioctl_data
+{
+ struct portal_ioctl_hdr hdr;
+ unsigned int subs;
+ unsigned int debug;
+};
+
+#define PORTAL_IOC_INIT(data) \
+do { \
+ memset(&data, 0, sizeof(data)); \
+ data.ioc_version = PORTAL_IOCTL_VERSION; \
+ data.ioc_len = sizeof(data); \
+} while (0)
+
+/* FIXME check conflict with lustre_lib.h */
+#define PTL_IOC_DEBUG_MASK _IOWR('f', 250, long)
+
+static inline int portal_ioctl_packlen(struct portal_ioctl_data *data)
+{
+ int len = sizeof(*data);
+ len += size_round(data->ioc_inllen1);
+ len += size_round(data->ioc_inllen2);
+ return len;
+}
+
+static inline int portal_ioctl_is_invalid(struct portal_ioctl_data *data)
+{
+ if (data->ioc_len > (1<<30)) {
+ CERROR ("PORTALS ioctl: ioc_len larger than 1<<30\n");
+ return 1;
+ }
+ if (data->ioc_inllen1 > (1<<30)) {
+ CERROR ("PORTALS ioctl: ioc_inllen1 larger than 1<<30\n");
+ return 1;
+ }
+ if (data->ioc_inllen2 > (1<<30)) {
+ CERROR ("PORTALS ioctl: ioc_inllen2 larger than 1<<30\n");
+ return 1;
+ }
+ if (data->ioc_inlbuf1 && !data->ioc_inllen1) {
+ CERROR ("PORTALS ioctl: inlbuf1 pointer but 0 length\n");
+ return 1;
+ }
+ if (data->ioc_inlbuf2 && !data->ioc_inllen2) {
+ CERROR ("PORTALS ioctl: inlbuf2 pointer but 0 length\n");
+ return 1;
+ }
+ if (data->ioc_pbuf1 && !data->ioc_plen1) {
+ CERROR ("PORTALS ioctl: pbuf1 pointer but 0 length\n");
+ return 1;
+ }
+ if (data->ioc_pbuf2 && !data->ioc_plen2) {
+ CERROR ("PORTALS ioctl: pbuf2 pointer but 0 length\n");
+ return 1;
+ }
+ if (data->ioc_plen1 && !data->ioc_pbuf1) {
+ CERROR ("PORTALS ioctl: plen1 nonzero but no pbuf1 pointer\n");
+ return 1;
+ }
+ if (data->ioc_plen2 && !data->ioc_pbuf2) {
+ CERROR ("PORTALS ioctl: plen2 nonzero but no pbuf2 pointer\n");
+ return 1;
+ }
+ if (portal_ioctl_packlen(data) != data->ioc_len ) {
+ CERROR ("PORTALS ioctl: packlen != ioc_len\n");
+ return 1;
+ }
+ if (data->ioc_inllen1 &&
+ data->ioc_bulk[data->ioc_inllen1 - 1] != '\0') {
+ CERROR ("PORTALS ioctl: inlbuf1 not 0 terminated\n");
+ return 1;
+ }
+ if (data->ioc_inllen2 &&
+ data->ioc_bulk[size_round(data->ioc_inllen1) +
+ data->ioc_inllen2 - 1] != '\0') {
+ CERROR ("PORTALS ioctl: inlbuf2 not 0 terminated\n");
+ return 1;
+ }
+ return 0;
+}
+
+#ifndef __KERNEL__
+static inline int portal_ioctl_pack(struct portal_ioctl_data *data, char **pbuf,
+ int max)
+{
+ char *ptr;
+ struct portal_ioctl_data *overlay;
+ data->ioc_len = portal_ioctl_packlen(data);
+ data->ioc_version = PORTAL_IOCTL_VERSION;
+
+ if (*pbuf && portal_ioctl_packlen(data) > max)
+ return 1;
+ if (*pbuf == NULL) {
+ *pbuf = malloc(data->ioc_len);
+ }
+ if (!*pbuf)
+ return 1;
+ overlay = (struct portal_ioctl_data *)*pbuf;
+ memcpy(*pbuf, data, sizeof(*data));
+
+ ptr = overlay->ioc_bulk;
+ if (data->ioc_inlbuf1)
+ LOGL(data->ioc_inlbuf1, data->ioc_inllen1, ptr);
+ if (data->ioc_inlbuf2)
+ LOGL(data->ioc_inlbuf2, data->ioc_inllen2, ptr);
+ if (portal_ioctl_is_invalid(overlay))
+ return 1;
+
+ return 0;
+}
+#else
+#include <asm/uaccess.h>
+
+/* buffer MUST be at least the size of portal_ioctl_hdr */
+static inline int portal_ioctl_getdata(char *buf, char *end, void *arg)
+{
+ struct portal_ioctl_hdr *hdr;
+ struct portal_ioctl_data *data;
+ int err;
+ ENTRY;
+
+ hdr = (struct portal_ioctl_hdr *)buf;
+ data = (struct portal_ioctl_data *)buf;
+
+ err = copy_from_user(buf, (void *)arg, sizeof(*hdr));
+ if ( err ) {
+ EXIT;
+ return err;
+ }
+
+ if (hdr->ioc_version != PORTAL_IOCTL_VERSION) {
+ CERROR ("PORTALS: version mismatch kernel vs application\n");
+ return -EINVAL;
+ }
+
+ if (hdr->ioc_len + buf >= end) {
+ CERROR ("PORTALS: user buffer exceeds kernel buffer\n");
+ return -EINVAL;
+ }
+
+
+ if (hdr->ioc_len < sizeof(struct portal_ioctl_data)) {
+ CERROR ("PORTALS: user buffer too small for ioctl\n");
+ return -EINVAL;
+ }
+
+ err = copy_from_user(buf, (void *)arg, hdr->ioc_len);
+ if ( err ) {
+ EXIT;
+ return err;
+ }
+
+ if (portal_ioctl_is_invalid(data)) {
+ CERROR ("PORTALS: ioctl not correctly formatted\n");
+ return -EINVAL;
+ }
+
+ if (data->ioc_inllen1) {
+ data->ioc_inlbuf1 = &data->ioc_bulk[0];
+ }
+
+ if (data->ioc_inllen2) {
+ data->ioc_inlbuf2 = &data->ioc_bulk[0] +
+ size_round(data->ioc_inllen1);
+ }
+
+ EXIT;
+ return 0;
+}
+#endif
+
+/* ioctls for manipulating snapshots 30- */
+#define IOC_PORTAL_TYPE 'e'
+#define IOC_PORTAL_MIN_NR 30
+
+#define IOC_PORTAL_PING _IOWR('e', 30, long)
+#define IOC_PORTAL_GET_DEBUG _IOWR('e', 31, long)
+#define IOC_PORTAL_CLEAR_DEBUG _IOWR('e', 32, long)
+#define IOC_PORTAL_MARK_DEBUG _IOWR('e', 33, long)
+#define IOC_PORTAL_PANIC _IOWR('e', 34, long)
+#define IOC_PORTAL_ADD_ROUTE _IOWR('e', 35, long)
+#define IOC_PORTAL_DEL_ROUTE _IOWR('e', 36, long)
+#define IOC_PORTAL_GET_ROUTE _IOWR('e', 37, long)
+#define IOC_PORTAL_NAL_CMD _IOWR('e', 38, long)
+#define IOC_PORTAL_GET_NID _IOWR('e', 39, long)
+#define IOC_PORTAL_FAIL_NID _IOWR('e', 40, long)
+#define IOC_PORTAL_SET_DAEMON _IOWR('e', 41, long)
+
+#define IOC_PORTAL_MAX_NR 41
+
+enum {
+ QSWNAL = 1,
+ SOCKNAL,
+ GMNAL,
+ TOENAL,
+ TCPNAL,
+ SCIMACNAL,
+ NAL_ENUM_END_MARKER
+};
+
+#ifdef __KERNEL__
+extern ptl_handle_ni_t kqswnal_ni;
+extern ptl_handle_ni_t ksocknal_ni;
+extern ptl_handle_ni_t ktoenal_ni;
+extern ptl_handle_ni_t kgmnal_ni;
+extern ptl_handle_ni_t kscimacnal_ni;
+#endif
+
+#define NAL_MAX_NR (NAL_ENUM_END_MARKER - 1)
+
+#define NAL_CMD_REGISTER_PEER_FD 100
+#define NAL_CMD_CLOSE_CONNECTION 101
+#define NAL_CMD_REGISTER_MYNID 102
+#define NAL_CMD_PUSH_CONNECTION 103
+
+enum {
+ DEBUG_DAEMON_START = 1,
+ DEBUG_DAEMON_STOP = 2,
+ DEBUG_DAEMON_PAUSE = 3,
+ DEBUG_DAEMON_CONTINUE = 4,
+};
+
+/* XXX remove to lustre ASAP */
+struct lustre_peer {
+ ptl_nid_t peer_nid;
+ ptl_handle_ni_t peer_ni;
+};
+
+/* module.c */
+typedef int (*nal_cmd_handler_t)(struct portal_ioctl_data *, void * private);
+int kportal_nal_register(int nal, nal_cmd_handler_t handler, void * private);
+int kportal_nal_unregister(int nal);
+
+ptl_handle_ni_t *kportal_get_ni (int nal);
+void kportal_put_ni (int nal);
+
+#ifdef __CYGWIN__
+#ifndef BITS_PER_LONG
+#if (~0UL) == 0xffffffffUL
+#define BITS_PER_LONG 32
+#else
+#define BITS_PER_LONG 64
+#endif
+#endif
+#endif
+
+#if (BITS_PER_LONG == 32 || __WORDSIZE == 32)
+# define LPU64 "%Lu"
+# define LPD64 "%Ld"
+# define LPX64 "%#Lx"
+# define LPSZ "%u"
+# define LPSSZ "%d"
+#endif
+#if (BITS_PER_LONG == 64 || __WORDSIZE == 64)
+# define LPU64 "%lu"
+# define LPD64 "%ld"
+# define LPX64 "%#lx"
+# define LPSZ "%lu"
+# define LPSSZ "%ld"
+#endif
+#ifndef LPU64
+# error "No word size defined"
+#endif
+
+#endif
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Basic library routines.
+ *
+ */
+
+#ifndef _PORTALS_LIB_H
+#define _PORTALS_LIB_H
+
+#ifndef __KERNEL__
+# include <string.h>
+#else
+# include <asm/types.h>
+#endif
+
+#undef MIN
+#define MIN(a,b) (((a)<(b)) ? (a): (b))
+#undef MAX
+#define MAX(a,b) (((a)>(b)) ? (a): (b))
+#define MKSTR(ptr) ((ptr))? (ptr) : ""
+
+static inline int size_round (int val)
+{
+ return (val + 7) & (~0x7);
+}
+
+static inline int size_round0(int val)
+{
+ if (!val)
+ return 0;
+ return (val + 1 + 7) & (~0x7);
+}
+
+static inline size_t round_strlen(char *fset)
+{
+ return size_round(strlen(fset) + 1);
+}
+
+#ifdef __KERNEL__
+static inline char *strdup(const char *str)
+{
+ int len = strlen(str) + 1;
+ char *tmp = kmalloc(len, GFP_KERNEL);
+ if (tmp)
+ memcpy(tmp, str, len);
+
+ return tmp;
+}
+#endif
+
+#ifdef __KERNEL__
+# define NTOH__u32(var) le32_to_cpu(var)
+# define NTOH__u64(var) le64_to_cpu(var)
+# define HTON__u32(var) cpu_to_le32(var)
+# define HTON__u64(var) cpu_to_le64(var)
+#else
+# define expansion_u64(var) \
+ ({ __u64 ret; \
+ switch (sizeof(var)) { \
+ case 8: (ret) = (var); break; \
+ case 4: (ret) = (__u32)(var); break; \
+ case 2: (ret) = (__u16)(var); break; \
+ case 1: (ret) = (__u8)(var); break; \
+ }; \
+ (ret); \
+ })
+# define NTOH__u32(var) (var)
+# define NTOH__u64(var) (expansion_u64(var))
+# define HTON__u32(var) (var)
+# define HTON__u64(var) (expansion_u64(var))
+#endif
+
+/*
+ * copy sizeof(type) bytes from pointer to var and move ptr forward.
+ * return EFAULT if pointer goes beyond end
+ */
+#define UNLOGV(var,type,ptr,end) \
+do { \
+ var = *(type *)ptr; \
+ ptr += sizeof(type); \
+ if (ptr > end ) \
+ return -EFAULT; \
+} while (0)
+
+/* the following two macros convert to little endian */
+/* type MUST be __u32 or __u64 */
+#define LUNLOGV(var,type,ptr,end) \
+do { \
+ var = NTOH##type(*(type *)ptr); \
+ ptr += sizeof(type); \
+ if (ptr > end ) \
+ return -EFAULT; \
+} while (0)
+
+/* now log values */
+#define LOGV(var,type,ptr) \
+do { \
+ *((type *)ptr) = var; \
+ ptr += sizeof(type); \
+} while (0)
+
+/* and in network order */
+#define LLOGV(var,type,ptr) \
+do { \
+ *((type *)ptr) = HTON##type(var); \
+ ptr += sizeof(type); \
+} while (0)
+
+
+/*
+ * set var to point at (type *)ptr, move ptr forward with sizeof(type)
+ * return from function with EFAULT if ptr goes beyond end
+ */
+#define UNLOGP(var,type,ptr,end) \
+do { \
+ var = (type *)ptr; \
+ ptr += sizeof(type); \
+ if (ptr > end ) \
+ return -EFAULT; \
+} while (0)
+
+#define LOGP(var,type,ptr) \
+do { \
+ memcpy(ptr, var, sizeof(type)); \
+ ptr += sizeof(type); \
+} while (0)
+
+/*
+ * set var to point at (char *)ptr, move ptr forward by size_round(len);
+ * return from function with EFAULT if ptr goes beyond end
+ */
+#define UNLOGL(var,type,len,ptr,end) \
+do { \
+ var = (type *)ptr; \
+ ptr += size_round(len * sizeof(type)); \
+ if (ptr > end ) \
+ return -EFAULT; \
+} while (0)
+
+#define UNLOGL0(var,type,len,ptr,end) \
+do { \
+ UNLOGL(var,type,len,ptr,end); \
+ if ( *((char *)ptr - size_round(len) + len - 1) != '\0') \
+ return -EFAULT; \
+} while (0)
+
+#define LOGL(var,len,ptr) \
+do { \
+ if (var) \
+ memcpy((char *)ptr, (const char *)var, len); \
+ ptr += size_round(len); \
+} while (0)
+
+#define LOGU(var,len,ptr) \
+do { \
+ if (var) \
+ memcpy((char *)var, (const char *)ptr, len); \
+ ptr += size_round(len); \
+} while (0)
+
+#define LOGL0(var,len,ptr) \
+do { \
+ if (!len) \
+ break; \
+ memcpy((char *)ptr, (const char *)var, len); \
+ *((char *)(ptr) + len) = 0; \
+ ptr += size_round(len + 1); \
+} while (0)
+
+#endif /* _PORTALS_LIB_H */
--- /dev/null
+# Copyright (C) 2001 Cluster File Systems, Inc.
+#
+# This code is issued under the GNU General Public License.
+# See the file COPYING in this distribution
+
+SUBDIRS = base
+include $(top_srcdir)/Rules
+
+pkginclude_HEADERS=api-support.h api.h arg-blocks.h defines.h errno.h internal.h lib-dispatch.h lib-nal.h lib-p30.h lib-types.h myrnal.h nal.h p30.h ppid.h ptlctl.h stringtab.h types.h nalids.h list.h bridge.h ipmap.h procbridge.h lltrace.h
+
--- /dev/null
+# define DEBUG_SUBSYSTEM S_PORTALS
+# define PORTAL_DEBUG
+
+#ifndef __KERNEL__
+# include <stdio.h>
+# include <stdlib.h>
+# include <unistd.h>
+# include <time.h>
+
+/* Lots of POSIX dependencies to support PtlEQWait_timeout */
+# include <signal.h>
+# include <setjmp.h>
+# include <time.h>
+#endif
+
+#include <portals/types.h>
+#include <linux/kp30.h>
+#include <portals/p30.h>
+
+#include <portals/internal.h>
+#include <portals/nal.h>
+#include <portals/arg-blocks.h>
+
+/* Hack for 2.4.18 macro name collision */
+#ifdef yield
+#undef yield
+#endif
--- /dev/null
+#ifndef P30_API_H
+#define P30_API_H
+
+#include <portals/types.h>
+
+#ifndef PTL_NO_WRAP
+int PtlInit(void);
+int PtlInitialized(void);
+void PtlFini(void);
+
+int PtlNIInit(ptl_interface_t interface, ptl_pt_index_t ptl_size_in,
+ ptl_ac_index_t acl_size_in, ptl_pid_t requested_pid,
+ ptl_handle_ni_t * interface_out);
+
+int PtlNIInitialized(ptl_interface_t);
+
+int PtlNIFini(ptl_handle_ni_t interface_in);
+
+#endif
+
+int PtlGetId(ptl_handle_ni_t ni_handle, ptl_process_id_t *id);
+
+
+/*
+ * Network interfaces
+ */
+
+#ifndef PTL_NO_WRAP
+int PtlNIBarrier(ptl_handle_ni_t interface_in);
+#endif
+
+int PtlNIStatus(ptl_handle_ni_t interface_in, ptl_sr_index_t register_in,
+ ptl_sr_value_t * status_out);
+
+int PtlNIDist(ptl_handle_ni_t interface_in, ptl_process_id_t process_in,
+ unsigned long *distance_out);
+
+#ifndef PTL_NO_WRAP
+int PtlNIHandle(ptl_handle_any_t handle_in, ptl_handle_ni_t * interface_out);
+#endif
+
+
+/*
+ * PtlNIDebug:
+ *
+ * This is not an official Portals 3 API call. It is provided
+ * by the reference implementation to allow the maintainers an
+ * easy way to turn on and off debugging information in the
+ * library. Do not use it in code that is not intended for use
+ * with any version other than the portable reference library.
+ */
+unsigned int PtlNIDebug(ptl_handle_ni_t ni, unsigned int mask_in);
+
+/*
+ * PtlNIFailNid
+ *
+ * Not an official Portals 3 API call. It provides a way of simulating
+ * communications failures to all (nid == PTL_NID_ANY), or specific peers
+ * (via multiple calls), either until further notice (threshold == -1), or
+ * for a specific number of messages. Passing a threshold of zero, "heals"
+ * the given peer.
+ */
+int PtlFailNid (ptl_handle_ni_t ni, ptl_nid_t nid, unsigned int threshold);
+
+
+/*
+ * Match entries
+ */
+
+int PtlMEAttach(ptl_handle_ni_t interface_in, ptl_pt_index_t index_in,
+ ptl_process_id_t match_id_in, ptl_match_bits_t match_bits_in,
+ ptl_match_bits_t ignore_bits_in, ptl_unlink_t unlink_in,
+ ptl_ins_pos_t pos_in, ptl_handle_me_t * handle_out);
+
+int PtlMEInsert(ptl_handle_me_t current_in, ptl_process_id_t match_id_in,
+ ptl_match_bits_t match_bits_in, ptl_match_bits_t ignore_bits_in,
+ ptl_unlink_t unlink_in, ptl_ins_pos_t position_in,
+ ptl_handle_me_t * handle_out);
+
+int PtlMEUnlink(ptl_handle_me_t current_in);
+
+int PtlMEUnlinkList(ptl_handle_me_t current_in);
+
+int PtlTblDump(ptl_handle_ni_t ni, int index_in);
+int PtlMEDump(ptl_handle_me_t current_in);
+
+
+
+/*
+ * Memory descriptors
+ */
+
+#ifndef PTL_NO_WRAP
+int PtlMDAttach(ptl_handle_me_t current_in, ptl_md_t md_in,
+ ptl_unlink_t unlink_in, ptl_handle_md_t * handle_out);
+
+int PtlMDBind(ptl_handle_ni_t ni_in, ptl_md_t md_in,
+ ptl_handle_md_t * handle_out);
+
+int PtlMDUnlink(ptl_handle_md_t md_in);
+
+int PtlMDUpdate(ptl_handle_md_t md_in, ptl_md_t * old_inout,
+ ptl_md_t * new_inout, ptl_handle_eq_t testq_in);
+
+#endif
+
+/* These should not be called by users */
+int PtlMDUpdate_internal(ptl_handle_md_t md_in, ptl_md_t * old_inout,
+ ptl_md_t * new_inout, ptl_handle_eq_t testq_in,
+ ptl_seq_t sequence_in);
+
+
+
+
+/*
+ * Event queues
+ */
+#ifndef PTL_NO_WRAP
+
+/* These should be called by users */
+int PtlEQAlloc(ptl_handle_ni_t ni_in, ptl_size_t count_in,
+ int (*callback) (ptl_event_t * event),
+ ptl_handle_eq_t * handle_out);
+int PtlEQFree(ptl_handle_eq_t eventq_in);
+
+int PtlEQCount(ptl_handle_eq_t eventq_in, ptl_size_t * count_out);
+
+int PtlEQGet(ptl_handle_eq_t eventq_in, ptl_event_t * event_out);
+
+
+int PtlEQWait(ptl_handle_eq_t eventq_in, ptl_event_t * event_out);
+
+int PtlEQWait_timeout(ptl_handle_eq_t eventq_in, ptl_event_t * event_out,
+ int timeout);
+#endif
+
+/*
+ * Access Control Table
+ */
+int PtlACEntry(ptl_handle_ni_t ni_in, ptl_ac_index_t index_in,
+ ptl_process_id_t match_id_in, ptl_pt_index_t portal_in);
+
+
+/*
+ * Data movement
+ */
+
+int PtlPut(ptl_handle_md_t md_in, ptl_ack_req_t ack_req_in,
+ ptl_process_id_t target_in, ptl_pt_index_t portal_in,
+ ptl_ac_index_t cookie_in, ptl_match_bits_t match_bits_in,
+ ptl_size_t offset_in, ptl_hdr_data_t hdr_data_in);
+
+int PtlGet(ptl_handle_md_t md_in, ptl_process_id_t target_in,
+ ptl_pt_index_t portal_in, ptl_ac_index_t cookie_in,
+ ptl_match_bits_t match_bits_in, ptl_size_t offset_in);
+
+
+
+#endif
--- /dev/null
+#ifndef PTL_BLOCKS_H
+#define PTL_BLOCKS_H
+
+/*
+ * blocks.h
+ *
+ * Argument block types for the Portals 3.0 library
+ * Generated by idl
+ *
+ */
+
+#include <portals/types.h>
+
+/* put LIB_MAX_DISPATCH last here -- these must match the
+ assignements to the dispatch table in lib-p30/dispatch.c */
+#define PTL_GETID 1
+#define PTL_NISTATUS 2
+#define PTL_NIDIST 3
+#define PTL_NIDEBUG 4
+#define PTL_MEATTACH 5
+#define PTL_MEINSERT 6
+// #define PTL_MEPREPEND 7
+#define PTL_MEUNLINK 8
+#define PTL_TBLDUMP 9
+#define PTL_MEDUMP 10
+#define PTL_MDATTACH 11
+// #define PTL_MDINSERT 12
+#define PTL_MDBIND 13
+#define PTL_MDUPDATE 14
+#define PTL_MDUNLINK 15
+#define PTL_EQALLOC 16
+#define PTL_EQFREE 17
+#define PTL_ACENTRY 18
+#define PTL_PUT 19
+#define PTL_GET 20
+#define PTL_FAILNID 21
+#define LIB_MAX_DISPATCH 21
+
+typedef struct PtlFailNid_in {
+ ptl_handle_ni_t interface;
+ ptl_nid_t nid;
+ unsigned int threshold;
+} PtlFailNid_in;
+
+typedef struct PtlFailNid_out {
+ int rc;
+} PtlFailNid_out;
+
+typedef struct PtlGetId_in {
+ ptl_handle_ni_t handle_in;
+} PtlGetId_in;
+
+typedef struct PtlGetId_out {
+ int rc;
+ ptl_process_id_t id_out;
+} PtlGetId_out;
+
+typedef struct PtlNIStatus_in {
+ ptl_handle_ni_t interface_in;
+ ptl_sr_index_t register_in;
+} PtlNIStatus_in;
+
+typedef struct PtlNIStatus_out {
+ int rc;
+ ptl_sr_value_t status_out;
+} PtlNIStatus_out;
+
+
+typedef struct PtlNIDist_in {
+ ptl_handle_ni_t interface_in;
+ ptl_process_id_t process_in;
+} PtlNIDist_in;
+
+typedef struct PtlNIDist_out {
+ int rc;
+ unsigned long distance_out;
+} PtlNIDist_out;
+
+
+typedef struct PtlNIDebug_in {
+ unsigned int mask_in;
+} PtlNIDebug_in;
+
+typedef struct PtlNIDebug_out {
+ unsigned int rc;
+} PtlNIDebug_out;
+
+
+typedef struct PtlMEAttach_in {
+ ptl_handle_ni_t interface_in;
+ ptl_pt_index_t index_in;
+ ptl_ins_pos_t position_in;
+ ptl_process_id_t match_id_in;
+ ptl_match_bits_t match_bits_in;
+ ptl_match_bits_t ignore_bits_in;
+ ptl_unlink_t unlink_in;
+} PtlMEAttach_in;
+
+typedef struct PtlMEAttach_out {
+ int rc;
+ ptl_handle_me_t handle_out;
+} PtlMEAttach_out;
+
+
+typedef struct PtlMEInsert_in {
+ ptl_handle_me_t current_in;
+ ptl_process_id_t match_id_in;
+ ptl_match_bits_t match_bits_in;
+ ptl_match_bits_t ignore_bits_in;
+ ptl_unlink_t unlink_in;
+ ptl_ins_pos_t position_in;
+} PtlMEInsert_in;
+
+typedef struct PtlMEInsert_out {
+ int rc;
+ ptl_handle_me_t handle_out;
+} PtlMEInsert_out;
+
+typedef struct PtlMEUnlink_in {
+ ptl_handle_me_t current_in;
+ ptl_unlink_t unlink_in;
+} PtlMEUnlink_in;
+
+typedef struct PtlMEUnlink_out {
+ int rc;
+} PtlMEUnlink_out;
+
+
+typedef struct PtlTblDump_in {
+ int index_in;
+} PtlTblDump_in;
+
+typedef struct PtlTblDump_out {
+ int rc;
+} PtlTblDump_out;
+
+
+typedef struct PtlMEDump_in {
+ ptl_handle_me_t current_in;
+} PtlMEDump_in;
+
+typedef struct PtlMEDump_out {
+ int rc;
+} PtlMEDump_out;
+
+
+typedef struct PtlMDAttach_in {
+ ptl_handle_me_t me_in;
+ ptl_handle_eq_t eq_in;
+ ptl_md_t md_in;
+ ptl_unlink_t unlink_in;
+} PtlMDAttach_in;
+
+typedef struct PtlMDAttach_out {
+ int rc;
+ ptl_handle_md_t handle_out;
+} PtlMDAttach_out;
+
+
+typedef struct PtlMDBind_in {
+ ptl_handle_ni_t ni_in;
+ ptl_handle_eq_t eq_in;
+ ptl_md_t md_in;
+} PtlMDBind_in;
+
+typedef struct PtlMDBind_out {
+ int rc;
+ ptl_handle_md_t handle_out;
+} PtlMDBind_out;
+
+
+typedef struct PtlMDUpdate_internal_in {
+ ptl_handle_md_t md_in;
+ ptl_handle_eq_t testq_in;
+ ptl_seq_t sequence_in;
+
+ ptl_md_t old_inout;
+ int old_inout_valid;
+ ptl_md_t new_inout;
+ int new_inout_valid;
+} PtlMDUpdate_internal_in;
+
+typedef struct PtlMDUpdate_internal_out {
+ int rc;
+ ptl_md_t old_inout;
+ ptl_md_t new_inout;
+} PtlMDUpdate_internal_out;
+
+
+typedef struct PtlMDUnlink_in {
+ ptl_handle_md_t md_in;
+} PtlMDUnlink_in;
+
+typedef struct PtlMDUnlink_out {
+ int rc;
+ ptl_md_t status_out;
+} PtlMDUnlink_out;
+
+
+typedef struct PtlEQAlloc_in {
+ ptl_handle_ni_t ni_in;
+ ptl_size_t count_in;
+ void *base_in;
+ int len_in;
+ int (*callback_in) (ptl_event_t * event);
+} PtlEQAlloc_in;
+
+typedef struct PtlEQAlloc_out {
+ int rc;
+ ptl_handle_eq_t handle_out;
+} PtlEQAlloc_out;
+
+
+typedef struct PtlEQFree_in {
+ ptl_handle_eq_t eventq_in;
+} PtlEQFree_in;
+
+typedef struct PtlEQFree_out {
+ int rc;
+} PtlEQFree_out;
+
+
+typedef struct PtlACEntry_in {
+ ptl_handle_ni_t ni_in;
+ ptl_ac_index_t index_in;
+ ptl_process_id_t match_id_in;
+ ptl_pt_index_t portal_in;
+} PtlACEntry_in;
+
+typedef struct PtlACEntry_out {
+ int rc;
+} PtlACEntry_out;
+
+
+typedef struct PtlPut_in {
+ ptl_handle_md_t md_in;
+ ptl_ack_req_t ack_req_in;
+ ptl_process_id_t target_in;
+ ptl_pt_index_t portal_in;
+ ptl_ac_index_t cookie_in;
+ ptl_match_bits_t match_bits_in;
+ ptl_size_t offset_in;
+ ptl_hdr_data_t hdr_data_in;
+} PtlPut_in;
+
+typedef struct PtlPut_out {
+ int rc;
+} PtlPut_out;
+
+
+typedef struct PtlGet_in {
+ ptl_handle_md_t md_in;
+ ptl_process_id_t target_in;
+ ptl_pt_index_t portal_in;
+ ptl_ac_index_t cookie_in;
+ ptl_match_bits_t match_bits_in;
+ ptl_size_t offset_in;
+} PtlGet_in;
+
+typedef struct PtlGet_out {
+ int rc;
+} PtlGet_out;
+
+
+#endif
--- /dev/null
+/*
+** $Id: defines.h,v 1.1.2.1 2003/05/19 04:25:31 braam Exp $
+**
+** This files contains definitions that are used throughout the cplant code.
+*/
+
+#ifndef CPLANT_H
+#define CPLANT_H
+
+#define TITLE(fname,zmig)
+
+
+/*
+** TRUE and FALSE
+*/
+#undef TRUE
+#define TRUE (1)
+#undef FALSE
+#define FALSE (0)
+
+
+/*
+** Return codes from functions
+*/
+#undef OK
+#define OK (0)
+#undef ERROR
+#define ERROR (-1)
+
+
+
+/*
+** The GCC macro for a safe max() that works on all types arithmetic types.
+*/
+#ifndef MAX
+#define MAX(a, b) (a) > (b) ? (a) : (b)
+#endif /* MAX */
+
+#ifndef MIN
+#define MIN(a, b) (a) < (b) ? (a) : (b)
+#endif /* MIN */
+
+/*
+** The rest is from the old qkdefs.h
+*/
+
+#ifndef __linux__
+#define __inline__
+#endif
+
+#ifndef NULL
+#define NULL ((void *)0)
+#endif
+
+#ifndef __osf__
+#define PRIVATE static
+#define PUBLIC
+#endif
+
+#ifndef __osf__
+typedef unsigned char uchar;
+#endif
+
+typedef char CHAR;
+typedef unsigned char UCHAR;
+typedef char INT8;
+typedef unsigned char UINT8;
+typedef short int INT16;
+typedef unsigned short int UINT16;
+typedef int INT32;
+typedef unsigned int UINT32;
+typedef long LONG32;
+typedef unsigned long ULONG32;
+
+/* long may be 32 or 64, so we can't really append the size to the definition */
+typedef long LONG;
+typedef unsigned long ULONG;
+
+#ifdef __alpha__
+typedef long int_t;
+#ifndef __osf__
+typedef unsigned long uint_t;
+#endif
+#endif
+
+#ifdef __i386__
+typedef int int_t;
+typedef unsigned int uint_t;
+#endif
+
+typedef float FLOAT32;
+typedef double FLOAT64;
+typedef void VOID;
+typedef INT32 BOOLEAN;
+typedef void (*FCN_PTR)(void);
+
+#ifndef off64_t
+
+#if defined (__alpha__) || defined (__ia64__)
+typedef long off64_t;
+#else
+typedef long long off64_t;
+#endif
+
+#endif
+
+/*
+** Process related typedefs
+*/
+typedef UINT16 PID_TYPE; /* Type of Local process ID */
+typedef UINT16 NID_TYPE; /* Type of Physical node ID */
+typedef UINT16 GID_TYPE; /* Type of Group ID */
+typedef UINT16 RANK_TYPE; /* Type of Logical rank/process within a group */
+
+
+
+#endif /* CPLANT_H */
--- /dev/null
+#ifndef _P30_ERRNO_H_
+#define _P30_ERRNO_H_
+
+/*
+ * include/portals/errno.h
+ *
+ * Shared error number lists
+ */
+
+/* If you change these, you must update the string table in api-errno.c */
+typedef enum {
+ PTL_OK = 0,
+ PTL_SEGV = 1,
+
+ PTL_NOSPACE = 2,
+ PTL_INUSE = 3,
+ PTL_VAL_FAILED = 4,
+
+ PTL_NAL_FAILED = 5,
+ PTL_NOINIT = 6,
+ PTL_INIT_DUP = 7,
+ PTL_INIT_INV = 8,
+ PTL_AC_INV_INDEX = 9,
+
+ PTL_INV_ASIZE = 10,
+ PTL_INV_HANDLE = 11,
+ PTL_INV_MD = 12,
+ PTL_INV_ME = 13,
+ PTL_INV_NI = 14,
+/* If you change these, you must update the string table in api-errno.c */
+ PTL_ILL_MD = 15,
+ PTL_INV_PROC = 16,
+ PTL_INV_PSIZE = 17,
+ PTL_INV_PTINDEX = 18,
+ PTL_INV_REG = 19,
+
+ PTL_INV_SR_INDX = 20,
+ PTL_ML_TOOLONG = 21,
+ PTL_ADDR_UNKNOWN = 22,
+ PTL_INV_EQ = 23,
+ PTL_EQ_DROPPED = 24,
+
+ PTL_EQ_EMPTY = 25,
+ PTL_NOUPDATE = 26,
+ PTL_FAIL = 27,
+ PTL_NOT_IMPLEMENTED = 28,
+ PTL_NO_ACK = 29,
+
+ PTL_IOV_TOO_MANY = 30,
+ PTL_IOV_TOO_SMALL = 31,
+
+ PTL_EQ_INUSE = 32,
+ PTL_MD_INUSE = 33,
+
+ PTL_MAX_ERRNO = 33
+} ptl_err_t;
+/* If you change these, you must update the string table in api-errno.c */
+
+extern const char *ptl_err_str[];
+
+#endif
--- /dev/null
+#ifndef PTL_DISPATCH_H
+#define PTL_DISPATCH_H
+
+/*
+ * include/dispatch.h
+ *
+ * Dispatch table header and externs for remote side
+ * operations
+ *
+ * Generated by idl
+ *
+ */
+
+#include <portals/lib-p30.h>
+#include <portals/arg-blocks.h>
+
+extern int do_PtlGetId(nal_cb_t * nal, void *private, void *args, void *ret);
+extern int do_PtlNIStatus(nal_cb_t * nal, void *private, void *args, void *ret);
+extern int do_PtlNIDist(nal_cb_t * nal, void *private, void *args, void *ret);
+extern int do_PtlNIDebug(nal_cb_t * nal, void *private, void *args, void *ret);
+extern int do_PtlMEAttach(nal_cb_t * nal, void *private, void *args, void *ret);
+extern int do_PtlMEInsert(nal_cb_t * nal, void *private, void *args, void *ret);
+extern int do_PtlMEPrepend(nal_cb_t * nal, void *private, void *args,
+ void *ret);
+extern int do_PtlMEUnlink(nal_cb_t * nal, void *private, void *args, void *ret);
+extern int do_PtlTblDump(nal_cb_t * nal, void *private, void *args, void *ret);
+extern int do_PtlMEDump(nal_cb_t * nal, void *private, void *args, void *ret);
+extern int do_PtlMDAttach(nal_cb_t * nal, void *private, void *args,
+ void *ret);
+extern int do_PtlMDBind(nal_cb_t * nal, void *private, void *args,
+ void *ret);
+extern int do_PtlMDUpdate_internal(nal_cb_t * nal, void *private, void *args,
+ void *ret);
+extern int do_PtlMDUnlink(nal_cb_t * nal, void *private, void *args,
+ void *ret);
+extern int do_PtlEQAlloc_internal(nal_cb_t * nal, void *private, void *args,
+ void *ret);
+extern int do_PtlEQFree_internal(nal_cb_t * nal, void *private, void *args,
+ void *ret);
+extern int do_PtlACEntry(nal_cb_t * nal, void *private, void *args, void *ret);
+extern int do_PtlPut(nal_cb_t * nal, void *private, void *args, void *ret);
+extern int do_PtlGet(nal_cb_t * nal, void *private, void *args, void *ret);
+extern int do_PtlFailNid (nal_cb_t *nal, void *private, void *args, void *ret);
+
+extern char *dispatch_name(int index);
+#endif
--- /dev/null
+#ifndef _LIB_NAL_H_
+#define _LIB_NAL_H_
+
+/*
+ * nal.h
+ *
+ * Library side headers that define the abstraction layer's
+ * responsibilities and interfaces
+ */
+
+#include <portals/lib-types.h>
+
+struct nal_cb_t {
+ /*
+ * Per interface portal table, access control table
+ * and NAL private data field;
+ */
+ lib_ni_t ni;
+ void *nal_data;
+ /*
+ * send: Sends a preformatted header and user data to a
+ * specified remote process.
+ * Can overwrite iov.
+ */
+ int (*cb_send) (nal_cb_t * nal, void *private, lib_msg_t * cookie,
+ ptl_hdr_t * hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
+ unsigned int niov, struct iovec *iov, size_t mlen);
+
+ /* as send, but with a set of page fragments (NULL if not supported) */
+ int (*cb_send_pages) (nal_cb_t * nal, void *private, lib_msg_t * cookie,
+ ptl_hdr_t * hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
+ unsigned int niov, ptl_kiov_t *iov, size_t mlen);
+ /*
+ * recv: Receives an incoming message from a remote process
+ * Type of iov depends on options. Can overwrite iov.
+ */
+ int (*cb_recv) (nal_cb_t * nal, void *private, lib_msg_t * cookie,
+ unsigned int niov, struct iovec *iov, size_t mlen,
+ size_t rlen);
+
+ /* as recv, but with a set of page fragments (NULL if not supported) */
+ int (*cb_recv_pages) (nal_cb_t * nal, void *private, lib_msg_t * cookie,
+ unsigned int niov, ptl_kiov_t *iov, size_t mlen,
+ size_t rlen);
+ /*
+ * read: Reads a block of data from a specified user address
+ */
+ int (*cb_read) (nal_cb_t * nal, void *private, void *dst_addr,
+ user_ptr src_addr, size_t len);
+
+ /*
+ * write: Writes a block of data into a specified user address
+ */
+ int (*cb_write) (nal_cb_t * nal, void *private, user_ptr dsr_addr,
+ void *src_addr, size_t len);
+
+ /*
+ * callback: Calls an event callback
+ */
+ int (*cb_callback) (nal_cb_t * nal, void *private, lib_eq_t *eq,
+ ptl_event_t *ev);
+
+ /*
+ * malloc: Acquire a block of memory in a system independent
+ * fashion.
+ */
+ void *(*cb_malloc) (nal_cb_t * nal, size_t len);
+
+ void (*cb_free) (nal_cb_t * nal, void *buf, size_t len);
+
+ /*
+ * (un)map: Tell the NAL about some memory it will access.
+ * *addrkey passed to cb_unmap() is what cb_map() set it to.
+ * type of *iov depends on options.
+ * Set to NULL if not required.
+ */
+ int (*cb_map) (nal_cb_t * nal, unsigned int niov, struct iovec *iov,
+ void **addrkey);
+ void (*cb_unmap) (nal_cb_t * nal, unsigned int niov, struct iovec *iov,
+ void **addrkey);
+
+ /* as (un)map, but with a set of page fragments */
+ int (*cb_map_pages) (nal_cb_t * nal, unsigned int niov, ptl_kiov_t *iov,
+ void **addrkey);
+ void (*cb_unmap_pages) (nal_cb_t * nal, unsigned int niov, ptl_kiov_t *iov,
+ void **addrkey);
+
+ void (*cb_printf) (nal_cb_t * nal, const char *fmt, ...);
+
+ /* Turn interrupts off (begin of protected area) */
+ void (*cb_cli) (nal_cb_t * nal, unsigned long *flags);
+
+ /* Turn interrupts on (end of protected area) */
+ void (*cb_sti) (nal_cb_t * nal, unsigned long *flags);
+
+ /*
+ * Calculate a network "distance" to given node
+ */
+ int (*cb_dist) (nal_cb_t * nal, ptl_nid_t nid, unsigned long *dist);
+};
+
+#endif
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * lib-p30.h
+ *
+ * Top level include for library side routines
+ */
+
+#ifndef _LIB_P30_H_
+#define _LIB_P30_H_
+
+#ifdef __KERNEL__
+# include <asm/page.h>
+# include <linux/string.h>
+#else
+# include <portals/list.h>
+# include <string.h>
+#endif
+#include <portals/types.h>
+#include <linux/kp30.h>
+#include <portals/p30.h>
+#include <portals/errno.h>
+#include <portals/lib-types.h>
+#include <portals/lib-nal.h>
+#include <portals/lib-dispatch.h>
+
+static inline int ptl_is_wire_handle_none (ptl_handle_wire_t *wh)
+{
+ return (wh->wh_interface_cookie == PTL_WIRE_HANDLE_NONE.wh_interface_cookie &&
+ wh->wh_object_cookie == PTL_WIRE_HANDLE_NONE.wh_object_cookie);
+}
+
+#ifdef __KERNEL__
+#define state_lock(nal,flagsp) \
+do { \
+ CDEBUG(D_PORTALS, "taking state lock\n"); \
+ nal->cb_cli(nal, flagsp); \
+} while (0)
+
+#define state_unlock(nal,flagsp) \
+{ \
+ CDEBUG(D_PORTALS, "releasing state lock\n"); \
+ nal->cb_sti(nal, flagsp); \
+}
+#else
+/* not needed in user space until we thread there */
+#define state_lock(nal,flagsp) \
+do { \
+ CDEBUG(D_PORTALS, "taking state lock\n"); \
+ CDEBUG(D_PORTALS, "%p:%p\n", nal, flagsp); \
+} while (0)
+
+#define state_unlock(nal,flagsp) \
+{ \
+ CDEBUG(D_PORTALS, "releasing state lock\n"); \
+ CDEBUG(D_PORTALS, "%p:%p\n", nal, flagsp); \
+}
+#endif /* __KERNEL__ */
+
+#ifndef PTL_USE_SLAB_CACHE
+
+#define MAX_MES 2048
+#define MAX_MDS 2048
+#define MAX_MSGS 2048 /* Outstanding messages */
+#define MAX_EQS 512
+
+extern int lib_freelist_init (nal_cb_t *nal, lib_freelist_t *fl, int nobj, int objsize);
+extern void lib_freelist_fini (nal_cb_t *nal, lib_freelist_t *fl);
+
+static inline void *
+lib_freelist_alloc (lib_freelist_t *fl)
+{
+ /* ALWAYS called with statelock held */
+ lib_freeobj_t *o;
+
+ if (list_empty (&fl->fl_list))
+ return (NULL);
+
+ o = list_entry (fl->fl_list.next, lib_freeobj_t, fo_list);
+ list_del (&o->fo_list);
+ return ((void *)&o->fo_contents);
+}
+
+static inline void
+lib_freelist_free (lib_freelist_t *fl, void *obj)
+{
+ /* ALWAYS called with statelock held */
+ lib_freeobj_t *o = list_entry (obj, lib_freeobj_t, fo_contents);
+
+ list_add (&o->fo_list, &fl->fl_list);
+}
+
+
+static inline lib_eq_t *
+lib_eq_alloc (nal_cb_t *nal)
+{
+ /* NEVER called with statelock held */
+ unsigned long flags;
+ lib_eq_t *eq;
+
+ state_lock (nal, &flags);
+ eq = (lib_eq_t *)lib_freelist_alloc (&nal->ni.ni_free_eqs);
+ state_unlock (nal, &flags);
+
+ return (eq);
+}
+
+static inline void
+lib_eq_free (nal_cb_t *nal, lib_eq_t *eq)
+{
+ /* ALWAYS called with statelock held */
+ lib_freelist_free (&nal->ni.ni_free_eqs, eq);
+}
+
+static inline lib_md_t *
+lib_md_alloc (nal_cb_t *nal)
+{
+ /* NEVER called with statelock held */
+ unsigned long flags;
+ lib_md_t *md;
+
+ state_lock (nal, &flags);
+ md = (lib_md_t *)lib_freelist_alloc (&nal->ni.ni_free_mds);
+ state_unlock (nal, &flags);
+
+ return (md);
+}
+
+static inline void
+lib_md_free (nal_cb_t *nal, lib_md_t *md)
+{
+ /* ALWAYS called with statelock held */
+ lib_freelist_free (&nal->ni.ni_free_mds, md);
+}
+
+static inline lib_me_t *
+lib_me_alloc (nal_cb_t *nal)
+{
+ /* NEVER called with statelock held */
+ unsigned long flags;
+ lib_me_t *me;
+
+ state_lock (nal, &flags);
+ me = (lib_me_t *)lib_freelist_alloc (&nal->ni.ni_free_mes);
+ state_unlock (nal, &flags);
+
+ return (me);
+}
+
+static inline void
+lib_me_free (nal_cb_t *nal, lib_me_t *me)
+{
+ /* ALWAYS called with statelock held */
+ lib_freelist_free (&nal->ni.ni_free_mes, me);
+}
+
+static inline lib_msg_t *
+lib_msg_alloc (nal_cb_t *nal)
+{
+ /* ALWAYS called with statelock held */
+ return ((lib_msg_t *)lib_freelist_alloc (&nal->ni.ni_free_msgs));
+}
+
+static inline void
+lib_msg_free (nal_cb_t *nal, lib_msg_t *msg)
+{
+ /* ALWAYS called with statelock held */
+ lib_freelist_free (&nal->ni.ni_free_msgs, msg);
+}
+
+#else
+
+extern kmem_cache_t *ptl_md_slab;
+extern kmem_cache_t *ptl_msg_slab;
+extern kmem_cache_t *ptl_me_slab;
+extern kmem_cache_t *ptl_eq_slab;
+extern atomic_t md_in_use_count;
+extern atomic_t msg_in_use_count;
+extern atomic_t me_in_use_count;
+extern atomic_t eq_in_use_count;
+
+static inline lib_eq_t *
+lib_eq_alloc (nal_cb_t *nal)
+{
+ /* NEVER called with statelock held */
+ lib_eq_t *eq = kmem_cache_alloc(ptl_eq_slab, GFP_KERNEL);
+
+ if (eq == NULL)
+ return (NULL);
+
+ atomic_inc (&eq_in_use_count);
+ return (eq);
+}
+
+static inline void
+lib_eq_free (nal_cb_t *nal, lib_eq_t *eq)
+{
+ /* ALWAYS called with statelock held */
+ atomic_dec (&eq_in_use_count);
+ kmem_cache_free(ptl_eq_slab, eq);
+}
+
+static inline lib_md_t *
+lib_md_alloc (nal_cb_t *nal)
+{
+ /* NEVER called with statelock held */
+ lib_md_t *md = kmem_cache_alloc(ptl_md_slab, GFP_KERNEL);
+
+ if (md == NULL)
+ return (NULL);
+
+ atomic_inc (&md_in_use_count);
+ return (md);
+}
+
+static inline void
+lib_md_free (nal_cb_t *nal, lib_md_t *md)
+{
+ /* ALWAYS called with statelock held */
+ atomic_dec (&md_in_use_count);
+ kmem_cache_free(ptl_md_slab, md);
+}
+
+static inline lib_me_t *
+lib_me_alloc (nal_cb_t *nal)
+{
+ /* NEVER called with statelock held */
+ lib_me_t *me = kmem_cache_alloc(ptl_me_slab, GFP_KERNEL);
+
+ if (me == NULL)
+ return (NULL);
+
+ atomic_inc (&me_in_use_count);
+ return (me);
+}
+
+static inline void
+lib_me_free(nal_cb_t *nal, lib_me_t *me)
+{
+ /* ALWAYS called with statelock held */
+ atomic_dec (&me_in_use_count);
+ kmem_cache_free(ptl_me_slab, me);
+}
+
+static inline lib_msg_t *
+lib_msg_alloc(nal_cb_t *nal)
+{
+ /* ALWAYS called with statelock held */
+ lib_msg_t *msg = kmem_cache_alloc(ptl_msg_slab, GFP_ATOMIC);
+
+ if (msg == NULL)
+ return (NULL);
+
+ atomic_inc (&msg_in_use_count);
+ return (msg);
+}
+
+static inline void
+lib_msg_free(nal_cb_t *nal, lib_msg_t *msg)
+{
+ /* ALWAYS called with statelock held */
+ atomic_dec (&msg_in_use_count);
+ kmem_cache_free(ptl_msg_slab, msg);
+}
+#endif
+
+extern lib_handle_t *lib_lookup_cookie (nal_cb_t *nal, __u64 cookie);
+extern void lib_initialise_handle (nal_cb_t *nal, lib_handle_t *lh);
+extern void lib_invalidate_handle (nal_cb_t *nal, lib_handle_t *lh);
+
+static inline void
+ptl_eq2handle (ptl_handle_eq_t *handle, lib_eq_t *eq)
+{
+ handle->cookie = eq->eq_lh.lh_cookie;
+}
+
+static inline lib_eq_t *
+ptl_handle2eq (ptl_handle_eq_t *handle, nal_cb_t *nal)
+{
+ /* ALWAYS called with statelock held */
+ lib_handle_t *lh = lib_lookup_cookie (nal, handle->cookie);
+
+ if (lh == NULL)
+ return (NULL);
+
+ return (lh_entry (lh, lib_eq_t, eq_lh));
+}
+
+static inline void
+ptl_md2handle (ptl_handle_md_t *handle, lib_md_t *md)
+{
+ handle->cookie = md->md_lh.lh_cookie;
+}
+
+static inline lib_md_t *
+ptl_handle2md (ptl_handle_md_t *handle, nal_cb_t *nal)
+{
+ /* ALWAYS called with statelock held */
+ lib_handle_t *lh = lib_lookup_cookie (nal, handle->cookie);
+
+ if (lh == NULL)
+ return (NULL);
+
+ return (lh_entry (lh, lib_md_t, md_lh));
+}
+
+static inline lib_md_t *
+ptl_wire_handle2md (ptl_handle_wire_t *wh, nal_cb_t *nal)
+{
+ /* ALWAYS called with statelock held */
+ lib_handle_t *lh;
+
+ if (wh->wh_interface_cookie != nal->ni.ni_interface_cookie)
+ return (NULL);
+
+ lh = lib_lookup_cookie (nal, wh->wh_object_cookie);
+ if (lh == NULL)
+ return (NULL);
+
+ return (lh_entry (lh, lib_md_t, md_lh));
+}
+
+static inline void
+ptl_me2handle (ptl_handle_me_t *handle, lib_me_t *me)
+{
+ handle->cookie = me->me_lh.lh_cookie;
+}
+
+static inline lib_me_t *
+ptl_handle2me (ptl_handle_me_t *handle, nal_cb_t *nal)
+{
+ /* ALWAYS called with statelock held */
+ lib_handle_t *lh = lib_lookup_cookie (nal, handle->cookie);
+
+ if (lh == NULL)
+ return (NULL);
+
+ return (lh_entry (lh, lib_me_t, me_lh));
+}
+
+extern int lib_init(nal_cb_t * cb, ptl_nid_t nid, ptl_pid_t pid, int gsize,
+ ptl_pt_index_t tbl_size, ptl_ac_index_t ac_size);
+extern int lib_fini(nal_cb_t * cb);
+extern void lib_dispatch(nal_cb_t * cb, void *private, int index,
+ void *arg_block, void *ret_block);
+extern char *dispatch_name(int index);
+
+/*
+ * When the NAL detects an incoming message, it should call
+ * lib_parse() decode it. The NAL callbacks will be handed
+ * the private cookie as a way for the NAL to maintain state
+ * about which transaction is being processed. An extra parameter,
+ * lib_cookie will contain the necessary information for
+ * finalizing the message.
+ *
+ * After it has finished the handling the message, it should
+ * call lib_finalize() with the lib_cookie parameter.
+ * Call backs will be made to write events, send acks or
+ * replies and so on.
+ */
+extern int lib_parse(nal_cb_t * nal, ptl_hdr_t * hdr, void *private);
+extern int lib_finalize(nal_cb_t * nal, void *private, lib_msg_t * msg);
+extern void print_hdr(nal_cb_t * nal, ptl_hdr_t * hdr);
+
+extern ptl_size_t lib_iov_nob (int niov, struct iovec *iov);
+extern void lib_copy_iov2buf (char *dest, int niov, struct iovec *iov, ptl_size_t len);
+extern void lib_copy_buf2iov (int niov, struct iovec *iov, char *dest, ptl_size_t len);
+
+extern ptl_size_t lib_kiov_nob (int niov, ptl_kiov_t *iov);
+extern void lib_copy_kiov2buf (char *dest, int niov, ptl_kiov_t *iov, ptl_size_t len);
+extern void lib_copy_buf2kiov (int niov, ptl_kiov_t *iov, char *src, ptl_size_t len);
+
+extern void lib_recv (nal_cb_t *nal, void *private, lib_msg_t *msg, lib_md_t *md,
+ ptl_size_t offset, ptl_size_t mlen, ptl_size_t rlen);
+extern int lib_send (nal_cb_t *nal, void *private, lib_msg_t *msg,
+ ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
+ lib_md_t *md, ptl_size_t offset, ptl_size_t len);
+
+extern void lib_md_deconstruct(nal_cb_t * nal, lib_md_t * md_in,
+ ptl_md_t * md_out);
+extern void lib_md_unlink(nal_cb_t * nal, lib_md_t * md_in);
+extern void lib_me_unlink(nal_cb_t * nal, lib_me_t * me_in);
+#endif
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * p30/lib-types.h
+ *
+ * Types used by the library side routines that do not need to be
+ * exposed to the user application
+ */
+
+#ifndef _LIB_TYPES_H_
+#define _LIB_TYPES_H_
+
+#include <portals/types.h>
+#ifdef __KERNEL__
+# define PTL_USE_SLAB_CACHE
+# include <linux/uio.h>
+# include <linux/smp_lock.h>
+# include <linux/types.h>
+#else
+# include <sys/types.h>
+#endif
+
+/* struct nal_cb_t is defined in lib-nal.h */
+typedef struct nal_cb_t nal_cb_t;
+
+typedef char *user_ptr;
+typedef struct lib_msg_t lib_msg_t;
+typedef struct lib_ptl_t lib_ptl_t;
+typedef struct lib_ac_t lib_ac_t;
+typedef struct lib_me_t lib_me_t;
+typedef struct lib_md_t lib_md_t;
+typedef struct lib_eq_t lib_eq_t;
+
+/* The wire handle's interface cookie only matches one network interface in
+ * one epoch (i.e. new cookie when the interface restarts or the node
+ * reboots). The object cookie only matches one object on that interface
+ * during that object's lifetime (i.e. no cookie re-use). */
+typedef struct {
+ __u64 wh_interface_cookie;
+ __u64 wh_object_cookie;
+} ptl_handle_wire_t;
+
+/* byte-flip insensitive! */
+#define PTL_WIRE_HANDLE_NONE \
+((const ptl_handle_wire_t) {.wh_interface_cookie = -1, .wh_object_cookie = -1})
+
+typedef enum {
+ PTL_MSG_ACK = 0,
+ PTL_MSG_PUT,
+ PTL_MSG_GET,
+ PTL_MSG_REPLY,
+ PTL_MSG_HELLO,
+} ptl_msg_type_t;
+
+/* Each of these structs should start with an odd number of
+ * __u32, or the compiler could add its own padding and confuse
+ * everyone.
+ *
+ * Also, "length" needs to be at offset 28 of each struct.
+ */
+typedef struct ptl_ack {
+ ptl_size_t mlength;
+ ptl_handle_wire_t dst_wmd;
+ ptl_match_bits_t match_bits;
+ ptl_size_t length; /* common length (0 for acks) moving out RSN */
+} ptl_ack_t;
+
+typedef struct ptl_put {
+ ptl_pt_index_t ptl_index;
+ ptl_handle_wire_t ack_wmd;
+ ptl_match_bits_t match_bits;
+ ptl_size_t length; /* common length moving out RSN */
+ ptl_size_t offset;
+ ptl_hdr_data_t hdr_data;
+} ptl_put_t;
+
+typedef struct ptl_get {
+ ptl_pt_index_t ptl_index;
+ ptl_handle_wire_t return_wmd;
+ ptl_match_bits_t match_bits;
+ ptl_size_t length; /* common length (0 for gets) moving out RSN */
+ ptl_size_t src_offset;
+ ptl_size_t return_offset; /* unused: going RSN */
+ ptl_size_t sink_length;
+} ptl_get_t;
+
+typedef struct ptl_reply {
+ __u32 unused1; /* unused fields going RSN */
+ ptl_handle_wire_t dst_wmd;
+ ptl_size_t dst_offset; /* unused: going RSN */
+ __u32 unused2;
+ ptl_size_t length; /* common length moving out RSN */
+} ptl_reply_t;
+
+typedef struct {
+ ptl_nid_t dest_nid;
+ ptl_nid_t src_nid;
+ ptl_pid_t dest_pid;
+ ptl_pid_t src_pid;
+ __u32 type; /* ptl_msg_type_t */
+ union {
+ ptl_ack_t ack;
+ ptl_put_t put;
+ ptl_get_t get;
+ ptl_reply_t reply;
+ } msg;
+} ptl_hdr_t;
+
+/* All length fields in individual unions at same offset */
+/* LASSERT for same in lib-move.c */
+#define PTL_HDR_LENGTH(h) ((h)->msg.ack.length)
+
+/* A HELLO message contains the portals magic number and protocol version
+ * code in the header's dest_nid, the peer's NID in the src_nid, and
+ * PTL_MSG_HELLO in the type field. All other fields are zero (including
+ * PTL_HDR_LENGTH; i.e. no payload).
+ * This is for use by byte-stream NALs (e.g. TCP/IP) to check the peer is
+ * running the same protocol and to find out its NID, so that hosts with
+ * multiple IP interfaces can have a single NID. These NALs should exchange
+ * HELLO messages when a connection is first established. */
+typedef struct {
+ __u32 magic; /* PORTALS_PROTO_MAGIC */
+ __u16 version_major; /* increment on incompatible change */
+ __u16 version_minor; /* increment on compatible change */
+} ptl_magicversion_t;
+
+#define PORTALS_PROTO_MAGIC 0xeebc0ded
+
+#define PORTALS_PROTO_VERSION_MAJOR 0
+#define PORTALS_PROTO_VERSION_MINOR 1
+
+typedef struct {
+ long recv_count, recv_length, send_count, send_length, drop_count,
+ drop_length, msgs_alloc, msgs_max;
+} lib_counters_t;
+
+/* temporary expedient: limit number of entries in discontiguous MDs */
+#if PTL_LARGE_MTU
+# define PTL_MD_MAX_IOV 64
+#else
+# define PTL_MD_MAX_IOV 16
+#endif
+
+struct lib_msg_t {
+ struct list_head msg_list;
+ int send_ack;
+ lib_md_t *md;
+ ptl_nid_t nid;
+ ptl_pid_t pid;
+ ptl_event_t ev;
+ ptl_handle_wire_t ack_wmd;
+ union {
+ struct iovec iov[PTL_MD_MAX_IOV];
+ ptl_kiov_t kiov[PTL_MD_MAX_IOV];
+ } msg_iov;
+};
+
+struct lib_ptl_t {
+ ptl_pt_index_t size;
+ struct list_head *tbl;
+};
+
+struct lib_ac_t {
+ int next_free;
+};
+
+typedef struct {
+ struct list_head lh_hash_chain;
+ __u64 lh_cookie;
+} lib_handle_t;
+
+#define lh_entry(ptr, type, member) \
+ ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member)))
+
+struct lib_eq_t {
+ struct list_head eq_list;
+ lib_handle_t eq_lh;
+ ptl_seq_t sequence;
+ ptl_size_t size;
+ ptl_event_t *base;
+ int eq_refcount;
+ int (*event_callback) (ptl_event_t * event);
+ void *eq_addrkey;
+};
+
+struct lib_me_t {
+ struct list_head me_list;
+ lib_handle_t me_lh;
+ ptl_process_id_t match_id;
+ ptl_match_bits_t match_bits, ignore_bits;
+ ptl_unlink_t unlink;
+ lib_md_t *md;
+};
+
+struct lib_md_t {
+ struct list_head md_list;
+ lib_handle_t md_lh;
+ lib_me_t *me;
+ user_ptr start;
+ ptl_size_t offset;
+ ptl_size_t length;
+ ptl_size_t max_size;
+ int threshold;
+ int pending;
+ ptl_unlink_t unlink;
+ unsigned int options;
+ unsigned int md_flags;
+ void *user_ptr;
+ lib_eq_t *eq;
+ void *md_addrkey;
+ unsigned int md_niov; /* # frags */
+ union {
+ struct iovec iov[PTL_MD_MAX_IOV];
+ ptl_kiov_t kiov[PTL_MD_MAX_IOV];
+ } md_iov;
+};
+
+#define PTL_MD_FLAG_UNLINK (1 << 0)
+#define PTL_MD_FLAG_AUTO_UNLINKED (1 << 1)
+
+#ifndef PTL_USE_SLAB_CACHE
+typedef struct
+{
+ void *fl_objs; /* single contiguous array of objects */
+ int fl_nobjs; /* the number of them */
+ int fl_objsize; /* the size (including overhead) of each of them */
+ struct list_head fl_list; /* where they are enqueued */
+} lib_freelist_t;
+
+typedef struct
+{
+ struct list_head fo_list; /* enqueue on fl_list */
+ void *fo_contents; /* aligned contents */
+} lib_freeobj_t;
+#endif
+
+typedef struct {
+ /* info about peers we are trying to fail */
+ struct list_head tp_list; /* stash in ni.ni_test_peers */
+ ptl_nid_t tp_nid; /* matching nid */
+ unsigned int tp_threshold; /* # failures to simulate */
+} lib_test_peer_t;
+
+typedef struct {
+ int up;
+ int refcnt;
+ ptl_nid_t nid;
+ ptl_pid_t pid;
+ int num_nodes;
+ unsigned int debug;
+ lib_ptl_t tbl;
+ lib_ac_t ac;
+ lib_counters_t counters;
+
+ int ni_lh_hash_size; /* size of lib handle hash table */
+ struct list_head *ni_lh_hash_table; /* all extant lib handles, this interface */
+ __u64 ni_next_object_cookie; /* cookie generator */
+ __u64 ni_interface_cookie; /* uniquely identifies this ni in this epoch */
+
+ struct list_head ni_test_peers;
+
+#ifndef PTL_USE_SLAB_CACHE
+ lib_freelist_t ni_free_mes;
+ lib_freelist_t ni_free_msgs;
+ lib_freelist_t ni_free_mds;
+ lib_freelist_t ni_free_eqs;
+#endif
+ struct list_head ni_active_msgs;
+ struct list_head ni_active_mds;
+ struct list_head ni_active_eqs;
+} lib_ni_t;
+
+#endif
--- /dev/null
+#ifndef _LINUX_LIST_H
+#define _LINUX_LIST_H
+
+
+/*
+ * Simple doubly linked list implementation.
+ *
+ * Some of the internal functions ("__xxx") are useful when
+ * manipulating whole lists rather than single entries, as
+ * sometimes we already know the next/prev entries and we can
+ * generate better code by using them directly rather than
+ * using the generic single-entry routines.
+ */
+
+#define prefetch(a) ((void)a)
+
+struct list_head {
+ struct list_head *next, *prev;
+};
+
+#define LIST_HEAD_INIT(name) { &(name), &(name) }
+
+#define LIST_HEAD(name) \
+ struct list_head name = LIST_HEAD_INIT(name)
+
+#define INIT_LIST_HEAD(ptr) do { \
+ (ptr)->next = (ptr); (ptr)->prev = (ptr); \
+} while (0)
+
+/*
+ * Insert a new entry between two known consecutive entries.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static inline void __list_add(struct list_head * new,
+ struct list_head * prev,
+ struct list_head * next)
+{
+ next->prev = new;
+ new->next = next;
+ new->prev = prev;
+ prev->next = new;
+}
+
+/**
+ * list_add - add a new entry
+ * @new: new entry to be added
+ * @head: list head to add it after
+ *
+ * Insert a new entry after the specified head.
+ * This is good for implementing stacks.
+ */
+static inline void list_add(struct list_head *new, struct list_head *head)
+{
+ __list_add(new, head, head->next);
+}
+
+/**
+ * list_add_tail - add a new entry
+ * @new: new entry to be added
+ * @head: list head to add it before
+ *
+ * Insert a new entry before the specified head.
+ * This is useful for implementing queues.
+ */
+static inline void list_add_tail(struct list_head *new, struct list_head *head)
+{
+ __list_add(new, head->prev, head);
+}
+
+/*
+ * Delete a list entry by making the prev/next entries
+ * point to each other.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static inline void __list_del(struct list_head * prev, struct list_head * next)
+{
+ next->prev = prev;
+ prev->next = next;
+}
+
+/**
+ * list_del - deletes entry from list.
+ * @entry: the element to delete from the list.
+ * Note: list_empty on entry does not return true after this, the entry is in an undefined state.
+ */
+static inline void list_del(struct list_head *entry)
+{
+ __list_del(entry->prev, entry->next);
+}
+
+/**
+ * list_del_init - deletes entry from list and reinitialize it.
+ * @entry: the element to delete from the list.
+ */
+static inline void list_del_init(struct list_head *entry)
+{
+ __list_del(entry->prev, entry->next);
+ INIT_LIST_HEAD(entry);
+}
+
+/**
+ * list_move - delete from one list and add as another's head
+ * @list: the entry to move
+ * @head: the head that will precede our entry
+ */
+static inline void list_move(struct list_head *list, struct list_head *head)
+{
+ __list_del(list->prev, list->next);
+ list_add(list, head);
+}
+
+/**
+ * list_move_tail - delete from one list and add as another's tail
+ * @list: the entry to move
+ * @head: the head that will follow our entry
+ */
+static inline void list_move_tail(struct list_head *list,
+ struct list_head *head)
+{
+ __list_del(list->prev, list->next);
+ list_add_tail(list, head);
+}
+
+/**
+ * list_empty - tests whether a list is empty
+ * @head: the list to test.
+ */
+static inline int list_empty(struct list_head *head)
+{
+ return head->next == head;
+}
+
+static inline void __list_splice(struct list_head *list,
+ struct list_head *head)
+{
+ struct list_head *first = list->next;
+ struct list_head *last = list->prev;
+ struct list_head *at = head->next;
+
+ first->prev = head;
+ head->next = first;
+
+ last->next = at;
+ at->prev = last;
+}
+
+/**
+ * list_splice - join two lists
+ * @list: the new list to add.
+ * @head: the place to add it in the first list.
+ */
+static inline void list_splice(struct list_head *list, struct list_head *head)
+{
+ if (!list_empty(list))
+ __list_splice(list, head);
+}
+
+/**
+ * list_splice_init - join two lists and reinitialise the emptied list.
+ * @list: the new list to add.
+ * @head: the place to add it in the first list.
+ *
+ * The list at @list is reinitialised
+ */
+static inline void list_splice_init(struct list_head *list,
+ struct list_head *head)
+{
+ if (!list_empty(list)) {
+ __list_splice(list, head);
+ INIT_LIST_HEAD(list);
+ }
+}
+
+/**
+ * list_entry - get the struct for this entry
+ * @ptr: the &struct list_head pointer.
+ * @type: the type of the struct this is embedded in.
+ * @member: the name of the list_struct within the struct.
+ */
+#define list_entry(ptr, type, member) \
+ ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member)))
+
+/**
+ * list_for_each - iterate over a list
+ * @pos: the &struct list_head to use as a loop counter.
+ * @head: the head for your list.
+ */
+#define list_for_each(pos, head) \
+ for (pos = (head)->next, prefetch(pos->next); pos != (head); \
+ pos = pos->next, prefetch(pos->next))
+
+/**
+ * list_for_each_prev - iterate over a list in reverse order
+ * @pos: the &struct list_head to use as a loop counter.
+ * @head: the head for your list.
+ */
+#define list_for_each_prev(pos, head) \
+ for (pos = (head)->prev, prefetch(pos->prev); pos != (head); \
+ pos = pos->prev, prefetch(pos->prev))
+
+/**
+ * list_for_each_safe - iterate over a list safe against removal of list entry
+ * @pos: the &struct list_head to use as a loop counter.
+ * @n: another &struct list_head to use as temporary storage
+ * @head: the head for your list.
+ */
+#define list_for_each_safe(pos, n, head) \
+ for (pos = (head)->next, n = pos->next; pos != (head); \
+ pos = n, n = pos->next)
+
+#endif
+
+#ifndef list_for_each_entry
+/**
+ * list_for_each_entry - iterate over list of given type
+ * @pos: the type * to use as a loop counter.
+ * @head: the head for your list.
+ * @member: the name of the list_struct within the struct.
+ */
+#define list_for_each_entry(pos, head, member) \
+ for (pos = list_entry((head)->next, typeof(*pos), member), \
+ prefetch(pos->member.next); \
+ &pos->member != (head); \
+ pos = list_entry(pos->member.next, typeof(*pos), member), \
+ prefetch(pos->member.next))
+#endif
+
+#ifndef list_for_each_entry_safe
+/**
+ * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry
+ * @pos: the type * to use as a loop counter.
+ * @n: the &struct list_head to use as temporary storage
+ * @head: the head for your list.
+ * @member: the name of the list_struct within the struct.
+ */
+#define list_for_each_entry_safe(pos, n, head, member) \
+ for (pos = list_entry((head)->next, typeof(*pos), member), \
+ n = pos->member.next; \
+ &pos->member != (head); \
+ pos = list_entry(n, typeof(*pos), member), \
+ n = pos->member.next)
+#endif
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Compile with:
+ * cc -I../../portals/include -o fio fio.c -L../../portals/linux/utils -lptlctl
+ */
+#ifndef __LTRACE_H_
+#define __LTRACE_H_
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <getopt.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/time.h>
+#include <portals/types.h>
+#include <portals/ptlctl.h>
+#include <linux/kp30.h>
+#include <linux/limits.h>
+#include <asm/page.h>
+#include <linux/version.h>
+
+static inline int ltrace_write_file(char* fname)
+{
+ char* argv[3];
+
+ argv[0] = "debug_kernel";
+ argv[1] = fname;
+ argv[2] = "1";
+
+ fprintf(stderr, "[ptlctl] %s %s %s\n", argv[0], argv[1], argv[2]);
+
+ return jt_dbg_debug_kernel(3, argv);
+}
+
+static inline int ltrace_clear()
+{
+ char* argv[1];
+
+ argv[0] = "clear";
+
+ fprintf(stderr, "[ptlctl] %s\n", argv[0]);
+
+ return jt_dbg_clear_debug_buf(1, argv);
+}
+
+static inline int ltrace_mark(int indent_level, char* text)
+{
+ char* argv[2];
+ char mark_buf[PATH_MAX];
+
+ snprintf(mark_buf, PATH_MAX, "====%d=%s", indent_level, text);
+
+ argv[0] = "mark";
+ argv[1] = mark_buf;
+ return jt_dbg_mark_debug_buf(2, argv);
+}
+
+static inline int ltrace_applymasks()
+{
+ char* argv[2];
+ argv[0] = "list";
+ argv[1] = "applymasks";
+
+ fprintf(stderr, "[ptlctl] %s %s\n", argv[0], argv[1]);
+
+ return jt_dbg_list(2, argv);
+}
+
+
+static inline int ltrace_filter(char* subsys_or_mask)
+{
+ char* argv[2];
+ argv[0] = "filter";
+ argv[1] = subsys_or_mask;
+ return jt_dbg_filter(2, argv);
+}
+
+static inline int ltrace_show(char* subsys_or_mask)
+{
+ char* argv[2];
+ argv[0] = "show";
+ argv[1] = subsys_or_mask;
+ return jt_dbg_show(2, argv);
+}
+
+static inline int ltrace_start()
+{
+ int rc = 0;
+ dbg_initialize(0, NULL);
+#ifdef PORTALS_DEV_ID
+ rc = register_ioc_dev(PORTALS_DEV_ID, PORTALS_DEV_PATH);
+#endif
+ ltrace_filter("class");
+ ltrace_filter("socknal");
+ ltrace_filter("qswnal");
+ ltrace_filter("gmnal");
+ ltrace_filter("portals");
+
+ ltrace_show("all_types");
+ ltrace_filter("trace");
+ ltrace_filter("malloc");
+ ltrace_filter("net");
+ ltrace_filter("page");
+ ltrace_filter("other");
+ ltrace_filter("info");
+ ltrace_applymasks();
+
+ return rc;
+}
+
+
+static inline void ltrace_stop()
+{
+#ifdef PORTALS_DEV_ID
+ unregister_ioc_dev(PORTALS_DEV_ID);
+#endif
+}
+
+static inline int not_uml()
+{
+ /* Return Values:
+ * 0 when run under UML
+ * 1 when run on host
+ * <0 when lookup failed
+ */
+ struct stat buf;
+ int rc = stat("/dev/ubd", &buf);
+ rc = ((rc<0) && (errno == ENOENT)) ? 1 : rc;
+ if (rc<0) {
+ fprintf(stderr, "Cannot stat /dev/ubd: %s\n", strerror(errno));
+ rc = 1; /* Assume host */
+ }
+ return rc;
+}
+
+#define LTRACE_MAX_NOB 256
+static inline void ltrace_add_processnames(char* fname)
+{
+ char cmdbuf[LTRACE_MAX_NOB];
+ struct timeval tv;
+ struct timezone tz;
+ int nob;
+ int underuml = !not_uml();
+
+ gettimeofday(&tv, &tz);
+
+ nob = snprintf(cmdbuf, LTRACE_MAX_NOB, "ps --no-headers -eo \"");
+
+ /* Careful - these format strings need to match the CDEBUG
+ * formats in portals/linux/debug.c EXACTLY
+ */
+ nob += snprintf(cmdbuf+nob, LTRACE_MAX_NOB, "%02x:%06x:%d:%lu.%06lu ",
+ S_RPC >> 24, D_VFSTRACE, 0, tv.tv_sec, tv.tv_usec);
+
+ if (underuml && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))) {
+ nob += snprintf (cmdbuf+nob, LTRACE_MAX_NOB,
+ "(%s:%d:%s() %d | %d+%lu): ",
+ "lltrace.h", __LINE__, __FUNCTION__, 0, 0, 0L);
+ }
+ else {
+ nob += snprintf (cmdbuf+nob, LTRACE_MAX_NOB,
+ "(%s:%d:%s() %d+%lu): ",
+ "lltrace.h", __LINE__, __FUNCTION__, 0, 0L);
+ }
+
+ nob += snprintf(cmdbuf+nob, LTRACE_MAX_NOB, " %%p %%c\" >> %s", fname);
+ system(cmdbuf);
+}
+
+#endif
--- /dev/null
+/*
+** $Id: myrnal.h,v 1.1.2.1 2003/05/19 04:25:31 braam Exp $
+*/
+
+#ifndef MYRNAL_H
+#define MYRNAL_H
+
+#define MAX_ARGS_LEN (256)
+#define MAX_RET_LEN (128)
+#define MYRNAL_MAX_ACL_SIZE (64)
+#define MYRNAL_MAX_PTL_SIZE (64)
+
+#define P3CMD (100)
+#define P3SYSCALL (200)
+#define P3REGISTER (300)
+
+enum { PTL_MLOCKALL };
+
+typedef struct {
+ void *args;
+ size_t args_len;
+ void *ret;
+ size_t ret_len;
+ int p3cmd;
+} myrnal_forward_t;
+
+#endif /* MYRNAL_H */
--- /dev/null
+/*
+** $Id: nal.h,v 1.1.2.1 2003/05/19 04:25:31 braam Exp $
+*/
+#ifndef _NAL_H_
+#define _NAL_H_
+
+/*
+ * p30/nal.h
+ *
+ * The API side NAL declarations
+ */
+
+#include <portals/types.h>
+
+#ifdef yield
+#undef yield
+#endif
+
+typedef struct nal_t nal_t;
+
+struct nal_t {
+ ptl_ni_t ni;
+ int refct;
+ void *nal_data;
+ int *timeout; /* for libp30api users */
+ int (*forward) (nal_t * nal, int index, /* Function ID */
+ void *args, size_t arg_len, void *ret, size_t ret_len);
+
+ int (*shutdown) (nal_t * nal, int interface);
+
+ int (*validate) (nal_t * nal, void *base, size_t extent);
+
+ void (*yield) (nal_t * nal);
+
+ void (*lock) (nal_t * nal, unsigned long *flags);
+
+ void (*unlock) (nal_t * nal, unsigned long *flags);
+};
+
+typedef nal_t *(ptl_interface_t) (int, ptl_pt_index_t, ptl_ac_index_t, ptl_pid_t requested_pid);
+extern nal_t *PTL_IFACE_IP(int, ptl_pt_index_t, ptl_ac_index_t, ptl_pid_t requested_pid);
+extern nal_t *PTL_IFACE_MYR(int, ptl_pt_index_t, ptl_ac_index_t, ptl_pid_t requested_pid);
+
+extern nal_t *ptl_hndl2nal(ptl_handle_any_t * any);
+
+#ifndef PTL_IFACE_DEFAULT
+#define PTL_IFACE_DEFAULT (PTL_IFACE_IP)
+#endif
+
+#endif
--- /dev/null
+#define PTL_IFACE_TCP 1
+#define PTL_IFACE_ER 2
+#define PTL_IFACE_SS 3
+#define PTL_IFACE_MAX 4
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
+#ifndef _P30_H_
+#define _P30_H_
+
+/*
+ * p30.h
+ *
+ * User application interface file
+ */
+
+#if defined (__KERNEL__)
+#include <linux/uio.h>
+#include <linux/types.h>
+#else
+#include <sys/types.h>
+#include <sys/uio.h>
+#endif
+
+#include <portals/types.h>
+#include <portals/nal.h>
+#include <portals/api.h>
+#include <portals/errno.h>
+#include <portals/nalids.h>
+
+extern int __p30_initialized; /* for libraries & test codes */
+extern int __p30_myr_initialized; /* that don't know if p30 */
+extern int __p30_ip_initialized; /* had been initialized yet */
+extern ptl_handle_ni_t __myr_ni_handle, __ip_ni_handle;
+
+extern int __p30_myr_timeout; /* in seconds, for PtlNIBarrier, */
+extern int __p30_ip_timeout; /* PtlReduce_all, & PtlBroadcast_all */
+
+/*
+ * Debugging flags reserved for the Portals reference library.
+ * These are not part of the API as described in the SAND report
+ * but are for the use of the maintainers of the reference implementation.
+ *
+ * It is not expected that the real implementations will export
+ * this functionality.
+ */
+#define PTL_DEBUG_NONE 0ul
+#define PTL_DEBUG_ALL (0x0FFFul) /* Only the Portals flags */
+
+#define __bit(x) ((unsigned long) 1<<(x))
+#define PTL_DEBUG_PUT __bit(0)
+#define PTL_DEBUG_GET __bit(1)
+#define PTL_DEBUG_REPLY __bit(2)
+#define PTL_DEBUG_ACK __bit(3)
+#define PTL_DEBUG_DROP __bit(4)
+#define PTL_DEBUG_REQUEST __bit(5)
+#define PTL_DEBUG_DELIVERY __bit(6)
+#define PTL_DEBUG_UNLINK __bit(7)
+#define PTL_DEBUG_THRESHOLD __bit(8)
+#define PTL_DEBUG_API __bit(9)
+
+/*
+ * These eight are reserved for the NAL to define
+ * It should probably give them better names...
+ */
+#define PTL_DEBUG_NI_ALL (0xF000ul) /* Only the NAL flags */
+#define PTL_DEBUG_NI0 __bit(24)
+#define PTL_DEBUG_NI1 __bit(25)
+#define PTL_DEBUG_NI2 __bit(26)
+#define PTL_DEBUG_NI3 __bit(27)
+#define PTL_DEBUG_NI4 __bit(28)
+#define PTL_DEBUG_NI5 __bit(29)
+#define PTL_DEBUG_NI6 __bit(30)
+#define PTL_DEBUG_NI7 __bit(31)
+
+#endif
--- /dev/null
+/*
+ * TITLE(ppid_h, "@(#) $Id: ppid.h,v 1.1.2.1 2003/05/19 04:25:31 braam Exp $");
+ */
+
+#ifndef _INCppidh_
+#define _INCppidh_
+
+#include "defines.h"
+// #include "idtypes.h"
+
+
+#define MAX_PPID 1000 /* this needs to fit into 16 bits so the
+ maximum value is 65535. having it "large"
+ can help w/ debugging process accounting
+ but there are reasons for making it
+ somewhat smaller than the maximum --
+ requiring storage for arrays that index
+ on the ppid, eg... */
+
+#define MAX_GID 1000 /* this needs to fit into 16 bits... */
+
+#define MAX_FIXED_PPID 100
+#define MAX_FIXED_GID 100
+#define PPID_FLOATING MAX_FIXED_PPID+1 /* Floating area starts here */
+#define GID_FLOATING MAX_FIXED_GID+1 /* Floating area starts here */
+#define NUM_PTL_TASKS MAX_FIXED_PPID+80 /* Maximum no. portals tasks */
+
+#define PPID_AUTO 0
+
+/* Minimum PPID is 1 */
+#define PPID_BEBOPD 1 /* bebopd */
+#define GID_BEBOPD 1 /* bebopd */
+
+#define PPID_PCT 2 /* pct */
+#define GID_PCT 2 /* pct */
+
+#define PPID_FYOD 3 /* fyod */
+#define GID_FYOD 3 /* fyod */
+
+#define PPID_GDBWRAP 11 /* portals proxy for gdb */
+#define GID_GDBWRAP 11 /* portals proxy for gdb */
+
+#define PPID_TEST 15 /* for portals tests */
+#define GID_TEST 15
+
+#define GID_YOD 5 /* yod */
+#define GID_PINGD 6 /* pingd */
+#define GID_BT 7 /* bt */
+#define GID_PTLTEST 8 /* ptltest */
+#define GID_CGDB 9 /* cgdb */
+#define GID_TVDSVR 10 /* start-tvdsvr */
+
+#endif /* _INCppidh_ */
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ *
+ * This file is part of Portals, http://www.sf.net/projects/lustre/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * header for libptlctl.a
+ */
+#ifndef _PTLCTL_H_
+#define _PTLCTL_H_
+
+#define PORTALS_DEV_ID 0
+#define PORTALS_DEV_PATH "/dev/portals"
+#define OBD_DEV_ID 1
+#define OBD_DEV_PATH "/dev/obd"
+
+int ptl_name2nal(char *str);
+int ptl_parse_nid (ptl_nid_t *nidp, char *str);
+char * ptl_nid2str (char *buffer, ptl_nid_t nid);
+
+int ptl_initialize(int argc, char **argv);
+int jt_ptl_network(int argc, char **argv);
+int jt_ptl_connect(int argc, char **argv);
+int jt_ptl_disconnect(int argc, char **argv);
+int jt_ptl_push_connection(int argc, char **argv);
+int jt_ptl_ping(int argc, char **argv);
+int jt_ptl_mynid(int argc, char **argv);
+int jt_ptl_add_uuid(int argc, char **argv);
+int jt_ptl_add_uuid_old(int argc, char **argv); /* backwards compatibility */
+int jt_ptl_close_uuid(int argc, char **argv);
+int jt_ptl_del_uuid(int argc, char **argv);
+int jt_ptl_rxmem (int argc, char **argv);
+int jt_ptl_txmem (int argc, char **argv);
+int jt_ptl_nagle (int argc, char **argv);
+int jt_ptl_add_route (int argc, char **argv);
+int jt_ptl_del_route (int argc, char **argv);
+int jt_ptl_print_routes (int argc, char **argv);
+int jt_ptl_fail_nid (int argc, char **argv);
+
+int dbg_initialize(int argc, char **argv);
+int jt_dbg_filter(int argc, char **argv);
+int jt_dbg_show(int argc, char **argv);
+int jt_dbg_list(int argc, char **argv);
+int jt_dbg_debug_kernel(int argc, char **argv);
+int jt_dbg_debug_daemon(int argc, char **argv);
+int jt_dbg_debug_file(int argc, char **argv);
+int jt_dbg_clear_debug_buf(int argc, char **argv);
+int jt_dbg_mark_debug_buf(int argc, char **argv);
+int jt_dbg_modules(int argc, char **argv);
+int jt_dbg_panic(int argc, char **argv);
+
+/* l_ioctl.c */
+int register_ioc_dev(int dev_id, const char * dev_name);
+void unregister_ioc_dev(int dev_id);
+int set_ioctl_dump(char * file);
+int l_ioctl(int dev_id, int opc, void *buf);
+int parse_dump(char * dump_file, int (*ioc_func)(int dev_id, int opc, void *));
+int jt_ioc_dump(int argc, char **argv);
+
+#endif
--- /dev/null
+/*
+** $Id: stringtab.h,v 1.1.2.1 2003/05/19 04:25:31 braam Exp $
+*/
+/*
+ * stringtab.h
+ */
--- /dev/null
+#ifndef _P30_TYPES_H_
+#define _P30_TYPES_H_
+
+#ifdef __linux__
+#include <asm/types.h>
+#include <asm/timex.h>
+#else
+#include <sys/types.h>
+typedef u_int32_t __u32;
+typedef u_int64_t __u64;
+typedef unsigned long long cycles_t;
+static inline cycles_t get_cycles(void) { return 0; }
+#endif
+
+typedef __u64 ptl_nid_t;
+typedef __u32 ptl_pid_t;
+typedef __u32 ptl_pt_index_t;
+typedef __u32 ptl_ac_index_t;
+typedef __u64 ptl_match_bits_t;
+typedef __u64 ptl_hdr_data_t;
+typedef __u32 ptl_size_t;
+
+typedef struct {
+ unsigned long nal_idx; /* which network interface */
+ __u64 cookie; /* which thing on that interface */
+} ptl_handle_any_t;
+
+typedef ptl_handle_any_t ptl_handle_ni_t;
+typedef ptl_handle_any_t ptl_handle_eq_t;
+typedef ptl_handle_any_t ptl_handle_md_t;
+typedef ptl_handle_any_t ptl_handle_me_t;
+
+#define PTL_HANDLE_NONE \
+((const ptl_handle_any_t){.nal_idx = -1, .cookie = -1})
+#define PTL_EQ_NONE PTL_HANDLE_NONE
+
+static inline int PtlHandleEqual (ptl_handle_any_t h1, ptl_handle_any_t h2)
+{
+ return (h1.nal_idx == h2.nal_idx && h1.cookie == h2.cookie);
+}
+
+#define PTL_NID_ANY ((ptl_nid_t) -1)
+#define PTL_PID_ANY ((ptl_pid_t) -1)
+
+typedef struct {
+ ptl_nid_t nid;
+ ptl_pid_t pid; /* node id / process id */
+} ptl_process_id_t;
+
+typedef enum {
+ PTL_RETAIN = 0,
+ PTL_UNLINK
+} ptl_unlink_t;
+
+typedef enum {
+ PTL_INS_BEFORE,
+ PTL_INS_AFTER
+} ptl_ins_pos_t;
+
+typedef struct {
+ struct page *kiov_page;
+ unsigned int kiov_len;
+ unsigned int kiov_offset;
+} ptl_kiov_t;
+
+typedef struct {
+ void *start;
+ ptl_size_t length;
+ int threshold;
+ int max_size;
+ unsigned int options;
+ void *user_ptr;
+ ptl_handle_eq_t eventq;
+ unsigned int niov;
+} ptl_md_t;
+
+/* Options for the MD structure */
+#define PTL_MD_OP_PUT (1 << 0)
+#define PTL_MD_OP_GET (1 << 1)
+#define PTL_MD_MANAGE_REMOTE (1 << 2)
+#define PTL_MD_AUTO_UNLINK (1 << 3)
+#define PTL_MD_TRUNCATE (1 << 4)
+#define PTL_MD_ACK_DISABLE (1 << 5)
+#define PTL_MD_IOV (1 << 6)
+#define PTL_MD_MAX_SIZE (1 << 7)
+#define PTL_MD_KIOV (1 << 8)
+
+#define PTL_MD_THRESH_INF (-1)
+
+typedef enum {
+ PTL_EVENT_GET,
+ PTL_EVENT_PUT,
+ PTL_EVENT_REPLY,
+ PTL_EVENT_ACK,
+ PTL_EVENT_SENT
+} ptl_event_kind_t;
+
+#define PTL_SEQ_BASETYPE long
+typedef unsigned PTL_SEQ_BASETYPE ptl_seq_t;
+#define PTL_SEQ_GT(a,b) (((signed PTL_SEQ_BASETYPE)((a) - (b))) > 0)
+
+typedef struct {
+ ptl_event_kind_t type;
+ ptl_process_id_t initiator;
+ ptl_pt_index_t portal;
+ ptl_match_bits_t match_bits;
+ ptl_size_t rlength, mlength, offset;
+ ptl_handle_me_t unlinked_me;
+ ptl_md_t mem_desc;
+ ptl_hdr_data_t hdr_data;
+ cycles_t arrival_time;
+ volatile ptl_seq_t sequence;
+} ptl_event_t;
+
+
+typedef enum {
+ PTL_ACK_REQ,
+ PTL_NOACK_REQ
+} ptl_ack_req_t;
+
+
+typedef struct {
+ volatile ptl_seq_t sequence;
+ ptl_size_t size;
+ ptl_event_t *base;
+ ptl_handle_any_t cb_eq_handle;
+} ptl_eq_t;
+
+typedef struct {
+ ptl_eq_t *eq;
+} ptl_ni_t;
+
+
+typedef struct {
+ int max_match_entries; /* max number of match entries */
+ int max_mem_descriptors; /* max number of memory descriptors */
+ int max_event_queues; /* max number of event queues */
+ int max_atable_index; /* maximum access control list table index */
+ int max_ptable_index; /* maximum portals table index */
+} ptl_ni_limits_t;
+
+/*
+ * Status registers
+ */
+typedef enum {
+ PTL_SR_DROP_COUNT,
+ PTL_SR_DROP_LENGTH,
+ PTL_SR_RECV_COUNT,
+ PTL_SR_RECV_LENGTH,
+ PTL_SR_SEND_COUNT,
+ PTL_SR_SEND_LENGTH,
+ PTL_SR_MSGS_MAX,
+} ptl_sr_index_t;
+
+typedef int ptl_sr_value_t;
+
+#endif
--- /dev/null
+# Copyright (C) 2001 Cluster File Systems, Inc.
+#
+# This code is issued under the GNU General Public License.
+# See the file COPYING in this distribution
+
+SUBDIRS= socknal toenal @QSWNAL@ @GMNAL@ @SCIMACNAL@
--- /dev/null
+include ../Kernelenv
+
+obj-y = socknal/
+# more coming...
\ No newline at end of file
--- /dev/null
+# Copyright (C) 2001 Cluster File Systems, Inc.
+#
+# This code is issued under the GNU General Public License.
+# See the file COPYING in this distribution
+
+include ../../Rules.linux
+
+MODULE = kgmnal
+modulenet_DATA = kgmnal.o
+EXTRA_PROGRAMS = kgmnal
+
+DEFS =
+kgmnal_SOURCES = gmnal.c gmnal_cb.c gmnal.h
--- /dev/null
+diff -ru gm-1.5.2.1_Linux/drivers/linux/gm/gm_arch.c gm-1.5.2.1_Linux-cfs/drivers/linux/gm/gm_arch.c
+--- gm-1.5.2.1_Linux/drivers/linux/gm/gm_arch.c Mon Jul 1 10:35:09 2002
++++ gm-1.5.2.1_Linux-cfs/drivers/linux/gm/gm_arch.c Thu Sep 19 14:19:38 2002
+@@ -30,6 +30,8 @@
+ *
+ ************************************************************************/
+
++#define EXPORT_SYMTAB
++
+ #include <linux/config.h>
+ #include <linux/module.h>
+
+@@ -4075,6 +4077,28 @@
+ return 0;
+ }
+
++EXPORT_SYMBOL(gm_blocking_receive_no_spin);
++EXPORT_SYMBOL(gm_close);
++EXPORT_SYMBOL(gm_dma_free);
++EXPORT_SYMBOL(gm_dma_malloc);
++EXPORT_SYMBOL(gm_drop_sends);
++EXPORT_SYMBOL(gm_finalize);
++EXPORT_SYMBOL(gm_get_node_id);
++EXPORT_SYMBOL(gm_init);
++EXPORT_SYMBOL(gm_initialize_alarm);
++EXPORT_SYMBOL(gm_max_node_id_in_use);
++EXPORT_SYMBOL(gm_min_size_for_length);
++EXPORT_SYMBOL(gm_num_receive_tokens);
++EXPORT_SYMBOL(gm_num_send_tokens);
++EXPORT_SYMBOL(gm_open);
++EXPORT_SYMBOL(gm_provide_receive_buffer);
++EXPORT_SYMBOL(gm_resume_sending);
++EXPORT_SYMBOL(gm_send_with_callback);
++EXPORT_SYMBOL(gm_set_acceptable_sizes);
++EXPORT_SYMBOL(gm_set_alarm);
++EXPORT_SYMBOL(gm_unknown);
++
++
+ /*
+ This file uses GM standard indentation.
+
+Only in gm-1.5.2.1_Linux-cfs/drivers/linux/gm: gm_arch.c~
+Only in gm-1.5.2.1_Linux-cfs/: trace
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Based on ksocknal and qswnal
+ *
+ * Copyright (C) 2002 Cluster File Systems, Inc.
+ * Author: Robert Read <rread@datarithm.net>
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include "gmnal.h"
+
+ptl_handle_ni_t kgmnal_ni;
+nal_t kgmnal_api;
+
+kgmnal_data_t kgmnal_data;
+int gmnal_debug = 0;
+
+kpr_nal_interface_t kqswnal_router_interface = {
+ kprni_nalid: GMNAL,
+ kprni_arg: NULL,
+ kprni_fwd: kgmnal_fwd_packet,
+};
+
+static int kgmnal_forward(nal_t *nal,
+ int id,
+ void *args, size_t args_len,
+ void *ret, size_t ret_len)
+{
+ kgmnal_data_t *k = nal->nal_data;
+ nal_cb_t *nal_cb = k->kgm_cb;
+
+ LASSERT (nal == &kgmnal_api);
+ LASSERT (k == &kgmnal_data);
+ LASSERT (nal_cb == &kgmnal_lib);
+
+ lib_dispatch(nal_cb, k, id, args, ret); /* nal needs k */
+ return PTL_OK;
+}
+
+static void kgmnal_lock(nal_t *nal, unsigned long *flags)
+{
+ kgmnal_data_t *k = nal->nal_data;
+ nal_cb_t *nal_cb = k->kgm_cb;
+
+
+ LASSERT (nal == &kgmnal_api);
+ LASSERT (k == &kgmnal_data);
+ LASSERT (nal_cb == &kgmnal_lib);
+
+ nal_cb->cb_cli(nal_cb,flags);
+}
+
+static void kgmnal_unlock(nal_t *nal, unsigned long *flags)
+{
+ kgmnal_data_t *k = nal->nal_data;
+ nal_cb_t *nal_cb = k->kgm_cb;
+
+
+ LASSERT (nal == &kgmnal_api);
+ LASSERT (k == &kgmnal_data);
+ LASSERT (nal_cb == &kgmnal_lib);
+
+ nal_cb->cb_sti(nal_cb,flags);
+}
+
+static int kgmnal_shutdown(nal_t *nal, int ni)
+{
+ LASSERT (nal == &kgmnal_api);
+ return 0;
+}
+
+static void kgmnal_yield( nal_t *nal )
+{
+ LASSERT (nal == &kgmnal_api);
+
+ if (current->need_resched)
+ schedule();
+ return;
+}
+
+kgmnal_rx_t *kgm_add_recv(kgmnal_data_t *data,int ndx)
+{
+ kgmnal_rx_t *conn;
+
+ PORTAL_ALLOC(conn, sizeof(kgmnal_rx_t));
+ /* Check for out of mem here */
+ if (conn==NULL) {
+ printk("kgm_add_recv: memory alloc failed\n");
+ return NULL;
+ }
+
+ list_add(&conn->krx_item,(struct list_head *)&data->kgm_list);
+ // conn->ndx=ndx;
+ // conn->len=conn->ptlhdr_copied=0;
+ // conn->loopback=0;
+ return conn;
+}
+
+static nal_t *kgmnal_init(int interface, ptl_pt_index_t ptl_size,
+ ptl_ac_index_t ac_size, ptl_pid_t requested_pid)
+{
+ unsigned int nnids;
+
+ gm_max_node_id_in_use(kgmnal_data.kgm_port, &nnids);
+
+ CDEBUG(D_NET, "calling lib_init with nid 0x%Lx of %d\n",
+ kgmnal_data.kgm_nid, nnids);
+ lib_init(&kgmnal_lib, kgmnal_data.kgm_nid, 0, nnids,ptl_size, ac_size);
+ return &kgmnal_api;
+}
+
+static void __exit
+kgmnal_finalize(void)
+{
+ struct list_head *tmp;
+
+ PORTAL_SYMBOL_UNREGISTER (kgmnal_ni);
+ PtlNIFini(kgmnal_ni);
+ lib_fini(&kgmnal_api);
+
+ if (kgmnal_data.kgm_port) {
+ gm_close(kgmnal_data.kgm_port);
+ }
+
+ /* FIXME: free dma buffers */
+ /* FIXME: kill receiver thread */
+
+ PORTAL_FREE (kgmnal_data.kgm_trans, bsizeof(kgmnal_tx_t)*TXMSGS);
+
+ list_for_each(tmp, &kgmnal_data.kgm_list) {
+ kgmnal_rx_t *conn;
+ conn = list_entry(tmp, kgmnal_rx_t, krx_item);
+ CDEBUG(D_IOCTL, "freeing conn %p\n",conn);
+ tmp = tmp->next;
+ list_del(&conn->krx_item);
+ PORTAL_FREE(conn, sizeof(*conn));
+ }
+
+ CDEBUG (D_MALLOC, "done kmem %d\n", atomic_read (&portal_kmemory));
+
+ return;
+}
+
+static int __init
+kgmnal_initialize(void)
+{
+ int rc;
+ int ntok;
+ unsigned long sizemask;
+ unsigned int nid;
+
+ CDEBUG (D_MALLOC, "start kmem %d\n", atomic_read (&portal_kmemory));
+
+ kgmnal_api.forward = kgmnal_forward;
+ kgmnal_api.shutdown = kgmnal_shutdown;
+ kgmnal_api.yield = kgmnal_yield;
+ kgmnal_api.validate = NULL; /* our api validate is a NOOP */
+ kgmnal_api.lock= kgmnal_lock;
+ kgmnal_api.unlock= kgmnal_unlock;
+ kgmnal_api.nal_data = &kgmnal_data;
+
+ kgmnal_lib.nal_data = &kgmnal_data;
+
+ memset(&kgmnal_data, 0, sizeof(kgmnal_data));
+
+ INIT_LIST_HEAD(&kgmnal_data.kgm_list);
+ kgmnal_data.kgm_cb = &kgmnal_lib;
+
+ /* Allocate transmit descriptors */
+ PORTAL_ALLOC (kgmnal_data.kgm_trans, sizeof(kgmnal_tx_t)*TXMSGS);
+ if (kgmnal_data.kgm_trans==NULL) {
+ printk("kgmnal: init: failed to allocate transmit "
+ "descriptors\n");
+ return -1;
+ }
+ memset(kgmnal_data.kgm_trans,-1,sizeof(kgmnal_tx_t)*(TXMSGS));
+
+ spin_lock_init(&kgmnal_data.kgm_dispatch_lock);
+ spin_lock_init(&kgmnal_data.kgm_update_lock);
+ spin_lock_init(&kgmnal_data.kgm_send_lock);
+
+ /* Do the receiver and xmtr allocation */
+
+ rc = gm_init();
+ if (rc != GM_SUCCESS) {
+ CERROR("gm_init failed: %d\n", rc);
+ return -1;
+ }
+
+ rc = gm_open(&kgmnal_data.kgm_port, 0 , KGM_PORT_NUM, KGM_HOSTNAME,
+ GM_API_VERSION_1_1);
+ if (rc != GM_SUCCESS) {
+ gm_finalize();
+ kgmnal_data.kgm_port = NULL;
+ CERROR("gm_open failed: %d\n", rc);
+ return -1;
+ }
+ gm_get_node_id(kgmnal_data.kgm_port, &nid);
+ kgmnal_data.kgm_nid = nid;
+ /* Allocate 2 different sizes of buffers. For new, use half
+ the tokens for each. */
+ ntok = gm_num_receive_tokens(kgmnal_data.kgm_port)/2;
+ CDEBUG(D_NET, "gmnal_init: creating %d large %d byte recv buffers\n",
+ ntok, MSG_LEN_LARGE);
+ while (ntok-- > 0) {
+ void * buffer = gm_dma_malloc(kgmnal_data.kgm_port,
+ MSG_LEN_LARGE);
+ if (buffer == NULL) {
+ CERROR("gm_init failed: %d\n", rc);
+ return (-ENOMEM);
+ }
+ CDEBUG(D_NET, " add buffer: port %p buf %p len %d size %d "
+ "pri %d\n ", kgmnal_data.kgm_port, buffer,
+ MSG_LEN_LARGE, MSG_SIZE_LARGE, GM_LOW_PRIORITY);
+
+ gm_provide_receive_buffer(kgmnal_data.kgm_port, buffer,
+ MSG_SIZE_LARGE, GM_LOW_PRIORITY);
+ }
+
+ ntok = gm_num_receive_tokens(kgmnal_data.kgm_port)/2;
+ CDEBUG(D_NET, "gmnal_init: creating %d small %d byte recv buffers\n",
+ ntok, MSG_LEN_SMALL);
+ while (ntok-- > 0) {
+ void * buffer = gm_dma_malloc(kgmnal_data.kgm_port,
+ MSG_LEN_SMALL);
+ if (buffer == NULL) {
+ CERROR("gm_init failed: %d\n", rc);
+ return (-ENOMEM);
+ }
+ CDEBUG(D_NET, " add buffer: port %p buf %p len %d size %d "
+ "pri %d\n ", kgmnal_data.kgm_port, buffer,
+ MSG_LEN_SMALL, MSG_SIZE_SMALL, GM_LOW_PRIORITY);
+
+ gm_provide_receive_buffer(kgmnal_data.kgm_port, buffer,
+ MSG_SIZE_SMALL, GM_LOW_PRIORITY);
+
+ }
+ sizemask = (1 << MSG_SIZE_LARGE) | (1 << MSG_SIZE_SMALL);
+ CDEBUG(D_NET, "gm_set_acceptable_sizes port %p pri %d mask 0x%x\n",
+ kgmnal_data.kgm_port, GM_LOW_PRIORITY, sizemask);
+ gm_set_acceptable_sizes(kgmnal_data.kgm_port, GM_LOW_PRIORITY,
+ sizemask);
+ gm_set_acceptable_sizes(kgmnal_data.kgm_port, GM_HIGH_PRIORITY, 0);
+
+ /* Initialize Network Interface */
+ rc = PtlNIInit(kgmnal_init, 32, 4, 0, &kgmnal_ni);
+ if (rc) {
+ CERROR("PtlNIInit failed %d\n", rc);
+ return (-ENOMEM);
+ }
+
+ /* Start receiver thread */
+ kernel_thread(kgmnal_recv_thread, &kgmnal_data, 0);
+
+ PORTAL_SYMBOL_REGISTER(kgmnal_ni);
+
+ kgmnal_data.kgm_init = 1;
+
+ return 0;
+}
+
+MODULE_AUTHOR("Robert Read <rread@datarithm.net>");
+MODULE_DESCRIPTION("Kernel Myrinet GM NAL v0.1");
+MODULE_LICENSE("GPL");
+
+module_init (kgmnal_initialize);
+module_exit (kgmnal_finalize);
+
+EXPORT_SYMBOL (kgmnal_ni);
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
+#ifndef _GMNAL_H
+#define _GMNAL_H
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/errno.h>
+#include <linux/locks.h>
+#include <linux/unistd.h>
+#include <linux/init.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/stat.h>
+#include <linux/list.h>
+#include <asm/uaccess.h>
+#include <asm/segment.h>
+
+#define DEBUG_SUBSYSTEM S_GMNAL
+
+#include <linux/kp30.h>
+#include <portals/p30.h>
+#include <portals/lib-p30.h>
+
+#include <gm.h>
+
+
+/*
+ * Myrinet GM NAL
+ */
+#define NPAGES_LARGE 16
+#define NPAGES_SMALL 1
+#define MSG_LEN_LARGE NPAGES_LARGE*PAGE_SIZE
+#define MSG_LEN_SMALL NPAGES_SMALL*PAGE_SIZE
+#define MSG_SIZE_LARGE (gm_min_size_for_length(MSG_LEN_LARGE))
+#define MSG_SIZE_SMALL (gm_min_size_for_length(MSG_LEN_SMALL))
+
+#define TXMSGS 64 /* Number of Transmit Messages */
+#define ENVELOPES 8 /* Number of outstanding receive msgs */
+
+#define KGM_PORT_NUM 3
+#define KGM_HOSTNAME "kgmnal"
+
+
+typedef struct {
+ char *krx_buffer;
+ unsigned long krx_len;
+ unsigned int krx_size;
+ unsigned int krx_priority;
+ struct list_head krx_item;
+} kgmnal_rx_t;
+
+
+typedef struct {
+ nal_cb_t *ktx_nal;
+ void *ktx_private;
+ lib_msg_t *ktx_cookie;
+ char *ktx_buffer;
+ size_t ktx_len;
+ unsigned long ktx_size;
+ int ktx_ndx;
+ unsigned int ktx_priority;
+ unsigned int ktx_tgt_node;
+ unsigned int ktx_tgt_port_id;
+} kgmnal_tx_t;
+
+
+typedef struct {
+ char kgm_init;
+ char kgm_shuttingdown;
+ struct gm_port *kgm_port;
+ struct list_head kgm_list;
+ ptl_nid_t kgm_nid;
+ nal_cb_t *kgm_cb;
+ struct kgm_trans *kgm_trans;
+ struct tq_struct kgm_ready_tq;
+ spinlock_t kgm_dispatch_lock;
+ spinlock_t kgm_update_lock;
+ spinlock_t kgm_send_lock;
+} kgmnal_data_t;
+
+int kgm_init(kgmnal_data_t *kgm_data);
+int kgmnal_recv_thread(void *);
+int gm_return_mynid(void);
+void kgmnal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd);
+
+extern kgmnal_data_t kgmnal_data;
+extern nal_t kgmnal_api;
+extern nal_cb_t kgmnal_lib;
+
+#endif /* _GMNAL_H */
+
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Based on ksocknal and qswnal
+ *
+ * Copyright (C) 2002 Cluster File Systems, Inc.
+ * Author: Robert Read <rread@datarithm.net>
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/* TODO
+ * preallocate send buffers, store on list
+ * put receive buffers on queue, handle with receive threads
+ * use routing
+ */
+
+#include "gmnal.h"
+
+extern kgmnal_rx_t *kgm_add_recv(kgmnal_data_t *,int);
+
+static kgmnal_tx_t *
+get_trans(void)
+{
+ kgmnal_tx_t *t;
+ PORTAL_ALLOC(t, (sizeof(kgmnal_tx_t)));
+ return t;
+}
+
+static void
+put_trans(kgmnal_tx_t *t)
+{
+ PORTAL_FREE(t, sizeof(kgmnal_tx_t));
+}
+
+int
+kgmnal_ispeer (ptl_nid_t nid)
+{
+ unsigned int gmnid = (unsigned int)nid;
+ unsigned int nnids;
+
+ gm_max_node_id_in_use(kgmnal_data.kgm_port, &nnids);
+
+ return ((ptl_nid_t)gmnid == nid &&/* didn't lose high bits on conversion ? */
+ gmnid < nnids); /* it's in this machine */
+}
+
+/*
+ * LIB functions follow
+ *
+ */
+static int
+kgmnal_read (nal_cb_t *nal, void *private, void *dst_addr, user_ptr src_addr,
+ size_t len)
+{
+ CDEBUG(D_NET, "0x%Lx: reading %ld bytes from %p -> %p\n",
+ nal->ni.nid, (long)len, src_addr, dst_addr );
+ memcpy( dst_addr, src_addr, len );
+ return 0;
+}
+
+static int
+kgmnal_write(nal_cb_t *nal, void *private, user_ptr dst_addr, void *src_addr,
+ size_t len)
+{
+ CDEBUG(D_NET, "0x%Lx: writing %ld bytes from %p -> %p\n",
+ nal->ni.nid, (long)len, src_addr, dst_addr );
+ memcpy( dst_addr, src_addr, len );
+ return 0;
+}
+
+static void *
+kgmnal_malloc(nal_cb_t *nal, size_t len)
+{
+ void *buf;
+
+ PORTAL_ALLOC(buf, len);
+ return buf;
+}
+
+static void
+kgmnal_free(nal_cb_t *nal, void *buf, size_t len)
+{
+ PORTAL_FREE(buf, len);
+}
+
+static void
+kgmnal_printf(nal_cb_t *nal, const char *fmt, ...)
+{
+ va_list ap;
+ char msg[256];
+
+ if (portal_debug & D_NET) {
+ va_start( ap, fmt );
+ vsnprintf( msg, sizeof(msg), fmt, ap );
+ va_end( ap );
+
+ printk("CPUId: %d %s",smp_processor_id(), msg);
+ }
+}
+
+
+static void
+kgmnal_cli(nal_cb_t *nal, unsigned long *flags)
+{
+ kgmnal_data_t *data= nal->nal_data;
+
+ spin_lock_irqsave(&data->kgm_dispatch_lock,*flags);
+}
+
+
+static void
+kgmnal_sti(nal_cb_t *nal, unsigned long *flags)
+{
+ kgmnal_data_t *data= nal->nal_data;
+
+ spin_unlock_irqrestore(&data->kgm_dispatch_lock,*flags);
+}
+
+
+static int
+kgmnal_dist(nal_cb_t *nal, ptl_nid_t nid, unsigned long *dist)
+{
+ /* network distance doesn't mean much for this nal */
+ if ( nal->ni.nid == nid ) {
+ *dist = 0;
+ } else {
+ *dist = 1;
+ }
+
+ return 0;
+}
+
+/* FIXME rmr: add rounting code here */
+static void
+kgmnal_tx_done(kgmnal_tx_t *trans, int error)
+{
+ lib_finalize(trans->ktx_nal, trans->ktx_private, trans->ktx_cookie);
+
+ gm_dma_free(kgmnal_data.kgm_port, trans->ktx_buffer);
+
+ trans->ktx_buffer = NULL;
+ trans->ktx_len = 0;
+
+ put_trans(trans);
+}
+static char * gm_error_strings[GM_NUM_STATUS_CODES] = {
+ [GM_SUCCESS] = "GM_SUCCESS",
+ [GM_SEND_TIMED_OUT] = "GM_SEND_TIMED_OUT",
+ [GM_SEND_REJECTED] = "GM_SEND_REJECTED",
+ [GM_SEND_TARGET_PORT_CLOSED] = "GM_SEND_TARGET_PORT_CLOSED",
+ [GM_SEND_TARGET_NODE_UNREACHABLE] = "GM_SEND_TARGET_NODE_UNREACHABLE",
+ [GM_SEND_DROPPED] = "GM_SEND_DROPPED",
+ [GM_SEND_PORT_CLOSED] = "GM_SEND_PORT_CLOSED",
+};
+
+inline char * get_error(int status)
+{
+ if (gm_error_strings[status] != NULL)
+ return gm_error_strings[status];
+ else
+ return "Unknown error";
+}
+
+static void
+kgmnal_errhandler(struct gm_port *p, void *context, gm_status_t status)
+{
+ CDEBUG(D_NET,"error callback: ktx %p status %d\n", context, status);
+}
+
+static void
+kgmnal_txhandler(struct gm_port *p, void *context, gm_status_t status)
+{
+ kgmnal_tx_t *ktx = (kgmnal_tx_t *)context;
+ int err = 0;
+
+ LASSERT (p != NULL);
+ LASSERT (ktx != NULL);
+
+ CDEBUG(D_NET,"ktx %p status %d nid 0x%x pid %d\n", ktx, status,
+ ktx->ktx_tgt_node, ktx->ktx_tgt_port_id);
+
+ switch((int)status) {
+ case GM_SUCCESS: /* normal */
+ break;
+ case GM_SEND_TIMED_OUT: /* application error */
+ case GM_SEND_REJECTED: /* size of msg unacceptable */
+ case GM_SEND_TARGET_PORT_CLOSED:
+ CERROR("%s (%d):\n", get_error(status), status);
+ gm_resume_sending(kgmnal_data.kgm_port, ktx->ktx_priority,
+ ktx->ktx_tgt_node, ktx->ktx_tgt_port_id,
+ kgmnal_errhandler, NULL);
+ err = -EIO;
+ break;
+ case GM_SEND_TARGET_NODE_UNREACHABLE:
+ case GM_SEND_PORT_CLOSED:
+ CERROR("%s (%d):\n", get_error(status), status);
+ gm_drop_sends(kgmnal_data.kgm_port, ktx->ktx_priority,
+ ktx->ktx_tgt_node, ktx->ktx_tgt_port_id,
+ kgmnal_errhandler, NULL);
+ err = -EIO;
+ break;
+ case GM_SEND_DROPPED:
+ CERROR("%s (%d):\n", get_error(status), status);
+ err = -EIO;
+ break;
+ default:
+ CERROR("Unknown status: %d\n", status);
+ err = -EIO;
+ break;
+ }
+
+ kgmnal_tx_done(ktx, err);
+}
+
+/*
+ */
+
+static int
+kgmnal_send(nal_cb_t *nal,
+ void *private,
+ lib_msg_t *cookie,
+ ptl_hdr_t *hdr,
+ int type,
+ ptl_nid_t nid,
+ ptl_pid_t pid,
+ int options,
+ unsigned int niov,
+ lib_md_iov_t *iov,
+ size_t len)
+{
+ /*
+ * ipnal assumes that this is the private as passed to lib_dispatch..
+ * so do we :/
+ */
+ kgmnal_tx_t *ktx=NULL;
+ int rc=0;
+ void * buf;
+ int buf_len = sizeof(ptl_hdr_t) + len;
+ int buf_size = 0;
+
+ LASSERT ((options & PTL_MD_KIOV) == 0);
+
+ PROF_START(gmnal_send);
+
+
+ CDEBUG(D_NET, "sending %d bytes from %p to nid: 0x%Lx pid %d\n",
+ len, iov, nid, KGM_PORT_NUM);
+
+ /* ensure there is an available tx handle */
+
+ /* save transaction info to trans for later finalize and cleanup */
+ ktx = get_trans();
+ if (ktx == NULL) {
+ rc = -ENOMEM;
+ goto send_exit;
+ }
+
+ /* hmmm... GM doesn't support vectored write, so need to allocate buffer to coalesce
+ header and data.
+ Also, memory must be dma'able or registered with GM. */
+
+ if (buf_len <= MSG_LEN_SMALL) {
+ buf_size = MSG_SIZE_SMALL;
+ } else if (buf_len <= MSG_LEN_LARGE) {
+ buf_size = MSG_SIZE_LARGE;
+ } else {
+ printk("kgmnal:request exceeds TX MTU size (%d).\n",
+ MSG_SIZE_LARGE);
+ rc = -1;
+ goto send_exit;
+ }
+
+ buf = gm_dma_malloc(kgmnal_data.kgm_port, buf_len);
+ if (buf == NULL) {
+ rc = -ENOMEM;
+ goto send_exit;
+ }
+ memcpy(buf, hdr, sizeof(ptl_hdr_t));
+
+ if (len != 0)
+ lib_copy_iov2buf(((char *)buf) + sizeof (ptl_hdr_t),
+ options, niov, iov, len);
+
+ ktx->ktx_nal = nal;
+ ktx->ktx_private = private;
+ ktx->ktx_cookie = cookie;
+ ktx->ktx_len = buf_len;
+ ktx->ktx_size = buf_size;
+ ktx->ktx_buffer = buf;
+ ktx->ktx_priority = GM_LOW_PRIORITY;
+ ktx->ktx_tgt_node = nid;
+ ktx->ktx_tgt_port_id = KGM_PORT_NUM;
+
+ CDEBUG(D_NET, "gm_send %d bytes (size %d) from %p to nid: 0x%Lx "
+ "pid %d pri %d\n", buf_len, buf_size, iov, nid, KGM_PORT_NUM,
+ GM_LOW_PRIORITY);
+
+ gm_send_with_callback(kgmnal_data.kgm_port, buf, buf_size,
+ buf_len, GM_LOW_PRIORITY,
+ nid, KGM_PORT_NUM,
+ kgmnal_txhandler, ktx);
+
+ PROF_FINISH(gmnal_send);
+ send_exit:
+ return rc;
+}
+void
+kgmnal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd)
+{
+ CERROR ("forwarding not implemented\n");
+}
+
+void
+kqswnal_fwd_callback (void *arg, int error)
+{
+ CERROR ("forwarding not implemented\n");
+}
+
+
+static inline void
+kgmnal_requeue_rx(kgmnal_rx_t *krx)
+{
+ gm_provide_receive_buffer(kgmnal_data.kgm_port, krx->krx_buffer,
+ krx->krx_size, krx->krx_priority);
+}
+
+/* Process a received portals packet */
+
+/* Receive Interrupt Handler */
+static void kgmnal_rx(kgmnal_data_t *kgm, unsigned long len, unsigned int size,
+ void * buf, unsigned int pri)
+{
+ ptl_hdr_t *hdr = buf;
+ kgmnal_rx_t krx;
+
+ CDEBUG(D_NET,"buf %p, len %ld\n", buf, len);
+
+ if ( len < sizeof( ptl_hdr_t ) ) {
+ /* XXX what's this for? */
+ if (kgm->kgm_shuttingdown)
+ return;
+ CERROR("kgmnal: did not receive complete portal header, "
+ "len= %ld", len);
+ gm_provide_receive_buffer(kgm->kgm_port, buf, size, pri);
+ return;
+ }
+
+ /* might want to use seperate threads to handle receive */
+ krx.krx_buffer = buf;
+ krx.krx_len = len;
+ krx.krx_size = size;
+ krx.krx_priority = pri;
+
+ if ( hdr->dest_nid == kgmnal_lib.ni.nid ) {
+ PROF_START(lib_parse);
+ lib_parse(&kgmnal_lib, (ptl_hdr_t *)krx.krx_buffer, &krx);
+ PROF_FINISH(lib_parse);
+ } else if (kgmnal_ispeer(hdr->dest_nid)) {
+ /* should have gone direct to peer */
+ CERROR("dropping packet from 0x%llx to 0x%llx: target is "
+ "a peer", hdr->src_nid, hdr->dest_nid);
+ kgmnal_requeue_rx(&krx);
+ } else {
+ /* forward to gateway */
+ CERROR("forwarding not implemented yet");
+ kgmnal_requeue_rx(&krx);
+ }
+
+ return;
+}
+
+
+static int kgmnal_recv(nal_cb_t *nal,
+ void *private,
+ lib_msg_t *cookie,
+ int options,
+ unsigned int niov,
+ lib_md_iov_t *iov,
+ size_t mlen,
+ size_t rlen)
+{
+ kgmnal_rx_t *krx = private;
+
+ LASSERT ((options & PTL_MD_KIOV) == 0);
+
+ CDEBUG(D_NET,"mlen=%d, rlen=%d\n", mlen, rlen);
+
+ /* What was actually received must be >= what sender claims to
+ * have sent. This is an LASSERT, since lib-move doesn't
+ * check cb return code yet. */
+ LASSERT (krx->krx_len >= sizeof (ptl_hdr_t) + rlen);
+ LASSERT (mlen <= rlen);
+
+ PROF_START(gmnal_recv);
+
+ if(mlen != 0) {
+ PROF_START(memcpy);
+ lib_copy_buf2iov (options, niov, iov,
+ krx->krx_buffer + sizeof (ptl_hdr_t), mlen);
+ PROF_FINISH(memcpy);
+ }
+
+ PROF_START(lib_finalize);
+ lib_finalize(nal, private, cookie);
+ PROF_FINISH(lib_finalize);
+
+ kgmnal_requeue_rx(krx);
+
+ PROF_FINISH(gmnal_recv);
+
+ return rlen;
+}
+
+
+static void kgmnal_shutdown(void * none)
+{
+ CERROR("called\n");
+ return;
+}
+
+/*
+ * Set terminate and use alarm to wake up the recv thread.
+ */
+static void recv_shutdown(kgmnal_data_t *kgm)
+{
+ gm_alarm_t alarm;
+
+ kgm->kgm_shuttingdown = 1;
+ gm_initialize_alarm(&alarm);
+ gm_set_alarm(kgm->kgm_port, &alarm, 1, kgmnal_shutdown, NULL);
+}
+
+int kgmnal_end(kgmnal_data_t *kgm)
+{
+
+ /* wait for sends to finish ? */
+ /* remove receive buffers */
+ /* shutdown receive thread */
+
+ recv_shutdown(kgm);
+
+ return 0;
+}
+
+/* Used only for the spinner */
+int kgmnal_recv_thread(void *arg)
+{
+ kgmnal_data_t *kgm = arg;
+
+ LASSERT(kgm != NULL);
+
+ kportal_daemonize("kgmnal_rx");
+
+ while(1) {
+ gm_recv_event_t *e;
+ int priority = GM_LOW_PRIORITY;
+ if (kgm->kgm_shuttingdown)
+ break;
+
+ e = gm_blocking_receive_no_spin(kgm->kgm_port);
+ if (e == NULL) {
+ CERROR("gm_blocking_receive returned NULL\n");
+ break;
+ }
+
+ switch(gm_ntohc(e->recv.type)) {
+ case GM_HIGH_RECV_EVENT:
+ priority = GM_HIGH_PRIORITY;
+ /* fall through */
+ case GM_RECV_EVENT:
+ kgmnal_rx(kgm, gm_ntohl(e->recv.length),
+ gm_ntohc(e->recv.size),
+ gm_ntohp(e->recv.buffer), priority);
+ break;
+ case GM_ALARM_EVENT:
+ CERROR("received alarm");
+ gm_unknown(kgm->kgm_port, e);
+ break;
+ case GM_BAD_SEND_DETECTED_EVENT: /* ?? */
+ CERROR("received bad send!\n");
+ break;
+ default:
+ gm_unknown(kgm->kgm_port, e);
+ }
+ }
+
+ CERROR("shuttting down.\n");
+ return 0;
+}
+
+nal_cb_t kgmnal_lib = {
+ nal_data: &kgmnal_data, /* NAL private data */
+ cb_send: kgmnal_send,
+ cb_recv: kgmnal_recv,
+ cb_read: kgmnal_read,
+ cb_write: kgmnal_write,
+ cb_malloc: kgmnal_malloc,
+ cb_free: kgmnal_free,
+ cb_printf: kgmnal_printf,
+ cb_cli: kgmnal_cli,
+ cb_sti: kgmnal_sti,
+ cb_dist: kgmnal_dist
+};
--- /dev/null
+# Copyright (C) 2001 Cluster File Systems, Inc.
+#
+# This code is issued under the GNU General Public License.
+# See the file COPYING in this distribution
+
+include ../../Rules.linux
+
+MODULE = kqswnal
+modulenet_DATA = kqswnal.o
+EXTRA_PROGRAMS = kqswnal
+
+
+#CFLAGS:= @KCFLAGS@
+#CPPFLAGS:=@KCPPFLAGS@
+DEFS =
+kqswnal_SOURCES = qswnal.c qswnal_cb.c qswnal.h
--- /dev/null
+/*
+ * Copyright (C) 2002 Cluster File Systems, Inc.
+ * Author: Eric Barton <eric@bartonsoftware.com>
+ *
+ * Copyright (C) 2002, Lawrence Livermore National Labs (LLNL)
+ * W. Marcus Miller - Based on ksocknal
+ *
+ * This file is part of Portals, http://www.sf.net/projects/lustre/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#include "qswnal.h"
+
+ptl_handle_ni_t kqswnal_ni;
+nal_t kqswnal_api;
+kqswnal_data_t kqswnal_data;
+
+kpr_nal_interface_t kqswnal_router_interface = {
+ kprni_nalid: QSWNAL,
+ kprni_arg: NULL,
+ kprni_fwd: kqswnal_fwd_packet,
+};
+
+
+static int
+kqswnal_forward(nal_t *nal,
+ int id,
+ void *args, size_t args_len,
+ void *ret, size_t ret_len)
+{
+ kqswnal_data_t *k = nal->nal_data;
+ nal_cb_t *nal_cb = k->kqn_cb;
+
+ LASSERT (nal == &kqswnal_api);
+ LASSERT (k == &kqswnal_data);
+ LASSERT (nal_cb == &kqswnal_lib);
+
+ lib_dispatch(nal_cb, k, id, args, ret); /* nal needs k */
+ return (PTL_OK);
+}
+
+static void
+kqswnal_lock (nal_t *nal, unsigned long *flags)
+{
+ kqswnal_data_t *k = nal->nal_data;
+ nal_cb_t *nal_cb = k->kqn_cb;
+
+ LASSERT (nal == &kqswnal_api);
+ LASSERT (k == &kqswnal_data);
+ LASSERT (nal_cb == &kqswnal_lib);
+
+ nal_cb->cb_cli(nal_cb,flags);
+}
+
+static void
+kqswnal_unlock(nal_t *nal, unsigned long *flags)
+{
+ kqswnal_data_t *k = nal->nal_data;
+ nal_cb_t *nal_cb = k->kqn_cb;
+
+ LASSERT (nal == &kqswnal_api);
+ LASSERT (k == &kqswnal_data);
+ LASSERT (nal_cb == &kqswnal_lib);
+
+ nal_cb->cb_sti(nal_cb,flags);
+}
+
+static int
+kqswnal_shutdown(nal_t *nal, int ni)
+{
+ CDEBUG (D_NET, "shutdown\n");
+
+ LASSERT (nal == &kqswnal_api);
+ return (0);
+}
+
+static void
+kqswnal_yield( nal_t *nal )
+{
+ CDEBUG (D_NET, "yield\n");
+
+ if (current->need_resched)
+ schedule();
+ return;
+}
+
+static nal_t *
+kqswnal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size,
+ ptl_pid_t requested_pid)
+{
+ ptl_nid_t mynid = ep_nodeid (kqswnal_data.kqn_epdev);
+ int nnids = ep_numnodes (kqswnal_data.kqn_epdev);
+
+ CDEBUG(D_NET, "calling lib_init with nid "LPX64" of %d\n", mynid,nnids);
+
+ lib_init(&kqswnal_lib, mynid, 0, nnids, ptl_size, ac_size);
+
+ return (&kqswnal_api);
+}
+
+void __exit
+kqswnal_finalise (void)
+{
+ switch (kqswnal_data.kqn_init)
+ {
+ default:
+ LASSERT (0);
+
+ case KQN_INIT_ALL:
+ PORTAL_SYMBOL_UNREGISTER (kqswnal_ni);
+ /* fall through */
+
+ case KQN_INIT_PTL:
+ PtlNIFini (kqswnal_ni);
+ lib_fini (&kqswnal_lib);
+ /* fall through */
+
+ case KQN_INIT_DATA:
+ break;
+
+ case KQN_INIT_NOTHING:
+ return;
+ }
+
+ /**********************************************************************/
+ /* Make router stop her calling me and fail any more call-ins */
+ kpr_shutdown (&kqswnal_data.kqn_router);
+
+ /**********************************************************************/
+ /* flag threads to terminate, wake them and wait for them to die */
+
+ kqswnal_data.kqn_shuttingdown = 1;
+ wake_up_all (&kqswnal_data.kqn_sched_waitq);
+
+ while (atomic_read (&kqswnal_data.kqn_nthreads) != 0) {
+ CDEBUG(D_NET, "waiting for %d threads to terminate\n",
+ atomic_read (&kqswnal_data.kqn_nthreads));
+ set_current_state (TASK_UNINTERRUPTIBLE);
+ schedule_timeout (HZ);
+ }
+
+ /**********************************************************************/
+ /* close elan comms */
+
+ if (kqswnal_data.kqn_eprx_small != NULL)
+ ep_remove_large_rcvr (kqswnal_data.kqn_eprx_small);
+
+ if (kqswnal_data.kqn_eprx_large != NULL)
+ ep_remove_large_rcvr (kqswnal_data.kqn_eprx_large);
+
+ if (kqswnal_data.kqn_eptx != NULL)
+ ep_free_large_xmtr (kqswnal_data.kqn_eptx);
+
+ /**********************************************************************/
+ /* No more threads. No more portals, router or comms callbacks!
+ * I control the horizontals and the verticals...
+ */
+
+ /**********************************************************************/
+ /* Complete any blocked forwarding packets with error
+ */
+
+ while (!list_empty (&kqswnal_data.kqn_idletxd_fwdq))
+ {
+ kpr_fwd_desc_t *fwd = list_entry (kqswnal_data.kqn_idletxd_fwdq.next,
+ kpr_fwd_desc_t, kprfd_list);
+ list_del (&fwd->kprfd_list);
+ kpr_fwd_done (&kqswnal_data.kqn_router, fwd, -EHOSTUNREACH);
+ }
+
+ while (!list_empty (&kqswnal_data.kqn_delayedfwds))
+ {
+ kpr_fwd_desc_t *fwd = list_entry (kqswnal_data.kqn_delayedfwds.next,
+ kpr_fwd_desc_t, kprfd_list);
+ list_del (&fwd->kprfd_list);
+ kpr_fwd_done (&kqswnal_data.kqn_router, fwd, -EHOSTUNREACH);
+ }
+
+ /**********************************************************************/
+ /* Wait for router to complete any packets I sent her
+ */
+
+ kpr_deregister (&kqswnal_data.kqn_router);
+
+
+ /**********************************************************************/
+ /* Unmap message buffers and free all descriptors and buffers
+ */
+
+ if (kqswnal_data.kqn_eprxdmahandle != NULL)
+ {
+ elan3_dvma_unload(kqswnal_data.kqn_epdev->DmaState,
+ kqswnal_data.kqn_eprxdmahandle, 0,
+ KQSW_NRXMSGPAGES_SMALL * KQSW_NRXMSGS_SMALL +
+ KQSW_NRXMSGPAGES_LARGE * KQSW_NRXMSGS_LARGE);
+
+ elan3_dma_release(kqswnal_data.kqn_epdev->DmaState,
+ kqswnal_data.kqn_eprxdmahandle);
+ }
+
+ if (kqswnal_data.kqn_eptxdmahandle != NULL)
+ {
+ elan3_dvma_unload(kqswnal_data.kqn_epdev->DmaState,
+ kqswnal_data.kqn_eptxdmahandle, 0,
+ KQSW_NTXMSGPAGES * (KQSW_NTXMSGS +
+ KQSW_NNBLK_TXMSGS));
+
+ elan3_dma_release(kqswnal_data.kqn_epdev->DmaState,
+ kqswnal_data.kqn_eptxdmahandle);
+ }
+
+ if (kqswnal_data.kqn_txds != NULL)
+ {
+ int i;
+
+ for (i = 0; i < KQSW_NTXMSGS + KQSW_NNBLK_TXMSGS; i++)
+ {
+ kqswnal_tx_t *ktx = &kqswnal_data.kqn_txds[i];
+
+ if (ktx->ktx_buffer != NULL)
+ PORTAL_FREE(ktx->ktx_buffer,
+ KQSW_TX_BUFFER_SIZE);
+ }
+
+ PORTAL_FREE(kqswnal_data.kqn_txds,
+ sizeof (kqswnal_tx_t) * (KQSW_NTXMSGS +
+ KQSW_NNBLK_TXMSGS));
+ }
+
+ if (kqswnal_data.kqn_rxds != NULL)
+ {
+ int i;
+ int j;
+
+ for (i = 0; i < KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE; i++)
+ {
+ kqswnal_rx_t *krx = &kqswnal_data.kqn_rxds[i];
+
+ for (j = 0; j < krx->krx_npages; j++)
+ if (krx->krx_pages[j] != NULL)
+ __free_page (krx->krx_pages[j]);
+ }
+
+ PORTAL_FREE(kqswnal_data.kqn_rxds,
+ sizeof(kqswnal_rx_t) * (KQSW_NRXMSGS_SMALL +
+ KQSW_NRXMSGS_LARGE));
+ }
+
+ /* resets flags, pointers to NULL etc */
+ memset(&kqswnal_data, 0, sizeof (kqswnal_data));
+
+ CDEBUG (D_MALLOC, "done kmem %d\n", atomic_read(&portal_kmemory));
+
+ printk (KERN_INFO "Routing QSW NAL unloaded (final mem %d)\n",
+ atomic_read(&portal_kmemory));
+}
+
+static int __init
+kqswnal_initialise (void)
+{
+ ELAN3_DMA_REQUEST dmareq;
+ int rc;
+ int i;
+ int elan_page_idx;
+ int pkmem = atomic_read(&portal_kmemory);
+
+ LASSERT (kqswnal_data.kqn_init == KQN_INIT_NOTHING);
+
+ CDEBUG (D_MALLOC, "start kmem %d\n", atomic_read(&portal_kmemory));
+
+ kqswnal_api.forward = kqswnal_forward;
+ kqswnal_api.shutdown = kqswnal_shutdown;
+ kqswnal_api.yield = kqswnal_yield;
+ kqswnal_api.validate = NULL; /* our api validate is a NOOP */
+ kqswnal_api.lock = kqswnal_lock;
+ kqswnal_api.unlock = kqswnal_unlock;
+ kqswnal_api.nal_data = &kqswnal_data;
+
+ kqswnal_lib.nal_data = &kqswnal_data;
+
+ /* ensure all pointers NULL etc */
+ memset (&kqswnal_data, 0, sizeof (kqswnal_data));
+
+ kqswnal_data.kqn_cb = &kqswnal_lib;
+
+ INIT_LIST_HEAD (&kqswnal_data.kqn_idletxds);
+ INIT_LIST_HEAD (&kqswnal_data.kqn_nblk_idletxds);
+ spin_lock_init (&kqswnal_data.kqn_idletxd_lock);
+ init_waitqueue_head (&kqswnal_data.kqn_idletxd_waitq);
+ INIT_LIST_HEAD (&kqswnal_data.kqn_idletxd_fwdq);
+
+ INIT_LIST_HEAD (&kqswnal_data.kqn_delayedfwds);
+ INIT_LIST_HEAD (&kqswnal_data.kqn_delayedtxds);
+ INIT_LIST_HEAD (&kqswnal_data.kqn_readyrxds);
+
+ spin_lock_init (&kqswnal_data.kqn_sched_lock);
+ init_waitqueue_head (&kqswnal_data.kqn_sched_waitq);
+
+ spin_lock_init (&kqswnal_data.kqn_statelock);
+
+ /* pointers/lists/locks initialised */
+ kqswnal_data.kqn_init = KQN_INIT_DATA;
+
+ /**********************************************************************/
+ /* Find the first Elan device */
+
+ kqswnal_data.kqn_epdev = ep_device (0);
+ if (kqswnal_data.kqn_epdev == NULL)
+ {
+ CERROR ("Can't get elan device 0\n");
+ return (-ENOMEM);
+ }
+
+ /**********************************************************************/
+ /* Get the transmitter */
+
+ kqswnal_data.kqn_eptx = ep_alloc_large_xmtr (kqswnal_data.kqn_epdev);
+ if (kqswnal_data.kqn_eptx == NULL)
+ {
+ CERROR ("Can't allocate transmitter\n");
+ kqswnal_finalise ();
+ return (-ENOMEM);
+ }
+
+ /**********************************************************************/
+ /* Get the receivers */
+
+ kqswnal_data.kqn_eprx_small = ep_install_large_rcvr (kqswnal_data.kqn_epdev,
+ EP_SVC_LARGE_PORTALS_SMALL,
+ KQSW_EP_ENVELOPES_SMALL);
+ if (kqswnal_data.kqn_eprx_small == NULL)
+ {
+ CERROR ("Can't install small msg receiver\n");
+ kqswnal_finalise ();
+ return (-ENOMEM);
+ }
+
+ kqswnal_data.kqn_eprx_large = ep_install_large_rcvr (kqswnal_data.kqn_epdev,
+ EP_SVC_LARGE_PORTALS_LARGE,
+ KQSW_EP_ENVELOPES_LARGE);
+ if (kqswnal_data.kqn_eprx_large == NULL)
+ {
+ CERROR ("Can't install large msg receiver\n");
+ kqswnal_finalise ();
+ return (-ENOMEM);
+ }
+
+ /**********************************************************************/
+ /* Reserve Elan address space for transmit buffers */
+
+ dmareq.Waitfn = DDI_DMA_SLEEP;
+ dmareq.ElanAddr = (E3_Addr) 0;
+ dmareq.Attr = PTE_LOAD_LITTLE_ENDIAN;
+ dmareq.Perm = ELAN_PERM_REMOTEREAD;
+
+ rc = elan3_dma_reserve(kqswnal_data.kqn_epdev->DmaState,
+ KQSW_NTXMSGPAGES*(KQSW_NTXMSGS+KQSW_NNBLK_TXMSGS),
+ &dmareq, &kqswnal_data.kqn_eptxdmahandle);
+ if (rc != DDI_SUCCESS)
+ {
+ CERROR ("Can't reserve rx dma space\n");
+ kqswnal_finalise ();
+ return (-ENOMEM);
+ }
+
+ /**********************************************************************/
+ /* Reserve Elan address space for receive buffers */
+
+ dmareq.Waitfn = DDI_DMA_SLEEP;
+ dmareq.ElanAddr = (E3_Addr) 0;
+ dmareq.Attr = PTE_LOAD_LITTLE_ENDIAN;
+ dmareq.Perm = ELAN_PERM_REMOTEWRITE;
+
+ rc = elan3_dma_reserve (kqswnal_data.kqn_epdev->DmaState,
+ KQSW_NRXMSGPAGES_SMALL * KQSW_NRXMSGS_SMALL +
+ KQSW_NRXMSGPAGES_LARGE * KQSW_NRXMSGS_LARGE,
+ &dmareq, &kqswnal_data.kqn_eprxdmahandle);
+ if (rc != DDI_SUCCESS)
+ {
+ CERROR ("Can't reserve rx dma space\n");
+ kqswnal_finalise ();
+ return (-ENOMEM);
+ }
+
+ /**********************************************************************/
+ /* Allocate/Initialise transmit descriptors */
+
+ PORTAL_ALLOC(kqswnal_data.kqn_txds,
+ sizeof(kqswnal_tx_t) * (KQSW_NTXMSGS + KQSW_NNBLK_TXMSGS));
+ if (kqswnal_data.kqn_txds == NULL)
+ {
+ kqswnal_finalise ();
+ return (-ENOMEM);
+ }
+
+ /* clear flags, null pointers etc */
+ memset(kqswnal_data.kqn_txds, 0,
+ sizeof(kqswnal_tx_t) * (KQSW_NTXMSGS + KQSW_NNBLK_TXMSGS));
+ for (i = 0; i < (KQSW_NTXMSGS + KQSW_NNBLK_TXMSGS); i++)
+ {
+ int premapped_pages;
+ kqswnal_tx_t *ktx = &kqswnal_data.kqn_txds[i];
+ int basepage = i * KQSW_NTXMSGPAGES;
+
+ PORTAL_ALLOC (ktx->ktx_buffer, KQSW_TX_BUFFER_SIZE);
+ if (ktx->ktx_buffer == NULL)
+ {
+ kqswnal_finalise ();
+ return (-ENOMEM);
+ }
+
+ /* Map pre-allocated buffer NOW, to save latency on transmit */
+ premapped_pages = kqswnal_pages_spanned(ktx->ktx_buffer,
+ KQSW_TX_BUFFER_SIZE);
+
+ elan3_dvma_kaddr_load (kqswnal_data.kqn_epdev->DmaState,
+ kqswnal_data.kqn_eptxdmahandle,
+ ktx->ktx_buffer, KQSW_TX_BUFFER_SIZE,
+ basepage, &ktx->ktx_ebuffer);
+
+ ktx->ktx_basepage = basepage + premapped_pages; /* message mapping starts here */
+ ktx->ktx_npages = KQSW_NTXMSGPAGES - premapped_pages; /* for this many pages */
+
+ if (i < KQSW_NTXMSGS)
+ ktx->ktx_idle = &kqswnal_data.kqn_idletxds;
+ else
+ ktx->ktx_idle = &kqswnal_data.kqn_nblk_idletxds;
+
+ list_add_tail (&ktx->ktx_list, ktx->ktx_idle);
+ }
+
+ /**********************************************************************/
+ /* Allocate/Initialise receive descriptors */
+
+ PORTAL_ALLOC (kqswnal_data.kqn_rxds,
+ sizeof (kqswnal_rx_t) * (KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE));
+ if (kqswnal_data.kqn_rxds == NULL)
+ {
+ kqswnal_finalise ();
+ return (-ENOMEM);
+ }
+
+ memset(kqswnal_data.kqn_rxds, 0, /* clear flags, null pointers etc */
+ sizeof(kqswnal_rx_t) * (KQSW_NRXMSGS_SMALL+KQSW_NRXMSGS_LARGE));
+
+ elan_page_idx = 0;
+ for (i = 0; i < KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE; i++)
+ {
+ E3_Addr elanaddr;
+ int j;
+ kqswnal_rx_t *krx = &kqswnal_data.kqn_rxds[i];
+
+ if (i < KQSW_NRXMSGS_SMALL)
+ {
+ krx->krx_npages = KQSW_NRXMSGPAGES_SMALL;
+ krx->krx_eprx = kqswnal_data.kqn_eprx_small;
+ }
+ else
+ {
+ krx->krx_npages = KQSW_NRXMSGPAGES_LARGE;
+ krx->krx_eprx = kqswnal_data.kqn_eprx_large;
+ }
+
+ LASSERT (krx->krx_npages > 0);
+ for (j = 0; j < krx->krx_npages; j++)
+ {
+ krx->krx_pages[j] = alloc_page (GFP_KERNEL);
+ if (krx->krx_pages[j] == NULL)
+ {
+ kqswnal_finalise ();
+ return (-ENOMEM);
+ }
+
+ LASSERT(page_address(krx->krx_pages[j]) != NULL);
+
+ elan3_dvma_kaddr_load(kqswnal_data.kqn_epdev->DmaState,
+ kqswnal_data.kqn_eprxdmahandle,
+ page_address(krx->krx_pages[j]),
+ PAGE_SIZE, elan_page_idx,
+ &elanaddr);
+ elan_page_idx++;
+
+ if (j == 0)
+ krx->krx_elanaddr = elanaddr;
+
+ /* NB we assume a contiguous */
+ LASSERT (elanaddr == krx->krx_elanaddr + j * PAGE_SIZE);
+ }
+ }
+ LASSERT (elan_page_idx ==
+ (KQSW_NRXMSGS_SMALL * KQSW_NRXMSGPAGES_SMALL) +
+ (KQSW_NRXMSGS_LARGE * KQSW_NRXMSGPAGES_LARGE));
+
+ /**********************************************************************/
+ /* Network interface ready to initialise */
+
+ rc = PtlNIInit(kqswnal_init, 32, 4, 0, &kqswnal_ni);
+ if (rc != 0)
+ {
+ CERROR ("PtlNIInit failed %d\n", rc);
+ kqswnal_finalise ();
+ return (-ENOMEM);
+ }
+
+ kqswnal_data.kqn_init = KQN_INIT_PTL;
+
+ /**********************************************************************/
+ /* Queue receives, now that it's OK to run their completion callbacks */
+
+ for (i = 0; i < KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE; i++)
+ {
+ kqswnal_rx_t *krx = &kqswnal_data.kqn_rxds[i];
+
+ /* NB this enqueue can allocate/sleep (attr == 0) */
+ rc = ep_queue_receive(krx->krx_eprx, kqswnal_rxhandler, krx,
+ krx->krx_elanaddr,
+ krx->krx_npages * PAGE_SIZE, 0);
+ if (rc != 0)
+ {
+ CERROR ("failed ep_queue_receive %d\n", rc);
+ kqswnal_finalise ();
+ return (-ENOMEM);
+ }
+ }
+
+ /**********************************************************************/
+ /* Spawn scheduling threads */
+ for (i = 0; i < smp_num_cpus; i++)
+ {
+ rc = kqswnal_thread_start (kqswnal_scheduler, NULL);
+ if (rc != 0)
+ {
+ CERROR ("failed to spawn scheduling thread: %d\n", rc);
+ kqswnal_finalise ();
+ return (rc);
+ }
+ }
+
+ /**********************************************************************/
+ /* Connect to the router */
+ rc = kpr_register (&kqswnal_data.kqn_router, &kqswnal_router_interface);
+ CDEBUG(D_NET, "Can't initialise routing interface (rc = %d): not routing\n",rc);
+
+ PORTAL_SYMBOL_REGISTER(kqswnal_ni);
+ kqswnal_data.kqn_init = KQN_INIT_ALL;
+
+ printk(KERN_INFO "Routing QSW NAL loaded on node %d of %d "
+ "(Routing %s, initial mem %d)\n",
+ ep_nodeid (kqswnal_data.kqn_epdev),
+ ep_numnodes (kqswnal_data.kqn_epdev),
+ kpr_routing (&kqswnal_data.kqn_router) ? "enabled" : "disabled",
+ pkmem);
+
+ return (0);
+}
+
+
+MODULE_AUTHOR("W. Marcus Miller <marcusm@llnl.gov>");
+MODULE_DESCRIPTION("Kernel Quadrics Switch NAL v1.00");
+MODULE_LICENSE("GPL");
+
+module_init (kqswnal_initialise);
+module_exit (kqswnal_finalise);
+
+EXPORT_SYMBOL (kqswnal_ni);
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Basic library routines.
+ *
+ */
+
+#ifndef _QSWNAL_H
+#define _QSWNAL_H
+#define EXPORT_SYMTAB
+
+#ifdef PROPRIETARY_ELAN
+# include <qsw/kernel.h>
+#else
+# include <qsnet/kernel.h>
+#endif
+
+#undef printf /* nasty QSW #define */
+
+#include <linux/config.h>
+#include <linux/module.h>
+
+#include <elan3/elanregs.h>
+#include <elan3/elandev.h>
+#include <elan3/elanvp.h>
+#include <elan3/elan3mmu.h>
+#include <elan3/elanctxt.h>
+#include <elan3/elandebug.h>
+#include <elan3/urom_addrs.h>
+#include <elan3/busops.h>
+#include <elan3/kcomm.h>
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/errno.h>
+#include <linux/locks.h>
+#include <linux/unistd.h>
+#include <net/sock.h>
+#include <linux/uio.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/stat.h>
+#include <linux/list.h>
+#include <asm/uaccess.h>
+#include <asm/segment.h>
+
+#define DEBUG_SUBSYSTEM S_QSWNAL
+
+#include <linux/kp30.h>
+#include <portals/p30.h>
+#include <portals/lib-p30.h>
+
+#define KQSW_CHECKSUM 0
+#if KQSW_CHECKSUM
+typedef unsigned long kqsw_csum_t;
+#define KQSW_CSUM_SIZE (2 * sizeof (kqsw_csum_t))
+#else
+#define KQSW_CSUM_SIZE 0
+#endif
+#define KQSW_HDR_SIZE (sizeof (ptl_hdr_t) + KQSW_CSUM_SIZE)
+
+/*
+ * Elan NAL
+ */
+#define EP_SVC_LARGE_PORTALS_SMALL (0x10) /* Portals over elan port number (large payloads) */
+#define EP_SVC_LARGE_PORTALS_LARGE (0x11) /* Portals over elan port number (small payloads) */
+/* NB small/large message sizes are GLOBAL constants */
+
+/*
+ * Performance Tuning defines
+ * NB no mention of PAGE_SIZE for interoperability
+ */
+#if PTL_LARGE_MTU
+# define KQSW_MAXPAYLOAD (256<<10) /* biggest message this NAL will cope with */
+#else
+# define KQSW_MAXPAYLOAD (64<<10) /* biggest message this NAL will cope with */
+#endif
+
+#define KQSW_SMALLPAYLOAD ((4<<10) - KQSW_HDR_SIZE) /* small/large ep receiver breakpoint */
+
+#define KQSW_TX_MAXCONTIG (1<<10) /* largest payload that gets made contiguous on transmit */
+
+#define KQSW_NTXMSGS 8 /* # normal transmit messages */
+#define KQSW_NNBLK_TXMSGS 128 /* # reserved transmit messages if can't block */
+
+#define KQSW_NRXMSGS_LARGE 64 /* # large receive buffers */
+#define KQSW_EP_ENVELOPES_LARGE 128 /* # large ep envelopes */
+
+#define KQSW_NRXMSGS_SMALL 256 /* # small receive buffers */
+#define KQSW_EP_ENVELOPES_SMALL 2048 /* # small ep envelopes */
+
+#define KQSW_RESCHED 100 /* # busy loops that forces scheduler to yield */
+
+/*
+ * derived constants
+ */
+
+#define KQSW_TX_BUFFER_SIZE (KQSW_HDR_SIZE + KQSW_TX_MAXCONTIG)
+/* The pre-allocated tx buffer (hdr + small payload) */
+
+#define KQSW_NTXMSGPAGES (btopr(KQSW_TX_BUFFER_SIZE) + 1 + btopr(KQSW_MAXPAYLOAD) + 1)
+/* Reserve elan address space for pre-allocated and pre-mapped transmit
+ * buffer and a full payload too. Extra pages allow for page alignment */
+
+#define KQSW_NRXMSGPAGES_SMALL (btopr(KQSW_HDR_SIZE + KQSW_SMALLPAYLOAD))
+/* receive hdr/payload always contiguous and page aligned */
+#define KQSW_NRXMSGBYTES_SMALL (KQSW_NRXMSGPAGES_SMALL * PAGE_SIZE)
+
+#define KQSW_NRXMSGPAGES_LARGE (btopr(KQSW_HDR_SIZE + KQSW_MAXPAYLOAD))
+/* receive hdr/payload always contiguous and page aligned */
+#define KQSW_NRXMSGBYTES_LARGE (KQSW_NRXMSGPAGES_LARGE * PAGE_SIZE)
+/* biggest complete packet we can receive (or transmit) */
+
+
+typedef struct
+{
+ struct list_head krx_list; /* enqueue -> thread */
+ EP_RCVR *krx_eprx; /* port to post receives to */
+ EP_RXD *krx_rxd; /* receive descriptor (for repost) */
+ E3_Addr krx_elanaddr; /* Elan address of buffer (contiguous in elan vm) */
+ int krx_npages; /* # pages in receive buffer */
+ int krx_nob; /* Number Of Bytes received into buffer */
+ kpr_fwd_desc_t krx_fwd; /* embedded forwarding descriptor */
+ struct page *krx_pages[KQSW_NRXMSGPAGES_LARGE]; /* pages allocated */
+ struct iovec krx_iov[KQSW_NRXMSGPAGES_LARGE]; /* iovec for forwarding */
+} kqswnal_rx_t;
+
+typedef struct
+{
+ struct list_head ktx_list; /* enqueue idle/delayed */
+ struct list_head *ktx_idle; /* where to put when idle */
+ char ktx_state; /* What I'm doing */
+ uint32_t ktx_basepage; /* page offset in reserved elan tx vaddrs for mapping pages */
+ int ktx_npages; /* pages reserved for mapping messages */
+ int ktx_nmappedpages; /* # pages mapped for current message */
+ EP_IOVEC ktx_iov[EP_MAXFRAG]; /* msg frags (elan vaddrs) */
+ int ktx_niov; /* # message frags */
+ int ktx_port; /* destination ep port */
+ ptl_nid_t ktx_nid; /* destination node */
+ void *ktx_args[2]; /* completion passthru */
+ E3_Addr ktx_ebuffer; /* elan address of ktx_buffer */
+ char *ktx_buffer; /* pre-allocated contiguous buffer for hdr + small payloads */
+} kqswnal_tx_t;
+
+#define KTX_IDLE 0 /* MUST BE ZERO (so zeroed ktx is idle) */
+#define KTX_SENDING 1 /* local send */
+#define KTX_FORWARDING 2 /* routing a packet */
+
+typedef struct
+{
+ char kqn_init; /* what's been initialised */
+ char kqn_shuttingdown; /* I'm trying to shut down */
+ atomic_t kqn_nthreads; /* # threads still running */
+
+ kqswnal_rx_t *kqn_rxds; /* all the receive descriptors */
+ kqswnal_tx_t *kqn_txds; /* all the transmit descriptors */
+
+ struct list_head kqn_idletxds; /* transmit descriptors free to use */
+ struct list_head kqn_nblk_idletxds; /* reserve of */
+ spinlock_t kqn_idletxd_lock; /* serialise idle txd access */
+ wait_queue_head_t kqn_idletxd_waitq; /* sender blocks here waiting for idle txd */
+ struct list_head kqn_idletxd_fwdq; /* forwarded packets block here waiting for idle txd */
+
+ spinlock_t kqn_sched_lock; /* serialise packet schedulers */
+ wait_queue_head_t kqn_sched_waitq; /* scheduler blocks here */
+
+ struct list_head kqn_readyrxds; /* rxds full of data */
+ struct list_head kqn_delayedfwds; /* delayed forwards */
+ struct list_head kqn_delayedtxds; /* delayed transmits */
+
+ spinlock_t kqn_statelock; /* cb_cli/cb_sti */
+ nal_cb_t *kqn_cb; /* -> kqswnal_lib */
+ EP_DEV *kqn_epdev; /* elan device */
+ EP_XMTR *kqn_eptx; /* elan transmitter */
+ EP_RCVR *kqn_eprx_small; /* elan receiver (small messages) */
+ EP_RCVR *kqn_eprx_large; /* elan receiver (large messages) */
+ ELAN3_DMA_HANDLE *kqn_eptxdmahandle; /* elan reserved tx vaddrs */
+ ELAN3_DMA_HANDLE *kqn_eprxdmahandle; /* elan reserved rx vaddrs */
+ kpr_router_t kqn_router; /* connection to Kernel Portals Router module */
+} kqswnal_data_t;
+
+/* kqn_init state */
+#define KQN_INIT_NOTHING 0 /* MUST BE ZERO so zeroed state is initialised OK */
+#define KQN_INIT_DATA 1
+#define KQN_INIT_PTL 2
+#define KQN_INIT_ALL 3
+
+extern nal_cb_t kqswnal_lib;
+extern nal_t kqswnal_api;
+extern kqswnal_data_t kqswnal_data;
+
+extern int kqswnal_thread_start (int (*fn)(void *arg), void *arg);
+extern void kqswnal_rxhandler(EP_RXD *rxd);
+extern int kqswnal_scheduler (void *);
+extern void kqswnal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd);
+
+static inline void
+kqswnal_requeue_rx (kqswnal_rx_t *krx)
+{
+ ep_requeue_receive (krx->krx_rxd, kqswnal_rxhandler, krx,
+ krx->krx_elanaddr, krx->krx_npages * PAGE_SIZE);
+}
+
+static inline int
+kqswnal_pages_spanned (void *base, int nob)
+{
+ unsigned long first_page = ((unsigned long)base) >> PAGE_SHIFT;
+ unsigned long last_page = (((unsigned long)base) + (nob - 1)) >> PAGE_SHIFT;
+
+ LASSERT (last_page >= first_page); /* can't wrap address space */
+ return (last_page - first_page + 1);
+}
+
+#if KQSW_CHECKSUM
+static inline kqsw_csum_t kqsw_csum (kqsw_csum_t sum, void *base, int nob)
+{
+ unsigned char *ptr = (unsigned char *)base;
+
+ while (nob-- > 0)
+ sum += *ptr++;
+
+ return (sum);
+}
+#endif
+
+#endif /* _QSWNAL_H */
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2002 Cluster File Systems, Inc.
+ * Author: Eric Barton <eric@bartonsoftware.com>
+ *
+ * Copyright (C) 2002, Lawrence Livermore National Labs (LLNL)
+ * W. Marcus Miller - Based on ksocknal
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#include "qswnal.h"
+
+atomic_t kqswnal_packets_launched;
+atomic_t kqswnal_packets_transmitted;
+atomic_t kqswnal_packets_received;
+
+
+/*
+ * LIB functions follow
+ *
+ */
+static int
+kqswnal_read(nal_cb_t *nal, void *private, void *dst_addr, user_ptr src_addr,
+ size_t len)
+{
+ CDEBUG (D_NET, LPX64": reading "LPSZ" bytes from %p -> %p\n",
+ nal->ni.nid, len, src_addr, dst_addr );
+ memcpy( dst_addr, src_addr, len );
+
+ return (0);
+}
+
+static int
+kqswnal_write(nal_cb_t *nal, void *private, user_ptr dst_addr, void *src_addr,
+ size_t len)
+{
+ CDEBUG (D_NET, LPX64": writing "LPSZ" bytes from %p -> %p\n",
+ nal->ni.nid, len, src_addr, dst_addr );
+ memcpy( dst_addr, src_addr, len );
+
+ return (0);
+}
+
+static void *
+kqswnal_malloc(nal_cb_t *nal, size_t len)
+{
+ void *buf;
+
+ PORTAL_ALLOC(buf, len);
+ return (buf);
+}
+
+static void
+kqswnal_free(nal_cb_t *nal, void *buf, size_t len)
+{
+ PORTAL_FREE(buf, len);
+}
+
+static void
+kqswnal_printf (nal_cb_t * nal, const char *fmt, ...)
+{
+ va_list ap;
+ char msg[256];
+
+ va_start (ap, fmt);
+ vsnprintf (msg, sizeof (msg), fmt, ap); /* sprint safely */
+ va_end (ap);
+
+ msg[sizeof (msg) - 1] = 0; /* ensure terminated */
+
+ CDEBUG (D_NET, "%s", msg);
+}
+
+
+static void
+kqswnal_cli(nal_cb_t *nal, unsigned long *flags)
+{
+ kqswnal_data_t *data= nal->nal_data;
+
+ spin_lock_irqsave(&data->kqn_statelock, *flags);
+}
+
+
+static void
+kqswnal_sti(nal_cb_t *nal, unsigned long *flags)
+{
+ kqswnal_data_t *data= nal->nal_data;
+
+ spin_unlock_irqrestore(&data->kqn_statelock, *flags);
+}
+
+
+static int
+kqswnal_dist(nal_cb_t *nal, ptl_nid_t nid, unsigned long *dist)
+{
+ /* network distance doesn't mean much for this nal */
+ *dist = (nid == nal->ni.nid) ? 0 : 1;
+ return (0);
+}
+
+int
+kqswnal_ispeer (ptl_nid_t nid)
+{
+ unsigned int elanid = (unsigned int)nid;
+
+ /* didn't lose high bits on conversion and it's in this machine? */
+ return ((ptl_nid_t)elanid == nid &&
+ elanid < ep_numnodes (kqswnal_data.kqn_epdev));
+}
+
+void
+kqswnal_unmap_tx (kqswnal_tx_t *ktx)
+{
+ if (ktx->ktx_nmappedpages == 0)
+ return;
+
+ CDEBUG (D_NET, "%p[%d] unloading pages %d for %d\n",
+ ktx, ktx->ktx_niov, ktx->ktx_basepage, ktx->ktx_nmappedpages);
+
+ LASSERT (ktx->ktx_nmappedpages <= ktx->ktx_npages);
+ LASSERT (ktx->ktx_basepage + ktx->ktx_nmappedpages <=
+ kqswnal_data.kqn_eptxdmahandle->NumDvmaPages);
+
+ elan3_dvma_unload(kqswnal_data.kqn_epdev->DmaState,
+ kqswnal_data.kqn_eptxdmahandle,
+ ktx->ktx_basepage, ktx->ktx_nmappedpages);
+ ktx->ktx_nmappedpages = 0;
+}
+
+int
+kqswnal_map_tx_kiov (kqswnal_tx_t *ktx, int nob, int niov, ptl_kiov_t *kiov)
+{
+ int nfrags = ktx->ktx_niov;
+ const int maxfrags = sizeof (ktx->ktx_iov)/sizeof (ktx->ktx_iov[0]);
+ int nmapped = ktx->ktx_nmappedpages;
+ int maxmapped = ktx->ktx_npages;
+ uint32_t basepage = ktx->ktx_basepage + nmapped;
+ char *ptr;
+
+ LASSERT (nmapped <= maxmapped);
+ LASSERT (nfrags <= maxfrags);
+ LASSERT (niov > 0);
+ LASSERT (nob > 0);
+
+ do {
+ int fraglen = kiov->kiov_len;
+
+ /* nob exactly spans the iovs */
+ LASSERT (fraglen <= nob);
+ /* each frag fits in a page */
+ LASSERT (kiov->kiov_offset + kiov->kiov_len <= PAGE_SIZE);
+
+ nmapped++;
+ if (nmapped > maxmapped) {
+ CERROR("Can't map message in %d pages (max %d)\n",
+ nmapped, maxmapped);
+ return (-EMSGSIZE);
+ }
+
+ if (nfrags == maxfrags) {
+ CERROR("Message too fragmented in Elan VM (max %d frags)\n",
+ maxfrags);
+ return (-EMSGSIZE);
+ }
+
+ /* XXX this is really crap, but we'll have to kmap until
+ * EKC has a page (rather than vaddr) mapping interface */
+
+ ptr = ((char *)kmap (kiov->kiov_page)) + kiov->kiov_offset;
+
+ CDEBUG(D_NET,
+ "%p[%d] loading %p for %d, page %d, %d total\n",
+ ktx, nfrags, ptr, fraglen, basepage, nmapped);
+
+ elan3_dvma_kaddr_load (kqswnal_data.kqn_epdev->DmaState,
+ kqswnal_data.kqn_eptxdmahandle,
+ ptr, fraglen,
+ basepage, &ktx->ktx_iov[nfrags].Base);
+
+ kunmap (kiov->kiov_page);
+
+ /* keep in loop for failure case */
+ ktx->ktx_nmappedpages = nmapped;
+
+ if (nfrags > 0 && /* previous frag mapped */
+ ktx->ktx_iov[nfrags].Base == /* contiguous with this one */
+ (ktx->ktx_iov[nfrags-1].Base + ktx->ktx_iov[nfrags-1].Len))
+ /* just extend previous */
+ ktx->ktx_iov[nfrags - 1].Len += fraglen;
+ else {
+ ktx->ktx_iov[nfrags].Len = fraglen;
+ nfrags++; /* new frag */
+ }
+
+ basepage++;
+ kiov++;
+ niov--;
+ nob -= fraglen;
+
+ /* iov must not run out before end of data */
+ LASSERT (nob == 0 || niov > 0);
+
+ } while (nob > 0);
+
+ ktx->ktx_niov = nfrags;
+ CDEBUG (D_NET, "%p got %d frags over %d pages\n",
+ ktx, ktx->ktx_niov, ktx->ktx_nmappedpages);
+
+ return (0);
+}
+
+int
+kqswnal_map_tx_iov (kqswnal_tx_t *ktx, int nob, int niov, struct iovec *iov)
+{
+ int nfrags = ktx->ktx_niov;
+ const int maxfrags = sizeof (ktx->ktx_iov)/sizeof (ktx->ktx_iov[0]);
+ int nmapped = ktx->ktx_nmappedpages;
+ int maxmapped = ktx->ktx_npages;
+ uint32_t basepage = ktx->ktx_basepage + nmapped;
+
+ LASSERT (nmapped <= maxmapped);
+ LASSERT (nfrags <= maxfrags);
+ LASSERT (niov > 0);
+ LASSERT (nob > 0);
+
+ do {
+ int fraglen = iov->iov_len;
+ long npages = kqswnal_pages_spanned (iov->iov_base, fraglen);
+
+ /* nob exactly spans the iovs */
+ LASSERT (fraglen <= nob);
+
+ nmapped += npages;
+ if (nmapped > maxmapped) {
+ CERROR("Can't map message in %d pages (max %d)\n",
+ nmapped, maxmapped);
+ return (-EMSGSIZE);
+ }
+
+ if (nfrags == maxfrags) {
+ CERROR("Message too fragmented in Elan VM (max %d frags)\n",
+ maxfrags);
+ return (-EMSGSIZE);
+ }
+
+ CDEBUG(D_NET,
+ "%p[%d] loading %p for %d, pages %d for %ld, %d total\n",
+ ktx, nfrags, iov->iov_base, fraglen, basepage, npages,
+ nmapped);
+
+ elan3_dvma_kaddr_load (kqswnal_data.kqn_epdev->DmaState,
+ kqswnal_data.kqn_eptxdmahandle,
+ iov->iov_base, fraglen,
+ basepage, &ktx->ktx_iov[nfrags].Base);
+ /* keep in loop for failure case */
+ ktx->ktx_nmappedpages = nmapped;
+
+ if (nfrags > 0 && /* previous frag mapped */
+ ktx->ktx_iov[nfrags].Base == /* contiguous with this one */
+ (ktx->ktx_iov[nfrags-1].Base + ktx->ktx_iov[nfrags-1].Len))
+ /* just extend previous */
+ ktx->ktx_iov[nfrags - 1].Len += fraglen;
+ else {
+ ktx->ktx_iov[nfrags].Len = fraglen;
+ nfrags++; /* new frag */
+ }
+
+ basepage += npages;
+ iov++;
+ niov--;
+ nob -= fraglen;
+
+ /* iov must not run out before end of data */
+ LASSERT (nob == 0 || niov > 0);
+
+ } while (nob > 0);
+
+ ktx->ktx_niov = nfrags;
+ CDEBUG (D_NET, "%p got %d frags over %d pages\n",
+ ktx, ktx->ktx_niov, ktx->ktx_nmappedpages);
+
+ return (0);
+}
+
+void
+kqswnal_put_idle_tx (kqswnal_tx_t *ktx)
+{
+ kpr_fwd_desc_t *fwd = NULL;
+ struct list_head *idle = ktx->ktx_idle;
+ unsigned long flags;
+
+ kqswnal_unmap_tx (ktx); /* release temporary mappings */
+ ktx->ktx_state = KTX_IDLE;
+
+ spin_lock_irqsave (&kqswnal_data.kqn_idletxd_lock, flags);
+
+ list_add (&ktx->ktx_list, idle);
+
+ /* reserved for non-blocking tx */
+ if (idle == &kqswnal_data.kqn_nblk_idletxds) {
+ spin_unlock_irqrestore (&kqswnal_data.kqn_idletxd_lock, flags);
+ return;
+ }
+
+ /* anything blocking for a tx descriptor? */
+ if (!list_empty(&kqswnal_data.kqn_idletxd_fwdq)) /* forwarded packet? */
+ {
+ CDEBUG(D_NET,"wakeup fwd\n");
+
+ fwd = list_entry (kqswnal_data.kqn_idletxd_fwdq.next,
+ kpr_fwd_desc_t, kprfd_list);
+ list_del (&fwd->kprfd_list);
+ }
+
+ if (waitqueue_active (&kqswnal_data.kqn_idletxd_waitq)) /* process? */
+ {
+ /* local sender waiting for tx desc */
+ CDEBUG(D_NET,"wakeup process\n");
+ wake_up (&kqswnal_data.kqn_idletxd_waitq);
+ }
+
+ spin_unlock_irqrestore (&kqswnal_data.kqn_idletxd_lock, flags);
+
+ if (fwd == NULL)
+ return;
+
+ /* schedule packet for forwarding again */
+ spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags);
+
+ list_add_tail (&fwd->kprfd_list, &kqswnal_data.kqn_delayedfwds);
+ if (waitqueue_active (&kqswnal_data.kqn_sched_waitq))
+ wake_up (&kqswnal_data.kqn_sched_waitq);
+
+ spin_unlock_irqrestore (&kqswnal_data.kqn_sched_lock, flags);
+}
+
+kqswnal_tx_t *
+kqswnal_get_idle_tx (kpr_fwd_desc_t *fwd, int may_block)
+{
+ unsigned long flags;
+ kqswnal_tx_t *ktx = NULL;
+
+ for (;;) {
+ spin_lock_irqsave (&kqswnal_data.kqn_idletxd_lock, flags);
+
+ /* "normal" descriptor is free */
+ if (!list_empty (&kqswnal_data.kqn_idletxds)) {
+ ktx = list_entry (kqswnal_data.kqn_idletxds.next,
+ kqswnal_tx_t, ktx_list);
+ list_del (&ktx->ktx_list);
+ break;
+ }
+
+ /* "normal" descriptor pool is empty */
+
+ if (fwd != NULL) { /* forwarded packet => queue for idle txd */
+ CDEBUG (D_NET, "blocked fwd [%p]\n", fwd);
+ list_add_tail (&fwd->kprfd_list,
+ &kqswnal_data.kqn_idletxd_fwdq);
+ break;
+ }
+
+ /* doing a local transmit */
+ if (!may_block) {
+ if (list_empty (&kqswnal_data.kqn_nblk_idletxds)) {
+ CERROR ("intr tx desc pool exhausted\n");
+ break;
+ }
+
+ ktx = list_entry (kqswnal_data.kqn_nblk_idletxds.next,
+ kqswnal_tx_t, ktx_list);
+ list_del (&ktx->ktx_list);
+ break;
+ }
+
+ /* block for idle tx */
+
+ spin_unlock_irqrestore (&kqswnal_data.kqn_idletxd_lock, flags);
+
+ CDEBUG (D_NET, "blocking for tx desc\n");
+ wait_event (kqswnal_data.kqn_idletxd_waitq,
+ !list_empty (&kqswnal_data.kqn_idletxds));
+ }
+
+ spin_unlock_irqrestore (&kqswnal_data.kqn_idletxd_lock, flags);
+
+ /* Idle descs can't have any mapped (as opposed to pre-mapped) pages */
+ LASSERT (ktx == NULL || ktx->ktx_nmappedpages == 0);
+ return (ktx);
+}
+
+void
+kqswnal_tx_done (kqswnal_tx_t *ktx, int error)
+{
+ switch (ktx->ktx_state) {
+ case KTX_FORWARDING: /* router asked me to forward this packet */
+ kpr_fwd_done (&kqswnal_data.kqn_router,
+ (kpr_fwd_desc_t *)ktx->ktx_args[0], error);
+ break;
+
+ case KTX_SENDING: /* packet sourced locally */
+ lib_finalize (&kqswnal_lib, ktx->ktx_args[0],
+ (lib_msg_t *)ktx->ktx_args[1]);
+ break;
+
+ default:
+ LASSERT (0);
+ }
+
+ kqswnal_put_idle_tx (ktx);
+}
+
+static void
+kqswnal_txhandler(EP_TXD *txd, void *arg, int status)
+{
+ kqswnal_tx_t *ktx = (kqswnal_tx_t *)arg;
+
+ LASSERT (txd != NULL);
+ LASSERT (ktx != NULL);
+
+ CDEBUG(D_NET, "txd %p, arg %p status %d\n", txd, arg, status);
+
+ if (status == EP_SUCCESS)
+ atomic_inc (&kqswnal_packets_transmitted);
+
+ if (status != EP_SUCCESS)
+ {
+ CERROR ("kqswnal: Transmit failed with %d\n", status);
+ status = -EIO;
+ }
+
+ kqswnal_tx_done (ktx, status);
+}
+
+int
+kqswnal_launch (kqswnal_tx_t *ktx)
+{
+ /* Don't block for transmit descriptor if we're in interrupt context */
+ int attr = in_interrupt() ? (EP_NO_SLEEP | EP_NO_ALLOC) : 0;
+ int rc = ep_transmit_large(kqswnal_data.kqn_eptx, ktx->ktx_nid,
+ ktx->ktx_port, attr, kqswnal_txhandler,
+ ktx, ktx->ktx_iov, ktx->ktx_niov);
+ long flags;
+
+ if (rc == 0)
+ atomic_inc (&kqswnal_packets_launched);
+
+ if (rc != ENOMEM)
+ return (rc);
+
+ /* can't allocate ep txd => queue for later */
+
+ LASSERT (in_interrupt()); /* not called by thread (not looping) */
+
+ spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags);
+
+ list_add_tail (&ktx->ktx_list, &kqswnal_data.kqn_delayedtxds);
+ if (waitqueue_active (&kqswnal_data.kqn_sched_waitq))
+ wake_up (&kqswnal_data.kqn_sched_waitq);
+
+ spin_unlock_irqrestore (&kqswnal_data.kqn_sched_lock, flags);
+
+ return (0);
+}
+
+
+static char *
+hdr_type_string (ptl_hdr_t *hdr)
+{
+ switch (hdr->type) {
+ case PTL_MSG_ACK:
+ return ("ACK");
+ case PTL_MSG_PUT:
+ return ("PUT");
+ case PTL_MSG_GET:
+ return ("GET");
+ case PTL_MSG_REPLY:
+ return ("REPLY");
+ default:
+ return ("<UNKNOWN>");
+ }
+}
+
+static void
+kqswnal_cerror_hdr(ptl_hdr_t * hdr)
+{
+ char *type_str = hdr_type_string (hdr);
+
+ CERROR("P3 Header at %p of type %s\n", hdr, type_str);
+ CERROR(" From nid/pid "LPU64"/%u", NTOH__u64(hdr->src_nid),
+ NTOH__u32(hdr->src_pid));
+ CERROR(" To nid/pid "LPU64"/%u\n", NTOH__u64(hdr->dest_nid),
+ NTOH__u32(hdr->dest_pid));
+
+ switch (NTOH__u32(hdr->type)) {
+ case PTL_MSG_PUT:
+ CERROR(" Ptl index %d, ack md "LPX64"."LPX64", "
+ "match bits "LPX64"\n",
+ NTOH__u32 (hdr->msg.put.ptl_index),
+ hdr->msg.put.ack_wmd.wh_interface_cookie,
+ hdr->msg.put.ack_wmd.wh_object_cookie,
+ NTOH__u64 (hdr->msg.put.match_bits));
+ CERROR(" Length %d, offset %d, hdr data "LPX64"\n",
+ NTOH__u32(PTL_HDR_LENGTH(hdr)),
+ NTOH__u32(hdr->msg.put.offset),
+ hdr->msg.put.hdr_data);
+ break;
+
+ case PTL_MSG_GET:
+ CERROR(" Ptl index %d, return md "LPX64"."LPX64", "
+ "match bits "LPX64"\n",
+ NTOH__u32 (hdr->msg.get.ptl_index),
+ hdr->msg.get.return_wmd.wh_interface_cookie,
+ hdr->msg.get.return_wmd.wh_object_cookie,
+ hdr->msg.get.match_bits);
+ CERROR(" Length %d, src offset %d\n",
+ NTOH__u32 (hdr->msg.get.sink_length),
+ NTOH__u32 (hdr->msg.get.src_offset));
+ break;
+
+ case PTL_MSG_ACK:
+ CERROR(" dst md "LPX64"."LPX64", manipulated length %d\n",
+ hdr->msg.ack.dst_wmd.wh_interface_cookie,
+ hdr->msg.ack.dst_wmd.wh_object_cookie,
+ NTOH__u32 (hdr->msg.ack.mlength));
+ break;
+
+ case PTL_MSG_REPLY:
+ CERROR(" dst md "LPX64"."LPX64", length %d\n",
+ hdr->msg.reply.dst_wmd.wh_interface_cookie,
+ hdr->msg.reply.dst_wmd.wh_object_cookie,
+ NTOH__u32 (PTL_HDR_LENGTH(hdr)));
+ }
+
+} /* end of print_hdr() */
+
+static int
+kqswnal_sendmsg (nal_cb_t *nal,
+ void *private,
+ lib_msg_t *cookie,
+ ptl_hdr_t *hdr,
+ int type,
+ ptl_nid_t nid,
+ ptl_pid_t pid,
+ unsigned int payload_niov,
+ struct iovec *payload_iov,
+ ptl_kiov_t *payload_kiov,
+ size_t payload_nob)
+{
+ kqswnal_tx_t *ktx;
+ int rc;
+ ptl_nid_t gatewaynid;
+#if KQSW_CHECKSUM
+ int i;
+ kqsw_csum_t csum;
+ int sumnob;
+#endif
+
+ /* NB, the return code from this procedure is ignored.
+ * If we can't send, we must still complete with lib_finalize().
+ * We'll have to wait for 3.2 to return an error event.
+ */
+
+ CDEBUG(D_NET, "sending "LPSZ" bytes in %d frags to nid: "LPX64
+ " pid %u\n", payload_nob, payload_niov, nid, pid);
+
+ LASSERT (payload_nob == 0 || payload_niov > 0);
+ LASSERT (payload_niov <= PTL_MD_MAX_IOV);
+
+ /* It must be OK to kmap() if required */
+ LASSERT (payload_kiov == NULL || !in_interrupt ());
+ /* payload is either all vaddrs or all pages */
+ LASSERT (!(payload_kiov != NULL && payload_iov != NULL));
+
+ if (payload_nob > KQSW_MAXPAYLOAD) {
+ CERROR ("request exceeds MTU size "LPSZ" (max %u).\n",
+ payload_nob, KQSW_MAXPAYLOAD);
+ lib_finalize (&kqswnal_lib, private, cookie);
+ return (-1);
+ }
+
+ if (!kqswnal_ispeer (nid)) { /* Can't send direct: find gateway? */
+ rc = kpr_lookup (&kqswnal_data.kqn_router, nid, &gatewaynid);
+ if (rc != 0) {
+ CERROR("Can't route to "LPX64": router error %d\n",
+ nid, rc);
+ lib_finalize (&kqswnal_lib, private, cookie);
+ return (-1);
+ }
+ if (!kqswnal_ispeer (gatewaynid)) {
+ CERROR("Bad gateway "LPX64" for "LPX64"\n",
+ gatewaynid, nid);
+ lib_finalize (&kqswnal_lib, private, cookie);
+ return (-1);
+ }
+ nid = gatewaynid;
+ }
+
+ /* I may not block for a transmit descriptor if I might block the
+ * receiver, or an interrupt handler. */
+ ktx = kqswnal_get_idle_tx(NULL, !(type == PTL_MSG_ACK ||
+ type == PTL_MSG_REPLY ||
+ in_interrupt()));
+ if (ktx == NULL) {
+ kqswnal_cerror_hdr (hdr);
+ lib_finalize (&kqswnal_lib, private, cookie);
+ }
+
+ memcpy (ktx->ktx_buffer, hdr, sizeof (*hdr)); /* copy hdr from caller's stack */
+
+#if KQSW_CHECKSUM
+ csum = kqsw_csum (0, (char *)hdr, sizeof (*hdr));
+ memcpy (ktx->ktx_buffer + sizeof (*hdr), &csum, sizeof (csum));
+ for (csum = 0, i = 0, sumnob = payload_nob; sumnob > 0; i++) {
+ if (payload_kiov != NULL) {
+ ptl_kiov_t *kiov = &payload_kiov[i];
+ char *addr = ((char *)kmap (kiov->kiov_page)) +
+ kiov->kiov_offset;
+
+ csum = kqsw_csum (csum, addr, MIN (sumnob, kiov->kiov_len));
+ sumnob -= kiov->kiov_len;
+ } else {
+ struct iovec *iov = &payload_iov[i];
+
+ csum = kqsw_csum (csum, iov->iov_base, MIN (sumnob, kiov->iov_len));
+ sumnob -= iov->iov_len;
+ }
+ }
+ memcpy(ktx->ktx_buffer +sizeof(*hdr) +sizeof(csum), &csum,sizeof(csum));
+#endif
+
+ /* Set up first frag from pre-mapped buffer (it's at least the
+ * portals header) */
+ ktx->ktx_iov[0].Base = ktx->ktx_ebuffer;
+ ktx->ktx_iov[0].Len = KQSW_HDR_SIZE;
+ ktx->ktx_niov = 1;
+
+ if (payload_nob > 0) { /* got some payload (something more to do) */
+ /* make a single contiguous message? */
+ if (payload_nob <= KQSW_TX_MAXCONTIG) {
+ /* copy payload to ktx_buffer, immediately after hdr */
+ if (payload_kiov != NULL)
+ lib_copy_kiov2buf (ktx->ktx_buffer + KQSW_HDR_SIZE,
+ payload_niov, payload_kiov, payload_nob);
+ else
+ lib_copy_iov2buf (ktx->ktx_buffer + KQSW_HDR_SIZE,
+ payload_niov, payload_iov, payload_nob);
+ /* first frag includes payload */
+ ktx->ktx_iov[0].Len += payload_nob;
+ } else {
+ if (payload_kiov != NULL)
+ rc = kqswnal_map_tx_kiov (ktx, payload_nob,
+ payload_niov, payload_kiov);
+ else
+ rc = kqswnal_map_tx_iov (ktx, payload_nob,
+ payload_niov, payload_iov);
+ if (rc != 0) {
+ kqswnal_put_idle_tx (ktx);
+ lib_finalize (&kqswnal_lib, private, cookie);
+ return (-1);
+ }
+ }
+ }
+
+ ktx->ktx_port = (payload_nob <= KQSW_SMALLPAYLOAD) ?
+ EP_SVC_LARGE_PORTALS_SMALL : EP_SVC_LARGE_PORTALS_LARGE;
+ ktx->ktx_nid = nid;
+ ktx->ktx_state = KTX_SENDING; /* => lib_finalize() on completion */
+ ktx->ktx_args[0] = private;
+ ktx->ktx_args[1] = cookie;
+
+ rc = kqswnal_launch (ktx);
+ if (rc != 0) { /* failed? */
+ CERROR ("Failed to send packet to "LPX64": %d\n", nid, rc);
+ lib_finalize (&kqswnal_lib, private, cookie);
+ return (-1);
+ }
+
+ CDEBUG(D_NET, "send to "LPSZ" bytes to "LPX64"\n", payload_nob, nid);
+ return (0);
+}
+
+static int
+kqswnal_send (nal_cb_t *nal,
+ void *private,
+ lib_msg_t *cookie,
+ ptl_hdr_t *hdr,
+ int type,
+ ptl_nid_t nid,
+ ptl_pid_t pid,
+ unsigned int payload_niov,
+ struct iovec *payload_iov,
+ size_t payload_nob)
+{
+ return (kqswnal_sendmsg (nal, private, cookie, hdr, type, nid, pid,
+ payload_niov, payload_iov, NULL, payload_nob));
+}
+
+static int
+kqswnal_send_pages (nal_cb_t *nal,
+ void *private,
+ lib_msg_t *cookie,
+ ptl_hdr_t *hdr,
+ int type,
+ ptl_nid_t nid,
+ ptl_pid_t pid,
+ unsigned int payload_niov,
+ ptl_kiov_t *payload_kiov,
+ size_t payload_nob)
+{
+ return (kqswnal_sendmsg (nal, private, cookie, hdr, type, nid, pid,
+ payload_niov, NULL, payload_kiov, payload_nob));
+}
+
+int kqswnal_fwd_copy_contig = 0;
+
+void
+kqswnal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd)
+{
+ int rc;
+ kqswnal_tx_t *ktx;
+ struct iovec *iov = fwd->kprfd_iov;
+ int niov = fwd->kprfd_niov;
+ int nob = fwd->kprfd_nob;
+ ptl_nid_t nid = fwd->kprfd_gateway_nid;
+
+#if KQSW_CHECKSUM
+ CERROR ("checksums for forwarded packets not implemented\n");
+ LBUG ();
+#endif
+ /* The router wants this NAL to forward a packet */
+ CDEBUG (D_NET, "forwarding [%p] to "LPX64", %d frags %d bytes\n",
+ fwd, nid, niov, nob);
+
+ LASSERT (niov > 0);
+
+ ktx = kqswnal_get_idle_tx (fwd, FALSE);
+ if (ktx == NULL) /* can't get txd right now */
+ return; /* fwd will be scheduled when tx desc freed */
+
+ if (nid == kqswnal_lib.ni.nid) /* gateway is me */
+ nid = fwd->kprfd_target_nid; /* target is final dest */
+
+ if (!kqswnal_ispeer (nid)) {
+ CERROR("Can't forward [%p] to "LPX64": not a peer\n", fwd, nid);
+ rc = -EHOSTUNREACH;
+ goto failed;
+ }
+
+ if (nob > KQSW_NRXMSGBYTES_LARGE) {
+ CERROR ("Can't forward [%p] to "LPX64
+ ": size %d bigger than max packet size %ld\n",
+ fwd, nid, nob, (long)KQSW_NRXMSGBYTES_LARGE);
+ rc = -EMSGSIZE;
+ goto failed;
+ }
+
+ if ((kqswnal_fwd_copy_contig || niov > 1) &&
+ nob <= KQSW_TX_BUFFER_SIZE)
+ {
+ /* send from ktx's pre-allocated/mapped contiguous buffer? */
+ lib_copy_iov2buf (ktx->ktx_buffer, niov, iov, nob);
+ ktx->ktx_iov[0].Base = ktx->ktx_ebuffer; /* already mapped */
+ ktx->ktx_iov[0].Len = nob;
+ ktx->ktx_niov = 1;
+ }
+ else
+ {
+ /* zero copy */
+ ktx->ktx_niov = 0; /* no frags mapped yet */
+ rc = kqswnal_map_tx_iov (ktx, nob, niov, iov);
+ if (rc != 0)
+ goto failed;
+ }
+
+ ktx->ktx_port = (nob <= (sizeof (ptl_hdr_t) + KQSW_SMALLPAYLOAD)) ?
+ EP_SVC_LARGE_PORTALS_SMALL : EP_SVC_LARGE_PORTALS_LARGE;
+ ktx->ktx_nid = nid;
+ ktx->ktx_state = KTX_FORWARDING; /* kpr_put_packet() on completion */
+ ktx->ktx_args[0] = fwd;
+
+ rc = kqswnal_launch (ktx);
+ if (rc == 0)
+ return;
+
+ failed:
+ LASSERT (rc != 0);
+ CERROR ("Failed to forward [%p] to "LPX64": %d\n", fwd, nid, rc);
+
+ kqswnal_put_idle_tx (ktx);
+ /* complete now (with failure) */
+ kpr_fwd_done (&kqswnal_data.kqn_router, fwd, rc);
+}
+
+void
+kqswnal_fwd_callback (void *arg, int error)
+{
+ kqswnal_rx_t *krx = (kqswnal_rx_t *)arg;
+
+ /* The router has finished forwarding this packet */
+
+ if (error != 0)
+ {
+ ptl_hdr_t *hdr = (ptl_hdr_t *)page_address (krx->krx_pages[0]);
+
+ CERROR("Failed to route packet from "LPX64" to "LPX64": %d\n",
+ NTOH__u64(hdr->src_nid), NTOH__u64(hdr->dest_nid),error);
+ }
+
+ kqswnal_requeue_rx (krx);
+}
+
+void
+kqswnal_rx (kqswnal_rx_t *krx)
+{
+ ptl_hdr_t *hdr = (ptl_hdr_t *) page_address (krx->krx_pages[0]);
+ ptl_nid_t dest_nid = NTOH__u64 (hdr->dest_nid);
+ int nob;
+ int niov;
+
+ if (dest_nid == kqswnal_lib.ni.nid) { /* It's for me :) */
+ /* NB krx requeued when lib_parse() calls back kqswnal_recv */
+ lib_parse (&kqswnal_lib, hdr, krx);
+ return;
+ }
+
+#if KQSW_CHECKSUM
+ CERROR ("checksums for forwarded packets not implemented\n");
+ LBUG ();
+#endif
+ if (kqswnal_ispeer (dest_nid)) /* should have gone direct to peer */
+ {
+ CERROR("dropping packet from "LPX64" for "LPX64
+ ": target is peer\n", NTOH__u64(hdr->src_nid), dest_nid);
+ kqswnal_requeue_rx (krx);
+ return;
+ }
+
+ /* NB forwarding may destroy iov; rebuild every time */
+ for (nob = krx->krx_nob, niov = 0; nob > 0; nob -= PAGE_SIZE, niov++)
+ {
+ LASSERT (niov < krx->krx_npages);
+ krx->krx_iov[niov].iov_base= page_address(krx->krx_pages[niov]);
+ krx->krx_iov[niov].iov_len = MIN(PAGE_SIZE, nob);
+ }
+
+ kpr_fwd_init (&krx->krx_fwd, dest_nid,
+ krx->krx_nob, niov, krx->krx_iov,
+ kqswnal_fwd_callback, krx);
+
+ kpr_fwd_start (&kqswnal_data.kqn_router, &krx->krx_fwd);
+}
+
+/* Receive Interrupt Handler: posts to schedulers */
+void
+kqswnal_rxhandler(EP_RXD *rxd)
+{
+ long flags;
+ int nob = ep_rxd_len (rxd);
+ int status = ep_rxd_status (rxd);
+ kqswnal_rx_t *krx = (kqswnal_rx_t *)ep_rxd_arg (rxd);
+
+ CDEBUG(D_NET, "kqswnal_rxhandler: rxd %p, krx %p, nob %d, status %d\n",
+ rxd, krx, nob, status);
+
+ LASSERT (krx != NULL);
+
+ krx->krx_rxd = rxd;
+ krx->krx_nob = nob;
+
+ /* must receive a whole header to be able to parse */
+ if (status != EP_SUCCESS || nob < sizeof (ptl_hdr_t))
+ {
+ /* receives complete with failure when receiver is removed */
+ if (kqswnal_data.kqn_shuttingdown)
+ return;
+
+ CERROR("receive status failed with status %d nob %d\n",
+ ep_rxd_status(rxd), nob);
+ kqswnal_requeue_rx (krx);
+ return;
+ }
+
+ atomic_inc (&kqswnal_packets_received);
+
+ spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags);
+
+ list_add_tail (&krx->krx_list, &kqswnal_data.kqn_readyrxds);
+ if (waitqueue_active (&kqswnal_data.kqn_sched_waitq))
+ wake_up (&kqswnal_data.kqn_sched_waitq);
+
+ spin_unlock_irqrestore (&kqswnal_data.kqn_sched_lock, flags);
+}
+
+#if KQSW_CHECKSUM
+void
+kqswnal_csum_error (kqswnal_rx_t *krx, int ishdr)
+{
+ ptl_hdr_t *hdr = (ptl_hdr_t *)page_address (krx->krx_pages[0]);
+
+ CERROR ("%s checksum mismatch %p: dnid "LPX64", snid "LPX64
+ ", dpid %d, spid %d, type %d\n",
+ ishdr ? "Header" : "Payload", krx,
+ NTOH__u64(hdr->dest_nid), NTOH__u64(hdr->src_nid)
+ NTOH__u32(hdr->dest_pid), NTOH__u32(hdr->src_pid),
+ NTOH__u32(hdr->type));
+
+ switch (NTOH__u32 (hdr->type))
+ {
+ case PTL_MSG_ACK:
+ CERROR("ACK: mlen %d dmd "LPX64"."LPX64" match "LPX64
+ " len %u\n",
+ NTOH__u32(hdr->msg.ack.mlength),
+ hdr->msg.ack.dst_wmd.handle_cookie,
+ hdr->msg.ack.dst_wmd.handle_idx,
+ NTOH__u64(hdr->msg.ack.match_bits),
+ NTOH__u32(hdr->msg.ack.length));
+ break;
+ case PTL_MSG_PUT:
+ CERROR("PUT: ptl %d amd "LPX64"."LPX64" match "LPX64
+ " len %u off %u data "LPX64"\n",
+ NTOH__u32(hdr->msg.put.ptl_index),
+ hdr->msg.put.ack_wmd.handle_cookie,
+ hdr->msg.put.ack_wmd.handle_idx,
+ NTOH__u64(hdr->msg.put.match_bits),
+ NTOH__u32(hdr->msg.put.length),
+ NTOH__u32(hdr->msg.put.offset),
+ hdr->msg.put.hdr_data);
+ break;
+ case PTL_MSG_GET:
+ CERROR ("GET: <>\n");
+ break;
+ case PTL_MSG_REPLY:
+ CERROR ("REPLY: <>\n");
+ break;
+ default:
+ CERROR ("TYPE?: <>\n");
+ }
+}
+#endif
+
+static int
+kqswnal_recvmsg (nal_cb_t *nal,
+ void *private,
+ lib_msg_t *cookie,
+ unsigned int niov,
+ struct iovec *iov,
+ ptl_kiov_t *kiov,
+ size_t mlen,
+ size_t rlen)
+{
+ kqswnal_rx_t *krx = (kqswnal_rx_t *)private;
+ int page;
+ char *page_ptr;
+ int page_nob;
+ char *iov_ptr;
+ int iov_nob;
+ int frag;
+#if KQSW_CHECKSUM
+ kqsw_csum_t senders_csum;
+ kqsw_csum_t payload_csum = 0;
+ kqsw_csum_t hdr_csum = kqsw_csum(0, page_address(krx->krx_pages[0]),
+ sizeof(ptl_hdr_t));
+ size_t csum_len = mlen;
+ int csum_frags = 0;
+ int csum_nob = 0;
+ static atomic_t csum_counter;
+ int csum_verbose = (atomic_read(&csum_counter)%1000001) == 0;
+
+ atomic_inc (&csum_counter);
+
+ memcpy (&senders_csum, ((char *)page_address (krx->krx_pages[0])) +
+ sizeof (ptl_hdr_t), sizeof (kqsw_csum_t));
+ if (senders_csum != hdr_csum)
+ kqswnal_csum_error (krx, 1);
+#endif
+ CDEBUG(D_NET,"kqswnal_recv, mlen="LPSZ", rlen="LPSZ"\n", mlen, rlen);
+
+ /* What was actually received must be >= payload.
+ * This is an LASSERT, as lib_finalize() doesn't have a completion status. */
+ LASSERT (krx->krx_nob >= KQSW_HDR_SIZE + mlen);
+ LASSERT (mlen <= rlen);
+
+ /* It must be OK to kmap() if required */
+ LASSERT (kiov == NULL || !in_interrupt ());
+ /* Either all pages or all vaddrs */
+ LASSERT (!(kiov != NULL && iov != NULL));
+
+ if (mlen != 0)
+ {
+ page = 0;
+ page_ptr = ((char *) page_address(krx->krx_pages[0])) +
+ KQSW_HDR_SIZE;
+ page_nob = PAGE_SIZE - KQSW_HDR_SIZE;
+
+ LASSERT (niov > 0);
+ if (kiov != NULL) {
+ iov_ptr = ((char *)kmap (kiov->kiov_page)) + kiov->kiov_offset;
+ iov_nob = kiov->kiov_len;
+ } else {
+ iov_ptr = iov->iov_base;
+ iov_nob = iov->iov_len;
+ }
+
+ for (;;)
+ {
+ /* We expect the iov to exactly match mlen */
+ LASSERT (iov_nob <= mlen);
+
+ frag = MIN (page_nob, iov_nob);
+ memcpy (iov_ptr, page_ptr, frag);
+#if KQSW_CHECKSUM
+ payload_csum = kqsw_csum (payload_csum, iov_ptr, frag);
+ csum_nob += frag;
+ csum_frags++;
+#endif
+ mlen -= frag;
+ if (mlen == 0)
+ break;
+
+ page_nob -= frag;
+ if (page_nob != 0)
+ page_ptr += frag;
+ else
+ {
+ page++;
+ LASSERT (page < krx->krx_npages);
+ page_ptr = page_address(krx->krx_pages[page]);
+ page_nob = PAGE_SIZE;
+ }
+
+ iov_nob -= frag;
+ if (iov_nob != 0)
+ iov_ptr += frag;
+ else if (kiov != NULL) {
+ kunmap (kiov->kiov_page);
+ kiov++;
+ niov--;
+ LASSERT (niov > 0);
+ iov_ptr = ((char *)kmap (kiov->kiov_page)) + kiov->kiov_offset;
+ iov_nob = kiov->kiov_len;
+ } else {
+ iov++;
+ niov--;
+ LASSERT (niov > 0);
+ iov_ptr = iov->iov_base;
+ iov_nob = iov->iov_len;
+ }
+ }
+
+ if (kiov != NULL)
+ kunmap (kiov->kiov_page);
+ }
+
+#if KQSW_CHECKSUM
+ memcpy (&senders_csum, ((char *)page_address (krx->krx_pages[0])) +
+ sizeof(ptl_hdr_t) + sizeof(kqsw_csum_t), sizeof(kqsw_csum_t));
+
+ if (csum_len != rlen)
+ CERROR("Unable to checksum data in user's buffer\n");
+ else if (senders_csum != payload_csum)
+ kqswnal_csum_error (krx, 0);
+
+ if (csum_verbose)
+ CERROR("hdr csum %lx, payload_csum %lx, csum_frags %d, "
+ "csum_nob %d\n",
+ hdr_csum, payload_csum, csum_frags, csum_nob);
+#endif
+ lib_finalize(nal, private, cookie);
+
+ kqswnal_requeue_rx (krx);
+
+ return (rlen);
+}
+
+static int
+kqswnal_recv(nal_cb_t *nal,
+ void *private,
+ lib_msg_t *cookie,
+ unsigned int niov,
+ struct iovec *iov,
+ size_t mlen,
+ size_t rlen)
+{
+ return (kqswnal_recvmsg (nal, private, cookie, niov, iov, NULL, mlen, rlen));
+}
+
+static int
+kqswnal_recv_pages (nal_cb_t *nal,
+ void *private,
+ lib_msg_t *cookie,
+ unsigned int niov,
+ ptl_kiov_t *kiov,
+ size_t mlen,
+ size_t rlen)
+{
+ return (kqswnal_recvmsg (nal, private, cookie, niov, NULL, kiov, mlen, rlen));
+}
+
+int
+kqswnal_thread_start (int (*fn)(void *arg), void *arg)
+{
+ long pid = kernel_thread (fn, arg, 0);
+
+ if (pid < 0)
+ return ((int)pid);
+
+ atomic_inc (&kqswnal_data.kqn_nthreads);
+ return (0);
+}
+
+void
+kqswnal_thread_fini (void)
+{
+ atomic_dec (&kqswnal_data.kqn_nthreads);
+}
+
+int
+kqswnal_scheduler (void *arg)
+{
+ kqswnal_rx_t *krx;
+ kqswnal_tx_t *ktx;
+ kpr_fwd_desc_t *fwd;
+ long flags;
+ int rc;
+ int counter = 0;
+ int did_something;
+
+ kportal_daemonize ("kqswnal_sched");
+ kportal_blockallsigs ();
+
+ spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags);
+
+ while (!kqswnal_data.kqn_shuttingdown)
+ {
+ did_something = FALSE;
+
+ if (!list_empty (&kqswnal_data.kqn_readyrxds))
+ {
+ krx = list_entry(kqswnal_data.kqn_readyrxds.next,
+ kqswnal_rx_t, krx_list);
+ list_del (&krx->krx_list);
+ spin_unlock_irqrestore(&kqswnal_data.kqn_sched_lock,
+ flags);
+
+ kqswnal_rx (krx);
+
+ did_something = TRUE;
+ spin_lock_irqsave(&kqswnal_data.kqn_sched_lock, flags);
+ }
+
+ if (!list_empty (&kqswnal_data.kqn_delayedtxds))
+ {
+ ktx = list_entry(kqswnal_data.kqn_delayedtxds.next,
+ kqswnal_tx_t, ktx_list);
+ list_del (&ktx->ktx_list);
+ spin_unlock_irqrestore(&kqswnal_data.kqn_sched_lock,
+ flags);
+
+ rc = kqswnal_launch (ktx);
+ if (rc != 0) /* failed: ktx_nid down? */
+ {
+ CERROR("Failed delayed transmit to "LPX64
+ ": %d\n", ktx->ktx_nid, rc);
+ kqswnal_tx_done (ktx, rc);
+ }
+
+ did_something = TRUE;
+ spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags);
+ }
+
+ if (!list_empty (&kqswnal_data.kqn_delayedfwds))
+ {
+ fwd = list_entry (kqswnal_data.kqn_delayedfwds.next, kpr_fwd_desc_t, kprfd_list);
+ list_del (&fwd->kprfd_list);
+ spin_unlock_irqrestore (&kqswnal_data.kqn_sched_lock, flags);
+
+ kqswnal_fwd_packet (NULL, fwd);
+
+ did_something = TRUE;
+ spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags);
+ }
+
+ /* nothing to do or hogging CPU */
+ if (!did_something || counter++ == KQSW_RESCHED) {
+ spin_unlock_irqrestore(&kqswnal_data.kqn_sched_lock,
+ flags);
+
+ counter = 0;
+
+ if (!did_something) {
+ rc = wait_event_interruptible (kqswnal_data.kqn_sched_waitq,
+ kqswnal_data.kqn_shuttingdown ||
+ !list_empty(&kqswnal_data.kqn_readyrxds) ||
+ !list_empty(&kqswnal_data.kqn_delayedtxds) ||
+ !list_empty(&kqswnal_data.kqn_delayedfwds));
+ LASSERT (rc == 0);
+ } else if (current->need_resched)
+ schedule ();
+
+ spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags);
+ }
+ }
+
+ spin_unlock_irqrestore (&kqswnal_data.kqn_sched_lock, flags);
+
+ kqswnal_thread_fini ();
+ return (0);
+}
+
+nal_cb_t kqswnal_lib =
+{
+ nal_data: &kqswnal_data, /* NAL private data */
+ cb_send: kqswnal_send,
+ cb_send_pages: kqswnal_send_pages,
+ cb_recv: kqswnal_recv,
+ cb_recv_pages: kqswnal_recv_pages,
+ cb_read: kqswnal_read,
+ cb_write: kqswnal_write,
+ cb_malloc: kqswnal_malloc,
+ cb_free: kqswnal_free,
+ cb_printf: kqswnal_printf,
+ cb_cli: kqswnal_cli,
+ cb_sti: kqswnal_sti,
+ cb_dist: kqswnal_dist
+};
--- /dev/null
+# This code is issued under the GNU General Public License.
+# See the file COPYING in this distribution
+
+include ../../Rules.linux
+
+MODULE = kscimacnal
+modulenet_DATA = kscimacnal.o
+EXTRA_PROGRAMS = kscimacnal
+
+DEFS =
+kscimacnal_SOURCES = scimacnal.c scimacnal_cb.c scimacnal.h
--- /dev/null
+
+scimacnal - A NAL for the Scali ScaMAC midlayer.
+
+The ScaMAC midlayer is a simplified API to the SCI high performance
+interconnect.
+
+In order to use this NAL you'll need to tune scimac to use larger buffers.
+See scimac.conf in this directory for an example.
+
+Overall performance and stability isn't great but this can be attributed
+to the scimac driver which apparently is in need of some development.
+
+TODO:
+Routing isn't yet implemented.
--- /dev/null
+# Configuration file for the scimac driver - lustre friendly settings
+#
+
+# The maximal number of message headers to use in the system.
+scimac_max_no_hdrs = 32
+
+# The maximal number of eager buffers to use in the system.
+scimac_max_no_ebufs = 8
+
+# The maximal size in bytes of each eager buffer.
+scimac_max_ebuf_size = 65536
+
+# Enable use of a kernel thread to defer reception of packets.
+# Default is to use a tasklet (sw interrupt).
+scimac_use_ulevel_recv = 1
+
+# The maximal number of packets queued for transfer per path at any one time.
+scimac_max_send_queuelen = 2000
+
+# The packet retransmit time in milliseconds.
+# The time elapsed since a packet was attempted sent until the packet is resent.
+scimac_pkt_rexmit_time = 200
+
+# The packet's maximal retransmit time in milliseconds.
+# The total time that a packet will be attempted sent before it is dropped.
+scimac_max_rexmit_time = 5000
+
+# The lowest valid node identifier in the system.
+scimac_min_nodeid_number = 0x100
+
+# The largest valid node identifier in the system.
+scimac_max_nodeid_number = 0xff00
+
+# The incremental nodeid step in the system.
+scimac_nodeid_increment = 0x100
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:cindent:
+ *
+ * Copyright (C) 2003 High Performance Computing Center North (HPC2N)
+ * Author: Niklas Edmundsson <nikke@hpc2n.umu.se>
+
+ * Based on gmnal, which is based on ksocknal and qswnal
+ *
+ * This file is part of Portals, http://www.sf.net/projects/lustre/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+
+#include "scimacnal.h"
+
+ptl_handle_ni_t kscimacnal_ni;
+nal_t kscimacnal_api;
+
+kscimacnal_data_t kscimacnal_data;
+
+kpr_nal_interface_t kscimacnal_router_interface = {
+ kprni_nalid: SCIMACNAL,
+ kprni_arg: NULL,
+ kprni_fwd: kscimacnal_fwd_packet,
+};
+
+
+static int kscimacnal_forward(nal_t *nal,
+ int id,
+ void *args, size_t args_len,
+ void *ret, size_t ret_len)
+{
+ kscimacnal_data_t *ksci = nal->nal_data;
+ nal_cb_t *nal_cb = ksci->ksci_cb;
+
+ LASSERT (nal == &kscimacnal_api);
+ LASSERT (ksci == &kscimacnal_data);
+ LASSERT (nal_cb == &kscimacnal_lib);
+
+ lib_dispatch(nal_cb, ksci, id, args, ret); /* nal needs ksci */
+ return PTL_OK;
+}
+
+
+static void kscimacnal_lock(nal_t *nal, unsigned long *flags)
+{
+ kscimacnal_data_t *ksci = nal->nal_data;
+ nal_cb_t *nal_cb = ksci->ksci_cb;
+
+
+ LASSERT (nal == &kscimacnal_api);
+ LASSERT (ksci == &kscimacnal_data);
+ LASSERT (nal_cb == &kscimacnal_lib);
+
+ nal_cb->cb_cli(nal_cb,flags);
+}
+
+
+static void kscimacnal_unlock(nal_t *nal, unsigned long *flags)
+{
+ kscimacnal_data_t *ksci = nal->nal_data;
+ nal_cb_t *nal_cb = ksci->ksci_cb;
+
+
+ LASSERT (nal == &kscimacnal_api);
+ LASSERT (ksci == &kscimacnal_data);
+ LASSERT (nal_cb == &kscimacnal_lib);
+
+ nal_cb->cb_sti(nal_cb,flags);
+}
+
+
+static int kscimacnal_shutdown(nal_t *nal, int ni)
+{
+ LASSERT (nal == &kscimacnal_api);
+ return 0;
+}
+
+
+static void kscimacnal_yield( nal_t *nal )
+{
+ LASSERT (nal == &kscimacnal_api);
+
+ if (current->need_resched)
+ schedule();
+ return;
+}
+
+
+static nal_t *kscimacnal_init(int interface, ptl_pt_index_t ptl_size,
+ ptl_ac_index_t ac_size, ptl_pid_t requested_pid)
+{
+ int nnids = 512; /* FIXME: Need ScaMac funktion to get #nodes */
+
+ CDEBUG(D_NET, "calling lib_init with nid 0x%Lx nnids %d\n", kscimacnal_data.ksci_nid, nnids);
+ lib_init(&kscimacnal_lib, kscimacnal_data.ksci_nid, 0, nnids,ptl_size, ac_size);
+ return &kscimacnal_api;
+}
+
+
+/* Called by kernel at module unload time */
+static void __exit
+kscimacnal_finalize(void)
+{
+ /* FIXME: How should the shutdown procedure really look? */
+ kscimacnal_data.ksci_shuttingdown=1;
+
+ PORTAL_SYMBOL_UNREGISTER(kscimacnal_ni);
+
+ PtlNIFini(kscimacnal_ni);
+ lib_fini(&kscimacnal_lib);
+
+ mac_finish(kscimacnal_data.ksci_machandle);
+
+ CDEBUG (D_MALLOC, "done kmem %d\n", atomic_read (&portal_kmemory));
+
+ return;
+}
+
+
+/* Called by kernel at module insertion time */
+static int __init
+kscimacnal_initialize(void)
+{
+ int rc;
+ unsigned long nid=0;
+ mac_handle_t *machandle = NULL;
+
+
+ CDEBUG (D_MALLOC, "start kmem %d\n", atomic_read (&portal_kmemory));
+
+ kscimacnal_api.forward = kscimacnal_forward;
+ kscimacnal_api.shutdown = kscimacnal_shutdown;
+ kscimacnal_api.yield = kscimacnal_yield;
+ kscimacnal_api.validate = NULL; /* our api validate is a NOOP */
+ kscimacnal_api.lock= kscimacnal_lock;
+ kscimacnal_api.unlock= kscimacnal_unlock;
+ kscimacnal_api.nal_data = &kscimacnal_data;
+
+ kscimacnal_lib.nal_data = &kscimacnal_data;
+
+ memset(&kscimacnal_data, 0, sizeof(kscimacnal_data));
+
+ kscimacnal_data.ksci_cb = &kscimacnal_lib;
+
+ /* We're not using this, but cli/sti callbacks does... ??? */
+ spin_lock_init(&kscimacnal_data.ksci_dispatch_lock);
+
+ /* FIXME: We only support one adapter for now */
+ machandle = mac_init(0, MAC_SAPID_LUSTRE, kscimacnal_rx,
+ &kscimacnal_data);
+
+ if(!machandle) {
+ CERROR("mac_init() failed\n");
+ return -1;
+ }
+
+ kscimacnal_data.ksci_machandle = machandle;
+
+ /* Make sure the scimac MTU is tuned */
+ if(mac_get_mtusize(machandle) < SCIMACNAL_MTU) {
+ CERROR("scimac mtu of %ld smaller than SCIMACNAL MTU of %d\n",
+ mac_get_mtusize(machandle), SCIMACNAL_MTU);
+ CERROR("Consult README.scimacnal for more information\n");
+ mac_finish(machandle);
+ return -1;
+ }
+
+ /* Get the node ID */
+ /* mac_get_physaddrlen() is a function instead of define, sigh */
+ LASSERT(mac_get_physaddrlen(machandle) <= sizeof(nid));
+ if(mac_get_physaddr(machandle, (mac_physaddr_t *) &nid)) {
+ CERROR("mac_get_physaddr() failed\n");
+ mac_finish(machandle);
+ return -1;
+ }
+ nid = ntohl(nid);
+ kscimacnal_data.ksci_nid = nid;
+
+
+ /* Initialize Network Interface */
+ /* FIXME: What do the magic numbers mean? Documentation anyone? */
+ rc = PtlNIInit(kscimacnal_init, 32, 4, 0, &kscimacnal_ni);
+ if (rc) {
+ CERROR("PtlNIInit failed %d\n", rc);
+ mac_finish(machandle);
+ return (-ENOMEM);
+ }
+
+ PORTAL_SYMBOL_REGISTER(kscimacnal_ni);
+
+ /* We're done now, it's OK for the RX callback to do stuff */
+ kscimacnal_data.ksci_init = 1;
+
+ return 0;
+}
+
+
+MODULE_AUTHOR("Niklas Edmundsson <nikke@hpc2n.umu.se>");
+MODULE_DESCRIPTION("Kernel Scali ScaMAC SCI NAL v0.0");
+MODULE_LICENSE("GPL");
+
+module_init (kscimacnal_initialize);
+module_exit (kscimacnal_finalize);
+
+EXPORT_SYMBOL(kscimacnal_ni);
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:cindent:
+ *
+ * Copyright (C) 2003 High Performance Computing Center North (HPC2N)
+ * Author: Niklas Edmundsson <nikke@hpc2n.umu.se>
+ */
+
+
+#ifndef _SCIMACNAL_H
+#define _SCIMACNAL_H
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/errno.h>
+#include <linux/locks.h>
+#include <linux/unistd.h>
+#include <linux/init.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/stat.h>
+#include <linux/list.h>
+#include <asm/uaccess.h>
+#include <asm/segment.h>
+#include <asm/page.h> /* For PAGE_SIZE */
+
+#define DEBUG_SUBSYSTEM S_UNDEFINED
+
+#include <linux/kp30.h>
+#include <portals/p30.h>
+#include <portals/lib-p30.h>
+
+#include <scamac.h>
+
+#ifndef MAC_SAPID_LUSTRE
+#define MAC_SAPID_LUSTRE MAC_SAPID_TEST1
+#endif /* MAC_SAPID_LUSTRE */
+
+#define SCIMACNAL_MTU 65536
+/* FIXME: What is really the MTU of lustre? */
+#if PTL_MD_MAX_IOV*PAGE_SIZE > SCIMACNAL_MTU
+#error Max MTU of ScaMAC is 64k, PTL_MD_MAX_IOV*PAGE_SIZE is bigger.
+#endif
+
+typedef struct {
+ mac_handle_t *handle;
+ mac_mblk_t *msg;
+ mac_msg_type_t type;
+ void *userdata;
+} kscimacnal_rx_t;
+
+
+typedef struct {
+ nal_cb_t *ktx_nal;
+ void *ktx_private;
+ lib_msg_t *ktx_cookie;
+ ptl_hdr_t ktx_hdr;
+} kscimacnal_tx_t;
+
+
+typedef struct {
+ char ksci_init;
+ char ksci_shuttingdown;
+ ptl_nid_t ksci_nid;
+ nal_cb_t *ksci_cb;
+ spinlock_t ksci_dispatch_lock;
+ mac_handle_t *ksci_machandle;
+} kscimacnal_data_t;
+
+extern kscimacnal_data_t kscimacnal_data;
+extern nal_t kscimacnal_api;
+extern nal_cb_t kscimacnal_lib;
+
+void kscimacnal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd);
+void kscimacnal_rx(mac_handle_t *handle, mac_mblk_t *msg, mac_msg_type_t type, void *userdata);
+
+
+#endif /* _SCIMACNAL_H */
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:cindent:
+ *
+ * Copyright (C) 2003 High Performance Computing Center North (HPC2N)
+ * Author: Niklas Edmundsson <nikke@hpc2n.umu.se>
+
+ *
+ * This file is part of Portals, http://www.sf.net/projects/lustre/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#include "scimacnal.h"
+
+static int
+kscimacnal_read (nal_cb_t *nal, void *private,
+ void *dst_addr, user_ptr src_addr, size_t len)
+{
+ CDEBUG(D_NET, "0x%Lx: reading %ld bytes from %p -> %p\n",
+ nal->ni.nid, (long)len, src_addr, dst_addr );
+ memcpy( dst_addr, src_addr, len );
+ return 0;
+}
+
+
+static int
+kscimacnal_write(nal_cb_t *nal, void *private,
+ user_ptr dst_addr, void *src_addr, size_t len)
+{
+ CDEBUG(D_NET, "0x%Lx: writing %ld bytes from %p -> %p\n",
+ nal->ni.nid, (long)len, src_addr, dst_addr );
+ memcpy( dst_addr, src_addr, len );
+ return 0;
+}
+
+
+static void *
+kscimacnal_malloc(nal_cb_t *nal, size_t len)
+{
+ void *buf;
+
+ PORTAL_ALLOC(buf, len);
+ return buf;
+}
+
+
+static void
+kscimacnal_free(nal_cb_t *nal, void *buf, size_t len)
+{
+ PORTAL_FREE(buf, len);
+}
+
+
+static void
+kscimacnal_printf(nal_cb_t *nal, const char *fmt, ...)
+{
+ va_list ap;
+ char msg[256];
+
+ if (portal_debug & D_NET) {
+ va_start( ap, fmt );
+ vsnprintf( msg, sizeof(msg), fmt, ap );
+ va_end( ap );
+
+ printk("CPUId: %d %s",smp_processor_id(), msg);
+ }
+}
+
+
+static void
+kscimacnal_cli(nal_cb_t *nal, unsigned long *flags)
+{
+ kscimacnal_data_t *data= nal->nal_data;
+
+ spin_lock_irqsave(&data->ksci_dispatch_lock,*flags);
+}
+
+
+static void
+kscimacnal_sti(nal_cb_t *nal, unsigned long *flags)
+{
+ kscimacnal_data_t *data= nal->nal_data;
+
+ spin_unlock_irqrestore(&data->ksci_dispatch_lock,*flags);
+}
+
+
+static int
+kscimacnal_dist(nal_cb_t *nal, ptl_nid_t nid, unsigned long *dist)
+{
+ /* FIXME: Network distance has a meaning, but is there no easy
+ * way to figure it out (depends on routing) */
+
+ if ( nal->ni.nid == nid ) {
+ *dist = 0;
+ } else {
+ *dist = 1;
+ }
+
+ return 0;
+}
+
+
+static
+char * get_mac_error(mac_status_t status)
+{
+ switch(status) {
+ case MAC_MSG_STAT_OK:
+ return "MAC_MSG_STAT_OK";
+ case MAC_MSG_STAT_FREED:
+ return "MAC_MSG_STAT_FREED";
+ case MAC_MSG_STAT_ABORTED:
+ return "MAC_MSG_STAT_ABORTED";
+ case MAC_MSG_STAT_TIMEDOUT:
+ return "MAC_MSG_STAT_TIMEDOUT";
+ case MAC_MSG_STAT_NODEUNREACH:
+ return "MAC_MSG_STAT_NODEUNREACH";
+ case MAC_MSG_STAT_NETDOWN:
+ return "MAC_MSG_STAT_NETDOWN";
+ case MAC_MSG_STAT_RESET:
+ return "MAC_MSG_STAT_RESET";
+ case MAC_MSG_STAT_INITFAILED:
+ return "MAC_MSG_STAT_INITFAILED";
+ case MAC_MSG_STAT_SYNCFAILED:
+ return "MAC_MSG_STAT_SYNCFAILED";
+ case MAC_MSG_STAT_BADPROTO:
+ return "MAC_MSG_STAT_BADPROTO";
+ case MAC_MSG_STAT_NOBUFSPACE:
+ return "MAC_MSG_STAT_NOBUFSPACE";
+ case MAC_MSG_STAT_CONGESTION:
+ return "MAC_MSG_STAT_CONGESTION";
+ case MAC_MSG_STAT_OTHER:
+ return "MAC_MSG_STAT_OTHER";
+ default:
+ return "Unknown error";
+ }
+}
+
+
+/* FIXME add routing code here ? */
+
+/* Called by ScaMac when transmission is complete (ie. message is released) */
+static void
+kscimacnal_txrelease(mac_mblk_t *msg, mac_msg_status_t status, void *context)
+{
+ kscimacnal_tx_t *ktx = (kscimacnal_tx_t *)context;
+ int err=0;
+
+ LASSERT (ktx != NULL);
+
+ /* Euh, there is no feedback when transmission fails?! */
+ switch(status) {
+ case MAC_MSG_STAT_OK: /* normal */
+ break;
+ default:
+ CERROR("%s (%d):\n", get_mac_error(status), status);
+ err = -EIO;
+ break;
+ }
+
+ lib_finalize(ktx->ktx_nal, ktx->ktx_private, ktx->ktx_cookie);
+
+ PORTAL_FREE(ktx, (sizeof(kscimacnal_tx_t)));
+}
+
+
+/* Called by portals when it wants to send a message.
+ * Since ScaMAC has it's own TX thread we don't bother setting up our own. */
+static int
+kscimacnal_send(nal_cb_t *nal,
+ void *private,
+ lib_msg_t *cookie,
+ ptl_hdr_t *hdr,
+ int type,
+ ptl_nid_t nid,
+ ptl_pid_t pid,
+ unsigned int payload_niov,
+ struct iovec *payload_iov,
+ size_t payload_len)
+{
+ kscimacnal_tx_t *ktx=NULL;
+ kscimacnal_data_t *ksci = nal->nal_data;
+ int rc=0;
+ int buf_len = sizeof(ptl_hdr_t) + payload_len;
+ mac_mblk_t *msg=NULL, *lastblk, *newblk;
+ unsigned long physaddr;
+
+
+ CDEBUG(D_NET, "sending %d bytes from %p to nid 0x%Lx niov: %d\n",
+ payload_len, payload_iov, nid, payload_niov);
+
+ LASSERT(ksci != NULL);
+
+ LASSERT(hdr != NULL);
+
+ /* Do real check if we can send this */
+ if (buf_len > mac_get_mtusize(ksci->ksci_machandle)) {
+ CERROR("kscimacnal:request exceeds TX MTU size (%ld).\n",
+ mac_get_mtusize(ksci->ksci_machandle));
+ return -EINVAL;
+ }
+
+
+ /* save transaction info for later finalize and cleanup */
+ PORTAL_ALLOC(ktx, (sizeof(kscimacnal_tx_t)));
+ if (!ktx) {
+ return -ENOMEM;
+ }
+
+ /* *SIGH* hdr is a stack variable in the calling function, so we
+ * need to copy it to a buffer. Zerocopy magic (or is it just
+ * deferred memcpy?) is annoying sometimes. */
+ memcpy(&ktx->ktx_hdr, hdr, sizeof(ptl_hdr_t));
+
+ /* First, put the header in the main message mblk */
+ msg = mac_alloc_mblk(&ktx->ktx_hdr, sizeof(ptl_hdr_t),
+ kscimacnal_txrelease, ktx);
+ if (!msg) {
+ PORTAL_FREE(ktx, (sizeof(kscimacnal_tx_t)));
+ return -ENOMEM;
+ }
+ mac_put_mblk(msg, sizeof(ptl_hdr_t));
+ lastblk=msg;
+
+ /* Allocate additional mblks for each iov as needed.
+ * Essentially lib_copy_iov2buf with a twist or two */
+ while (payload_len > 0)
+ {
+ ptl_size_t nob;
+
+ LASSERT (payload_niov > 0);
+
+ nob = MIN (payload_iov->iov_len, payload_len);
+
+ /* We don't need a callback on the additional mblks, since
+ * all release callbacks seems to be called when the entire
+ * message has been sent */
+ newblk=mac_alloc_mblk(payload_iov->iov_base, nob, NULL, NULL);
+ if(!newblk) {
+ mac_free_msg(msg);
+ PORTAL_FREE(ktx, (sizeof(kscimacnal_tx_t)));
+ return -ENOMEM;
+ }
+ mac_put_mblk(newblk, nob);
+ mac_link_mblk(lastblk, newblk);
+ lastblk=newblk;
+
+ payload_len -= nob;
+ payload_niov--;
+ payload_iov++;
+ }
+
+ ktx->ktx_nal = nal;
+ ktx->ktx_private = private;
+ ktx->ktx_cookie = cookie;
+
+ CDEBUG(D_NET, "mac_send %d bytes to nid: 0x%Lx\n", buf_len, nid);
+
+ physaddr = htonl(nid);
+
+ if((rc=mac_send(ksci->ksci_machandle, msg,
+ (mac_physaddr_t *) &physaddr))) {
+ CERROR("kscimacnal: mac_send() failed, rc=%d\n", rc);
+ mac_free_msg(msg);
+ PORTAL_FREE(ktx, (sizeof(kscimacnal_tx_t)));
+ return rc;
+ }
+
+ return 0;
+}
+
+
+void
+kscimacnal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd)
+{
+ CERROR ("forwarding not implemented\n");
+}
+
+
+/* Process a received portals packet */
+/* Called by the ScaMac RX thread when a packet is received */
+void
+kscimacnal_rx(mac_handle_t *handle, mac_mblk_t *msg, mac_msg_type_t type,
+ void *userdata)
+{
+ ptl_hdr_t *hdr = NULL;
+ kscimacnal_rx_t krx;
+ mac_size_t size;
+ kscimacnal_data_t *ksci = userdata;
+
+ LASSERT(ksci != NULL);
+
+ if ( !ksci->ksci_init || ksci->ksci_shuttingdown ||
+ type == MAC_MSG_TYPE_CTRL || type == MAC_MSG_TYPE_OTHER ) {
+ /* We're not interested in messages not for us, ignore */
+ mac_free_msg(msg);
+ return;
+ }
+
+ size = mac_msg_size(msg);
+
+ CDEBUG(D_NET,"msg %p type %d, size %ld bytes (%ld mblks)\n",
+ msg, type, size, mac_msg_mblks(msg));
+
+ if( size < sizeof( ptl_hdr_t ) ) {
+ /* XXX what's this for? */
+ if (ksci->ksci_shuttingdown)
+ return;
+ CERROR("kscimacnal: did not receive complete portal header,"
+ "size= %ld\n", size);
+ /* Free the message before exiting */
+ mac_free_msg(msg);
+ return;
+ }
+
+ /* Provide everything we know */
+ krx.handle = handle;
+ krx.msg = msg;
+ krx.type = type;
+ krx.userdata = userdata;
+
+ /* mac_msg_next returns the next mblk with unread data */
+ hdr = mac_get_mblk(mac_msg_next(msg), sizeof(ptl_hdr_t) );
+
+ if(!hdr) {
+ CERROR("kscimacnal: no data block in message %p\n", msg);
+ mac_free_msg(msg);
+ return;
+ }
+
+ if ( hdr->dest_nid == kscimacnal_lib.ni.nid ) {
+ PROF_START(lib_parse);
+ /* sets wanted_len, iovs etc and calls our callback */
+ lib_parse(&kscimacnal_lib, hdr, &krx);
+ PROF_FINISH(lib_parse);
+#if 0 /* FIXME: Is it possible to detect this? */
+ } else if (kgmnal_ispeer(hdr->dest_nid)) {
+ /* should have gone direct to peer */
+ CERROR("dropping packet from 0x%llx to 0x%llx:"
+ "target is a peer\n",
+ hdr->src_nid, hdr->dest_nid);
+ kgmnal_requeue_rx(&krx);
+#endif /* if 0 FIXME */
+ } else {
+ /* forward to gateway */
+ CERROR("forwarding not implemented, mynid=0x%llx dest=0x%llx\n",
+ kscimacnal_lib.ni.nid, hdr->dest_nid);
+ }
+
+ mac_free_msg(msg);
+
+ CDEBUG(D_NET, "msg %p: Done\n", msg);
+}
+
+
+/* Called by portals to process a recieved packet */
+static int kscimacnal_recv(nal_cb_t *nal,
+ void *private,
+ lib_msg_t *cookie,
+ unsigned int niov,
+ struct iovec *iov,
+ size_t mlen,
+ size_t rlen)
+{
+ kscimacnal_rx_t *krx = private;
+ mac_mblk_t *mblk;
+ void *src;
+ mac_size_t pkt_len;
+ ptl_size_t iovused=0;
+
+ LASSERT (krx != NULL);
+ LASSERT (krx->msg != NULL);
+
+ CDEBUG(D_NET,"msg %p: mlen=%d, rlen=%d, niov=%d\n",
+ krx->msg, mlen, rlen, niov);
+
+ /* What was actually received must be >= what sender claims to have
+ * sent. This is an LASSERT, since lib-move doesn't check cb return
+ * code yet. Also, rlen seems to be negative when mlen==0 so don't
+ * assert on that.
+ */
+ LASSERT (mlen==0 || mac_msg_size(krx->msg) >= sizeof(ptl_hdr_t)+rlen);
+ LASSERT (mlen==0 || mlen <= rlen);
+
+ PROF_START(memcpy);
+
+ /* mac_msg_next returns next mblk with unread data (ie. can
+ * be same mblk */
+ while (mlen != 0 && (mblk = mac_msg_next(krx->msg))) {
+ pkt_len = mac_mblk_len(mblk);
+ src = mac_get_mblk(mblk, pkt_len); /* Next unread block */
+
+ CDEBUG(D_NET,"msg %p: mblk: %p pkt_len: %ld src: %p\n",
+ krx->msg, mblk, pkt_len, src);
+
+ LASSERT(src != NULL);
+
+ /* Essentially lib_copy_buf2iov but with continuation support,
+ * we "gracefully" thrash the argument vars ;) */
+ while (pkt_len > 0) {
+ ptl_size_t nob;
+
+ LASSERT (niov > 0);
+
+ LASSERT(iovused < iov->iov_len);
+
+ nob = MIN (iov->iov_len-iovused, pkt_len);
+ CDEBUG(D_NET, "iovbase: %p iovlen: %d src: %p nob: %d "
+ "iovused: %d\n",
+ iov->iov_base, iov->iov_len,
+ src, nob, iovused);
+
+ memcpy (iov->iov_base+iovused, src, nob);
+ pkt_len -= nob;
+ src += nob;
+
+ if(nob+iovused < iov->iov_len) {
+ /* We didn't use all of the iov */
+ iovused+=nob;
+ }
+ else {
+ niov--;
+ iov++;
+ iovused=0;
+ }
+ }
+ }
+ PROF_FINISH(memcpy);
+
+ CDEBUG(D_NET, "Calling lib_finalize.\n");
+
+ PROF_START(lib_finalize);
+ lib_finalize(nal, private, cookie);
+ PROF_FINISH(lib_finalize);
+
+ CDEBUG(D_NET, "Done.\n");
+
+ return rlen;
+}
+
+
+nal_cb_t kscimacnal_lib = {
+ nal_data: &kscimacnal_data, /* NAL private data */
+ cb_send: kscimacnal_send,
+ cb_send_pages: NULL, /* Ignore for now */
+ cb_recv: kscimacnal_recv,
+ cb_recv_pages: NULL,
+ cb_read: kscimacnal_read,
+ cb_write: kscimacnal_write,
+ cb_malloc: kscimacnal_malloc,
+ cb_free: kscimacnal_free,
+ cb_printf: kscimacnal_printf,
+ cb_cli: kscimacnal_cli,
+ cb_sti: kscimacnal_sti,
+ cb_dist: kscimacnal_dist
+};
--- /dev/null
+# Copyright (C) 2001 Cluster File Systems, Inc.
+#
+# This code is issued under the GNU General Public License.
+# See the file COPYING in this distribution
+
+include ../../Rules.linux
+
+MODULE = ksocknal
+modulenet_DATA = ksocknal.o
+EXTRA_PROGRAMS = ksocknal
+
+DEFS =
+ksocknal_SOURCES = socknal.c socknal_cb.c socknal.h
--- /dev/null
+# Copyright (C) 2001 Cluster File Systems, Inc.
+#
+# This code is issued under the GNU General Public License.
+# See the file COPYING in this distribution
+
+include ../../Kernelenv
+
+obj-y += ksocknal.o
+ksocknal-objs := socknal.o socknal_cb.o
+
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ * Author: Zach Brown <zab@zabbo.net>
+ * Author: Peter J. Braam <braam@clusterfs.com>
+ * Author: Phil Schwan <phil@clusterfs.com>
+ * Author: Eric Barton <eric@bartonsoftware.com>
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include "socknal.h"
+
+ptl_handle_ni_t ksocknal_ni;
+static nal_t ksocknal_api;
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+ksock_nal_data_t ksocknal_data;
+#else
+static ksock_nal_data_t ksocknal_data;
+#endif
+
+kpr_nal_interface_t ksocknal_router_interface = {
+ kprni_nalid: SOCKNAL,
+ kprni_arg: &ksocknal_data,
+ kprni_fwd: ksocknal_fwd_packet,
+};
+
+
+int
+ksocknal_api_forward(nal_t *nal, int id, void *args, size_t args_len,
+ void *ret, size_t ret_len)
+{
+ ksock_nal_data_t *k;
+ nal_cb_t *nal_cb;
+
+ k = nal->nal_data;
+ nal_cb = k->ksnd_nal_cb;
+
+ lib_dispatch(nal_cb, k, id, args, ret); /* ksocknal_send needs k */
+ return PTL_OK;
+}
+
+int
+ksocknal_api_shutdown(nal_t *nal, int ni)
+{
+ CDEBUG (D_NET, "closing all connections\n");
+
+ return ksocknal_close_sock(0); /* close all sockets */
+}
+
+void
+ksocknal_api_yield(nal_t *nal)
+{
+ our_cond_resched();
+ return;
+}
+
+void
+ksocknal_api_lock(nal_t *nal, unsigned long *flags)
+{
+ ksock_nal_data_t *k;
+ nal_cb_t *nal_cb;
+
+ k = nal->nal_data;
+ nal_cb = k->ksnd_nal_cb;
+ nal_cb->cb_cli(nal_cb,flags);
+}
+
+void
+ksocknal_api_unlock(nal_t *nal, unsigned long *flags)
+{
+ ksock_nal_data_t *k;
+ nal_cb_t *nal_cb;
+
+ k = nal->nal_data;
+ nal_cb = k->ksnd_nal_cb;
+ nal_cb->cb_sti(nal_cb,flags);
+}
+
+nal_t *
+ksocknal_init(int interface, ptl_pt_index_t ptl_size,
+ ptl_ac_index_t ac_size, ptl_pid_t requested_pid)
+{
+ CDEBUG(D_NET, "calling lib_init with nid "LPX64"\n",
+ ksocknal_data.ksnd_mynid);
+ lib_init(&ksocknal_lib, ksocknal_data.ksnd_mynid, 0, 10, ptl_size,
+ ac_size);
+ return (&ksocknal_api);
+}
+
+/*
+ * EXTRA functions follow
+ */
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+#define SOCKET_I(inode) (&(inode)->u.socket_i)
+#endif
+static __inline__ struct socket *
+socki_lookup(struct inode *inode)
+{
+ return SOCKET_I(inode);
+}
+
+int
+ksocknal_set_mynid(ptl_nid_t nid)
+{
+ lib_ni_t *ni = &ksocknal_lib.ni;
+
+ /* FIXME: we have to do this because we call lib_init() at module
+ * insertion time, which is before we have 'mynid' available. lib_init
+ * sets the NAL's nid, which it uses to tell other nodes where packets
+ * are coming from. This is not a very graceful solution to this
+ * problem. */
+
+ CDEBUG(D_IOCTL, "setting mynid to "LPX64" (old nid="LPX64")\n",
+ nid, ni->nid);
+
+ ksocknal_data.ksnd_mynid = nid;
+ ni->nid = nid;
+ return (0);
+}
+
+void
+ksocknal_bind_irq (unsigned int irq, int cpu)
+{
+#if (defined(CONFIG_SMP) && CPU_AFFINITY)
+ char cmdline[64];
+ char *argv[] = {"/bin/sh",
+ "-c",
+ cmdline,
+ NULL};
+ char *envp[] = {"HOME=/",
+ "PATH=/sbin:/bin:/usr/sbin:/usr/bin",
+ NULL};
+
+ snprintf (cmdline, sizeof (cmdline),
+ "echo %d > /proc/irq/%u/smp_affinity", 1 << cpu, irq);
+
+ printk (KERN_INFO "Binding irq %u to CPU %d with cmd: %s\n",
+ irq, cpu, cmdline);
+
+ /* FIXME: Find a better method of setting IRQ affinity...
+ */
+
+ call_usermodehelper (argv[0], argv, envp);
+#endif
+}
+
+int
+ksocknal_add_sock (ptl_nid_t nid, int fd, int bind_irq)
+{
+ unsigned long flags;
+ ksock_conn_t *conn;
+ struct file *file = NULL;
+ struct socket *sock = NULL;
+ ksock_sched_t *sched = NULL;
+ unsigned int irq = 0;
+ struct net_device *dev = NULL;
+ int ret;
+ int idx;
+ ENTRY;
+
+ LASSERT (!in_interrupt());
+
+ file = fget(fd);
+ if (file == NULL)
+ RETURN(-EINVAL);
+
+ ret = -EINVAL;
+ sock = socki_lookup(file->f_dentry->d_inode);
+ if (sock == NULL)
+ GOTO(error, ret);
+
+ ret = -ENOMEM;
+ PORTAL_ALLOC(conn, sizeof(*conn));
+ if (!conn)
+ GOTO(error, ret);
+
+ memset (conn, 0, sizeof (conn)); /* zero for consistency */
+
+ conn->ksnc_file = file;
+ conn->ksnc_sock = sock;
+ conn->ksnc_saved_data_ready = sock->sk->data_ready;
+ conn->ksnc_saved_write_space = sock->sk->write_space;
+ conn->ksnc_peernid = nid;
+ atomic_set (&conn->ksnc_refcount, 1); /* 1 ref for socklist */
+
+ conn->ksnc_rx_ready = 0;
+ conn->ksnc_rx_scheduled = 0;
+ ksocknal_new_packet (conn, 0);
+
+ INIT_LIST_HEAD (&conn->ksnc_tx_queue);
+ conn->ksnc_tx_ready = 0;
+ conn->ksnc_tx_scheduled = 0;
+
+#warning check it is OK to derefence sk->dst_cache->dev like this...
+ lock_sock (conn->ksnc_sock->sk);
+
+ if (conn->ksnc_sock->sk->dst_cache != NULL) {
+ dev = conn->ksnc_sock->sk->dst_cache->dev;
+ if (dev != NULL) {
+ irq = dev->irq;
+ if (irq >= NR_IRQS) {
+ CERROR ("Unexpected IRQ %x\n", irq);
+ irq = 0;
+ }
+ }
+ }
+
+ release_sock (conn->ksnc_sock->sk);
+
+ write_lock_irqsave (&ksocknal_data.ksnd_socklist_lock, flags);
+
+ if (irq == 0 ||
+ ksocknal_data.ksnd_irq_info[irq] == SOCKNAL_IRQ_UNASSIGNED) {
+ /* This is a software NIC, or we haven't associated it with
+ * a CPU yet */
+
+ /* Choose the CPU with the fewest connections */
+ sched = ksocknal_data.ksnd_schedulers;
+ for (idx = 1; idx < SOCKNAL_N_SCHED; idx++)
+ if (sched->kss_nconns >
+ ksocknal_data.ksnd_schedulers[idx].kss_nconns)
+ sched = &ksocknal_data.ksnd_schedulers[idx];
+
+ if (irq != 0) { /* Hardware NIC */
+ /* Remember which scheduler we chose */
+ idx = sched - ksocknal_data.ksnd_schedulers;
+
+ LASSERT (idx < SOCKNAL_IRQ_SCHED_MASK);
+
+ if (bind_irq) /* remember if we will bind below */
+ idx |= SOCKNAL_IRQ_BOUND;
+
+ ksocknal_data.ksnd_irq_info[irq] = idx;
+ }
+ } else {
+ /* This is a hardware NIC, associated with a CPU */
+ idx = ksocknal_data.ksnd_irq_info[irq];
+
+ /* Don't bind again if we've bound already */
+ if ((idx & SOCKNAL_IRQ_BOUND) != 0)
+ bind_irq = 0;
+
+ sched = &ksocknal_data.ksnd_schedulers[idx & SOCKNAL_IRQ_SCHED_MASK];
+ }
+
+ sched->kss_nconns++;
+ conn->ksnc_scheduler = sched;
+
+ list_add(&conn->ksnc_list, &ksocknal_data.ksnd_socklist);
+
+ write_unlock_irqrestore (&ksocknal_data.ksnd_socklist_lock, flags);
+
+ if (bind_irq && /* irq binding required */
+ irq != 0) /* hardware NIC */
+ ksocknal_bind_irq (irq, sched - ksocknal_data.ksnd_schedulers);
+
+ /* NOW it's safe to get called back when socket is ready... */
+ sock->sk->user_data = conn;
+ sock->sk->data_ready = ksocknal_data_ready;
+ sock->sk->write_space = ksocknal_write_space;
+
+ /* ...which I call right now to get things going */
+ ksocknal_data_ready (sock->sk, 0);
+ ksocknal_write_space (sock->sk);
+
+ CDEBUG(D_IOCTL, "conn [%p] registered for nid "LPX64"\n",
+ conn, conn->ksnc_peernid);
+
+ /* Can't unload while connection active */
+ PORTAL_MODULE_USE;
+ RETURN(0);
+
+error:
+ fput(file);
+ return (ret);
+}
+
+/* Passing in a zero nid will close all connections */
+int
+ksocknal_close_sock(ptl_nid_t nid)
+{
+ long flags;
+ ksock_conn_t *conn;
+ LIST_HEAD (death_row);
+ struct list_head *tmp;
+
+ LASSERT (!in_interrupt());
+ write_lock_irqsave (&ksocknal_data.ksnd_socklist_lock, flags);
+
+ if (nid == 0) { /* close ALL connections */
+ /* insert 'death row' into the socket list... */
+ list_add (&death_row, &ksocknal_data.ksnd_socklist);
+ /* ...extract and reinitialise the socket list itself... */
+ list_del_init (&ksocknal_data.ksnd_socklist);
+ /* ...and voila, death row is the proud owner of all conns */
+ } else list_for_each (tmp, &ksocknal_data.ksnd_socklist) {
+
+ conn = list_entry (tmp, ksock_conn_t, ksnc_list);
+
+ if (conn->ksnc_peernid == nid) {
+ list_del (&conn->ksnc_list);
+ list_add (&conn->ksnc_list, &death_row);
+ break;
+ }
+ }
+
+ write_unlock_irqrestore (&ksocknal_data.ksnd_socklist_lock, flags);
+
+ if (nid && list_empty (&death_row))
+ return (-ENOENT);
+
+ while (!list_empty (&death_row)) {
+ conn = list_entry (death_row.next, ksock_conn_t, ksnc_list);
+ list_del (&conn->ksnc_list);
+
+ /* NB I _have_ to restore the callback, rather than storing
+ * a noop, since the socket could survive past this module
+ * being unloaded!! */
+ conn->ksnc_sock->sk->data_ready = conn->ksnc_saved_data_ready;
+ conn->ksnc_sock->sk->write_space = conn->ksnc_saved_write_space;
+
+ /* OK; no more callbacks, but they could be in progress now,
+ * so wait for them to complete... */
+ write_lock_irqsave (&ksocknal_data.ksnd_socklist_lock, flags);
+
+ /* ...however if I get the lock before a callback gets it,
+ * this will make them noop
+ */
+ conn->ksnc_sock->sk->user_data = NULL;
+
+ /* And drop the scheduler's connection count while I've got
+ * the exclusive lock */
+ conn->ksnc_scheduler->kss_nconns--;
+
+ write_unlock_irqrestore(&ksocknal_data.ksnd_socklist_lock,
+ flags);
+
+ ksocknal_put_conn (conn); /* drop ref for ksnd_socklist */
+ }
+
+ return (0);
+}
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+struct tcp_opt *sock2tcp_opt(struct sock *sk)
+{
+ return &(sk->tp_pinfo.af_tcp);
+}
+#else
+struct tcp_opt *sock2tcp_opt(struct sock *sk)
+{
+ struct tcp_sock *s = (struct tcp_sock *)sk;
+ return &s->tcp;
+}
+#endif
+
+void
+ksocknal_push_conn (ksock_conn_t *conn)
+{
+ struct sock *sk = conn->ksnc_sock->sk;
+ struct tcp_opt *tp = sock2tcp_opt(sk);
+ int nonagle;
+ int val = 1;
+ int rc;
+ mm_segment_t oldmm;
+
+ lock_sock (sk);
+ nonagle = tp->nonagle;
+ tp->nonagle = 1;
+ release_sock (sk);
+
+ oldmm = get_fs ();
+ set_fs (KERNEL_DS);
+
+ rc = sk->prot->setsockopt (sk, SOL_TCP, TCP_NODELAY,
+ (char *)&val, sizeof (val));
+ LASSERT (rc == 0);
+
+ set_fs (oldmm);
+
+ lock_sock (sk);
+ tp->nonagle = nonagle;
+ release_sock (sk);
+}
+
+/* Passing in a zero nid pushes all connections */
+int
+ksocknal_push_sock (ptl_nid_t nid)
+{
+ ksock_conn_t *conn;
+ struct list_head *tmp;
+ int index;
+ int i;
+
+ if (nid != 0) {
+ conn = ksocknal_get_conn (nid);
+
+ if (conn == NULL)
+ return (-ENOENT);
+
+ ksocknal_push_conn (conn);
+ ksocknal_put_conn (conn);
+
+ return (0);
+ }
+
+ /* NB we can't remove connections from the socket list so we have to
+ * cope with them being removed from under us...
+ */
+ for (index = 0; ; index++) {
+ read_lock (&ksocknal_data.ksnd_socklist_lock);
+
+ i = 0;
+ conn = NULL;
+
+ list_for_each (tmp, &ksocknal_data.ksnd_socklist) {
+ if (i++ == index) {
+ conn = list_entry(tmp, ksock_conn_t, ksnc_list);
+ atomic_inc (&conn->ksnc_refcount); // take a ref
+ break;
+ }
+ }
+
+ read_unlock (&ksocknal_data.ksnd_socklist_lock);
+
+ if (conn == NULL)
+ break;
+
+ ksocknal_push_conn (conn);
+ ksocknal_put_conn (conn);
+ }
+
+ return (0);
+}
+
+ksock_conn_t *
+ksocknal_get_conn (ptl_nid_t nid)
+{
+ struct list_head *tmp;
+ ksock_conn_t *conn;
+
+ PROF_START(conn_list_walk);
+
+ read_lock (&ksocknal_data.ksnd_socklist_lock);
+
+ list_for_each(tmp, &ksocknal_data.ksnd_socklist) {
+
+ conn = list_entry(tmp, ksock_conn_t, ksnc_list);
+
+ if (conn->ksnc_peernid == nid) {
+ /* caller is referencing */
+ atomic_inc (&conn->ksnc_refcount);
+
+ read_unlock (&ksocknal_data.ksnd_socklist_lock);
+
+ CDEBUG(D_NET, "got conn [%p] -> "LPX64" (%d)\n",
+ conn, nid, atomic_read (&conn->ksnc_refcount));
+
+ PROF_FINISH(conn_list_walk);
+ return (conn);
+ }
+ }
+
+ read_unlock (&ksocknal_data.ksnd_socklist_lock);
+
+ CDEBUG(D_NET, "No connection found when looking for nid "LPX64"\n",
+ nid);
+ PROF_FINISH(conn_list_walk);
+ return (NULL);
+}
+
+void
+ksocknal_close_conn (ksock_conn_t *conn)
+{
+ CDEBUG (D_NET, "connection [%p] closed \n", conn);
+
+ fput (conn->ksnc_file);
+ PORTAL_FREE (conn, sizeof (*conn));
+
+ /* One less connection keeping us hanging on */
+ PORTAL_MODULE_UNUSE;
+}
+
+void
+_ksocknal_put_conn (ksock_conn_t *conn)
+{
+ unsigned long flags;
+
+ CDEBUG (D_NET, "connection [%p] handed the black spot\n", conn);
+
+ /* "But what is the black spot, captain?" I asked.
+ * "That's a summons, mate..." */
+
+ LASSERT (atomic_read (&conn->ksnc_refcount) == 0);
+ LASSERT (conn->ksnc_sock->sk->data_ready != ksocknal_data_ready);
+ LASSERT (conn->ksnc_sock->sk->write_space != ksocknal_write_space);
+ LASSERT (conn->ksnc_sock->sk->user_data == NULL);
+ LASSERT (!conn->ksnc_rx_scheduled);
+
+ if (!in_interrupt()) {
+ ksocknal_close_conn (conn);
+ return;
+ }
+
+ spin_lock_irqsave (&ksocknal_data.ksnd_reaper_lock, flags);
+
+ list_add (&conn->ksnc_list, &ksocknal_data.ksnd_reaper_list);
+ wake_up (&ksocknal_data.ksnd_reaper_waitq);
+
+ spin_unlock_irqrestore (&ksocknal_data.ksnd_reaper_lock, flags);
+}
+
+int
+ksocknal_cmd(struct portal_ioctl_data * data, void * private)
+{
+ int rc = -EINVAL;
+
+ LASSERT (data != NULL);
+
+ switch(data->ioc_nal_cmd) {
+ case NAL_CMD_REGISTER_PEER_FD: {
+ rc = ksocknal_add_sock(data->ioc_nid, data->ioc_fd,
+ data->ioc_flags);
+ break;
+ }
+ case NAL_CMD_CLOSE_CONNECTION: {
+ rc = ksocknal_close_sock(data->ioc_nid);
+ break;
+ }
+ case NAL_CMD_REGISTER_MYNID: {
+ rc = ksocknal_set_mynid (data->ioc_nid);
+ break;
+ }
+ case NAL_CMD_PUSH_CONNECTION: {
+ rc = ksocknal_push_sock (data->ioc_nid);
+ break;
+ }
+ }
+
+ return rc;
+}
+
+void
+ksocknal_free_buffers (void)
+{
+ if (ksocknal_data.ksnd_fmbs != NULL) {
+ ksock_fmb_t *fmb = (ksock_fmb_t *)ksocknal_data.ksnd_fmbs;
+ int i;
+ int j;
+
+ for (i = 0;
+ i < (SOCKNAL_SMALL_FWD_NMSGS + SOCKNAL_LARGE_FWD_NMSGS);
+ i++, fmb++)
+ for (j = 0; j < fmb->fmb_npages; j++)
+ if (fmb->fmb_pages[j] != NULL)
+ __free_page (fmb->fmb_pages[j]);
+
+ PORTAL_FREE (ksocknal_data.ksnd_fmbs,
+ sizeof (ksock_fmb_t) * (SOCKNAL_SMALL_FWD_NMSGS +
+ SOCKNAL_LARGE_FWD_NMSGS));
+ }
+
+ if (ksocknal_data.ksnd_ltxs != NULL)
+ PORTAL_FREE (ksocknal_data.ksnd_ltxs,
+ sizeof (ksock_ltx_t) * (SOCKNAL_NLTXS +
+ SOCKNAL_NNBLK_LTXS));
+
+ if (ksocknal_data.ksnd_schedulers != NULL)
+ PORTAL_FREE (ksocknal_data.ksnd_schedulers,
+ sizeof (ksock_sched_t) * SOCKNAL_N_SCHED);
+}
+
+void __exit
+ksocknal_module_fini (void)
+{
+ int i;
+
+ CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n",
+ atomic_read (&portal_kmemory));
+
+ switch (ksocknal_data.ksnd_init) {
+ default:
+ LASSERT (0);
+
+ case SOCKNAL_INIT_ALL:
+ kportal_nal_unregister(SOCKNAL);
+ PORTAL_SYMBOL_UNREGISTER (ksocknal_ni);
+ /* fall through */
+
+ case SOCKNAL_INIT_PTL:
+ PtlNIFini(ksocknal_ni);
+ lib_fini(&ksocknal_lib);
+ /* fall through */
+
+ case SOCKNAL_INIT_DATA:
+ /* Module refcount only gets to zero when all connections
+ * have been closed so all lists must be empty */
+ LASSERT (list_empty (&ksocknal_data.ksnd_socklist));
+ LASSERT (list_empty (&ksocknal_data.ksnd_reaper_list));
+ LASSERT (list_empty (&ksocknal_data.ksnd_small_fmp.fmp_blocked_conns));
+ LASSERT (list_empty (&ksocknal_data.ksnd_large_fmp.fmp_blocked_conns));
+
+ if (ksocknal_data.ksnd_schedulers != NULL)
+ for (i = 0; i < SOCKNAL_N_SCHED; i++) {
+ ksock_sched_t *kss =
+ &ksocknal_data.ksnd_schedulers[i];
+
+ LASSERT (list_empty (&kss->kss_tx_conns));
+ LASSERT (list_empty (&kss->kss_rx_conns));
+ LASSERT (kss->kss_nconns == 0);
+ }
+
+ /* stop router calling me */
+ kpr_shutdown (&ksocknal_data.ksnd_router);
+
+ /* flag threads to terminate; wake and wait for them to die */
+ ksocknal_data.ksnd_shuttingdown = 1;
+ wake_up_all (&ksocknal_data.ksnd_reaper_waitq);
+
+ for (i = 0; i < SOCKNAL_N_SCHED; i++)
+ wake_up_all(&ksocknal_data.ksnd_schedulers[i].kss_waitq);
+
+ while (atomic_read (&ksocknal_data.ksnd_nthreads) != 0) {
+ CDEBUG (D_NET, "waitinf for %d threads to terminate\n",
+ atomic_read (&ksocknal_data.ksnd_nthreads));
+ set_current_state (TASK_UNINTERRUPTIBLE);
+ schedule_timeout (HZ);
+ }
+
+ kpr_deregister (&ksocknal_data.ksnd_router);
+
+ ksocknal_free_buffers();
+ /* fall through */
+
+ case SOCKNAL_INIT_NOTHING:
+ break;
+ }
+
+ CDEBUG(D_MALLOC, "after NAL cleanup: kmem %d\n",
+ atomic_read (&portal_kmemory));
+
+ printk(KERN_INFO "Routing socket NAL unloaded (final mem %d)\n",
+ atomic_read(&portal_kmemory));
+}
+
+
+int __init
+ksocknal_module_init (void)
+{
+ int pkmem = atomic_read(&portal_kmemory);
+ int rc;
+ int i;
+ int j;
+
+ /* packet descriptor must fit in a router descriptor's scratchpad */
+ LASSERT(sizeof (ksock_tx_t) <= sizeof (kprfd_scratch_t));
+
+ LASSERT (ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING);
+
+ ksocknal_api.forward = ksocknal_api_forward;
+ ksocknal_api.shutdown = ksocknal_api_shutdown;
+ ksocknal_api.yield = ksocknal_api_yield;
+ ksocknal_api.validate = NULL; /* our api validate is a NOOP */
+ ksocknal_api.lock = ksocknal_api_lock;
+ ksocknal_api.unlock = ksocknal_api_unlock;
+ ksocknal_api.nal_data = &ksocknal_data;
+
+ ksocknal_lib.nal_data = &ksocknal_data;
+
+ memset (&ksocknal_data, 0, sizeof (ksocknal_data)); /* zero pointers */
+
+ INIT_LIST_HEAD(&ksocknal_data.ksnd_socklist);
+ rwlock_init(&ksocknal_data.ksnd_socklist_lock);
+
+ ksocknal_data.ksnd_nal_cb = &ksocknal_lib;
+ spin_lock_init (&ksocknal_data.ksnd_nal_cb_lock);
+
+ spin_lock_init(&ksocknal_data.ksnd_small_fmp.fmp_lock);
+ INIT_LIST_HEAD(&ksocknal_data.ksnd_small_fmp.fmp_idle_fmbs);
+ INIT_LIST_HEAD(&ksocknal_data.ksnd_small_fmp.fmp_blocked_conns);
+
+ spin_lock_init(&ksocknal_data.ksnd_large_fmp.fmp_lock);
+ INIT_LIST_HEAD(&ksocknal_data.ksnd_large_fmp.fmp_idle_fmbs);
+ INIT_LIST_HEAD(&ksocknal_data.ksnd_large_fmp.fmp_blocked_conns);
+
+ spin_lock_init(&ksocknal_data.ksnd_idle_ltx_lock);
+ INIT_LIST_HEAD(&ksocknal_data.ksnd_idle_nblk_ltx_list);
+ INIT_LIST_HEAD(&ksocknal_data.ksnd_idle_ltx_list);
+ init_waitqueue_head(&ksocknal_data.ksnd_idle_ltx_waitq);
+
+ spin_lock_init (&ksocknal_data.ksnd_reaper_lock);
+ INIT_LIST_HEAD (&ksocknal_data.ksnd_reaper_list);
+ init_waitqueue_head(&ksocknal_data.ksnd_reaper_waitq);
+
+ memset (&ksocknal_data.ksnd_irq_info, SOCKNAL_IRQ_UNASSIGNED,
+ sizeof (ksocknal_data.ksnd_irq_info));
+
+ /* flag lists/ptrs/locks initialised */
+ ksocknal_data.ksnd_init = SOCKNAL_INIT_DATA;
+
+ PORTAL_ALLOC(ksocknal_data.ksnd_schedulers,
+ sizeof(ksock_sched_t) * SOCKNAL_N_SCHED);
+ if (ksocknal_data.ksnd_schedulers == NULL)
+ RETURN(-ENOMEM);
+
+ for (i = 0; i < SOCKNAL_N_SCHED; i++) {
+ ksock_sched_t *kss = &ksocknal_data.ksnd_schedulers[i];
+
+ spin_lock_init (&kss->kss_lock);
+ INIT_LIST_HEAD (&kss->kss_rx_conns);
+ INIT_LIST_HEAD (&kss->kss_tx_conns);
+#if SOCKNAL_ZC
+ INIT_LIST_HEAD (&kss->kss_zctxdone_list);
+#endif
+ init_waitqueue_head (&kss->kss_waitq);
+ }
+
+ CERROR ("ltx "LPSZ", total "LPSZ"\n", sizeof (ksock_ltx_t),
+ sizeof (ksock_ltx_t) * (SOCKNAL_NLTXS + SOCKNAL_NNBLK_LTXS));
+
+ PORTAL_ALLOC(ksocknal_data.ksnd_ltxs,
+ sizeof(ksock_ltx_t) * (SOCKNAL_NLTXS +SOCKNAL_NNBLK_LTXS));
+ if (ksocknal_data.ksnd_ltxs == NULL) {
+ ksocknal_module_fini ();
+ return (-ENOMEM);
+ }
+
+ /* Deterministic bugs please */
+ memset (ksocknal_data.ksnd_ltxs, 0xeb,
+ sizeof (ksock_ltx_t) * (SOCKNAL_NLTXS + SOCKNAL_NNBLK_LTXS));
+
+ for (i = 0; i < SOCKNAL_NLTXS + SOCKNAL_NNBLK_LTXS; i++) {
+ ksock_ltx_t *ltx = &((ksock_ltx_t *)ksocknal_data.ksnd_ltxs)[i];
+
+ ltx->ltx_idle = i < SOCKNAL_NLTXS ?
+ &ksocknal_data.ksnd_idle_ltx_list :
+ &ksocknal_data.ksnd_idle_nblk_ltx_list;
+ list_add (<x->ltx_tx.tx_list, ltx->ltx_idle);
+ }
+
+ rc = PtlNIInit(ksocknal_init, 32, 4, 0, &ksocknal_ni);
+ if (rc != 0) {
+ CERROR("ksocknal: PtlNIInit failed: error %d\n", rc);
+ ksocknal_module_fini ();
+ RETURN (rc);
+ }
+ PtlNIDebug(ksocknal_ni, ~0);
+
+ ksocknal_data.ksnd_init = SOCKNAL_INIT_PTL; // flag PtlNIInit() called
+
+ for (i = 0; i < SOCKNAL_N_SCHED; i++) {
+ rc = ksocknal_thread_start (ksocknal_scheduler,
+ &ksocknal_data.ksnd_schedulers[i]);
+ if (rc != 0) {
+ CERROR("Can't spawn socknal scheduler[%d]: %d\n",
+ i, rc);
+ ksocknal_module_fini ();
+ RETURN (rc);
+ }
+ }
+
+ rc = ksocknal_thread_start (ksocknal_reaper, NULL);
+ if (rc != 0) {
+ CERROR("Can't spawn socknal reaper: %d\n", rc);
+ ksocknal_module_fini ();
+ RETURN (rc);
+ }
+
+ rc = kpr_register(&ksocknal_data.ksnd_router,
+ &ksocknal_router_interface);
+ if (rc != 0) {
+ CDEBUG(D_NET, "Can't initialise routing interface "
+ "(rc = %d): not routing\n", rc);
+ } else {
+ /* Only allocate forwarding buffers if I'm on a gateway */
+
+ PORTAL_ALLOC(ksocknal_data.ksnd_fmbs,
+ sizeof(ksock_fmb_t) * (SOCKNAL_SMALL_FWD_NMSGS +
+ SOCKNAL_LARGE_FWD_NMSGS));
+ if (ksocknal_data.ksnd_fmbs == NULL) {
+ ksocknal_module_fini ();
+ RETURN(-ENOMEM);
+ }
+
+ /* NULL out buffer pointers etc */
+ memset(ksocknal_data.ksnd_fmbs, 0,
+ sizeof(ksock_fmb_t) * (SOCKNAL_SMALL_FWD_NMSGS +
+ SOCKNAL_LARGE_FWD_NMSGS));
+
+ for (i = 0; i < (SOCKNAL_SMALL_FWD_NMSGS +
+ SOCKNAL_LARGE_FWD_NMSGS); i++) {
+ ksock_fmb_t *fmb =
+ &((ksock_fmb_t *)ksocknal_data.ksnd_fmbs)[i];
+
+ if (i < SOCKNAL_SMALL_FWD_NMSGS) {
+ fmb->fmb_npages = SOCKNAL_SMALL_FWD_PAGES;
+ fmb->fmb_pool = &ksocknal_data.ksnd_small_fmp;
+ } else {
+ fmb->fmb_npages = SOCKNAL_LARGE_FWD_PAGES;
+ fmb->fmb_pool = &ksocknal_data.ksnd_large_fmp;
+ }
+
+ LASSERT (fmb->fmb_npages > 0);
+ for (j = 0; j < fmb->fmb_npages; j++) {
+ fmb->fmb_pages[j] = alloc_page (GFP_KERNEL);
+
+ if (fmb->fmb_pages[j] == NULL) {
+ ksocknal_module_fini ();
+ return (-ENOMEM);
+ }
+
+ LASSERT(page_address (fmb->fmb_pages[j]) !=
+ NULL);
+ }
+
+ list_add(&fmb->fmb_list, &fmb->fmb_pool->fmp_idle_fmbs);
+ }
+ }
+
+ rc = kportal_nal_register(SOCKNAL, &ksocknal_cmd, NULL);
+ if (rc != 0) {
+ CERROR ("Can't initialise command interface (rc = %d)\n", rc);
+ ksocknal_module_fini ();
+ return (rc);
+ }
+
+ PORTAL_SYMBOL_REGISTER(ksocknal_ni);
+
+ /* flag everything initialised */
+ ksocknal_data.ksnd_init = SOCKNAL_INIT_ALL;
+
+ printk(KERN_INFO "Routing socket NAL loaded (Routing %s, initial "
+ "mem %d)\n",
+ kpr_routing (&ksocknal_data.ksnd_router) ?
+ "enabled" : "disabled", pkmem);
+
+ return (0);
+}
+
+MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
+MODULE_DESCRIPTION("Kernel TCP Socket NAL v0.01");
+MODULE_LICENSE("GPL");
+
+module_init(ksocknal_module_init);
+module_exit(ksocknal_module_fini);
+
+EXPORT_SYMBOL (ksocknal_ni);
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ * Author: Zach Brown <zab@zabbo.net>
+ * Author: Peter J. Braam <braam@clusterfs.com>
+ * Author: Phil Schwan <phil@clusterfs.com>
+ * Author: Eric Barton <eric@bartonsoftware.com>
+ *
+ * This file is part of Portals, http://www.sf.net/projects/lustre/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#define DEBUG_PORTAL_ALLOC
+#define EXPORT_SYMTAB
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/errno.h>
+#include <linux/smp_lock.h>
+#include <linux/unistd.h>
+#include <net/sock.h>
+#include <net/tcp.h>
+#include <linux/uio.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/stat.h>
+#include <linux/list.h>
+#include <linux/kmod.h>
+#include <asm/uaccess.h>
+#include <asm/segment.h>
+
+#define DEBUG_SUBSYSTEM S_SOCKNAL
+
+#include <linux/kp30.h>
+#include <portals/p30.h>
+#include <portals/lib-p30.h>
+
+#define SOCKNAL_N_SCHED num_online_cpus() /* # socknal schedulers */
+
+#if PTL_LARGE_MTU
+# define SOCKNAL_MAX_FWD_PAYLOAD (256<<10) /* biggest payload I can forward */
+#else
+# define SOCKNAL_MAX_FWD_PAYLOAD (64<<10) /* biggest payload I can forward */
+#endif
+
+#define SOCKNAL_NLTXS 128 /* # normal transmit messages */
+#define SOCKNAL_NNBLK_LTXS 128 /* # transmit messages reserved if can't block */
+
+#define SOCKNAL_SMALL_FWD_NMSGS 128 /* # small messages I can be forwarding at any time */
+#define SOCKNAL_LARGE_FWD_NMSGS 64 /* # large messages I can be forwarding at any time */
+
+#define SOCKNAL_SMALL_FWD_PAGES 1 /* # pages in a small message fwd buffer */
+
+#define SOCKNAL_LARGE_FWD_PAGES (PAGE_ALIGN (sizeof (ptl_hdr_t) + SOCKNAL_MAX_FWD_PAYLOAD) >> PAGE_SHIFT)
+ /* # pages in a large message fwd buffer */
+
+#define SOCKNAL_RESCHED 100 /* # scheduler loops before reschedule */
+
+#define SOCKNAL_TX_LOW_WATER(sk) (((sk)->sndbuf*8)/10)
+
+typedef struct /* pool of forwarding buffers */
+{
+ spinlock_t fmp_lock; /* serialise */
+ struct list_head fmp_idle_fmbs; /* buffers waiting for a connection */
+ struct list_head fmp_blocked_conns; /* connections waiting for a buffer */
+} ksock_fmb_pool_t;
+
+
+typedef struct /* per scheduler state */
+{
+ spinlock_t kss_lock; /* serialise */
+ struct list_head kss_rx_conns; /* conn waiting to be read */
+ struct list_head kss_tx_conns; /* conn waiting to be written */
+#if SOCKNAL_ZC
+ struct list_head kss_zctxdone_list; /* completed ZC transmits */
+#endif
+ wait_queue_head_t kss_waitq; /* where scheduler sleeps */
+ int kss_nconns; /* # connections assigned to this scheduler */
+} ksock_sched_t;
+
+typedef struct {
+ int ksnd_init; /* initialisation state */
+
+ struct list_head ksnd_socklist; /* all my connections */
+ rwlock_t ksnd_socklist_lock; /* stabilise add/find/remove */
+
+ ptl_nid_t ksnd_mynid;
+ nal_cb_t *ksnd_nal_cb;
+ spinlock_t ksnd_nal_cb_lock; /* lib cli/sti lock */
+
+ atomic_t ksnd_nthreads; /* # live threads */
+ int ksnd_shuttingdown; /* tell threads to exit */
+ ksock_sched_t *ksnd_schedulers; /* scheduler state */
+
+ kpr_router_t ksnd_router; /* THE router */
+
+ void *ksnd_fmbs; /* all the pre-allocated FMBs */
+ ksock_fmb_pool_t ksnd_small_fmp; /* small message forwarding buffers */
+ ksock_fmb_pool_t ksnd_large_fmp; /* large message forwarding buffers */
+
+ void *ksnd_ltxs; /* all the pre-allocated LTXs */
+ spinlock_t ksnd_idle_ltx_lock; /* serialise ltx alloc/free */
+ struct list_head ksnd_idle_ltx_list; /* where to get an idle LTX */
+ struct list_head ksnd_idle_nblk_ltx_list; /* where to get an idle LTX if you can't block */
+ wait_queue_head_t ksnd_idle_ltx_waitq; /* where to block for an idle LTX */
+
+ struct list_head ksnd_reaper_list; /* conn waiting to be reaped */
+ wait_queue_head_t ksnd_reaper_waitq; /* reaper sleeps here */
+ spinlock_t ksnd_reaper_lock; /* serialise */
+ unsigned char ksnd_irq_info[NR_IRQS]; /* irq->scheduler lookup */
+} ksock_nal_data_t;
+
+#define SOCKNAL_INIT_NOTHING 0
+#define SOCKNAL_INIT_DATA 1
+#define SOCKNAL_INIT_PTL 2
+#define SOCKNAL_INIT_ALL 3
+
+#define SOCKNAL_IRQ_BOUND 0x80 /* flag we _did_ bind already */
+#define SOCKNAL_IRQ_SCHED_MASK 0x7f /* we assume < 127 CPUs */
+#define SOCKNAL_IRQ_UNASSIGNED 0xff /* flag unassigned */
+
+/* A packet just assembled for transmission is represented by 1 or more
+ * struct iovec fragments and 0 or more ptl_kiov_t fragments. Forwarded
+ * messages, or messages from an MD with PTL_MD_KIOV _not_ set have 0
+ * ptl_kiov_t fragments. Messages from an MD with PTL_MD_KIOV set, have 1
+ * struct iovec fragment (the header) and up to PTL_MD_MAX_IOV ptl_kiov_t
+ * fragments.
+ *
+ * On the receive side, initially 1 struct iovec fragment is posted for
+ * receive (the header). Once the header has been received, if the message
+ * requires forwarding or will be received into mapped memory, up to
+ * PTL_MD_MAX_IOV struct iovec fragments describe the target memory.
+ * Otherwise up to PTL_MD_MAX_IOV ptl_kiov_t fragments are used.
+ */
+
+typedef struct /* transmit packet */
+{
+ struct list_head tx_list; /* queue on conn for transmission etc */
+ char tx_isfwd; /* forwarding / sourced here */
+ int tx_nob; /* # packet bytes */
+ int tx_niov; /* # packet iovec frags */
+ struct iovec *tx_iov; /* packet iovec frags */
+ int tx_nkiov; /* # packet page frags */
+ ptl_kiov_t *tx_kiov; /* packet page frags */
+#if SOCKNAL_ZC
+ ksock_sched_t *tx_sched; /* who to wake on callback */
+ zccd_t tx_zccd; /* zero copy callback descriptor */
+#endif
+} ksock_tx_t;
+
+#define KSOCK_ZCCD_2_TX(ptr) list_entry (ptr, ksock_tx_t, tx_zccd)
+/* network zero copy callback descriptor embedded in ksock_tx_t */
+
+/* space for the tx frag descriptors: hdr is always 1 iovec
+ * and payload is PTL_MD_MAX of either type. */
+typedef struct
+{
+ struct iovec hdr;
+ union {
+ struct iovec iov[PTL_MD_MAX_IOV];
+ ptl_kiov_t kiov[PTL_MD_MAX_IOV];
+ } payload;
+} ksock_txiovspace_t;
+
+typedef struct /* locally transmitted packet */
+{
+ ksock_tx_t ltx_tx; /* send info */
+ struct list_head *ltx_idle; /* where to put when idle */
+ void *ltx_private; /* lib_finalize() callback arg */
+ void *ltx_cookie; /* lib_finalize() callback arg */
+ ksock_txiovspace_t ltx_iov_space; /* where to stash frag descriptors */
+ ptl_hdr_t ltx_hdr; /* buffer for packet header */
+} ksock_ltx_t;
+
+#define KSOCK_TX_2_KPR_FWD_DESC(ptr) list_entry ((kprfd_scratch_t *)ptr, kpr_fwd_desc_t, kprfd_scratch)
+/* forwarded packets (router->socknal) embedded in kpr_fwd_desc_t::kprfd_scratch */
+
+#define KSOCK_TX_2_KSOCK_LTX(ptr) list_entry (ptr, ksock_ltx_t, ltx_tx)
+/* local packets (lib->socknal) embedded in ksock_ltx_t::ltx_tx */
+
+/* NB list_entry() is used here as convenient macro for calculating a
+ * pointer to a struct from the address of a member.
+ */
+
+typedef struct /* Kernel portals Socket Forwarding message buffer */
+{ /* (socknal->router) */
+ struct list_head fmb_list; /* queue idle */
+ kpr_fwd_desc_t fmb_fwd; /* router's descriptor */
+ int fmb_npages; /* # pages allocated */
+ ksock_fmb_pool_t *fmb_pool; /* owning pool */
+ struct page *fmb_pages[SOCKNAL_LARGE_FWD_PAGES];
+ struct iovec fmb_iov[SOCKNAL_LARGE_FWD_PAGES];
+} ksock_fmb_t;
+
+/* space for the rx frag descriptors; we either read a single contiguous
+ * header, or PTL_MD_MAX_IOV frags of payload of either type. */
+typedef union {
+ struct iovec iov[PTL_MD_MAX_IOV];
+ ptl_kiov_t kiov[PTL_MD_MAX_IOV];
+} ksock_rxiovspace_t;
+
+#define SOCKNAL_RX_HEADER 1 /* reading header */
+#define SOCKNAL_RX_BODY 2 /* reading body (to deliver here) */
+#define SOCKNAL_RX_BODY_FWD 3 /* reading body (to forward) */
+#define SOCKNAL_RX_SLOP 4 /* skipping body */
+#define SOCKNAL_RX_GET_FMB 5 /* scheduled for forwarding */
+#define SOCKNAL_RX_FMB_SLEEP 6 /* blocked waiting for a fwd desc */
+
+typedef struct
+{
+ struct list_head ksnc_list; /* stash on global socket list */
+ struct file *ksnc_file; /* socket filp */
+ struct socket *ksnc_sock; /* actual socket */
+ void *ksnc_saved_data_ready; /* socket's original data_ready() callback */
+ void *ksnc_saved_write_space; /* socket's original write_space() callback */
+ ptl_nid_t ksnc_peernid; /* who's on the other end */
+ atomic_t ksnc_refcount; /* # users */
+ ksock_sched_t *ksnc_scheduler; /* who schedules this connection */
+
+ /* READER */
+ struct list_head ksnc_rx_list; /* where I enq waiting input or a forwarding descriptor */
+ volatile int ksnc_rx_ready; /* data ready to read */
+ int ksnc_rx_scheduled; /* being progressed */
+ int ksnc_rx_state; /* what is being read */
+ int ksnc_rx_nob_left; /* # bytes to next hdr/body */
+ int ksnc_rx_nob_wanted; /* bytes actually wanted */
+ int ksnc_rx_niov; /* # iovec frags */
+ struct iovec *ksnc_rx_iov; /* the iovec frags */
+ int ksnc_rx_nkiov; /* # page frags */
+ ptl_kiov_t *ksnc_rx_kiov; /* the page frags */
+ ksock_rxiovspace_t ksnc_rx_iov_space; /* space for frag descriptors */
+ void *ksnc_cookie; /* rx lib_finalize passthru arg */
+ ptl_hdr_t ksnc_hdr; /* where I read headers into */
+
+ /* WRITER */
+ struct list_head ksnc_tx_list; /* where I enq waiting for output space */
+ struct list_head ksnc_tx_queue; /* packets waiting to be sent */
+ volatile int ksnc_tx_ready; /* write space */
+ int ksnc_tx_scheduled; /* being progressed */
+
+} ksock_conn_t;
+
+extern int ksocknal_add_sock (ptl_nid_t nid, int fd, int client);
+extern int ksocknal_close_sock(ptl_nid_t nid);
+extern int ksocknal_set_mynid(ptl_nid_t nid);
+extern int ksocknal_push_sock(ptl_nid_t nid);
+extern ksock_conn_t *ksocknal_get_conn (ptl_nid_t nid);
+extern void _ksocknal_put_conn (ksock_conn_t *conn);
+extern void ksocknal_close_conn (ksock_conn_t *conn);
+
+static inline void
+ksocknal_put_conn (ksock_conn_t *conn)
+{
+ CDEBUG (D_OTHER, "putting conn[%p] -> "LPX64" (%d)\n",
+ conn, conn->ksnc_peernid, atomic_read (&conn->ksnc_refcount));
+
+ if (atomic_dec_and_test (&conn->ksnc_refcount))
+ _ksocknal_put_conn (conn);
+}
+
+extern int ksocknal_thread_start (int (*fn)(void *arg), void *arg);
+extern int ksocknal_new_packet (ksock_conn_t *conn, int skip);
+extern void ksocknal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd);
+extern int ksocknal_scheduler (void *arg);
+extern int ksocknal_reaper (void *arg);
+extern void ksocknal_data_ready(struct sock *sk, int n);
+extern void ksocknal_write_space(struct sock *sk);
+
+
+extern nal_cb_t ksocknal_lib;
+extern ksock_nal_data_t ksocknal_data;
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ * Author: Zach Brown <zab@zabbo.net>
+ * Author: Peter J. Braam <braam@clusterfs.com>
+ * Author: Phil Schwan <phil@clusterfs.com>
+ * Author: Eric Barton <eric@bartonsoftware.com>
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include "socknal.h"
+
+atomic_t ksocknal_packets_received;
+atomic_t ksocknal_packets_launched;
+atomic_t ksocknal_packets_being_sent;
+
+#if SOCKNAL_ZC
+int ksocknal_do_zc = 1;
+int ksocknal_zc_min_frag = 2048;
+#endif
+
+/*
+ * LIB functions follow
+ *
+ */
+int
+ksocknal_read(nal_cb_t *nal, void *private, void *dst_addr,
+ user_ptr src_addr, size_t len)
+{
+ CDEBUG(D_NET, LPX64": reading %ld bytes from %p -> %p\n",
+ nal->ni.nid, (long)len, src_addr, dst_addr);
+
+ memcpy( dst_addr, src_addr, len );
+ return 0;
+}
+
+int
+ksocknal_write(nal_cb_t *nal, void *private, user_ptr dst_addr,
+ void *src_addr, size_t len)
+{
+ CDEBUG(D_NET, LPX64": writing %ld bytes from %p -> %p\n",
+ nal->ni.nid, (long)len, src_addr, dst_addr);
+
+ memcpy( dst_addr, src_addr, len );
+ return 0;
+}
+
+int
+ksocknal_callback (nal_cb_t * nal, void *private, lib_eq_t *eq,
+ ptl_event_t *ev)
+{
+ CDEBUG(D_NET, LPX64": callback eq %p ev %p\n",
+ nal->ni.nid, eq, ev);
+
+ if (eq->event_callback != NULL)
+ eq->event_callback(ev);
+
+ return 0;
+}
+
+void *
+ksocknal_malloc(nal_cb_t *nal, size_t len)
+{
+ void *buf;
+
+ PORTAL_ALLOC(buf, len);
+
+ if (buf != NULL)
+ memset(buf, 0, len);
+
+ return (buf);
+}
+
+void
+ksocknal_free(nal_cb_t *nal, void *buf, size_t len)
+{
+ PORTAL_FREE(buf, len);
+}
+
+void
+ksocknal_printf(nal_cb_t *nal, const char *fmt, ...)
+{
+ va_list ap;
+ char msg[256];
+
+ va_start (ap, fmt);
+ vsnprintf (msg, sizeof (msg), fmt, ap); /* sprint safely */
+ va_end (ap);
+
+ msg[sizeof (msg) - 1] = 0; /* ensure terminated */
+
+ CDEBUG (D_NET, "%s", msg);
+}
+
+void
+ksocknal_cli(nal_cb_t *nal, unsigned long *flags)
+{
+ ksock_nal_data_t *data = nal->nal_data;
+
+ spin_lock(&data->ksnd_nal_cb_lock);
+}
+
+void
+ksocknal_sti(nal_cb_t *nal, unsigned long *flags)
+{
+ ksock_nal_data_t *data;
+ data = nal->nal_data;
+
+ spin_unlock(&data->ksnd_nal_cb_lock);
+}
+
+int
+ksocknal_dist(nal_cb_t *nal, ptl_nid_t nid, unsigned long *dist)
+{
+ /* I would guess that if ksocknal_get_conn(nid) == NULL,
+ and we're not routing, then 'nid' is very distant :) */
+ if ( nal->ni.nid == nid ) {
+ *dist = 0;
+ } else {
+ *dist = 1;
+ }
+
+ return 0;
+}
+
+ksock_ltx_t *
+ksocknal_get_ltx (int may_block)
+{
+ long flags;
+ ksock_ltx_t *ltx = NULL;
+
+ for (;;) {
+ spin_lock_irqsave (&ksocknal_data.ksnd_idle_ltx_lock, flags);
+
+ if (!list_empty (&ksocknal_data.ksnd_idle_ltx_list)) {
+ ltx = list_entry(ksocknal_data.ksnd_idle_ltx_list.next,
+ ksock_ltx_t, ltx_tx.tx_list);
+ list_del (<x->ltx_tx.tx_list);
+ break;
+ }
+
+ if (!may_block) {
+ if (!list_empty(&ksocknal_data.ksnd_idle_nblk_ltx_list)) {
+ ltx = list_entry(ksocknal_data.ksnd_idle_nblk_ltx_list.next,
+ ksock_ltx_t, ltx_tx.tx_list);
+ list_del (<x->ltx_tx.tx_list);
+ }
+ break;
+ }
+
+ spin_unlock_irqrestore(&ksocknal_data.ksnd_idle_ltx_lock,
+ flags);
+
+ wait_event (ksocknal_data.ksnd_idle_ltx_waitq,
+ !list_empty (&ksocknal_data.ksnd_idle_ltx_list));
+ }
+
+ spin_unlock_irqrestore (&ksocknal_data.ksnd_idle_ltx_lock, flags);
+
+ return (ltx);
+}
+
+#if SOCKNAL_ZC
+struct page *
+ksocknal_kvaddr_to_page (unsigned long vaddr)
+{
+ struct page *page;
+
+ if (vaddr >= VMALLOC_START &&
+ vaddr < VMALLOC_END)
+ page = vmalloc_to_page ((void *)vaddr);
+#if CONFIG_HIGHMEM
+ else if (vaddr >= PKMAP_BASE &&
+ vaddr < (PKMAP_BASE + LAST_PKMAP * PAGE_SIZE))
+ page = vmalloc_to_page ((void *)vaddr);
+ /* in 2.4 ^ just walks the page tables */
+#endif
+ else
+ page = virt_to_page (vaddr);
+
+ if (page == NULL ||
+ !VALID_PAGE (page))
+ return (NULL);
+
+ return (page);
+}
+#endif
+
+int
+ksocknal_send_iov (struct socket *sock, ksock_tx_t *tx, int more)
+{
+ struct iovec *iov = tx->tx_iov;
+ int fragsize = iov->iov_len;
+ unsigned long vaddr = (unsigned long)iov->iov_base;
+#if SOCKNAL_ZC
+ int offset = vaddr & (PAGE_SIZE - 1);
+ int zcsize = MIN (fragsize, PAGE_SIZE - offset);
+ struct page *page;
+#endif
+ int rc;
+
+ /* NB we can't trust socket ops to either consume our iovs
+ * or leave them alone, so we only send 1 frag at a time. */
+ LASSERT (fragsize <= tx->tx_nob);
+ LASSERT (tx->tx_niov > 0);
+ more |= (tx->tx_niov > 1);
+
+#if SOCKNAL_ZC
+ if (ksocknal_do_zc &&
+ (sock->sk->route_caps & NETIF_F_SG) &&
+ (sock->sk->route_caps & (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | NETIF_F_HW_CSUM)) &&
+ zcsize >= ksocknal_zc_min_frag &&
+ (page = ksocknal_kvaddr_to_page (vaddr)) != NULL) {
+
+ CDEBUG(D_NET, "vaddr %p, page %p->%p + offset %x for %d\n",
+ (void *)vaddr, page, page_address(page), offset, zcsize);
+
+ more |= (zcsize < fragsize);
+
+ rc = tcp_sendpage_zccd(sock, page, offset, zcsize,
+ more ? (MSG_DONTWAIT | MSG_MORE) : MSG_DONTWAIT,
+ &tx->tx_zccd);
+ } else
+#endif
+ {
+ /* NB don't pass tx's iov; sendmsg may or may not update it */
+ struct iovec fragiov = { .iov_base = (void *)vaddr,
+ .iov_len = fragsize};
+ struct msghdr msg = {
+ .msg_name = NULL,
+ .msg_namelen = 0,
+ .msg_iov = &fragiov,
+ .msg_iovlen = 1,
+ .msg_control = NULL,
+ .msg_controllen = 0,
+ .msg_flags = more ? (MSG_DONTWAIT | MSG_MORE) : MSG_DONTWAIT
+ };
+ mm_segment_t oldmm = get_fs();
+
+ set_fs (KERNEL_DS);
+ rc = sock->sk->prot->sendmsg(sock->sk, &msg, fragsize);
+ set_fs (oldmm);
+ }
+
+ if (rc <= 0)
+ return (rc);
+
+ tx->tx_nob -= rc;
+
+ if (rc < fragsize) {
+ /* didn't send whole frag */
+ iov->iov_base = (void *)(vaddr + rc);
+ iov->iov_len = fragsize - rc;
+ return (-EAGAIN);
+ }
+
+ /* everything went */
+ LASSERT (rc == fragsize);
+ tx->tx_iov++;
+ tx->tx_niov--;
+ return (1);
+}
+
+int
+ksocknal_send_kiov (struct socket *sock, ksock_tx_t *tx, int more)
+{
+ ptl_kiov_t *kiov = tx->tx_kiov;
+ int fragsize = kiov->kiov_len;
+ struct page *page = kiov->kiov_page;
+ int offset = kiov->kiov_offset;
+ int rc;
+
+ /* NB we can't trust socket ops to either consume our iovs
+ * or leave them alone, so we only send 1 frag at a time. */
+ LASSERT (fragsize <= tx->tx_nob);
+ LASSERT (offset + fragsize <= PAGE_SIZE);
+ LASSERT (tx->tx_nkiov > 0);
+ more |= (tx->tx_nkiov > 1);
+
+#if SOCKNAL_ZC
+ if (ksocknal_do_zc &&
+ (sock->sk->route_caps & NETIF_F_SG) &&
+ (sock->sk->route_caps & (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | NETIF_F_HW_CSUM)) &&
+ fragsize >= ksocknal_zc_min_frag) {
+
+ CDEBUG(D_NET, "page %p + offset %x for %d\n",
+ page, offset, fragsize);
+
+ rc = tcp_sendpage_zccd(sock, page, offset, fragsize,
+ more ? (MSG_DONTWAIT | MSG_MORE) : MSG_DONTWAIT,
+ &tx->tx_zccd);
+ } else
+#endif
+ {
+ char *addr = ((char *)kmap (page)) + offset;
+ struct iovec fragiov = {.iov_base = addr,
+ .iov_len = fragsize};
+ struct msghdr msg = {
+ .msg_name = NULL,
+ .msg_namelen = 0,
+ .msg_iov = &fragiov,
+ .msg_iovlen = 1,
+ .msg_control = NULL,
+ .msg_controllen = 0,
+ .msg_flags = more ? (MSG_DONTWAIT | MSG_MORE) : MSG_DONTWAIT
+ };
+ mm_segment_t oldmm = get_fs();
+
+ set_fs (KERNEL_DS);
+ rc = sock->sk->prot->sendmsg(sock->sk, &msg, fragsize);
+ set_fs (oldmm);
+ kunmap (page);
+ }
+
+ if (rc <= 0)
+ return (rc);
+
+ tx->tx_nob -= rc;
+
+ if (rc < fragsize) {
+ /* didn't send whole frag */
+ kiov->kiov_offset = offset + rc;
+ kiov->kiov_len = fragsize - rc;
+ return (-EAGAIN);
+ }
+
+ /* everything went */
+ LASSERT (rc == fragsize);
+ tx->tx_kiov++;
+ tx->tx_nkiov--;
+ return (1);
+}
+
+int
+ksocknal_sendmsg (struct socket *sock, ksock_tx_t *tx, int more)
+{
+ int rc;
+ int sent_some = 0;
+ ENTRY;
+
+ LASSERT (!in_interrupt());
+
+ for (;;) {
+ if (tx->tx_niov != 0)
+ rc = ksocknal_send_iov (sock, tx, more || tx->tx_nkiov != 0);
+ else
+ rc = ksocknal_send_kiov (sock, tx, more);
+
+ /* Interpret a zero rc the same as -EAGAIN (Adaptech TOE) */
+ if (rc <= 0) /* error or partial send */
+ RETURN ((sent_some || rc == -EAGAIN) ? 0 : rc);
+
+ if (tx->tx_nob == 0) /* sent everything */
+ RETURN (0);
+
+ sent_some = 1;
+ }
+}
+
+int
+ksocknal_recv_iov (ksock_conn_t *conn)
+{
+ struct iovec *iov = conn->ksnc_rx_iov;
+ int fragsize = iov->iov_len;
+ unsigned long vaddr = (unsigned long)iov->iov_base;
+ struct iovec fragiov = { .iov_base = (void *)vaddr,
+ .iov_len = fragsize};
+ struct msghdr msg = {
+ .msg_name = NULL,
+ .msg_namelen = 0,
+ .msg_iov = &fragiov,
+ .msg_iovlen = 1,
+ .msg_control = NULL,
+ .msg_controllen = 0,
+ .msg_flags = 0
+ };
+ mm_segment_t oldmm = get_fs();
+ int rc;
+
+ /* NB we can't trust socket ops to either consume our iovs
+ * or leave them alone, so we only receive 1 frag at a time. */
+ LASSERT (conn->ksnc_rx_niov > 0);
+ LASSERT (fragsize <= conn->ksnc_rx_nob_wanted);
+
+ set_fs (KERNEL_DS);
+ rc = sock_recvmsg (conn->ksnc_sock, &msg, fragsize, MSG_DONTWAIT);
+ /* NB this is just a boolean............................^ */
+ set_fs (oldmm);
+
+ if (rc <= 0)
+ return (rc);
+
+ conn->ksnc_rx_nob_wanted -= rc;
+ conn->ksnc_rx_nob_left -= rc;
+
+ if (rc < fragsize) {
+ iov->iov_base = (void *)(vaddr + rc);
+ iov->iov_len = fragsize - rc;
+ return (-EAGAIN);
+ }
+
+ LASSERT (rc == fragsize);
+ conn->ksnc_rx_iov++;
+ conn->ksnc_rx_niov--;
+ return (1);
+}
+
+int
+ksocknal_recv_kiov (ksock_conn_t *conn)
+{
+ ptl_kiov_t *kiov = conn->ksnc_rx_kiov;
+ struct page *page = kiov->kiov_page;
+ int offset = kiov->kiov_offset;
+ int fragsize = kiov->kiov_len;
+ unsigned long vaddr = ((unsigned long)kmap (page)) + offset;
+ struct iovec fragiov = { .iov_base = (void *)vaddr,
+ .iov_len = fragsize};
+ struct msghdr msg = {
+ .msg_name = NULL,
+ .msg_namelen = 0,
+ .msg_iov = &fragiov,
+ .msg_iovlen = 1,
+ .msg_control = NULL,
+ .msg_controllen = 0,
+ .msg_flags = 0
+ };
+ mm_segment_t oldmm = get_fs();
+ int rc;
+
+ /* NB we can't trust socket ops to either consume our iovs
+ * or leave them alone, so we only receive 1 frag at a time. */
+ LASSERT (fragsize <= conn->ksnc_rx_nob_wanted);
+ LASSERT (conn->ksnc_rx_nkiov > 0);
+ LASSERT (offset + fragsize <= PAGE_SIZE);
+
+ set_fs (KERNEL_DS);
+ rc = sock_recvmsg (conn->ksnc_sock, &msg, fragsize, MSG_DONTWAIT);
+ /* NB this is just a boolean............................^ */
+ set_fs (oldmm);
+ kunmap (page);
+
+ if (rc <= 0)
+ return (rc);
+
+ conn->ksnc_rx_nob_wanted -= rc;
+ conn->ksnc_rx_nob_left -= rc;
+
+ if (rc < fragsize) {
+ kiov->kiov_offset = offset + rc;
+ kiov->kiov_len = fragsize - rc;
+ return (-EAGAIN);
+ }
+
+ LASSERT (rc == fragsize);
+ conn->ksnc_rx_kiov++;
+ conn->ksnc_rx_nkiov--;
+ return (1);
+}
+
+int
+ksocknal_recvmsg (ksock_conn_t *conn)
+{
+ int rc;
+ int got_some = 0;
+ ENTRY;
+
+ LASSERT (!in_interrupt ());
+
+ for (;;) {
+ LASSERT (conn->ksnc_rx_nob_wanted > 0);
+
+ if (conn->ksnc_rx_niov != 0)
+ rc = ksocknal_recv_iov (conn);
+ else
+ rc = ksocknal_recv_kiov (conn);
+
+ /* CAVEAT EMPTOR: we return...
+ * <= 0 for error (0 == EOF) and > 0 for success (unlike sendmsg()) */
+
+ if (rc <= 0) /* error/EOF or partial receive */
+ RETURN ((got_some || rc == -EAGAIN) ? 1 : rc);
+
+ if (conn->ksnc_rx_nob_wanted == 0)
+ RETURN (1);
+
+ got_some = 0;
+ }
+}
+
+#if SOCKNAL_ZC
+void
+ksocknal_zc_callback (zccd_t *zcd)
+{
+ ksock_tx_t *tx = KSOCK_ZCCD_2_TX(zcd);
+ ksock_sched_t *sched = tx->tx_sched;
+ unsigned long flags;
+ ENTRY;
+
+ /* Schedule tx for cleanup (can't do it now due to lock conflicts) */
+
+ spin_lock_irqsave (&sched->kss_lock, flags);
+
+ list_add_tail (&tx->tx_list, &sched->kss_zctxdone_list);
+ if (waitqueue_active (&sched->kss_waitq))
+ wake_up (&sched->kss_waitq);
+
+ spin_unlock_irqrestore (&sched->kss_lock, flags);
+ EXIT;
+}
+#endif
+
+void
+ksocknal_tx_done (ksock_tx_t *tx)
+{
+ long flags;
+ ksock_ltx_t *ltx;
+ ENTRY;
+
+ atomic_dec (&ksocknal_packets_being_sent);
+
+ if (tx->tx_isfwd) { /* was a forwarded packet? */
+ kpr_fwd_done (&ksocknal_data.ksnd_router,
+ KSOCK_TX_2_KPR_FWD_DESC (tx), 0);
+ EXIT;
+ return;
+ }
+
+ /* local send */
+ ltx = KSOCK_TX_2_KSOCK_LTX (tx);
+
+ lib_finalize (&ksocknal_lib, ltx->ltx_private, ltx->ltx_cookie);
+
+ spin_lock_irqsave (&ksocknal_data.ksnd_idle_ltx_lock, flags);
+
+ list_add_tail (<x->ltx_tx.tx_list, ltx->ltx_idle);
+
+ /* normal tx desc => wakeup anyone blocking for one */
+ if (ltx->ltx_idle == &ksocknal_data.ksnd_idle_ltx_list &&
+ waitqueue_active (&ksocknal_data.ksnd_idle_ltx_waitq))
+ wake_up (&ksocknal_data.ksnd_idle_ltx_waitq);
+
+ spin_unlock_irqrestore (&ksocknal_data.ksnd_idle_ltx_lock, flags);
+ EXIT;
+}
+
+void
+ksocknal_process_transmit (ksock_sched_t *sched, long *irq_flags)
+{
+ ksock_conn_t *conn;
+ ksock_tx_t *tx;
+ int rc;
+
+ LASSERT (!list_empty (&sched->kss_tx_conns));
+ conn = list_entry(sched->kss_tx_conns.next, ksock_conn_t, ksnc_tx_list);
+ list_del (&conn->ksnc_tx_list);
+
+ LASSERT (conn->ksnc_tx_scheduled);
+ LASSERT (conn->ksnc_tx_ready);
+ LASSERT (!list_empty (&conn->ksnc_tx_queue));
+ tx = list_entry (conn->ksnc_tx_queue.next, ksock_tx_t, tx_list);
+ /* assume transmit will complete now, so dequeue while I've got lock */
+ list_del (&tx->tx_list);
+
+ spin_unlock_irqrestore (&sched->kss_lock, *irq_flags);
+
+ LASSERT (tx->tx_nob > 0);
+
+ conn->ksnc_tx_ready = 0;/* write_space may race with me and set ready */
+ mb(); /* => clear BEFORE trying to write */
+
+ rc = ksocknal_sendmsg (conn->ksnc_sock, tx,
+ !list_empty (&conn->ksnc_tx_queue)); /* more to come? */
+
+ CDEBUG (D_NET, "send(%d) %d\n", tx->tx_nob, rc);
+
+ if (rc != 0) {
+#warning FIXME: handle socket errors properly
+ CERROR("Error socknal send(%d) %p: %d\n", tx->tx_nob, conn, rc);
+ /* kid on for now the whole packet went.
+ * NB when we handle the error better, we'll still need to
+ * block for zccd completion.
+ */
+ tx->tx_nob = 0;
+ }
+
+ if (tx->tx_nob == 0) /* nothing left to send */
+ {
+ /* everything went; assume more can go, so prevent write_space locking */
+ conn->ksnc_tx_ready = 1;
+
+ ksocknal_put_conn (conn); /* release packet's ref */
+ atomic_inc (&ksocknal_packets_being_sent);
+#if SOCKNAL_ZC
+ if (atomic_read (&tx->tx_zccd.zccd_count) != 1) {
+ /* zccd skbufs are still in-flight. Release my
+ * initial ref on zccd, so callback can occur */
+ zccd_put (&tx->tx_zccd);
+ } else
+#endif
+ ksocknal_tx_done (tx);
+
+ spin_lock_irqsave (&sched->kss_lock, *irq_flags);
+ } else {
+ spin_lock_irqsave (&sched->kss_lock, *irq_flags);
+
+ /* back onto HEAD of tx_queue */
+ list_add (&tx->tx_list, &conn->ksnc_tx_queue);
+ }
+
+ if (!conn->ksnc_tx_ready || /* no space to write now */
+ list_empty (&conn->ksnc_tx_queue)) {/* nothing to write */
+ conn->ksnc_tx_scheduled = 0; /* not being scheduled */
+ ksocknal_put_conn (conn); /* release scheduler's ref */
+ } else /* let scheduler call me again */
+ list_add_tail (&conn->ksnc_tx_list, &sched->kss_tx_conns);
+}
+
+void
+ksocknal_launch_packet (ksock_conn_t *conn, ksock_tx_t *tx)
+{
+ unsigned long flags;
+ ksock_sched_t *sched = conn->ksnc_scheduler;
+
+ /* Ensure the frags we've been given EXACTLY match the number of
+ * bytes we want to send. Many TCP/IP stacks disregard any total
+ * size parameters passed to them and just look at the frags.
+ *
+ * We always expect at least 1 mapped fragment containing the
+ * complete portals header.
+ */
+ LASSERT (lib_iov_nob (tx->tx_niov, tx->tx_iov) +
+ lib_kiov_nob (tx->tx_nkiov, tx->tx_kiov) == tx->tx_nob);
+ LASSERT (tx->tx_niov >= 1);
+ LASSERT (tx->tx_iov[0].iov_len >= sizeof (ptl_hdr_t));
+
+ CDEBUG (D_NET, "type %d, nob %d niov %d nkiov %d\n",
+ ((ptl_hdr_t *)tx->tx_iov[0].iov_base)->type, tx->tx_nob,
+ tx->tx_niov, tx->tx_nkiov);
+
+#if SOCKNAL_ZC
+ zccd_init (&tx->tx_zccd, ksocknal_zc_callback);
+ /* NB this sets 1 ref on zccd, so the callback can only occur
+ * after I've released this ref */
+ tx->tx_sched = sched;
+#endif
+ spin_lock_irqsave (&sched->kss_lock, flags);
+
+ list_add_tail (&tx->tx_list, &conn->ksnc_tx_queue);
+
+ if (conn->ksnc_tx_ready && /* able to send */
+ !conn->ksnc_tx_scheduled) { /* not scheduled to send */
+ list_add_tail (&conn->ksnc_tx_list, &sched->kss_tx_conns);
+ conn->ksnc_tx_scheduled = 1;
+ atomic_inc (&conn->ksnc_refcount); /* extra ref for scheduler */
+ if (waitqueue_active (&sched->kss_waitq))
+ wake_up (&sched->kss_waitq);
+ }
+
+ spin_unlock_irqrestore (&sched->kss_lock, flags);
+
+ atomic_inc (&ksocknal_packets_launched);
+}
+
+ksock_conn_t *
+ksocknal_send_target (ptl_nid_t nid)
+{
+ ptl_nid_t gatewaynid;
+ ksock_conn_t *conn;
+ int rc;
+
+ if ((conn = ksocknal_get_conn (nid)) == NULL) {
+ /* It's not a peer; try to find a gateway */
+ rc = kpr_lookup (&ksocknal_data.ksnd_router, nid, &gatewaynid);
+ if (rc != 0) {
+ CERROR("Can't route to "LPX64": router error %d\n",
+ nid, rc);
+ return (NULL);
+ }
+
+ if ((conn = ksocknal_get_conn (gatewaynid)) == NULL) {
+ CERROR ("Can't route to "LPX64": gateway "LPX64
+ " is not a peer\n", nid, gatewaynid);
+ return (NULL);
+ }
+ }
+
+ return (conn);
+}
+
+ksock_ltx_t *
+ksocknal_setup_hdr (nal_cb_t *nal, void *private, lib_msg_t *cookie,
+ ptl_hdr_t *hdr, int type)
+{
+ ksock_ltx_t *ltx;
+
+ /* I may not block for a transmit descriptor if I might block the
+ * receiver, or an interrupt handler. */
+ ltx = ksocknal_get_ltx (!(type == PTL_MSG_ACK ||
+ type == PTL_MSG_REPLY ||
+ in_interrupt ()));
+ if (ltx == NULL) {
+ CERROR ("Can't allocate tx desc\n");
+ return (NULL);
+ }
+
+ /* Init local send packet (storage for hdr, finalize() args) */
+ ltx->ltx_hdr = *hdr;
+ ltx->ltx_private = private;
+ ltx->ltx_cookie = cookie;
+
+ /* Init common ltx_tx */
+ ltx->ltx_tx.tx_isfwd = 0;
+ ltx->ltx_tx.tx_nob = sizeof (*hdr);
+
+ /* We always have 1 mapped frag for the header */
+ ltx->ltx_tx.tx_niov = 1;
+ ltx->ltx_tx.tx_iov = <x->ltx_iov_space.hdr;
+ ltx->ltx_tx.tx_iov[0].iov_base = <x->ltx_hdr;
+ ltx->ltx_tx.tx_iov[0].iov_len = sizeof (ltx->ltx_hdr);
+
+ ltx->ltx_tx.tx_kiov = NULL;
+ ltx->ltx_tx.tx_nkiov = 0;
+
+ return (ltx);
+}
+
+int
+ksocknal_send (nal_cb_t *nal, void *private, lib_msg_t *cookie,
+ ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
+ unsigned int payload_niov, struct iovec *payload_iov, size_t payload_len)
+{
+ ksock_ltx_t *ltx;
+ ksock_conn_t *conn;
+
+ /* NB 'private' is different depending on what we're sending.
+ * Just ignore it until we can rely on it
+ *
+ * Also, the return code from this procedure is ignored.
+ * If we can't send, we must still complete with lib_finalize().
+ * We'll have to wait for 3.2 to return an error event.
+ */
+
+ CDEBUG(D_NET,
+ "sending "LPSZ" bytes in %d mapped frags to nid: "LPX64" pid %d\n",
+ payload_len, payload_niov, nid, pid);
+
+ conn = ksocknal_send_target (nid);
+ if (conn == NULL) {
+ lib_finalize (&ksocknal_lib, private, cookie);
+ return (-1);
+ }
+
+ ltx = ksocknal_setup_hdr (nal, private, cookie, hdr, type);
+ if (ltx == NULL) {
+ ksocknal_put_conn (conn);
+ lib_finalize (&ksocknal_lib, private, cookie);
+ return (-1);
+ }
+
+ /* append the payload_iovs to the one pointing at the header */
+ LASSERT (ltx->ltx_tx.tx_niov == 1 && ltx->ltx_tx.tx_nkiov == 0);
+ LASSERT (payload_niov <= PTL_MD_MAX_IOV);
+
+ memcpy (ltx->ltx_tx.tx_iov + 1, payload_iov,
+ payload_niov * sizeof (*payload_iov));
+ ltx->ltx_tx.tx_niov = 1 + payload_niov;
+ ltx->ltx_tx.tx_nob = sizeof (*hdr) + payload_len;
+
+ ksocknal_launch_packet (conn, <x->ltx_tx);
+ return (0);
+}
+
+int
+ksocknal_send_pages (nal_cb_t *nal, void *private, lib_msg_t *cookie,
+ ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
+ unsigned int payload_niov, ptl_kiov_t *payload_iov, size_t payload_len)
+{
+ ksock_ltx_t *ltx;
+ ksock_conn_t *conn;
+
+ /* NB 'private' is different depending on what we're sending.
+ * Just ignore it until we can rely on it */
+
+ CDEBUG(D_NET,
+ "sending "LPSZ" bytes in %d mapped frags to nid: "LPX64" pid %d\n",
+ payload_len, payload_niov, nid, pid);
+
+ conn = ksocknal_send_target (nid);
+ if (conn == NULL)
+ return (-1);
+
+ ltx = ksocknal_setup_hdr (nal, private, cookie, hdr, type);
+ if (ltx == NULL) {
+ ksocknal_put_conn (conn);
+ return (-1);
+ }
+
+ LASSERT (ltx->ltx_tx.tx_niov == 1 && ltx->ltx_tx.tx_nkiov == 0);
+ LASSERT (payload_niov <= PTL_MD_MAX_IOV);
+
+ ltx->ltx_tx.tx_kiov = ltx->ltx_iov_space.payload.kiov;
+ memcpy (ltx->ltx_tx.tx_kiov, payload_iov,
+ payload_niov * sizeof (*payload_iov));
+ ltx->ltx_tx.tx_nkiov = payload_niov;
+ ltx->ltx_tx.tx_nob = sizeof (*hdr) + payload_len;
+
+ ksocknal_launch_packet (conn, <x->ltx_tx);
+ return (0);
+}
+
+void
+ksocknal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd)
+{
+ ksock_conn_t *conn;
+ ptl_nid_t nid = fwd->kprfd_gateway_nid;
+ ksock_tx_t *tx = (ksock_tx_t *)&fwd->kprfd_scratch;
+
+ CDEBUG (D_NET, "Forwarding [%p] -> "LPX64" ("LPX64"))\n", fwd,
+ fwd->kprfd_gateway_nid, fwd->kprfd_target_nid);
+
+ /* I'm the gateway; must be the last hop */
+ if (nid == ksocknal_lib.ni.nid)
+ nid = fwd->kprfd_target_nid;
+
+ conn = ksocknal_get_conn (nid);
+ if (conn == NULL) {
+ CERROR ("[%p] fwd to "LPX64" isn't a peer\n", fwd, nid);
+ kpr_fwd_done (&ksocknal_data.ksnd_router, fwd, -EHOSTUNREACH);
+ return;
+ }
+
+ /* This forward has now got a ref on conn */
+
+ tx->tx_isfwd = 1; /* This is a forwarding packet */
+ tx->tx_nob = fwd->kprfd_nob;
+ tx->tx_niov = fwd->kprfd_niov;
+ tx->tx_iov = fwd->kprfd_iov;
+ tx->tx_nkiov = 0;
+ tx->tx_kiov = NULL;
+
+ ksocknal_launch_packet (conn, tx);
+}
+
+int
+ksocknal_thread_start (int (*fn)(void *arg), void *arg)
+{
+ long pid = kernel_thread (fn, arg, 0);
+
+ if (pid < 0)
+ return ((int)pid);
+
+ atomic_inc (&ksocknal_data.ksnd_nthreads);
+ return (0);
+}
+
+void
+ksocknal_thread_fini (void)
+{
+ atomic_dec (&ksocknal_data.ksnd_nthreads);
+}
+
+void
+ksocknal_fmb_callback (void *arg, int error)
+{
+ ksock_fmb_t *fmb = (ksock_fmb_t *)arg;
+ ksock_fmb_pool_t *fmp = fmb->fmb_pool;
+ ptl_hdr_t *hdr = (ptl_hdr_t *) page_address(fmb->fmb_pages[0]);
+ ksock_conn_t *conn = NULL;
+ ksock_sched_t *sched;
+ long flags;
+
+ if (error != 0)
+ CERROR("Failed to route packet from "LPX64" to "LPX64": %d\n",
+ NTOH__u64(hdr->src_nid), NTOH__u64(hdr->dest_nid),
+ error);
+ else
+ CDEBUG (D_NET, "routed packet from "LPX64" to "LPX64": OK\n",
+ NTOH__u64 (hdr->src_nid), NTOH__u64 (hdr->dest_nid));
+
+ spin_lock_irqsave (&fmp->fmp_lock, flags);
+
+ list_add (&fmb->fmb_list, &fmp->fmp_idle_fmbs);
+
+ if (!list_empty (&fmp->fmp_blocked_conns)) {
+ conn = list_entry (fmb->fmb_pool->fmp_blocked_conns.next,
+ ksock_conn_t, ksnc_rx_list);
+ list_del (&conn->ksnc_rx_list);
+ }
+
+ spin_unlock_irqrestore (&fmp->fmp_lock, flags);
+
+ if (conn == NULL)
+ return;
+
+ CDEBUG (D_NET, "Scheduling conn %p\n", conn);
+ LASSERT (conn->ksnc_rx_scheduled);
+ LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_FMB_SLEEP);
+
+ conn->ksnc_rx_state = SOCKNAL_RX_GET_FMB;
+
+ sched = conn->ksnc_scheduler;
+
+ spin_lock_irqsave (&sched->kss_lock, flags);
+
+ list_add_tail (&conn->ksnc_rx_list, &sched->kss_rx_conns);
+
+ if (waitqueue_active (&sched->kss_waitq))
+ wake_up (&sched->kss_waitq);
+
+ spin_unlock_irqrestore (&sched->kss_lock, flags);
+}
+
+ksock_fmb_t *
+ksocknal_get_idle_fmb (ksock_conn_t *conn)
+{
+ int payload_nob = conn->ksnc_rx_nob_left;
+ int packet_nob = sizeof (ptl_hdr_t) + payload_nob;
+ long flags;
+ ksock_fmb_pool_t *pool;
+ ksock_fmb_t *fmb;
+
+ LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_GET_FMB);
+ LASSERT (ksocknal_data.ksnd_fmbs != NULL);
+
+ if (packet_nob <= SOCKNAL_SMALL_FWD_PAGES * PAGE_SIZE)
+ pool = &ksocknal_data.ksnd_small_fmp;
+ else
+ pool = &ksocknal_data.ksnd_large_fmp;
+
+ spin_lock_irqsave (&pool->fmp_lock, flags);
+
+ if (!list_empty (&pool->fmp_idle_fmbs)) {
+ fmb = list_entry(pool->fmp_idle_fmbs.next,
+ ksock_fmb_t, fmb_list);
+ list_del (&fmb->fmb_list);
+ spin_unlock_irqrestore (&pool->fmp_lock, flags);
+
+ return (fmb);
+ }
+
+ /* deschedule until fmb free */
+
+ conn->ksnc_rx_state = SOCKNAL_RX_FMB_SLEEP;
+
+ list_add_tail (&conn->ksnc_rx_list,
+ &pool->fmp_blocked_conns);
+
+ spin_unlock_irqrestore (&pool->fmp_lock, flags);
+ return (NULL);
+}
+
+
+int
+ksocknal_init_fmb (ksock_conn_t *conn, ksock_fmb_t *fmb)
+{
+ int payload_nob = conn->ksnc_rx_nob_left;
+ int packet_nob = sizeof (ptl_hdr_t) + payload_nob;
+ ptl_nid_t dest_nid = NTOH__u64 (conn->ksnc_hdr.dest_nid);
+ int niov; /* at least the header */
+ int nob;
+
+ LASSERT (conn->ksnc_rx_scheduled);
+ LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_GET_FMB);
+ LASSERT (conn->ksnc_rx_nob_wanted == conn->ksnc_rx_nob_left);
+ LASSERT (payload_nob >= 0);
+ LASSERT (packet_nob <= fmb->fmb_npages * PAGE_SIZE);
+ LASSERT (sizeof (ptl_hdr_t) < PAGE_SIZE);
+
+ /* Got a forwarding buffer; copy the header we just read into the
+ * forwarding buffer. If there's payload start reading reading it
+ * into the buffer, otherwise the forwarding buffer can be kicked
+ * off immediately.
+ *
+ * NB fmb->fmb_iov spans the WHOLE packet.
+ * conn->ksnc_rx_iov spans just the payload.
+ */
+
+ fmb->fmb_iov[0].iov_base = page_address (fmb->fmb_pages[0]);
+
+ /* copy header */
+ memcpy (fmb->fmb_iov[0].iov_base, &conn->ksnc_hdr, sizeof (ptl_hdr_t));
+
+ if (payload_nob == 0) { /* got complete packet already */
+ atomic_inc (&ksocknal_packets_received);
+
+ CDEBUG (D_NET, "%p "LPX64"->"LPX64" %d fwd_start (immediate)\n",
+ conn, NTOH__u64 (conn->ksnc_hdr.src_nid),
+ dest_nid, packet_nob);
+
+ fmb->fmb_iov[0].iov_len = sizeof (ptl_hdr_t);
+
+ kpr_fwd_init (&fmb->fmb_fwd, dest_nid,
+ packet_nob, 1, fmb->fmb_iov,
+ ksocknal_fmb_callback, fmb);
+
+ /* forward it now */
+ kpr_fwd_start (&ksocknal_data.ksnd_router, &fmb->fmb_fwd);
+
+ ksocknal_new_packet (conn, 0); /* on to next packet */
+ return (1);
+ }
+
+ niov = 1;
+ if (packet_nob <= PAGE_SIZE) { /* whole packet fits in first page */
+ fmb->fmb_iov[0].iov_len = packet_nob;
+ } else {
+ fmb->fmb_iov[0].iov_len = PAGE_SIZE;
+ nob = packet_nob - PAGE_SIZE;
+
+ do {
+ LASSERT (niov < fmb->fmb_npages);
+ fmb->fmb_iov[niov].iov_base =
+ page_address (fmb->fmb_pages[niov]);
+ fmb->fmb_iov[niov].iov_len = MIN (PAGE_SIZE, nob);
+ nob -= PAGE_SIZE;
+ niov++;
+ } while (nob > 0);
+ }
+
+ kpr_fwd_init (&fmb->fmb_fwd, dest_nid,
+ packet_nob, niov, fmb->fmb_iov,
+ ksocknal_fmb_callback, fmb);
+
+ /* stash router's descriptor ready for call to kpr_fwd_start */
+ conn->ksnc_cookie = &fmb->fmb_fwd;
+
+ conn->ksnc_rx_state = SOCKNAL_RX_BODY_FWD; /* read in the payload */
+
+ /* payload is desc's iov-ed buffer, but skipping the hdr */
+ LASSERT (niov <= sizeof (conn->ksnc_rx_iov_space) /
+ sizeof (struct iovec));
+
+ conn->ksnc_rx_iov = (struct iovec *)&conn->ksnc_rx_iov_space;
+ conn->ksnc_rx_iov[0].iov_base =
+ (void *)(((unsigned long)fmb->fmb_iov[0].iov_base) +
+ sizeof (ptl_hdr_t));
+ conn->ksnc_rx_iov[0].iov_len =
+ fmb->fmb_iov[0].iov_len - sizeof (ptl_hdr_t);
+
+ if (niov > 1)
+ memcpy(&conn->ksnc_rx_iov[1], &fmb->fmb_iov[1],
+ (niov - 1) * sizeof (struct iovec));
+
+ conn->ksnc_rx_niov = niov;
+
+ CDEBUG (D_NET, "%p "LPX64"->"LPX64" %d reading body\n", conn,
+ NTOH__u64 (conn->ksnc_hdr.src_nid), dest_nid, payload_nob);
+ return (0);
+}
+
+void
+ksocknal_fwd_parse (ksock_conn_t *conn)
+{
+ ksock_conn_t *conn2;
+ ptl_nid_t dest_nid = NTOH__u64 (conn->ksnc_hdr.dest_nid);
+ int body_len = NTOH__u32 (PTL_HDR_LENGTH(&conn->ksnc_hdr));
+
+ CDEBUG (D_NET, "%p "LPX64"->"LPX64" %d parsing header\n", conn,
+ NTOH__u64 (conn->ksnc_hdr.src_nid),
+ dest_nid, conn->ksnc_rx_nob_left);
+
+ LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_HEADER);
+ LASSERT (conn->ksnc_rx_scheduled);
+
+ if (body_len < 0) { /* length corrupt (overflow) */
+ CERROR("dropping packet from "LPX64" for "LPX64": packet "
+ "size %d illegal\n", NTOH__u64 (conn->ksnc_hdr.src_nid),
+ dest_nid, body_len);
+ ksocknal_new_packet (conn, 0); /* on to new packet */
+ return;
+ }
+
+ if (ksocknal_data.ksnd_fmbs == NULL) { /* not forwarding */
+ CERROR("dropping packet from "LPX64" for "LPX64": not "
+ "forwarding\n", conn->ksnc_hdr.src_nid,
+ conn->ksnc_hdr.dest_nid);
+ /* on to new packet (skip this one's body) */
+ ksocknal_new_packet (conn, body_len);
+ return;
+ }
+
+ if (body_len > SOCKNAL_MAX_FWD_PAYLOAD) { /* too big to forward */
+ CERROR ("dropping packet from "LPX64" for "LPX64
+ ": packet size %d too big\n", conn->ksnc_hdr.src_nid,
+ conn->ksnc_hdr.dest_nid, body_len);
+ /* on to new packet (skip this one's body) */
+ ksocknal_new_packet (conn, body_len);
+ return;
+ }
+
+ /* should have gone direct */
+ conn2 = ksocknal_get_conn (conn->ksnc_hdr.dest_nid);
+ if (conn2 != NULL) {
+ CERROR ("dropping packet from "LPX64" for "LPX64
+ ": target is a peer\n", conn->ksnc_hdr.src_nid,
+ conn->ksnc_hdr.dest_nid);
+ ksocknal_put_conn (conn2); /* drop ref from get above */
+
+ /* on to next packet (skip this one's body) */
+ ksocknal_new_packet (conn, body_len);
+ return;
+ }
+
+ conn->ksnc_rx_state = SOCKNAL_RX_GET_FMB; /* Getting FMB now */
+ conn->ksnc_rx_nob_left = body_len; /* stash packet size */
+ conn->ksnc_rx_nob_wanted = body_len; /* (no slop) */
+}
+
+int
+ksocknal_new_packet (ksock_conn_t *conn, int nob_to_skip)
+{
+ static char ksocknal_slop_buffer[4096];
+
+ int nob;
+ int niov;
+ int skipped;
+
+ if (nob_to_skip == 0) { /* right at next packet boundary now */
+ conn->ksnc_rx_state = SOCKNAL_RX_HEADER;
+ conn->ksnc_rx_nob_wanted = sizeof (ptl_hdr_t);
+ conn->ksnc_rx_nob_left = sizeof (ptl_hdr_t);
+
+ conn->ksnc_rx_iov = (struct iovec *)&conn->ksnc_rx_iov_space;
+ conn->ksnc_rx_iov[0].iov_base = (char *)&conn->ksnc_hdr;
+ conn->ksnc_rx_iov[0].iov_len = sizeof (ptl_hdr_t);
+ conn->ksnc_rx_niov = 1;
+
+ conn->ksnc_rx_kiov = NULL;
+ conn->ksnc_rx_nkiov = 0;
+ return (1);
+ }
+
+ /* Set up to skip as much a possible now. If there's more left
+ * (ran out of iov entries) we'll get called again */
+
+ conn->ksnc_rx_state = SOCKNAL_RX_SLOP;
+ conn->ksnc_rx_nob_left = nob_to_skip;
+ conn->ksnc_rx_iov = (struct iovec *)&conn->ksnc_rx_iov_space;
+ skipped = 0;
+ niov = 0;
+
+ do {
+ nob = MIN (nob_to_skip, sizeof (ksocknal_slop_buffer));
+
+ conn->ksnc_rx_iov[niov].iov_base = ksocknal_slop_buffer;
+ conn->ksnc_rx_iov[niov].iov_len = nob;
+ niov++;
+ skipped += nob;
+ nob_to_skip -=nob;
+
+ } while (nob_to_skip != 0 && /* mustn't overflow conn's rx iov */
+ niov < sizeof(conn->ksnc_rx_iov_space) / sizeof (struct iovec));
+
+ conn->ksnc_rx_niov = niov;
+ conn->ksnc_rx_kiov = NULL;
+ conn->ksnc_rx_nkiov = 0;
+ conn->ksnc_rx_nob_wanted = skipped;
+ return (0);
+}
+
+void
+ksocknal_process_receive (ksock_sched_t *sched, long *irq_flags)
+{
+ ksock_conn_t *conn;
+ ksock_fmb_t *fmb;
+ int rc;
+
+ /* NB: sched->ksnc_lock lock held */
+
+ LASSERT (!list_empty (&sched->kss_rx_conns));
+ conn = list_entry(sched->kss_rx_conns.next, ksock_conn_t, ksnc_rx_list);
+ list_del (&conn->ksnc_rx_list);
+
+ spin_unlock_irqrestore (&sched->kss_lock, *irq_flags);
+
+ CDEBUG(D_NET, "sched %p conn %p\n", sched, conn);
+ LASSERT (atomic_read (&conn->ksnc_refcount) > 0);
+ LASSERT (conn->ksnc_rx_scheduled);
+ LASSERT (conn->ksnc_rx_ready);
+
+ /* doesn't need a forwarding buffer */
+ if (conn->ksnc_rx_state != SOCKNAL_RX_GET_FMB)
+ goto try_read;
+
+ get_fmb:
+ fmb = ksocknal_get_idle_fmb (conn);
+ if (fmb == NULL) { /* conn descheduled waiting for idle fmb */
+ spin_lock_irqsave (&sched->kss_lock, *irq_flags);
+ return;
+ }
+
+ if (ksocknal_init_fmb (conn, fmb)) /* packet forwarded ? */
+ goto out; /* come back later for next packet */
+
+ try_read:
+ /* NB: sched lock NOT held */
+ LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_HEADER ||
+ conn->ksnc_rx_state == SOCKNAL_RX_BODY ||
+ conn->ksnc_rx_state == SOCKNAL_RX_BODY_FWD ||
+ conn->ksnc_rx_state == SOCKNAL_RX_SLOP);
+
+ LASSERT (conn->ksnc_rx_nob_wanted > 0);
+
+ conn->ksnc_rx_ready = 0;/* data ready may race with me and set ready */
+ mb(); /* => clear BEFORE trying to read */
+
+ rc = ksocknal_recvmsg(conn);
+
+ if (rc == 0)
+ goto out;
+ if (rc < 0) {
+#warning FIXME: handle socket errors properly
+ CERROR ("Error socknal read %p: %d\n", conn, rc);
+ goto out;
+ }
+
+ if (conn->ksnc_rx_nob_wanted != 0) /* short read */
+ goto out; /* try again later */
+
+ /* got all I wanted, assume there's more - prevent data_ready locking */
+ conn->ksnc_rx_ready = 1;
+
+ switch (conn->ksnc_rx_state) {
+ case SOCKNAL_RX_HEADER:
+ /* It's not for me */
+ if (conn->ksnc_hdr.type != PTL_MSG_HELLO &&
+ NTOH__u64(conn->ksnc_hdr.dest_nid) != ksocknal_lib.ni.nid) {
+ ksocknal_fwd_parse (conn);
+ switch (conn->ksnc_rx_state) {
+ case SOCKNAL_RX_HEADER: /* skipped (zero payload) */
+ goto out; /* => come back later */
+ case SOCKNAL_RX_SLOP: /* skipping packet's body */
+ goto try_read; /* => go read it */
+ case SOCKNAL_RX_GET_FMB: /* forwarding */
+ goto get_fmb; /* => go get a fwd msg buffer */
+ default:
+ LBUG ();
+ }
+ /* Not Reached */
+ }
+
+ PROF_START(lib_parse);
+ /* sets wanted_len, iovs etc */
+ lib_parse(&ksocknal_lib, &conn->ksnc_hdr, conn);
+ PROF_FINISH(lib_parse);
+
+ if (conn->ksnc_rx_nob_wanted != 0) { /* need to get payload? */
+ conn->ksnc_rx_state = SOCKNAL_RX_BODY;
+ goto try_read; /* go read the payload */
+ }
+ /* Fall through (completed packet for me) */
+
+ case SOCKNAL_RX_BODY:
+ atomic_inc (&ksocknal_packets_received);
+ /* packet is done now */
+ lib_finalize(&ksocknal_lib, NULL, conn->ksnc_cookie);
+ /* Fall through */
+
+ case SOCKNAL_RX_SLOP:
+ /* starting new packet? */
+ if (ksocknal_new_packet (conn, conn->ksnc_rx_nob_left))
+ goto out; /* come back later */
+ goto try_read; /* try to finish reading slop now */
+
+ case SOCKNAL_RX_BODY_FWD:
+ CDEBUG (D_NET, "%p "LPX64"->"LPX64" %d fwd_start (got body)\n",
+ conn, NTOH__u64 (conn->ksnc_hdr.src_nid),
+ NTOH__u64 (conn->ksnc_hdr.dest_nid),
+ conn->ksnc_rx_nob_left);
+
+ atomic_inc (&ksocknal_packets_received);
+
+ /* ksocknal_init_fmb() put router desc. in conn->ksnc_cookie */
+ kpr_fwd_start (&ksocknal_data.ksnd_router,
+ (kpr_fwd_desc_t *)conn->ksnc_cookie);
+
+ /* no slop in forwarded packets */
+ LASSERT (conn->ksnc_rx_nob_left == 0);
+
+ ksocknal_new_packet (conn, 0); /* on to next packet */
+ goto out; /* (later) */
+
+ default:
+ }
+
+ /* Not Reached */
+ LBUG ();
+
+ out:
+ spin_lock_irqsave (&sched->kss_lock, *irq_flags);
+
+ /* no data there to read? */
+ if (!conn->ksnc_rx_ready) {
+ /* let socket callback schedule again */
+ conn->ksnc_rx_scheduled = 0;
+ ksocknal_put_conn (conn); /* release scheduler's ref */
+ } else /* let scheduler call me again */
+ list_add_tail (&conn->ksnc_rx_list, &sched->kss_rx_conns);
+}
+
+int
+ksocknal_recv (nal_cb_t *nal, void *private, lib_msg_t *msg,
+ unsigned int niov, struct iovec *iov, size_t mlen, size_t rlen)
+{
+ ksock_conn_t *conn = (ksock_conn_t *)private;
+
+ LASSERT (mlen <= rlen);
+ LASSERT (niov <= PTL_MD_MAX_IOV);
+
+ conn->ksnc_cookie = msg;
+ conn->ksnc_rx_nob_wanted = mlen;
+ conn->ksnc_rx_nob_left = rlen;
+
+ conn->ksnc_rx_nkiov = 0;
+ conn->ksnc_rx_kiov = NULL;
+ conn->ksnc_rx_niov = niov;
+ conn->ksnc_rx_iov = conn->ksnc_rx_iov_space.iov;
+ memcpy (conn->ksnc_rx_iov, iov, niov * sizeof (*iov));
+
+ LASSERT (mlen ==
+ lib_iov_nob (conn->ksnc_rx_niov, conn->ksnc_rx_iov) +
+ lib_kiov_nob (conn->ksnc_rx_nkiov, conn->ksnc_rx_kiov));
+
+ return (rlen);
+}
+
+int
+ksocknal_recv_pages (nal_cb_t *nal, void *private, lib_msg_t *msg,
+ unsigned int niov, ptl_kiov_t *kiov, size_t mlen, size_t rlen)
+{
+ ksock_conn_t *conn = (ksock_conn_t *)private;
+
+ LASSERT (mlen <= rlen);
+ LASSERT (niov <= PTL_MD_MAX_IOV);
+
+ conn->ksnc_cookie = msg;
+ conn->ksnc_rx_nob_wanted = mlen;
+ conn->ksnc_rx_nob_left = rlen;
+
+ conn->ksnc_rx_niov = 0;
+ conn->ksnc_rx_iov = NULL;
+ conn->ksnc_rx_nkiov = niov;
+ conn->ksnc_rx_kiov = conn->ksnc_rx_iov_space.kiov;
+ memcpy (conn->ksnc_rx_kiov, kiov, niov * sizeof (*kiov));
+
+ LASSERT (mlen ==
+ lib_iov_nob (conn->ksnc_rx_niov, conn->ksnc_rx_iov) +
+ lib_kiov_nob (conn->ksnc_rx_nkiov, conn->ksnc_rx_kiov));
+
+ return (rlen);
+}
+
+int ksocknal_scheduler (void *arg)
+{
+ ksock_sched_t *sched = (ksock_sched_t *)arg;
+ unsigned long flags;
+ int rc;
+ int nloops = 0;
+ int id = sched - ksocknal_data.ksnd_schedulers;
+ char name[16];
+#if (CONFIG_SMP && CPU_AFFINITY)
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+ int cpu = cpu_logical_map(id % num_online_cpus());
+#else
+#warning "Take care of architecure specific logical APIC map"
+ int cpu = 1; /* Have to change later. */
+#endif /* LINUX_VERSION_CODE */
+
+ set_cpus_allowed (current, 1 << cpu);
+ id = cpu;
+#endif /* CONFIG_SMP && CPU_AFFINITY */
+
+ snprintf (name, sizeof (name),"ksocknald[%d]", id);
+ kportal_daemonize (name);
+ kportal_blockallsigs ();
+
+ spin_lock_irqsave (&sched->kss_lock, flags);
+
+ while (!ksocknal_data.ksnd_shuttingdown) {
+ int did_something = 0;
+
+ /* Ensure I progress everything semi-fairly */
+
+ if (!list_empty (&sched->kss_rx_conns)) {
+ did_something = 1;
+ /* drops & regains kss_lock */
+ ksocknal_process_receive (sched, &flags);
+ }
+
+ if (!list_empty (&sched->kss_tx_conns)) {
+ did_something = 1;
+ /* drops and regains kss_lock */
+ ksocknal_process_transmit (sched, &flags);
+ }
+#if SOCKNAL_ZC
+ if (!list_empty (&sched->kss_zctxdone_list)) {
+ ksock_tx_t *tx =
+ list_entry(sched->kss_zctxdone_list.next,
+ ksock_tx_t, tx_list);
+ did_something = 1;
+
+ list_del (&tx->tx_list);
+ spin_unlock_irqrestore (&sched->kss_lock, flags);
+
+ ksocknal_tx_done (tx);
+
+ spin_lock_irqsave (&sched->kss_lock, flags);
+ }
+#endif
+ if (!did_something || /* nothing to do */
+ ++nloops == SOCKNAL_RESCHED) { /* hogging CPU? */
+ spin_unlock_irqrestore (&sched->kss_lock, flags);
+
+ nloops = 0;
+
+ if (!did_something) { /* wait for something to do */
+#if SOCKNAL_ZC
+ rc = wait_event_interruptible (sched->kss_waitq,
+ ksocknal_data.ksnd_shuttingdown ||
+ !list_empty(&sched->kss_rx_conns) ||
+ !list_empty(&sched->kss_tx_conns) ||
+ !list_empty(&sched->kss_zctxdone_list));
+#else
+ rc = wait_event_interruptible (sched->kss_waitq,
+ ksocknal_data.ksnd_shuttingdown ||
+ !list_empty(&sched->kss_rx_conns) ||
+ !list_empty(&sched->kss_tx_conns));
+#endif
+ LASSERT (rc == 0);
+ } else
+ our_cond_resched();
+
+ spin_lock_irqsave (&sched->kss_lock, flags);
+ }
+ }
+
+ spin_unlock_irqrestore (&sched->kss_lock, flags);
+ ksocknal_thread_fini ();
+ return (0);
+}
+
+void
+ksocknal_data_ready (struct sock *sk, int n)
+{
+ unsigned long flags;
+ ksock_conn_t *conn;
+ ksock_sched_t *sched;
+ ENTRY;
+
+ /* interleave correctly with closing sockets... */
+ read_lock (&ksocknal_data.ksnd_socklist_lock);
+
+ conn = sk->user_data;
+ if (conn == NULL) { /* raced with ksocknal_close_sock */
+ LASSERT (sk->data_ready != &ksocknal_data_ready);
+ sk->data_ready (sk, n);
+ } else if (!conn->ksnc_rx_ready) { /* new news */
+ /* Set ASAP in case of concurrent calls to me */
+ conn->ksnc_rx_ready = 1;
+
+ sched = conn->ksnc_scheduler;
+
+ spin_lock_irqsave (&sched->kss_lock, flags);
+
+ /* Set again (process_receive may have cleared while I blocked for the lock) */
+ conn->ksnc_rx_ready = 1;
+
+ if (!conn->ksnc_rx_scheduled) { /* not being progressed */
+ list_add_tail(&conn->ksnc_rx_list,
+ &sched->kss_rx_conns);
+ conn->ksnc_rx_scheduled = 1;
+ /* extra ref for scheduler */
+ atomic_inc (&conn->ksnc_refcount);
+
+ if (waitqueue_active (&sched->kss_waitq))
+ wake_up (&sched->kss_waitq);
+ }
+
+ spin_unlock_irqrestore (&sched->kss_lock, flags);
+ }
+
+ read_unlock (&ksocknal_data.ksnd_socklist_lock);
+
+ EXIT;
+}
+
+void
+ksocknal_write_space (struct sock *sk)
+{
+ unsigned long flags;
+ ksock_conn_t *conn;
+ ksock_sched_t *sched;
+
+ /* interleave correctly with closing sockets... */
+ read_lock (&ksocknal_data.ksnd_socklist_lock);
+
+ conn = sk->user_data;
+
+ CDEBUG(D_NET, "sk %p wspace %d low water %d conn %p%s%s%s\n",
+ sk, tcp_wspace(sk), SOCKNAL_TX_LOW_WATER(sk), conn,
+ (conn == NULL) ? "" : (test_bit (0, &conn->ksnc_tx_ready) ?
+ " ready" : " blocked"),
+ (conn == NULL) ? "" : (conn->ksnc_tx_scheduled ?
+ " scheduled" : " idle"),
+ (conn == NULL) ? "" : (list_empty (&conn->ksnc_tx_queue) ?
+ " empty" : " queued"));
+
+ if (conn == NULL) { /* raced with ksocknal_close_sock */
+ LASSERT (sk->write_space != &ksocknal_write_space);
+ sk->write_space (sk);
+ } else if (tcp_wspace(sk) >= SOCKNAL_TX_LOW_WATER(sk)) { /* got enough space */
+ clear_bit (SOCK_NOSPACE, &sk->socket->flags);
+
+ if (!conn->ksnc_tx_ready) { /* new news */
+ /* Set ASAP in case of concurrent calls to me */
+ conn->ksnc_tx_ready = 1;
+
+ sched = conn->ksnc_scheduler;
+
+ spin_lock_irqsave (&sched->kss_lock, flags);
+
+ /* Set again (process_transmit may have
+ cleared while I blocked for the lock) */
+ conn->ksnc_tx_ready = 1;
+
+ if (!conn->ksnc_tx_scheduled && // not being progressed
+ !list_empty(&conn->ksnc_tx_queue)){//packets to send
+ list_add_tail (&conn->ksnc_tx_list,
+ &sched->kss_tx_conns);
+ conn->ksnc_tx_scheduled = 1;
+ /* extra ref for scheduler */
+ atomic_inc (&conn->ksnc_refcount);
+
+ if (waitqueue_active (&sched->kss_waitq))
+ wake_up (&sched->kss_waitq);
+ }
+
+ spin_unlock_irqrestore (&sched->kss_lock, flags);
+ }
+ }
+
+ read_unlock (&ksocknal_data.ksnd_socklist_lock);
+}
+
+int
+ksocknal_reaper (void *arg)
+{
+ unsigned long flags;
+ ksock_conn_t *conn;
+ int rc;
+
+ kportal_daemonize ("ksocknal_reaper");
+ kportal_blockallsigs ();
+
+ while (!ksocknal_data.ksnd_shuttingdown) {
+ spin_lock_irqsave (&ksocknal_data.ksnd_reaper_lock, flags);
+
+ if (list_empty (&ksocknal_data.ksnd_reaper_list)) {
+ conn = NULL;
+ } else {
+ conn = list_entry (ksocknal_data.ksnd_reaper_list.next,
+ ksock_conn_t, ksnc_list);
+ list_del (&conn->ksnc_list);
+ }
+
+ spin_unlock_irqrestore (&ksocknal_data.ksnd_reaper_lock, flags);
+
+ if (conn != NULL)
+ ksocknal_close_conn (conn);
+ else {
+ rc = wait_event_interruptible (ksocknal_data.ksnd_reaper_waitq,
+ ksocknal_data.ksnd_shuttingdown ||
+ !list_empty(&ksocknal_data.ksnd_reaper_list));
+ LASSERT (rc == 0);
+ }
+ }
+
+ ksocknal_thread_fini ();
+ return (0);
+}
+
+nal_cb_t ksocknal_lib = {
+ nal_data: &ksocknal_data, /* NAL private data */
+ cb_send: ksocknal_send,
+ cb_send_pages: ksocknal_send_pages,
+ cb_recv: ksocknal_recv,
+ cb_recv_pages: ksocknal_recv_pages,
+ cb_read: ksocknal_read,
+ cb_write: ksocknal_write,
+ cb_callback: ksocknal_callback,
+ cb_malloc: ksocknal_malloc,
+ cb_free: ksocknal_free,
+ cb_printf: ksocknal_printf,
+ cb_cli: ksocknal_cli,
+ cb_sti: ksocknal_sti,
+ cb_dist: ksocknal_dist
+};
--- /dev/null
+# Copyright (C) 2001 Cluster File Systems, Inc.
+#
+# This code is issued under the GNU General Public License.
+# See the file COPYING in this distribution
+
+include ../../Rules.linux
+
+MODULE = ktoenal
+modulenet_DATA = ktoenal.o
+EXTRA_PROGRAMS = ktoenal
+
+DEFS =
+ktoenal_SOURCES = toenal.c toenal_cb.c toenal.h
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ * Author: Zach Brown <zab@zabbo.net>
+ * Author: Peter J. Braam <braam@clusterfs.com>
+ * Author: Phil Schwan <phil@clusterfs.com>
+ * Author: Eric Barton <eric@bartonsoftware.com>
+ * Author: Kedar Sovani <kedar@calsoftinc.com>
+ * Author: Amey Inamdar <amey@calsoftinc.com>
+ *
+ * This file is part of Portals, http://www.sf.net/projects/lustre/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+#include <linux/poll.h>
+#include "toenal.h"
+
+ptl_handle_ni_t ktoenal_ni;
+static nal_t ktoenal_api;
+static ksock_nal_data_t ktoenal_data;
+
+/*
+ksocknal_interface_t ktoenal_interface = {
+ ksni_add_sock: ktoenal_add_sock,
+ ksni_close_sock: ktoenal_close_sock,
+ ksni_set_mynid: ktoenal_set_mynid,
+};
+*/
+
+kpr_nal_interface_t ktoenal_router_interface = {
+ kprni_nalid: TOENAL,
+ kprni_arg: &ktoenal_data,
+ kprni_fwd: ktoenal_fwd_packet,
+};
+
+
+int
+ktoenal_api_forward(nal_t *nal, int id, void *args, size_t args_len,
+ void *ret, size_t ret_len)
+{
+ ksock_nal_data_t *k;
+ nal_cb_t *nal_cb;
+
+ k = nal->nal_data;
+ nal_cb = k->ksnd_nal_cb;
+
+ lib_dispatch(nal_cb, k, id, args, ret); /* ktoenal_send needs k */
+ return PTL_OK;
+}
+
+int
+ktoenal_api_shutdown(nal_t *nal, int ni)
+{
+ CDEBUG (D_NET, "closing all connections\n");
+
+ return ktoenal_close_sock(0); /* close all sockets */
+}
+
+void
+ktoenal_api_yield(nal_t *nal)
+{
+ our_cond_resched();
+ return;
+}
+
+void
+ktoenal_api_lock(nal_t *nal, unsigned long *flags)
+{
+ ksock_nal_data_t *k;
+ nal_cb_t *nal_cb;
+
+ k = nal->nal_data;
+ nal_cb = k->ksnd_nal_cb;
+ nal_cb->cb_cli(nal_cb,flags);
+}
+
+void
+ktoenal_api_unlock(nal_t *nal, unsigned long *flags)
+{
+ ksock_nal_data_t *k;
+ nal_cb_t *nal_cb;
+
+ k = nal->nal_data;
+ nal_cb = k->ksnd_nal_cb;
+ nal_cb->cb_sti(nal_cb,flags);
+}
+
+nal_t *
+ktoenal_init(int interface, ptl_pt_index_t ptl_size,
+ ptl_ac_index_t ac_size, ptl_pid_t requested_pid)
+{
+ CDEBUG(D_NET, "calling lib_init with nid "LPX64"\n",
+ ktoenal_data.ksnd_mynid);
+ lib_init(&ktoenal_lib, ktoenal_data.ksnd_mynid, 0, 10, ptl_size,
+ ac_size);
+ return (&ktoenal_api);
+}
+
+/*
+ * EXTRA functions follow
+ */
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+#define SOCKET_I(inode) (&(inode)->u.socket_i)
+#endif
+static __inline__ struct socket *
+socki_lookup(struct inode *inode)
+{
+ return SOCKET_I(inode);
+}
+
+int
+ktoenal_set_mynid(ptl_nid_t nid)
+{
+ lib_ni_t *ni = &ktoenal_lib.ni;
+
+ /* FIXME: we have to do this because we call lib_init() at module
+ * insertion time, which is before we have 'mynid' available. lib_init
+ * sets the NAL's nid, which it uses to tell other nodes where packets
+ * are coming from. This is not a very graceful solution to this
+ * problem. */
+
+ CDEBUG(D_IOCTL, "setting mynid to "LPX64" (old nid="LPX64")\n", nid, ni->nid);
+
+ ktoenal_data.ksnd_mynid = nid;
+ ni->nid = nid;
+ return (0);
+}
+
+int
+ktoenal_add_sock (ptl_nid_t nid, int fd)
+{
+ unsigned long flags;
+ ksock_conn_t *conn;
+ struct file *file = NULL;
+ struct socket *sock = NULL;
+ int ret;
+ ENTRY;
+
+ file = fget(fd);
+ if (file == NULL)
+ RETURN(-EINVAL);
+
+ ret = -EINVAL;
+ sock = socki_lookup(file->f_dentry->d_inode);
+ if (sock == NULL)
+ GOTO(error, ret);
+
+ ret = -ENOMEM;
+ PORTAL_ALLOC(conn, sizeof(*conn));
+ if (!conn)
+ GOTO(error, ret);
+
+ memset (conn, 0, sizeof (conn)); /* zero for consistency */
+ file->f_flags |= O_NONBLOCK; /* Does this have any conflicts */
+ conn->ksnc_file = file;
+ conn->ksnc_sock = sock;
+ conn->ksnc_peernid = nid;
+ atomic_set (&conn->ksnc_refcount, 1); /* 1 ref for socklist */
+
+ conn->ksnc_rx_ready = 0;
+ conn->ksnc_rx_scheduled = 0;
+ ktoenal_new_packet (conn, 0);
+
+ INIT_LIST_HEAD (&conn->ksnc_tx_queue);
+ conn->ksnc_tx_ready = 0;
+ conn->ksnc_tx_scheduled = 0;
+
+ LASSERT (!in_interrupt());
+ write_lock_irqsave (&ktoenal_data.ksnd_socklist_lock, flags);
+
+ list_add(&conn->ksnc_list, &ktoenal_data.ksnd_socklist);
+ write_unlock_irqrestore (&ktoenal_data.ksnd_socklist_lock, flags);
+
+ ktoenal_data_ready(conn);
+ ktoenal_write_space(conn);
+
+ ktoenal_data.ksnd_slistchange = 1;
+ wake_up_process(ktoenal_data.ksnd_pollthread_tsk);
+ /* Schedule pollthread so that it will poll
+ * for newly created socket
+ */
+
+
+ CDEBUG(D_IOCTL, "conn [%p] registered for nid "LPX64"\n",
+ conn, conn->ksnc_peernid);
+
+ /* Can't unload while connection active */
+ PORTAL_MODULE_USE;
+ RETURN(0);
+
+error:
+ fput(file);
+ return (ret);
+}
+
+/* Passing in a zero nid will close all connections */
+int
+ktoenal_close_sock(ptl_nid_t nid)
+{
+ long flags;
+ ksock_conn_t *conn;
+ LIST_HEAD (death_row);
+ struct list_head *tmp;
+
+ LASSERT (!in_interrupt());
+ write_lock_irqsave (&ktoenal_data.ksnd_socklist_lock, flags);
+
+ if (nid == 0) /* close ALL connections */
+ {
+ /* insert 'death row' into the socket list... */
+ list_add (&death_row, &ktoenal_data.ksnd_socklist);
+ /* ...extract and reinitialise the socket list itself... */
+ list_del_init (&ktoenal_data.ksnd_socklist);
+ /* ...and voila, death row is the proud owner of all conns */
+ } else list_for_each (tmp, &ktoenal_data.ksnd_socklist) {
+
+ conn = list_entry (tmp, ksock_conn_t, ksnc_list);
+
+ if (conn->ksnc_peernid == nid)
+ {
+ list_del (&conn->ksnc_list);
+ list_add (&conn->ksnc_list, &death_row);
+ break;
+ }
+ }
+
+
+ write_unlock_irqrestore (&ktoenal_data.ksnd_socklist_lock, flags);
+
+ if (list_empty (&death_row))
+ return (-ENOENT);
+
+ do {
+ conn = list_entry (death_row.next, ksock_conn_t, ksnc_list);
+ list_del (&conn->ksnc_list);
+ ktoenal_put_conn (conn); /* drop ref for ksnd_socklist */
+ } while (!list_empty (&death_row));
+
+ ktoenal_data.ksnd_slistchange = 1;
+ wake_up_process(ktoenal_data.ksnd_pollthread_tsk);
+
+ return (0);
+}
+
+
+ksock_conn_t *
+ktoenal_get_conn (ptl_nid_t nid)
+{
+ struct list_head *tmp;
+ ksock_conn_t *conn;
+
+ PROF_START(conn_list_walk);
+
+ read_lock (&ktoenal_data.ksnd_socklist_lock);
+
+ list_for_each(tmp, &ktoenal_data.ksnd_socklist) {
+
+ conn = list_entry(tmp, ksock_conn_t, ksnc_list);
+
+ if (conn->ksnc_peernid == nid)
+ {
+ /* caller is referencing */
+ atomic_inc (&conn->ksnc_refcount);
+
+ read_unlock (&ktoenal_data.ksnd_socklist_lock);
+
+ CDEBUG(D_NET, "got conn [%p] -> "LPX64" (%d)\n",
+ conn, nid, atomic_read (&conn->ksnc_refcount));
+
+ PROF_FINISH(conn_list_walk);
+ return (conn);
+ }
+ }
+
+ read_unlock (&ktoenal_data.ksnd_socklist_lock);
+
+ CDEBUG(D_NET, "No connection found when looking for nid "LPX64"\n", nid);
+ PROF_FINISH(conn_list_walk);
+ return (NULL);
+}
+
+void
+ktoenal_close_conn (ksock_conn_t *conn)
+{
+ CDEBUG (D_NET, "connection [%p] closed \n", conn);
+
+ fput (conn->ksnc_file);
+ PORTAL_FREE (conn, sizeof (*conn));
+ /* One less connection keeping us hanging on */
+ PORTAL_MODULE_UNUSE;
+}
+
+void
+_ktoenal_put_conn (ksock_conn_t *conn)
+{
+ unsigned long flags;
+
+ CDEBUG (D_NET, "connection [%p] handed the black spot\n", conn);
+
+ /* "But what is the black spot, captain?" I asked.
+ * "That's a summons, mate..." */
+
+ LASSERT (atomic_read (&conn->ksnc_refcount) == 0);
+ LASSERT (!conn->ksnc_rx_scheduled);
+
+ if (!in_interrupt())
+ {
+ ktoenal_close_conn (conn);
+ return;
+ }
+
+ spin_lock_irqsave (&ktoenal_data.ksnd_reaper_lock, flags);
+
+ list_add (&conn->ksnc_list, &ktoenal_data.ksnd_reaper_list);
+ wake_up (&ktoenal_data.ksnd_reaper_waitq);
+
+ spin_unlock_irqrestore (&ktoenal_data.ksnd_reaper_lock, flags);
+}
+
+void
+ktoenal_free_buffers (void)
+{
+ if (ktoenal_data.ksnd_fmbs != NULL)
+ {
+ ksock_fmb_t *fmb = (ksock_fmb_t *)ktoenal_data.ksnd_fmbs;
+ int i;
+ int j;
+
+ for (i = 0; i < (SOCKNAL_SMALL_FWD_NMSGS + SOCKNAL_LARGE_FWD_NMSGS); i++, fmb++)
+ for (j = 0; j < fmb->fmb_npages; j++)
+ if (fmb->fmb_pages[j] != NULL)
+ __free_page (fmb->fmb_pages[j]);
+
+ PORTAL_FREE (ktoenal_data.ksnd_fmbs,
+ sizeof (ksock_fmb_t) * (SOCKNAL_SMALL_FWD_NMSGS + SOCKNAL_LARGE_FWD_NMSGS));
+ }
+
+ if (ktoenal_data.ksnd_ltxs != NULL)
+ PORTAL_FREE (ktoenal_data.ksnd_ltxs,
+ sizeof (ksock_ltx_t) * (SOCKNAL_NLTXS + SOCKNAL_NNBLK_LTXS));
+}
+
+int
+ktoenal_cmd(struct portal_ioctl_data * data, void * private)
+{
+ int rc = -EINVAL;
+
+ LASSERT (data != NULL);
+
+ switch(data->ioc_nal_cmd) {
+ case NAL_CMD_REGISTER_PEER_FD: {
+ rc = ktoenal_add_sock(data->ioc_nid, data->ioc_fd);
+ break;
+ }
+ case NAL_CMD_CLOSE_CONNECTION: {
+ rc = ktoenal_close_sock(data->ioc_nid);
+ break;
+ }
+ case NAL_CMD_REGISTER_MYNID: {
+ rc = ktoenal_set_mynid (data->ioc_nid);
+ break;
+ }
+ }
+
+ return rc;
+}
+
+
+void __exit
+ktoenal_module_fini (void)
+{
+ CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n",
+ atomic_read (&portal_kmemory));
+
+ switch (ktoenal_data.ksnd_init)
+ {
+ default:
+ LASSERT (0);
+
+ case SOCKNAL_INIT_ALL:
+ kportal_nal_unregister(TOENAL);
+ PORTAL_SYMBOL_UNREGISTER (ktoenal_ni);
+ /* fall through */
+
+ case SOCKNAL_INIT_PTL:
+ PtlNIFini(ktoenal_ni);
+ lib_fini(&ktoenal_lib);
+ /* fall through */
+
+ case SOCKNAL_INIT_DATA:
+ /* Module refcount only gets to zero when all connections
+ * have been closed so all lists must be empty */
+ LASSERT (list_empty (&ktoenal_data.ksnd_socklist));
+ LASSERT (list_empty (&ktoenal_data.ksnd_reaper_list));
+ LASSERT (list_empty (&ktoenal_data.ksnd_rx_conns));
+ LASSERT (list_empty (&ktoenal_data.ksnd_tx_conns));
+ LASSERT (list_empty (&ktoenal_data.ksnd_small_fmp.fmp_blocked_conns));
+ LASSERT (list_empty (&ktoenal_data.ksnd_large_fmp.fmp_blocked_conns));
+
+ kpr_shutdown (&ktoenal_data.ksnd_router); /* stop router calling me */
+
+ /* flag threads to terminate; wake and wait for them to die */
+ ktoenal_data.ksnd_shuttingdown = 1;
+ wake_up_all (&ktoenal_data.ksnd_reaper_waitq);
+ wake_up_all (&ktoenal_data.ksnd_sched_waitq);
+ wake_up_process(ktoenal_data.ksnd_pollthread_tsk);
+
+ while (atomic_read (&ktoenal_data.ksnd_nthreads) != 0)
+ {
+ CDEBUG (D_NET, "waitinf for %d threads to terminate\n",
+ atomic_read (&ktoenal_data.ksnd_nthreads));
+ set_current_state (TASK_UNINTERRUPTIBLE);
+ schedule_timeout (HZ);
+ }
+
+ kpr_deregister (&ktoenal_data.ksnd_router);
+
+ ktoenal_free_buffers();
+ /* fall through */
+
+ case SOCKNAL_INIT_NOTHING:
+ break;
+ }
+
+ CDEBUG(D_MALLOC, "after NAL cleanup: kmem %d\n",
+ atomic_read (&portal_kmemory));
+
+ printk(KERN_INFO "Routing socket NAL unloaded (final mem %d)\n",
+ atomic_read(&portal_kmemory));
+}
+
+int __init
+ktoenal_module_init (void)
+{
+ int pkmem = atomic_read(&portal_kmemory);
+ int rc;
+ int i;
+ int j;
+
+ /* packet descriptor must fit in a router descriptor's scratchpad */
+ LASSERT(sizeof (ksock_tx_t) <= sizeof (kprfd_scratch_t));
+
+ LASSERT (ktoenal_data.ksnd_init == SOCKNAL_INIT_NOTHING);
+
+ ktoenal_api.forward = ktoenal_api_forward;
+ ktoenal_api.shutdown = ktoenal_api_shutdown;
+ ktoenal_api.yield = ktoenal_api_yield;
+ ktoenal_api.validate = NULL; /* our api validate is a NOOP */
+ ktoenal_api.lock = ktoenal_api_lock;
+ ktoenal_api.unlock = ktoenal_api_unlock;
+ ktoenal_api.nal_data = &ktoenal_data;
+
+ ktoenal_lib.nal_data = &ktoenal_data;
+
+ memset (&ktoenal_data, 0, sizeof (ktoenal_data)); /* zero pointers */
+
+ INIT_LIST_HEAD(&ktoenal_data.ksnd_socklist);
+ rwlock_init(&ktoenal_data.ksnd_socklist_lock);
+
+ ktoenal_data.ksnd_nal_cb = &ktoenal_lib;
+ spin_lock_init (&ktoenal_data.ksnd_nal_cb_lock);
+
+ spin_lock_init (&ktoenal_data.ksnd_sched_lock);
+
+ init_waitqueue_head (&ktoenal_data.ksnd_sched_waitq);
+
+ INIT_LIST_HEAD (&ktoenal_data.ksnd_rx_conns);
+ INIT_LIST_HEAD (&ktoenal_data.ksnd_tx_conns);
+
+ INIT_LIST_HEAD(&ktoenal_data.ksnd_small_fmp.fmp_idle_fmbs);
+ INIT_LIST_HEAD(&ktoenal_data.ksnd_small_fmp.fmp_blocked_conns);
+ INIT_LIST_HEAD(&ktoenal_data.ksnd_large_fmp.fmp_idle_fmbs);
+ INIT_LIST_HEAD(&ktoenal_data.ksnd_large_fmp.fmp_blocked_conns);
+
+ INIT_LIST_HEAD(&ktoenal_data.ksnd_idle_nblk_ltx_list);
+ INIT_LIST_HEAD(&ktoenal_data.ksnd_idle_ltx_list);
+ init_waitqueue_head(&ktoenal_data.ksnd_idle_ltx_waitq);
+
+ INIT_LIST_HEAD (&ktoenal_data.ksnd_reaper_list);
+ init_waitqueue_head(&ktoenal_data.ksnd_reaper_waitq);
+ spin_lock_init (&ktoenal_data.ksnd_reaper_lock);
+
+ ktoenal_data.ksnd_init = SOCKNAL_INIT_DATA; /* flag lists/ptrs/locks initialised */
+
+ PORTAL_ALLOC(ktoenal_data.ksnd_fmbs,
+ sizeof(ksock_fmb_t) * (SOCKNAL_SMALL_FWD_NMSGS + SOCKNAL_LARGE_FWD_NMSGS));
+ if (ktoenal_data.ksnd_fmbs == NULL)
+ RETURN(-ENOMEM);
+
+ /* NULL out buffer pointers etc */
+ memset(ktoenal_data.ksnd_fmbs, 0,
+ sizeof(ksock_fmb_t) * (SOCKNAL_SMALL_FWD_NMSGS + SOCKNAL_LARGE_FWD_NMSGS));
+
+ for (i = 0; i < (SOCKNAL_SMALL_FWD_NMSGS + SOCKNAL_LARGE_FWD_NMSGS); i++)
+ {
+ ksock_fmb_t *fmb = &((ksock_fmb_t *)ktoenal_data.ksnd_fmbs)[i];
+
+ if (i < SOCKNAL_SMALL_FWD_NMSGS)
+ {
+ fmb->fmb_npages = SOCKNAL_SMALL_FWD_PAGES;
+ fmb->fmb_pool = &ktoenal_data.ksnd_small_fmp;
+ }
+ else
+ {
+ fmb->fmb_npages = SOCKNAL_LARGE_FWD_PAGES;
+ fmb->fmb_pool = &ktoenal_data.ksnd_large_fmp;
+ }
+
+ LASSERT (fmb->fmb_npages > 0);
+ for (j = 0; j < fmb->fmb_npages; j++)
+ {
+ fmb->fmb_pages[j] = alloc_page (GFP_KERNEL);
+
+ if (fmb->fmb_pages[j] == NULL)
+ {
+ ktoenal_module_fini ();
+ return (-ENOMEM);
+ }
+
+ LASSERT (page_address (fmb->fmb_pages[j]) != NULL);
+ }
+
+ list_add (&fmb->fmb_list, &fmb->fmb_pool->fmp_idle_fmbs);
+ }
+
+ PORTAL_ALLOC(ktoenal_data.ksnd_ltxs,
+ sizeof (ksock_ltx_t) * (SOCKNAL_NLTXS + SOCKNAL_NNBLK_LTXS));
+ if (ktoenal_data.ksnd_ltxs == NULL)
+ {
+ ktoenal_module_fini ();
+ return (-ENOMEM);
+ }
+
+ /* Deterministic bugs please */
+ memset (ktoenal_data.ksnd_ltxs, 0xeb,
+ sizeof (ksock_ltx_t) * (SOCKNAL_NLTXS + SOCKNAL_NNBLK_LTXS));
+
+ for (i = 0; i < SOCKNAL_NLTXS + SOCKNAL_NNBLK_LTXS; i++)
+ {
+ ksock_ltx_t *ltx = &((ksock_ltx_t *)ktoenal_data.ksnd_ltxs)[i];
+
+ ltx->ltx_idle = i < SOCKNAL_NLTXS ?
+ &ktoenal_data.ksnd_idle_ltx_list :
+ &ktoenal_data.ksnd_idle_nblk_ltx_list;
+ list_add (<x->ltx_tx.tx_list, ltx->ltx_idle);
+ }
+
+ rc = PtlNIInit(ktoenal_init, 32, 4, 0, &ktoenal_ni);
+ if (rc != 0)
+ {
+ CERROR("ktoenal: PtlNIInit failed: error %d\n", rc);
+ ktoenal_module_fini ();
+ RETURN (rc);
+ }
+ PtlNIDebug(ktoenal_ni, ~0);
+
+ ktoenal_data.ksnd_init = SOCKNAL_INIT_PTL; /* flag PtlNIInit() called */
+
+ ktoenal_data.ksnd_slistchange = 1;
+ for (i = 0; i < TOENAL_N_SCHED; i++)
+ {
+ rc = ktoenal_thread_start (ktoenal_scheduler, NULL);
+ if (rc != 0)
+ {
+ CERROR("Can't spawn socknal scheduler[%d]: %d\n", i, rc);
+ ktoenal_module_fini ();
+ RETURN (rc);
+ }
+ }
+
+ rc = ktoenal_thread_start (ktoenal_reaper, NULL);
+ if (rc != 0)
+ {
+ CERROR("Can't spawn socknal reaper: %d\n", rc);
+ ktoenal_module_fini ();
+ RETURN (rc);
+ }
+
+ rc = ktoenal_thread_start (ktoenal_pollthread, NULL);
+ if (rc != 0)
+ {
+ CERROR("Can't spawn socknal pollthread: %d\n", rc);
+ ktoenal_module_fini ();
+ RETURN (rc);
+ }
+
+ rc = kpr_register(&ktoenal_data.ksnd_router,
+ &ktoenal_router_interface);
+ if (rc != 0)
+ CDEBUG (D_NET, "Can't initialise routing interface (rc = %d): not routing\n", rc);
+
+ rc = kportal_nal_register(TOENAL, &ktoenal_cmd, NULL);
+ if (rc != 0)
+ CDEBUG(D_NET, "Can't initialise command interface (rc = %d)\n",
+ rc);
+
+ PORTAL_SYMBOL_REGISTER(ktoenal_ni);
+
+ /* flag everything initialised */
+ ktoenal_data.ksnd_init = SOCKNAL_INIT_ALL;
+
+ printk(KERN_INFO"Routing TOE NAL loaded (Routing %s, initial mem %d)\n",
+ kpr_routing(&ktoenal_data.ksnd_router) ? "enabled" : "disabled",
+ pkmem);
+
+ return (0);
+}
+
+MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
+MODULE_DESCRIPTION("Kernel TCP Socket NAL v0.01");
+MODULE_LICENSE("GPL");
+
+module_init(ktoenal_module_init);
+module_exit(ktoenal_module_fini);
+
+EXPORT_SYMBOL (ktoenal_ni);
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ * Author: Zach Brown <zab@zabbo.net>
+ * Author: Peter J. Braam <braam@clusterfs.com>
+ * Author: Phil Schwan <phil@clusterfs.com>
+ * Author: Eric Barton <eric@bartonsoftware.com>
+ * Author: Kedar Sovani <kedar@calsoftinc.com>
+ * Author: Amey Inamdar <amey@calsoftinc.com>
+ *
+ * This file is part of Portals, http://www.sf.net/projects/lustre/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#define DEBUG_PORTAL_ALLOC
+#define EXPORT_SYMTAB
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/errno.h>
+#include <linux/smp_lock.h>
+#include <linux/unistd.h>
+#include <net/tcp.h>
+#include <linux/uio.h>
+#include <linux/sched.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/stat.h>
+#include <linux/list.h>
+#include <asm/uaccess.h>
+#include <asm/segment.h>
+
+#define DEBUG_SUBSYSTEM S_SOCKNAL
+
+#include <linux/kp30.h>
+#include <portals/p30.h>
+#include <portals/lib-p30.h>
+
+#define SOCKNAL_MAX_FWD_PAYLOAD (64<<10) /* biggest payload I can forward */
+
+#define SOCKNAL_NLTXS 128 /* # normal transmit messages */
+#define SOCKNAL_NNBLK_LTXS 128 /* # transmit messages reserved if can't block */
+
+#define SOCKNAL_SMALL_FWD_NMSGS 128 /* # small messages I can be forwarding at any time */
+#define SOCKNAL_LARGE_FWD_NMSGS 32 /* # large messages I can be forwarding at any time */
+
+#define SOCKNAL_SMALL_FWD_PAGES 1 /* # pages in a small message fwd buffer */
+
+#define SOCKNAL_LARGE_FWD_PAGES (PAGE_ALIGN (sizeof (ptl_hdr_t) + SOCKNAL_MAX_FWD_PAYLOAD) >> PAGE_SHIFT)
+ /* # pages in a large message fwd buffer */
+
+#define SOCKNAL_RESCHED 100 /* # scheduler loops before reschedule */
+
+#define SOCKNAL_TX_LOW_WATER(sk) (((sk)->sndbuf*8)/10)
+
+#define TOENAL_N_SCHED 1
+
+typedef struct /* pool of forwarding buffers */
+{
+ struct list_head fmp_idle_fmbs; /* buffers waiting for a connection */
+ struct list_head fmp_blocked_conns; /* connections waiting for a buffer */
+} ksock_fmb_pool_t;
+
+typedef struct {
+ int ksnd_init; /* initialisation state */
+
+ struct list_head ksnd_socklist; /* all my connections */
+ rwlock_t ksnd_socklist_lock; /* stabilise add/find/remove */
+
+
+ ptl_nid_t ksnd_mynid;
+ nal_cb_t *ksnd_nal_cb;
+ spinlock_t ksnd_nal_cb_lock; /* lib cli/sti lock */
+
+ atomic_t ksnd_nthreads; /* # live threads */
+ int ksnd_shuttingdown; /* tell threads to exit */
+
+ kpr_router_t ksnd_router; /* THE router */
+
+ spinlock_t ksnd_sched_lock; /* serialise packet scheduling */
+ wait_queue_head_t ksnd_sched_waitq; /* where scheduler(s) wait */
+
+ struct list_head ksnd_rx_conns; /* conn waiting to be read */
+ struct list_head ksnd_tx_conns; /* conn waiting to be written */
+
+ void *ksnd_fmbs; /* all the pre-allocated FMBs */
+ ksock_fmb_pool_t ksnd_small_fmp; /* small message forwarding buffers */
+ ksock_fmb_pool_t ksnd_large_fmp; /* large message forwarding buffers */
+
+ void *ksnd_ltxs; /* all the pre-allocated LTXs */
+ struct list_head ksnd_idle_ltx_list; /* where to get an idle LTX */
+ struct list_head ksnd_idle_nblk_ltx_list; /* where to get an idle LTX if you can't block */
+ wait_queue_head_t ksnd_idle_ltx_waitq; /* where to block for an idle LTX */
+
+ struct list_head ksnd_reaper_list; /* conn waiting to be reaped */
+ wait_queue_head_t ksnd_reaper_waitq; /* reaper sleeps here */
+ spinlock_t ksnd_reaper_lock; /* serialise */
+
+ struct task_struct *ksnd_pollthread_tsk;/* task_struct for the poll thread */
+ poll_table ksnd_pwait; /* poll wait table for the socket */
+ int ksnd_slistchange; /* informs the pollthread that
+ * the socklist has changed */
+} ksock_nal_data_t;
+
+#define SOCKNAL_INIT_NOTHING 0
+#define SOCKNAL_INIT_DATA 1
+#define SOCKNAL_INIT_PTL 2
+#define SOCKNAL_INIT_ALL 3
+
+typedef struct /* transmit packet */
+{
+ struct list_head tx_list; /* queue on conn for transmission etc */
+ char tx_isfwd; /* forwarding / sourced here */
+ int tx_nob; /* # packet bytes */
+ int tx_niov; /* # packet frags */
+ struct iovec *tx_iov; /* packet frags */
+} ksock_tx_t;
+
+typedef struct /* locally transmitted packet */
+{
+ ksock_tx_t ltx_tx; /* send info */
+ struct list_head *ltx_idle; /* where to put when idle */
+ void *ltx_private; /* lib_finalize() callback arg */
+ void *ltx_cookie; /* lib_finalize() callback arg */
+ struct iovec ltx_iov[1 + PTL_MD_MAX_IOV]; /* msg frags */
+ ptl_hdr_t ltx_hdr; /* buffer for packet header */
+} ksock_ltx_t;
+
+#define KSOCK_TX_2_KPR_FWD_DESC(ptr) list_entry (ptr, kpr_fwd_desc_t, kprfd_scratch)
+/* forwarded packets (router->socknal) embedded in kpr_fwd_desc_t::kprfd_scratch */
+
+#define KSOCK_TX_2_KSOCK_LTX(ptr) list_entry (ptr, ksock_ltx_t, ltx_tx)
+/* local packets (lib->socknal) embedded in ksock_ltx_t::ltx_tx */
+
+/* NB list_entry() is used here as convenient macro for calculating a
+ * pointer to a struct from the addres of a member.
+ */
+
+typedef struct /* Kernel portals Socket Forwarding message buffer */
+{ /* (socknal->router) */
+ struct list_head fmb_list; /* queue idle */
+ kpr_fwd_desc_t fmb_fwd; /* router's descriptor */
+ int fmb_npages; /* # pages allocated */
+ ksock_fmb_pool_t *fmb_pool; /* owning pool */
+ struct page *fmb_pages[SOCKNAL_LARGE_FWD_PAGES];
+ struct iovec fmb_iov[SOCKNAL_LARGE_FWD_PAGES];
+} ksock_fmb_t;
+
+#define SOCKNAL_RX_HEADER 1 /* reading header */
+#define SOCKNAL_RX_BODY 2 /* reading body (to deliver here) */
+#define SOCKNAL_RX_BODY_FWD 3 /* reading body (to forward) */
+#define SOCKNAL_RX_SLOP 4 /* skipping body */
+#define SOCKNAL_RX_GET_FMB 5 /* scheduled for forwarding */
+#define SOCKNAL_RX_FMB_SLEEP 6 /* blocked waiting for a fwd desc */
+
+typedef struct
+{
+ struct list_head ksnc_list; /* stash on global socket list */
+ struct file *ksnc_file; /* socket filp */
+ struct socket *ksnc_sock; /* socket */
+ ptl_nid_t ksnc_peernid; /* who's on the other end */
+ atomic_t ksnc_refcount; /* # users */
+
+ /* READER */
+ struct list_head ksnc_rx_list; /* where I enq waiting input or a forwarding descriptor */
+ unsigned long ksnc_rx_ready; /* data ready to read */
+ int ksnc_rx_scheduled; /* being progressed */
+ int ksnc_rx_state; /* what is being read */
+ int ksnc_rx_nob_left; /* # bytes to next hdr/body */
+ int ksnc_rx_nob_wanted; /* bytes actually wanted */
+ int ksnc_rx_niov; /* # frags */
+ struct iovec ksnc_rx_iov[1 + PTL_MD_MAX_IOV]; /* the frags */
+
+ void *ksnc_cookie; /* rx lib_finalize passthru arg */
+ ptl_hdr_t ksnc_hdr; /* where I read headers into */
+
+ /* WRITER */
+ struct list_head ksnc_tx_list; /* where I enq waiting for output space */
+ struct list_head ksnc_tx_queue; /* packets waiting to be sent */
+ unsigned long ksnc_tx_ready; /* write space */
+ int ksnc_tx_scheduled; /* being progressed */
+
+} ksock_conn_t;
+
+extern int ktoenal_add_sock (ptl_nid_t nid, int fd);
+extern int ktoenal_close_sock(ptl_nid_t nid);
+extern int ktoenal_set_mynid(ptl_nid_t nid);
+extern int ktoenal_push_sock(ptl_nid_t nid);
+extern ksock_conn_t *ktoenal_get_conn (ptl_nid_t nid);
+extern void _ktoenal_put_conn (ksock_conn_t *conn);
+extern void ktoenal_close_conn (ksock_conn_t *conn);
+
+static inline void
+ktoenal_put_conn (ksock_conn_t *conn)
+{
+ CDEBUG (D_OTHER, "putting conn[%p] -> "LPX64" (%d)\n",
+ conn, conn->ksnc_peernid, atomic_read (&conn->ksnc_refcount));
+
+ if (atomic_dec_and_test (&conn->ksnc_refcount))
+ _ktoenal_put_conn (conn);
+}
+
+extern int ktoenal_thread_start (int (*fn)(void *arg), void *arg);
+extern int ktoenal_new_packet (ksock_conn_t *conn, int skip);
+extern void ktoenal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd);
+extern int ktoenal_scheduler (void *arg);
+extern int ktoenal_reaper (void *arg);
+extern int ktoenal_pollthread (void *arg);
+extern void ktoenal_data_ready(ksock_conn_t *conn);
+extern void ktoenal_write_space(ksock_conn_t *conn);
+
+
+extern nal_cb_t ktoenal_lib;
+extern ksock_nal_data_t ktoenal_data;
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ * Author: Zach Brown <zab@zabbo.net>
+ * Author: Peter J. Braam <braam@clusterfs.com>
+ * Author: Phil Schwan <phil@clusterfs.com>
+ * Author: Eric Barton <eric@bartonsoftware.com>
+ * Author: Kedar Sovani <kedar@calsoftinc.com>
+ * Author: Amey Inamdar <amey@calsoftinc.com>
+ *
+ * This file is part of Portals, http://www.sf.net/projects/lustre/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#include <linux/poll.h>
+#include "toenal.h"
+
+atomic_t ktoenal_packets_received;
+long ktoenal_packets_launched;
+long ktoenal_packets_transmitted;
+
+/*
+ * LIB functions follow
+ *
+ */
+int
+ktoenal_read(nal_cb_t *nal, void *private, void *dst_addr,
+ user_ptr src_addr, size_t len)
+{
+ CDEBUG(D_NET, LPX64": reading %ld bytes from %p -> %p\n",
+ nal->ni.nid, (long)len, src_addr, dst_addr);
+
+ memcpy( dst_addr, src_addr, len );
+ return 0;
+}
+
+int
+ktoenal_write(nal_cb_t *nal, void *private, user_ptr dst_addr,
+ void *src_addr, size_t len)
+{
+ CDEBUG(D_NET, LPX64": writing %ld bytes from %p -> %p\n",
+ nal->ni.nid, (long)len, src_addr, dst_addr);
+
+ memcpy( dst_addr, src_addr, len );
+ return 0;
+}
+
+int
+ktoenal_callback (nal_cb_t * nal, void *private, lib_eq_t *eq,
+ ptl_event_t *ev)
+{
+ CDEBUG(D_NET, LPX64": callback eq %p ev %p\n",
+ nal->ni.nid, eq, ev);
+
+ if (eq->event_callback != NULL)
+ eq->event_callback(ev);
+
+ return 0;
+}
+
+void *
+ktoenal_malloc(nal_cb_t *nal, size_t len)
+{
+ void *buf;
+
+ PORTAL_ALLOC(buf, len);
+
+ if (buf != NULL)
+ memset(buf, 0, len);
+
+ return (buf);
+}
+
+void
+ktoenal_free(nal_cb_t *nal, void *buf, size_t len)
+{
+ PORTAL_FREE(buf, len);
+}
+
+void
+ktoenal_printf(nal_cb_t *nal, const char *fmt, ...)
+{
+ va_list ap;
+ char msg[256];
+
+ va_start (ap, fmt);
+ vsnprintf (msg, sizeof (msg), fmt, ap); /* sprint safely */
+ va_end (ap);
+
+ msg[sizeof (msg) - 1] = 0; /* ensure terminated */
+
+ CDEBUG (D_NET, "%s", msg);
+}
+
+void
+ktoenal_cli(nal_cb_t *nal, unsigned long *flags)
+{
+ ksock_nal_data_t *data = nal->nal_data;
+
+ spin_lock(&data->ksnd_nal_cb_lock);
+}
+
+void
+ktoenal_sti(nal_cb_t *nal, unsigned long *flags)
+{
+ ksock_nal_data_t *data;
+ data = nal->nal_data;
+
+ spin_unlock(&data->ksnd_nal_cb_lock);
+}
+
+int
+ktoenal_dist(nal_cb_t *nal, ptl_nid_t nid, unsigned long *dist)
+{
+ /* I would guess that if ktoenal_get_conn(nid) == NULL,
+ and we're not routing, then 'nid' is very distant :) */
+ if ( nal->ni.nid == nid ) {
+ *dist = 0;
+ } else {
+ *dist = 1;
+ }
+
+ return 0;
+}
+
+ksock_ltx_t *
+ktoenal_get_ltx (int may_block)
+{
+ long flags;
+ ksock_ltx_t *ltx = NULL;
+
+ for (;;)
+ {
+ spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, flags);
+
+ if (!list_empty (&ktoenal_data.ksnd_idle_ltx_list))
+ {
+ ltx = list_entry (ktoenal_data.ksnd_idle_ltx_list.next, ksock_ltx_t, ltx_tx.tx_list);
+ list_del (<x->ltx_tx.tx_list);
+ break;
+ }
+
+ if (!may_block)
+ {
+ if (!list_empty (&ktoenal_data.ksnd_idle_nblk_ltx_list))
+ {
+ ltx = list_entry (ktoenal_data.ksnd_idle_nblk_ltx_list.next,
+ ksock_ltx_t, ltx_tx.tx_list);
+ list_del (<x->ltx_tx.tx_list);
+ }
+ break;
+ }
+
+ spin_unlock_irqrestore (&ktoenal_data.ksnd_sched_lock, flags);
+
+ wait_event (ktoenal_data.ksnd_idle_ltx_waitq,
+ !list_empty (&ktoenal_data.ksnd_idle_ltx_list));
+ }
+
+ spin_unlock_irqrestore (&ktoenal_data.ksnd_sched_lock, flags);
+
+ return (ltx);
+}
+
+int
+ktoenal_sendmsg (struct file *sock, struct iovec *iov, int niov, int nob, int flags)
+{
+ /* NB This procedure "consumes" iov (actually we do, tcp_sendmsg doesn't)
+ */
+ mm_segment_t oldmm;
+ int rc;
+
+ LASSERT (niov > 0);
+ LASSERT (nob > 0);
+
+ oldmm = get_fs();
+ set_fs (KERNEL_DS);
+
+#ifdef PORTAL_DEBUG
+ {
+ int total_nob;
+ int i;
+
+ for (i = total_nob = 0; i < niov; i++)
+ total_nob += iov[i].iov_len;
+
+ LASSERT (nob == total_nob);
+ }
+#endif
+ LASSERT (!in_interrupt());
+
+ rc = sock->f_op->writev(sock, iov, niov, NULL);
+
+ set_fs (oldmm);
+
+ if (rc > 0) /* sent something? */
+ {
+ nob = rc; /* consume iov */
+ for (;;)
+ {
+ LASSERT (niov > 0);
+
+ if (iov->iov_len >= nob)
+ {
+ iov->iov_len -= nob;
+ iov->iov_base = (void *)(((unsigned long)iov->iov_base) + nob);
+ break;
+ }
+ nob -= iov->iov_len;
+ iov->iov_len = 0;
+ iov++;
+ niov--;
+ }
+ }
+
+ return (rc);
+}
+
+int
+ktoenal_recvmsg(struct file *sock, struct iovec *iov, int niov, int toread)
+{
+ /* NB This procedure "consumes" iov (actually tcp_recvmsg does)
+ */
+ mm_segment_t oldmm;
+ int ret, i, len = 0, origlen = 0;
+
+ PROF_START(our_recvmsg);
+ for(i = 0; i < niov; i++) {
+ len += iov[i].iov_len;
+ if(len >= toread)
+ break;
+ }
+
+ if(len >= toread) {
+ origlen = iov[i].iov_len;
+ iov[i].iov_len -= (len - toread);
+ }
+ else { /* i == niov */
+ i = niov - 1;
+ }
+
+ oldmm = get_fs();
+ set_fs(KERNEL_DS);
+
+ ret = sock->f_op->readv(sock, iov, i + 1, NULL);
+
+ set_fs(oldmm);
+
+ if(origlen)
+ iov[i].iov_len = origlen;
+
+ PROF_FINISH(our_recvmsg);
+ return ret;
+}
+
+void
+ktoenal_process_transmit (ksock_conn_t *conn, long *irq_flags)
+{
+ ksock_tx_t *tx = list_entry (conn->ksnc_tx_queue.next, ksock_tx_t, tx_list);
+ int rc;
+
+ LASSERT (conn->ksnc_tx_scheduled);
+ LASSERT (conn->ksnc_tx_ready);
+ LASSERT (!list_empty (&conn->ksnc_tx_queue));
+
+ /* assume transmit will complete now, so dequeue while I've got the lock */
+ list_del (&tx->tx_list);
+
+ spin_unlock_irqrestore (&ktoenal_data.ksnd_sched_lock, *irq_flags);
+
+ LASSERT (tx->tx_nob > 0);
+
+ conn->ksnc_tx_ready = 0; /* write_space may race with me and set ready */
+ mb(); /* => clear BEFORE trying to write */
+
+ rc = ktoenal_sendmsg (conn->ksnc_file,
+ tx->tx_iov, tx->tx_niov, tx->tx_nob,
+ list_empty (&conn->ksnc_tx_queue) ?
+ MSG_DONTWAIT : (MSG_DONTWAIT | MSG_MORE));
+
+ CDEBUG (D_NET, "send(%d) %d\n", tx->tx_nob, rc);
+
+ if (rc < 0) /* error */
+ {
+ if (rc == -EAGAIN) /* socket full => */
+ rc = 0; /* nothing sent */
+ else
+ {
+#warning FIXME: handle socket errors properly
+ CERROR ("Error socknal send(%d) %p: %d\n", tx->tx_nob, conn, rc);
+ rc = tx->tx_nob; /* kid on for now whole packet went */
+ }
+ }
+
+ if (rc == tx->tx_nob) /* everything went */
+ {
+ conn->ksnc_tx_ready = 1; /* assume more can go (ASAP) */
+ ktoenal_put_conn (conn); /* release packet's ref */
+
+ if (tx->tx_isfwd) /* was a forwarded packet? */
+ {
+ kpr_fwd_done (&ktoenal_data.ksnd_router,
+ KSOCK_TX_2_KPR_FWD_DESC (tx), 0);
+
+ spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, *irq_flags);
+ }
+ else /* local send */
+ {
+ ksock_ltx_t *ltx = KSOCK_TX_2_KSOCK_LTX (tx);
+
+ lib_finalize (&ktoenal_lib, ltx->ltx_private, ltx->ltx_cookie);
+
+ spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, *irq_flags);
+
+ list_add (<x->ltx_tx.tx_list, ltx->ltx_idle);
+
+ /* normal tx desc => wakeup anyone blocking for one */
+ if (ltx->ltx_idle == &ktoenal_data.ksnd_idle_ltx_list &&
+ waitqueue_active (&ktoenal_data.ksnd_idle_ltx_waitq))
+ wake_up (&ktoenal_data.ksnd_idle_ltx_waitq);
+ }
+ ktoenal_packets_transmitted++;
+ }
+ else
+ {
+ tx->tx_nob -= rc;
+
+ spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, *irq_flags);
+
+ /* back onto HEAD of tx_queue */
+ list_add (&tx->tx_list, &conn->ksnc_tx_queue);
+ }
+
+ if (!conn->ksnc_tx_ready || /* no space to write now */
+ list_empty (&conn->ksnc_tx_queue)) /* nothing to write */
+ {
+ conn->ksnc_tx_scheduled = 0; /* not being scheduled */
+ ktoenal_put_conn (conn); /* release scheduler's ref */
+ }
+ else /* let scheduler call me again */
+ list_add_tail (&conn->ksnc_tx_list, &ktoenal_data.ksnd_tx_conns);
+}
+
+void
+ktoenal_launch_packet (ksock_conn_t *conn, ksock_tx_t *tx)
+{
+ long flags;
+ int nob = tx->tx_nob;
+ struct iovec *iov = tx->tx_iov;
+ int niov = 1;
+
+ LASSERT (nob >= sizeof (ptl_hdr_t));
+
+ /* Truncate iov to exactly match total packet length
+ * since socket sendmsg pays no attention to requested length.
+ */
+ for (;;)
+ {
+ LASSERT (niov <= tx->tx_niov);
+ LASSERT (iov->iov_len >= 0);
+
+ if (iov->iov_len >= nob)
+ {
+ iov->iov_len = nob;
+ break;
+ }
+ nob -= iov->iov_len;
+ iov++;
+ niov++;
+ }
+ tx->tx_niov = niov;
+
+ spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, flags);
+ list_add_tail (&tx->tx_list, &conn->ksnc_tx_queue);
+
+ if (conn->ksnc_tx_ready && /* able to send */
+ !conn->ksnc_tx_scheduled) /* not scheduled to send */
+ {
+ list_add_tail (&conn->ksnc_tx_list, &ktoenal_data.ksnd_tx_conns);
+ conn->ksnc_tx_scheduled = 1;
+ atomic_inc (&conn->ksnc_refcount); /* extra ref for scheduler */
+ if (waitqueue_active (&ktoenal_data.ksnd_sched_waitq))
+ wake_up (&ktoenal_data.ksnd_sched_waitq);
+ }
+
+ ktoenal_packets_launched++;
+ spin_unlock_irqrestore (&ktoenal_data.ksnd_sched_lock, flags);
+}
+
+int
+ktoenal_send(nal_cb_t *nal, void *private, lib_msg_t *cookie,
+ ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
+ unsigned int payload_niov, struct iovec *payload_iov, size_t payload_len)
+{
+ ptl_nid_t gatewaynid;
+ ksock_conn_t *conn;
+ ksock_ltx_t *ltx;
+ int rc;
+ int i;
+
+ /* By this point, as it happens, we have absolutely no idea what
+ * 'private' is. It might be ksock_nal_data or it might be ksock_conn.
+ * Ha ha, isn't that a funny joke?
+ *
+ * FIXME: this is not the right way to fix this; the right way is to
+ * always pass in the same kind of structure. This is hard right now.
+ * To revisit this issue, set a breakpoint in here and watch for when
+ * it's called from lib_finalize. I think this occurs when we send a
+ * packet as a side-effect of another packet, such as when an ACK has
+ * been requested. -phil */
+
+ CDEBUG(D_NET, "sending "LPSZ" bytes from [%d](%p,%d)... to nid: "LPX64" pid %d\n",
+ payload_len, payload_niov,
+ payload_niov > 0 ? payload_iov[0].iov_base : NULL,
+ payload_niov > 0 ? payload_iov[0].iov_len : 0,
+ nid, pid);
+
+ if ((conn = ktoenal_get_conn (nid)) == NULL)
+ {
+ /* It's not a peer; try to find a gateway */
+ rc = kpr_lookup (&ktoenal_data.ksnd_router, nid, &gatewaynid);
+ if (rc != 0)
+ {
+ CERROR ("Can't route to "LPX64": router error %d\n", nid, rc);
+ return (-1);
+ }
+
+ if ((conn = ktoenal_get_conn (gatewaynid)) == NULL)
+ {
+ CERROR ("Can't route to "LPX64": gateway "LPX64" is not a peer\n",
+ nid, gatewaynid);
+ return (-1);
+ }
+ }
+
+ /* This transmit has now got a ref on conn */
+
+ /* I may not block for a transmit descriptor if I might block the
+ * receiver, or an interrupt handler. */
+ ltx = ktoenal_get_ltx (!(type == PTL_MSG_ACK ||
+ type == PTL_MSG_REPLY ||
+ in_interrupt ()));
+ if (ltx == NULL)
+ {
+ CERROR ("Can't allocate tx desc\n");
+ ktoenal_put_conn (conn);
+ return (-1);
+ }
+
+ /* Init common (to sends and forwards) packet part */
+ ltx->ltx_tx.tx_isfwd = 0;
+ ltx->ltx_tx.tx_nob = sizeof (*hdr) + payload_len;
+ ltx->ltx_tx.tx_niov = 1 + payload_niov;
+ ltx->ltx_tx.tx_iov = ltx->ltx_iov;
+
+ /* Init local send packet (storage for hdr, finalize() args, iov) */
+ ltx->ltx_hdr = *hdr;
+ ltx->ltx_private = private;
+ ltx->ltx_cookie = cookie;
+
+ ltx->ltx_iov[0].iov_base = <x->ltx_hdr;
+ ltx->ltx_iov[0].iov_len = sizeof (ltx->ltx_hdr);
+
+ LASSERT (payload_niov <= PTL_MD_MAX_IOV);
+
+ for (i = 0; i < payload_niov; i++)
+ {
+ ltx->ltx_iov[1 + i].iov_base = payload_iov[i].iov_base;
+ ltx->ltx_iov[1 + i].iov_len = payload_iov[i].iov_len;
+ }
+
+ ktoenal_launch_packet (conn, <x->ltx_tx);
+ return (0);
+}
+
+void
+ktoenal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd)
+{
+ ksock_conn_t *conn;
+ ptl_nid_t nid = fwd->kprfd_gateway_nid;
+ ksock_tx_t *tx = (ksock_tx_t *)&fwd->kprfd_scratch;
+
+ CDEBUG (D_NET, "Forwarding [%p] -> "LPX64" ("LPX64"))\n", fwd,
+ fwd->kprfd_gateway_nid, fwd->kprfd_target_nid);
+
+ if (nid == ktoenal_lib.ni.nid) /* I'm the gateway; must be the last hop */
+ nid = fwd->kprfd_target_nid;
+
+ conn = ktoenal_get_conn (nid);
+ if (conn == NULL)
+ {
+ CERROR ("[%p] fwd to "LPX64" isn't a peer\n", fwd, nid);
+ kpr_fwd_done (&ktoenal_data.ksnd_router, fwd, -EHOSTUNREACH);
+ return;
+ }
+
+ /* This forward has now got a ref on conn */
+
+ tx->tx_isfwd = 1; /* This is a forwarding packet */
+ tx->tx_nob = fwd->kprfd_nob;
+ tx->tx_niov = fwd->kprfd_niov;
+ tx->tx_iov = fwd->kprfd_iov;
+
+ ktoenal_launch_packet (conn, tx);
+}
+
+int
+ktoenal_thread_start (int (*fn)(void *arg), void *arg)
+{
+ long pid = kernel_thread (fn, arg, 0);
+
+ if (pid < 0)
+ return ((int)pid);
+
+ atomic_inc (&ktoenal_data.ksnd_nthreads);
+ return (0);
+}
+
+void
+ktoenal_thread_fini (void)
+{
+ atomic_dec (&ktoenal_data.ksnd_nthreads);
+}
+
+void
+ktoenal_fmb_callback (void *arg, int error)
+{
+ ksock_fmb_t *fmb = (ksock_fmb_t *)arg;
+ ptl_hdr_t *hdr = (ptl_hdr_t *) page_address(fmb->fmb_pages[0]);
+ ksock_conn_t *conn;
+ long flags;
+
+ CDEBUG (D_NET, "routed packet from "LPX64" to "LPX64": %d\n",
+ hdr->src_nid, hdr->dest_nid, error);
+
+ if (error != 0)
+ CERROR ("Failed to route packet from "LPX64" to "LPX64": %d\n",
+ hdr->src_nid, hdr->dest_nid, error);
+
+ spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, flags);
+
+ list_add (&fmb->fmb_list, &fmb->fmb_pool->fmp_idle_fmbs);
+
+ if (!list_empty (&fmb->fmb_pool->fmp_blocked_conns))
+ {
+ conn = list_entry (fmb->fmb_pool->fmp_blocked_conns.next, ksock_conn_t, ksnc_rx_list);
+ list_del (&conn->ksnc_rx_list);
+
+ CDEBUG (D_NET, "Scheduling conn %p\n", conn);
+ LASSERT (conn->ksnc_rx_scheduled);
+ LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_FMB_SLEEP);
+
+ conn->ksnc_rx_state = SOCKNAL_RX_GET_FMB;
+ list_add_tail (&conn->ksnc_rx_list, &ktoenal_data.ksnd_rx_conns);
+
+ if (waitqueue_active (&ktoenal_data.ksnd_sched_waitq))
+ wake_up (&ktoenal_data.ksnd_sched_waitq);
+ }
+
+ spin_unlock_irqrestore (&ktoenal_data.ksnd_sched_lock, flags);
+}
+
+ksock_fmb_t *
+ktoenal_get_idle_fmb (ksock_conn_t *conn)
+{
+ /* NB called with sched lock held */
+ int payload_nob = conn->ksnc_rx_nob_left;
+ int packet_nob = sizeof (ptl_hdr_t) + payload_nob;
+ ksock_fmb_pool_t *pool;
+ ksock_fmb_t *fmb;
+
+ LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_GET_FMB);
+
+ if (packet_nob <= SOCKNAL_SMALL_FWD_PAGES * PAGE_SIZE)
+ pool = &ktoenal_data.ksnd_small_fmp;
+ else
+ pool = &ktoenal_data.ksnd_large_fmp;
+
+ if (!list_empty (&pool->fmp_idle_fmbs))
+ {
+ fmb = list_entry (pool->fmp_idle_fmbs.next, ksock_fmb_t, fmb_list);
+ list_del (&fmb->fmb_list);
+ return (fmb);
+ }
+
+ /* deschedule until fmb free */
+
+ conn->ksnc_rx_state = SOCKNAL_RX_FMB_SLEEP;
+
+ list_add_tail (&conn->ksnc_rx_list,
+ &pool->fmp_blocked_conns);
+ return (NULL);
+}
+
+
+int
+ktoenal_init_fmb (ksock_conn_t *conn, ksock_fmb_t *fmb)
+{
+ int payload_nob = conn->ksnc_rx_nob_left;
+ int packet_nob = sizeof (ptl_hdr_t) + payload_nob;
+ int niov; /* at least the header */
+ int nob;
+
+ LASSERT (conn->ksnc_rx_scheduled);
+ LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_GET_FMB);
+ LASSERT (conn->ksnc_rx_nob_wanted == conn->ksnc_rx_nob_left);
+ LASSERT (payload_nob >= 0);
+ LASSERT (packet_nob <= fmb->fmb_npages * PAGE_SIZE);
+ LASSERT (sizeof (ptl_hdr_t) < PAGE_SIZE);
+
+ /* Got a forwarding buffer; copy the header we just read into the
+ * forwarding buffer. If there's payload start reading reading it
+ * into the buffer, otherwise the forwarding buffer can be kicked
+ * off immediately.
+ *
+ * NB fmb->fmb_iov spans the WHOLE packet.
+ * conn->ksnc_rx_iov spans just the payload.
+ */
+
+ fmb->fmb_iov[0].iov_base = page_address (fmb->fmb_pages[0]);
+
+ memcpy (fmb->fmb_iov[0].iov_base, &conn->ksnc_hdr, sizeof (ptl_hdr_t)); /* copy header */
+
+ if (payload_nob == 0) /* got complete packet already */
+ {
+ atomic_inc (&ktoenal_packets_received);
+
+ CDEBUG (D_NET, "%p "LPX64"->"LPX64" %d fwd_start (immediate)\n", conn,
+ conn->ksnc_hdr.src_nid, conn->ksnc_hdr.dest_nid, packet_nob);
+
+ fmb->fmb_iov[0].iov_len = sizeof (ptl_hdr_t);
+
+ kpr_fwd_init (&fmb->fmb_fwd, conn->ksnc_hdr.dest_nid,
+ packet_nob, 1, fmb->fmb_iov,
+ ktoenal_fmb_callback, fmb);
+
+ kpr_fwd_start (&ktoenal_data.ksnd_router, &fmb->fmb_fwd); /* forward it now */
+
+ ktoenal_new_packet (conn, 0); /* on to next packet */
+ return (1);
+ }
+
+ niov = 1;
+ if (packet_nob <= PAGE_SIZE) /* whole packet fits in first page */
+ fmb->fmb_iov[0].iov_len = packet_nob;
+ else
+ {
+ fmb->fmb_iov[0].iov_len = PAGE_SIZE;
+ nob = packet_nob - PAGE_SIZE;
+
+ do
+ {
+ LASSERT (niov < fmb->fmb_npages);
+ fmb->fmb_iov[niov].iov_base = page_address (fmb->fmb_pages[niov]);
+ fmb->fmb_iov[niov].iov_len = MIN (PAGE_SIZE, nob);
+ nob -= PAGE_SIZE;
+ niov++;
+ } while (nob > 0);
+ }
+
+ kpr_fwd_init (&fmb->fmb_fwd, conn->ksnc_hdr.dest_nid,
+ packet_nob, niov, fmb->fmb_iov,
+ ktoenal_fmb_callback, fmb);
+
+ /* stash router's descriptor ready for call to kpr_fwd_start */
+ conn->ksnc_cookie = &fmb->fmb_fwd;
+
+ conn->ksnc_rx_state = SOCKNAL_RX_BODY_FWD; /* read in the payload */
+
+ /* payload is desc's iov-ed buffer, but skipping the hdr */
+ LASSERT (niov <= sizeof (conn->ksnc_rx_iov) / sizeof (conn->ksnc_rx_iov[0]));
+
+ conn->ksnc_rx_iov[0].iov_base = (void *)(((unsigned long)fmb->fmb_iov[0].iov_base) + sizeof (ptl_hdr_t));
+ conn->ksnc_rx_iov[0].iov_len = fmb->fmb_iov[0].iov_len - sizeof (ptl_hdr_t);
+
+ if (niov > 1)
+ memcpy (&conn->ksnc_rx_iov[1], &fmb->fmb_iov[1], (niov - 1) * sizeof (struct iovec));
+
+ conn->ksnc_rx_niov = niov;
+
+ CDEBUG (D_NET, "%p "LPX64"->"LPX64" %d reading body\n", conn,
+ conn->ksnc_hdr.src_nid, conn->ksnc_hdr.dest_nid, payload_nob);
+ return (0);
+}
+
+void
+ktoenal_fwd_parse (ksock_conn_t *conn)
+{
+ ksock_conn_t *conn2;
+ int body_len;
+
+ CDEBUG (D_NET, "%p "LPX64"->"LPX64" %d parsing header\n", conn,
+ conn->ksnc_hdr.src_nid, conn->ksnc_hdr.dest_nid, conn->ksnc_rx_nob_left);
+
+ LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_HEADER);
+ LASSERT (conn->ksnc_rx_scheduled);
+
+ switch (conn->ksnc_hdr.type)
+ {
+ case PTL_MSG_GET:
+ case PTL_MSG_ACK:
+ body_len = 0;
+ break;
+ case PTL_MSG_PUT:
+ body_len = conn->ksnc_hdr.msg.put.length;
+ break;
+ case PTL_MSG_REPLY:
+ body_len = conn->ksnc_hdr.msg.reply.length;
+ break;
+ default:
+ /* Unrecognised packet type */
+ CERROR ("Unrecognised packet type %d from "LPX64" for "LPX64"\n",
+ conn->ksnc_hdr.type, conn->ksnc_hdr.src_nid, conn->ksnc_hdr.dest_nid);
+ /* Ignore this header and go back to reading a new packet. */
+ ktoenal_new_packet (conn, 0);
+ return;
+ }
+
+ if (body_len < 0) /* length corrupt */
+ {
+ CERROR ("dropping packet from "LPX64" for "LPX64": packet size %d illegal\n",
+ conn->ksnc_hdr.src_nid, conn->ksnc_hdr.dest_nid, body_len);
+ ktoenal_new_packet (conn, 0); /* on to new packet */
+ return;
+ }
+
+ if (body_len > SOCKNAL_MAX_FWD_PAYLOAD) /* too big to forward */
+ {
+ CERROR ("dropping packet from "LPX64" for "LPX64": packet size %d too big\n",
+ conn->ksnc_hdr.src_nid, conn->ksnc_hdr.dest_nid, body_len);
+ ktoenal_new_packet (conn, body_len); /* on to new packet (skip this one's body) */
+ return;
+ }
+
+ conn2 = ktoenal_get_conn (conn->ksnc_hdr.dest_nid); /* should have gone direct */
+ if (conn2 != NULL)
+ {
+ CERROR ("dropping packet from "LPX64" for "LPX64": target is a peer\n",
+ conn->ksnc_hdr.src_nid, conn->ksnc_hdr.dest_nid);
+ ktoenal_put_conn (conn2); /* drop ref from get above */
+
+ ktoenal_new_packet (conn, body_len); /* on to next packet (skip this one's body) */
+ return;
+ }
+
+ conn->ksnc_rx_state = SOCKNAL_RX_GET_FMB; /* Getting FMB now */
+ conn->ksnc_rx_nob_left = body_len; /* stash packet size */
+ conn->ksnc_rx_nob_wanted = body_len; /* (no slop) */
+}
+
+int
+ktoenal_new_packet (ksock_conn_t *conn, int nob_to_skip)
+{
+ static char ktoenal_slop_buffer[4096];
+
+ int nob;
+ int niov;
+ int skipped;
+
+ if (nob_to_skip == 0) /* right at next packet boundary now */
+ {
+ conn->ksnc_rx_state = SOCKNAL_RX_HEADER;
+ conn->ksnc_rx_nob_wanted = sizeof (ptl_hdr_t);
+ conn->ksnc_rx_nob_left = sizeof (ptl_hdr_t);
+
+ conn->ksnc_rx_iov[0].iov_base = (char *)&conn->ksnc_hdr;
+ conn->ksnc_rx_iov[0].iov_len = sizeof (ptl_hdr_t);
+ conn->ksnc_rx_niov = 1;
+ return (1);
+ }
+
+ /* set up to skip as much a possible now */
+ /* if there's more left (ran out of iov entries) we'll get called again */
+
+ conn->ksnc_rx_state = SOCKNAL_RX_SLOP;
+ conn->ksnc_rx_nob_left = nob_to_skip;
+ skipped = 0;
+ niov = 0;
+
+ do
+ {
+ nob = MIN (nob_to_skip, sizeof (ktoenal_slop_buffer));
+
+ conn->ksnc_rx_iov[niov].iov_base = ktoenal_slop_buffer;
+ conn->ksnc_rx_iov[niov].iov_len = nob;
+ niov++;
+ skipped += nob;
+ nob_to_skip -=nob;
+
+ } while (nob_to_skip != 0 && /* mustn't overflow conn's rx iov */
+ niov < sizeof (conn->ksnc_rx_iov)/sizeof (conn->ksnc_rx_iov[0]));
+
+ conn->ksnc_rx_niov = niov;
+ conn->ksnc_rx_nob_wanted = skipped;
+ return (0);
+}
+
+void
+ktoenal_process_receive (ksock_conn_t *conn, long *irq_flags)
+{
+ ksock_fmb_t *fmb;
+ int len;
+ LASSERT (atomic_read (&conn->ksnc_refcount) > 0);
+ LASSERT (conn->ksnc_rx_scheduled);
+ LASSERT (conn->ksnc_rx_ready);
+
+ /* NB: sched lock held */
+ CDEBUG(D_NET, "conn %p\n", conn);
+
+ if (conn->ksnc_rx_state != SOCKNAL_RX_GET_FMB) /* doesn't need a forwarding buffer */
+ {
+ spin_unlock_irqrestore (&ktoenal_data.ksnd_sched_lock, *irq_flags);
+ goto try_read;
+ }
+
+ get_fmb:
+ /* NB: sched lock held */
+ fmb = ktoenal_get_idle_fmb (conn);
+ if (fmb == NULL) /* conn descheduled waiting for idle fmb */
+ return;
+
+ spin_unlock_irqrestore (&ktoenal_data.ksnd_sched_lock, *irq_flags);
+
+ if (ktoenal_init_fmb (conn, fmb)) /* packet forwarded ? */
+ goto out; /* come back later for next packet */
+
+ try_read:
+ /* NB: sched lock NOT held */
+ LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_HEADER ||
+ conn->ksnc_rx_state == SOCKNAL_RX_BODY ||
+ conn->ksnc_rx_state == SOCKNAL_RX_BODY_FWD ||
+ conn->ksnc_rx_state == SOCKNAL_RX_SLOP);
+
+ LASSERT (conn->ksnc_rx_niov > 0);
+ LASSERT (conn->ksnc_rx_nob_wanted > 0);
+
+ conn->ksnc_rx_ready = 0; /* data ready may race with me and set ready */
+ mb(); /* => clear BEFORE trying to read */
+
+ /* NB ktoenal_recvmsg "consumes" the iov passed to it */
+ len = ktoenal_recvmsg(conn->ksnc_file,
+ conn->ksnc_rx_iov, conn->ksnc_rx_niov,
+ conn->ksnc_rx_nob_wanted);
+ CDEBUG (D_NET, "%p read(%d) %d\n", conn, conn->ksnc_rx_nob_wanted, len);
+
+ if (len <= 0) /* nothing ready (EAGAIN) or EOF or error */
+ {
+ if (len != -EAGAIN && /* ! nothing to read now */
+ len != 0) /* ! nothing to read ever */
+ {
+#warning FIXME: handle socket errors properly
+ CERROR ("Error socknal read(%d) %p: %d\n",
+ conn->ksnc_rx_nob_wanted, conn, len);
+ }
+ goto out; /* come back when there's data ready */
+ }
+
+ LASSERT (len <= conn->ksnc_rx_nob_wanted);
+ conn->ksnc_rx_nob_wanted -= len;
+ conn->ksnc_rx_nob_left -= len;
+
+ if (conn->ksnc_rx_nob_wanted != 0) /* short read */
+ goto out; /* try again later */
+
+ conn->ksnc_rx_ready = 1; /* assume there's more to be had */
+
+ switch (conn->ksnc_rx_state)
+ {
+ case SOCKNAL_RX_HEADER:
+ if (conn->ksnc_hdr.dest_nid != ktoenal_lib.ni.nid) /* It's not for me */
+ {
+ ktoenal_fwd_parse (conn);
+ switch (conn->ksnc_rx_state)
+ {
+ case SOCKNAL_RX_HEADER: /* skipped this packet (zero payload) */
+ goto out; /* => come back later */
+ case SOCKNAL_RX_SLOP: /* skipping this packet's body */
+ goto try_read; /* => go read it */
+ case SOCKNAL_RX_GET_FMB: /* forwarding */
+ spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, *irq_flags);
+ goto get_fmb; /* => go get a fwd msg buffer */
+ default:
+ }
+ /* Not Reached */
+ LBUG ();
+ }
+
+ PROF_START(lib_parse);
+ lib_parse(&ktoenal_lib, &conn->ksnc_hdr, conn); /* sets wanted_len, iovs etc */
+ PROF_FINISH(lib_parse);
+
+ if (conn->ksnc_rx_nob_wanted != 0) /* need to get some payload? */
+ {
+ conn->ksnc_rx_state = SOCKNAL_RX_BODY;
+ goto try_read; /* go read the payload */
+ }
+ /* Fall through (completed packet for me) */
+
+ case SOCKNAL_RX_BODY:
+ atomic_inc (&ktoenal_packets_received);
+ lib_finalize(&ktoenal_lib, NULL, conn->ksnc_cookie); /* packet is done now */
+ /* Fall through */
+
+ case SOCKNAL_RX_SLOP:
+ if (ktoenal_new_packet (conn, conn->ksnc_rx_nob_left)) /* starting new packet? */
+ goto out; /* come back later */
+ goto try_read; /* try to finish reading slop now */
+
+ case SOCKNAL_RX_BODY_FWD:
+ CDEBUG (D_NET, "%p "LPX64"->"LPX64" %d fwd_start (got body)\n", conn,
+ conn->ksnc_hdr.src_nid, conn->ksnc_hdr.dest_nid, conn->ksnc_rx_nob_left);
+
+ atomic_inc (&ktoenal_packets_received);
+
+ /* ktoenal_init_fmb() stashed router descriptor in conn->ksnc_cookie */
+ kpr_fwd_start (&ktoenal_data.ksnd_router, (kpr_fwd_desc_t *)conn->ksnc_cookie);
+
+ LASSERT (conn->ksnc_rx_nob_left == 0); /* no slop in forwarded packets */
+
+ ktoenal_new_packet (conn, 0); /* on to next packet */
+ goto out; /* (later) */
+
+ default:
+ }
+
+ /* Not Reached */
+ LBUG ();
+
+ out:
+ spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, *irq_flags);
+
+ if (!conn->ksnc_rx_ready) /* no data there to read? */
+ {
+ conn->ksnc_rx_scheduled = 0; /* let socket callback schedule again */
+ ktoenal_put_conn (conn); /* release scheduler's ref */
+ }
+ else /* let scheduler call me again */
+ list_add_tail (&conn->ksnc_rx_list, &ktoenal_data.ksnd_rx_conns);
+}
+
+int
+ktoenal_recv(nal_cb_t *nal, void *private, lib_msg_t *msg,
+ unsigned int niov, struct iovec *iov, size_t mlen, size_t rlen)
+{
+ ksock_conn_t *conn = (ksock_conn_t *)private;
+ int i;
+
+ conn->ksnc_cookie = msg;
+
+ LASSERT (niov <= PTL_MD_MAX_IOV);
+ for (i = 0; i < niov; i++)
+ {
+ conn->ksnc_rx_iov[i].iov_len = iov[i].iov_len;
+ conn->ksnc_rx_iov[i].iov_base = iov[i].iov_base;
+ }
+
+ conn->ksnc_rx_niov = niov;
+ conn->ksnc_rx_nob_wanted = mlen;
+ conn->ksnc_rx_nob_left = rlen;
+
+ return (rlen);
+}
+
+int
+ktoenal_scheduler (void *arg)
+{
+ unsigned long flags;
+ ksock_conn_t *conn;
+ int rc;
+ int nloops = 0;
+
+ kportal_daemonize ("ktoenal_sched");
+ kportal_blockallsigs ();
+
+ spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, flags);
+
+ while (!ktoenal_data.ksnd_shuttingdown)
+ {
+ int did_something = 0;
+
+ /* Ensure I progress everything semi-fairly */
+
+ if (!list_empty (&ktoenal_data.ksnd_rx_conns))
+ {
+ did_something = 1;
+ conn = list_entry (ktoenal_data.ksnd_rx_conns.next,
+ ksock_conn_t, ksnc_rx_list);
+ list_del (&conn->ksnc_rx_list);
+
+ ktoenal_process_receive (conn, &flags); /* drops & regains ksnd_sched_lock */
+ }
+
+ if (!list_empty (&ktoenal_data.ksnd_tx_conns))
+ {
+ did_something = 1;
+ conn = list_entry (ktoenal_data.ksnd_tx_conns.next,
+ ksock_conn_t, ksnc_tx_list);
+
+ list_del (&conn->ksnc_tx_list);
+ ktoenal_process_transmit (conn, &flags); /* drops and regains ksnd_sched_lock */
+ }
+
+ if (!did_something || /* nothing to do */
+ ++nloops == SOCKNAL_RESCHED) /* hogging CPU? */
+ {
+ spin_unlock_irqrestore (&ktoenal_data.ksnd_sched_lock, flags);
+
+ nloops = 0;
+
+ if (!did_something) { /* wait for something to do */
+ rc = wait_event_interruptible (ktoenal_data.ksnd_sched_waitq,
+ ktoenal_data.ksnd_shuttingdown ||
+ !list_empty (&ktoenal_data.ksnd_rx_conns) ||
+ !list_empty (&ktoenal_data.ksnd_tx_conns));
+ LASSERT (rc == 0);
+ } else
+ our_cond_resched();
+
+ spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, flags);
+ }
+ }
+
+ spin_unlock_irqrestore (&ktoenal_data.ksnd_sched_lock, flags);
+ ktoenal_thread_fini ();
+ return (0);
+}
+
+
+int
+ktoenal_reaper (void *arg)
+{
+ unsigned long flags;
+ ksock_conn_t *conn;
+ int rc;
+
+ kportal_daemonize ("ktoenal_reaper");
+ kportal_blockallsigs ();
+
+ while (!ktoenal_data.ksnd_shuttingdown)
+ {
+ spin_lock_irqsave (&ktoenal_data.ksnd_reaper_lock, flags);
+
+ if (list_empty (&ktoenal_data.ksnd_reaper_list))
+ conn = NULL;
+ else
+ {
+ conn = list_entry (ktoenal_data.ksnd_reaper_list.next,
+ ksock_conn_t, ksnc_list);
+ list_del (&conn->ksnc_list);
+ }
+
+ spin_unlock_irqrestore (&ktoenal_data.ksnd_reaper_lock, flags);
+
+ if (conn != NULL)
+ ktoenal_close_conn (conn);
+ else {
+ rc = wait_event_interruptible (ktoenal_data.ksnd_reaper_waitq,
+ ktoenal_data.ksnd_shuttingdown ||
+ !list_empty(&ktoenal_data.ksnd_reaper_list));
+ LASSERT (rc == 0);
+ }
+ }
+
+ ktoenal_thread_fini ();
+ return (0);
+}
+
+#define POLLREAD (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)
+#define POLLWRITE (POLLOUT | POLLWRNORM | POLLWRBAND)
+
+int
+ktoenal_pollthread(void *arg)
+{
+ unsigned int mask;
+ struct list_head *tmp;
+ ksock_conn_t *conn;
+
+ /* Save the task struct for waking it up */
+ ktoenal_data.ksnd_pollthread_tsk = current;
+
+ kportal_daemonize ("ktoenal_pollthread");
+ kportal_blockallsigs ();
+
+ poll_initwait(&ktoenal_data.ksnd_pwait);
+
+ while(!ktoenal_data.ksnd_shuttingdown) {
+
+ set_current_state(TASK_INTERRUPTIBLE);
+
+ read_lock (&ktoenal_data.ksnd_socklist_lock);
+ list_for_each(tmp, &ktoenal_data.ksnd_socklist) {
+
+ conn = list_entry(tmp, ksock_conn_t, ksnc_list);
+ atomic_inc(&conn->ksnc_refcount);
+ read_unlock (&ktoenal_data.ksnd_socklist_lock);
+
+ mask = conn->ksnc_file->f_op->poll(conn->ksnc_file,
+ ktoenal_data.ksnd_slistchange ?
+ &ktoenal_data.ksnd_pwait : NULL);
+
+ if(mask & POLLREAD) {
+ ktoenal_data_ready(conn);
+
+ }
+ if (mask & POLLWRITE) {
+ ktoenal_write_space(conn);
+
+ }
+ if (mask & (POLLERR | POLLHUP)) {
+ /* Do error processing */
+ }
+
+ read_lock (&ktoenal_data.ksnd_socklist_lock);
+ if(atomic_dec_and_test(&conn->ksnc_refcount))
+ _ktoenal_put_conn(conn);
+ }
+ ktoenal_data.ksnd_slistchange = 0;
+ read_unlock (&ktoenal_data.ksnd_socklist_lock);
+
+ schedule_timeout(MAX_SCHEDULE_TIMEOUT);
+ if(ktoenal_data.ksnd_slistchange) {
+ poll_freewait(&ktoenal_data.ksnd_pwait);
+ poll_initwait(&ktoenal_data.ksnd_pwait);
+ }
+ }
+ poll_freewait(&ktoenal_data.ksnd_pwait);
+ ktoenal_thread_fini();
+ return (0);
+}
+
+void
+ktoenal_data_ready (ksock_conn_t *conn)
+{
+ unsigned long flags;
+ ENTRY;
+
+ if (!test_and_set_bit (0, &conn->ksnc_rx_ready)) {
+ spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, flags);
+
+ if (!conn->ksnc_rx_scheduled) { /* not being progressed */
+ list_add_tail (&conn->ksnc_rx_list,
+ &ktoenal_data.ksnd_rx_conns);
+ conn->ksnc_rx_scheduled = 1;
+ /* extra ref for scheduler */
+ atomic_inc (&conn->ksnc_refcount);
+
+ /* This is done to avoid the effects of a sequence
+ * of events in which the rx_ready is lost
+ */
+ conn->ksnc_rx_ready=1;
+
+ if (waitqueue_active (&ktoenal_data.ksnd_sched_waitq))
+ wake_up (&ktoenal_data.ksnd_sched_waitq);
+ }
+
+ spin_unlock_irqrestore (&ktoenal_data.ksnd_sched_lock, flags);
+ }
+
+ EXIT;
+}
+
+void
+ktoenal_write_space (ksock_conn_t *conn)
+{
+ unsigned long flags;
+
+ CDEBUG (D_NET, "conn %p%s%s%s\n",
+ conn,
+ (conn == NULL) ? "" : (test_bit (0, &conn->ksnc_tx_ready) ? " ready" : " blocked"),
+ (conn == NULL) ? "" : (conn->ksnc_tx_scheduled ? " scheduled" : " idle"),
+ (conn == NULL) ? "" : (list_empty (&conn->ksnc_tx_queue) ? " empty" : " queued"));
+
+
+ if (!test_and_set_bit (0, &conn->ksnc_tx_ready)) {
+ spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, flags);
+
+ if (!list_empty (&conn->ksnc_tx_queue) && /* packets to send */
+ !conn->ksnc_tx_scheduled) { /* not being progressed */
+
+ list_add_tail (&conn->ksnc_tx_list,
+ &ktoenal_data.ksnd_tx_conns);
+ conn->ksnc_tx_scheduled = 1;
+ /* extra ref for scheduler */
+ atomic_inc (&conn->ksnc_refcount);
+
+ if (waitqueue_active (&ktoenal_data.ksnd_sched_waitq))
+ wake_up (&ktoenal_data.ksnd_sched_waitq);
+ }
+ spin_unlock_irqrestore (&ktoenal_data.ksnd_sched_lock, flags);
+ }
+}
+
+nal_cb_t ktoenal_lib = {
+ nal_data: &ktoenal_data, /* NAL private data */
+ cb_send: ktoenal_send,
+ cb_recv: ktoenal_recv,
+ cb_read: ktoenal_read,
+ cb_write: ktoenal_write,
+ cb_callback: ktoenal_callback,
+ cb_malloc: ktoenal_malloc,
+ cb_free: ktoenal_free,
+ cb_printf: ktoenal_printf,
+ cb_cli: ktoenal_cli,
+ cb_sti: ktoenal_sti,
+ cb_dist: ktoenal_dist
+};
--- /dev/null
+# Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+#
+# This code is issued under the GNU General Public License.
+# See the file COPYING in this distribution
+
+
+MODULE = portals
+modulenet_DATA = portals.o
+EXTRA_PROGRAMS = portals
+
+LIBLINKS := lib-dispatch.c lib-eq.c lib-init.c lib-md.c lib-me.c lib-move.c lib-msg.c lib-ni.c lib-not-impl.c lib-pid.c
+APILINKS := api-eq.c api-errno.c api-init.c api-md.c api-me.c api-ni.c api-wrap.c
+LINKS = $(APILINKS) $(LIBLINKS)
+DISTCLEANFILES = $(LINKS) link-stamp *.orig *.rej
+
+$(LINKS): link-stamp
+link-stamp:
+ -list='$(LIBLINKS)'; for f in $$list; do echo $$f ; ln -sf $(srcdir)/../portals/$$f .; done
+ -list='$(APILINKS)'; for f in $$list; do echo $$f ; ln -sf $(srcdir)/../portals/$$f .; done
+ echo timestamp > link-stamp
+
+DEFS =
+portals_SOURCES = $(LINKS) module.c proc.c debug.c
+
+# Don't distribute any patched files.
+dist-hook:
+ list='$(EXT2C)'; for f in $$list; do rm -f $(distdir)/$$f; done
+
+include ../Rules.linux
--- /dev/null
+# Copyright (C) 2001 Cluster File Systems, Inc.
+#
+# This code is issued under the GNU General Public License.
+# See the file COPYING in this distribution
+
+include fs/lustre/portals/Kernelenv
+
+obj-y += libcfs.o
+licfs-objs := module.o proc.o debug.o
\ No newline at end of file
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2002 Cluster File Systems, Inc.
+ * Author: Phil Schwan <phil@clusterfs.com>
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2.1 of the GNU Lesser General
+ * Public License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#define EXPORT_SYMTAB
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kmod.h>
+#include <linux/notifier.h>
+#include <linux/kernel.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/errno.h>
+#include <linux/smp_lock.h>
+#include <linux/unistd.h>
+#include <linux/interrupt.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <linux/completion.h>
+
+#include <linux/fs.h>
+#include <linux/stat.h>
+#include <asm/uaccess.h>
+#include <asm/segment.h>
+#include <linux/miscdevice.h>
+
+# define DEBUG_SUBSYSTEM S_PORTALS
+
+#include <linux/kp30.h>
+
+#define DEBUG_OVERFLOW 1024
+static char *debug_buf = NULL;
+static unsigned long debug_size = 0;
+static atomic_t debug_off_a = ATOMIC_INIT(0);
+static int debug_wrapped;
+wait_queue_head_t debug_ctlwq;
+#define DAEMON_SND_SIZE (64 << 10)
+
+/*
+ * used by the daemon to keep track the offset into debug_buffer for the next
+ * write to the file. Usually, the daemon is to write out buffer
+ * from debug_daemon_next_write upto debug_off
+ * variable usage
+ * Reader - portals_debug_msg()
+ * Writer - portals_debug_daemon()
+ * portals_debug_daemon_start() during daemon init time
+ * portals_debug_daemon_continue() to reset to debug_off
+ * portals_debug_clear_buffer() reset to debug_off for clear
+ * Note that *_start(), *_continue() & *clear_buffer() should serialized;
+ */
+static atomic_t debug_daemon_next_write;
+
+/*
+ * A debug_daemon can be in following states
+ * stopped - stopped state means there is no debug_daemon running.
+ * accordingly, it must be in paused state
+ * a daemon is in !stopped && !paused state after
+ * "lctl debug_daemon start" creates debug_daemon successfully
+ * Variable Usage
+ * Reader - portals_debug_daemon()
+ * portals_debug_set_daemon() routines
+ * Writer - portals_debug_set_daemon() routines
+ * portals_debug_daemon() on IO error
+ * paused - a debug_daemon state is changed from !paused into paused
+ * when "lctl debug_daemon paused" is issued
+ * "lctl debug_daemon continue" gets a daemon into !paused mode
+ * Reader - portals_debug_set_daemon() routines
+ * portals_debug_msg()
+ * Writer - portals_debug_set_daemon() on init
+ * portals_debug_daemon()
+ *
+ * Daemon state diagram.
+ * (stopped, paused)
+ * | <-- debug_daemon start
+ * V
+ * (!stopped, !paused)
+ * | <-- debug_daemon pause
+ * V
+ * (!stopped, paused)
+ * | <-- debug_daemon continue
+ * V
+ * (!stopped, !paused)
+ * | <-- debug_daemon stop
+ * V
+ * (stopped, paused)
+ * Overlapped - this is a state when CDEBUG is too fast for the daemon to
+ * write out the debug_bufferr. That is, debug_off is to
+ * overlap debug_daemon_next_write;
+ * Reader - portals_debug_msg()
+ * Writer - portals_debug_msg()
+ */
+
+/*
+ * Description on Trace Daemon Synchronization
+ *
+ * Three categories of code are synchronizing between each other
+ * 1. lctl, portals_debug_set_daemon(), the user debug control code,
+ * as well as portals_debug_clear_buffer()
+ * 2. CDEBUG, portals_debug_msg(), the debug put messages routine
+ * 3. Daemon, portals_debug_daemon(), to write out debug log file
+ *
+ *
+ * Three different controls for synchronizations
+ *
+ * 1. debug_daemon_semaphore
+ * The usage of this semaphore is to serialize multiple lctl controls
+ * in manipulating debug daemon state. The semaphore serves as the
+ * gatekeeper to allow only one user control thread, at any giving time,
+ * to access debug daemon state and keeps the other user control requests
+ * in wait state until the current control request is serviced.
+ *
+ * 2. wait_queue_head_t lctl (paired with lctl_event flag)
+ * Lctl event is the event between portals_debug_set_daemon() and
+ * portals_debug_daemon(). Lctl is an indicator for portals_debug_daemon()
+ * to flush data out to file. portals_debug_daemon() is to use lctl event
+ * as signal channel to wakeup portals_debug_set_daemon() upon flush
+ * operation is done.
+ *
+ * Producer :
+ * portals_debug_daemon() uses to wake up
+ * portals_debug_set_daemon(), pause and stop, routines
+ * Consumer :
+ * portals_debug_set_daemon(), stop and pause operations,
+ * wait and sleep on the event
+ *
+ * 3. wait_queue_head_t daemon (paired with daemon_event flag)
+ * This is an event channel to wakeup portals_debug_daemon. Daemon
+ * wakes up to run whenever there is an event posted. Daemon handles
+ * 2 types of operations . 1. Writes data out to debug file, 2. Flushes
+ * file and terminates base on lctl event.
+ * File operation -
+ * Daemon is normally in a sleep state.
+ * Daemon is woken up through daemon event whenever CDEBUG is
+ * putting data over any 64K boundary.
+ * File flush and termination -
+ * On portals_debug_daemon_stop/pause() operations, lctl control
+ * is to wake up daemon through daemon event.
+ *
+ * We can't use sleep_on() and wake_up() to replace daemon event because
+ * portals_debug_daemon() must catch the wakeup operation posted by
+ * portals_debug_daemon_stop/pause(). Otherwise, stop and pause may
+ * stuck in lctl wait event.
+ *
+ * Producer :
+ * a. portals_debug_daemon_pause() and portals_debug_daemon_stop()
+ * uses the event to wake up portals_debug_daemon()
+ * b. portals_debug_msg() uses the event to wake up
+ * portals_debug_daemon() whenever the data output is acrossing
+ * a 64K bytes boundary.
+ * Consumer :
+ * portals_debug_daemon() wakes up upon daemon event.
+ *
+ * Sequence for portals_debug_daemon_stop() operation
+ *
+ * _Portals_debug_daemon_stop()_ _Daemon_
+ * Wait_event(daemon) or running
+ * Paused = 1;
+ * Wakeup_event (daemon)
+ * Wait_event(lctl)
+ * Set force_flush flag if lctlevnt
+ * Flush data
+ * Wakeup_event (lctl)
+ * Wait_event(daemon)
+ * Stopped = 1;
+ * Wakeup_event (daemon)
+ * Wait_event(lctl)
+ * Exit daemon loop if (Stopped)
+ * Wakeup_event (lctl)
+ * Exit
+ * Return to user application
+ *
+ *
+ * _Portals_debug_msg()_ _Daemon_
+ * Wait_event(daemon) or running
+ * If (WriteStart<64K<WriteEnd)
+ * Wakeup_event(daemon)
+ * Do file IO
+ * Wait_event(daemon)
+ */
+struct debug_daemon_state {
+ unsigned long overlapped;
+ unsigned long stopped;
+ atomic_t paused;
+ unsigned long lctl_event; /* event for lctl */
+ wait_queue_head_t lctl;
+ unsigned long daemon_event; /* event for daemon */
+ wait_queue_head_t daemon;
+};
+static struct debug_daemon_state debug_daemon_state;
+static DECLARE_MUTEX(debug_daemon_semaphore);
+
+static loff_t daemon_file_size_limit;
+char debug_daemon_file_path[1024] = "";
+
+spinlock_t portals_debug_lock = SPIN_LOCK_UNLOCKED;
+char debug_file_path[1024] = "/tmp/lustre-log";
+char debug_file_name[1024];
+int handled_panic; /* to avoid recursive calls to notifiers */
+char portals_upcall[1024] = "/usr/lib/lustre/portals_upcall";
+
+
+int portals_do_debug_dumplog(void *arg)
+{
+ struct file *file;
+ void *journal_info;
+ int rc;
+ mm_segment_t oldfs;
+ unsigned long debug_off;
+
+ kportal_daemonize("");
+
+ reparent_to_init();
+ journal_info = current->journal_info;
+ current->journal_info = NULL;
+ sprintf(debug_file_name, "%s.%ld", debug_file_path, CURRENT_TIME);
+ file = filp_open(debug_file_name, O_CREAT|O_TRUNC|O_RDWR, 0644);
+
+ if (!file || IS_ERR(file)) {
+ CERROR("cannot open %s for dumping", debug_file_name);
+ GOTO(out, PTR_ERR(file));
+ } else {
+ printk(KERN_ALERT "dumping log to %s ... writing ...\n",
+ debug_file_name);
+ }
+
+ debug_off = atomic_read(&debug_off_a);
+ oldfs = get_fs();
+ set_fs(get_ds());
+ if (debug_wrapped) {
+ rc = file->f_op->write(file, debug_buf + debug_off + 1,
+ debug_size-debug_off-1, &file->f_pos);
+ rc += file->f_op->write(file, debug_buf, debug_off + 1,
+ &file->f_pos);
+ } else {
+ rc = file->f_op->write(file, debug_buf, debug_off,&file->f_pos);
+ }
+ printk("wrote %d bytes\n", rc);
+ set_fs(oldfs);
+
+ rc = file->f_op->fsync(file, file->f_dentry, 1);
+ if (rc)
+ CERROR("sync returns %d\n", rc);
+ filp_close(file, 0);
+out:
+ current->journal_info = journal_info;
+ wake_up(&debug_ctlwq);
+ return 0;
+}
+
+int portals_debug_daemon(void *arg)
+{
+ struct file *file;
+ void *journal_info;
+ mm_segment_t oldfs;
+ unsigned long force_flush = 0;
+ unsigned long size;
+ int rc;
+
+ kportal_daemonize("ldebug_daemon");
+ reparent_to_init();
+ journal_info = current->journal_info;
+ current->journal_info = NULL;
+
+ file = filp_open(debug_daemon_file_path,
+ O_CREAT|O_TRUNC|O_RDWR|O_LARGEFILE, 0644);
+
+ if (!file || IS_ERR(file)) {
+ CERROR("cannot open %s for logging", debug_daemon_file_path);
+ GOTO(out1, PTR_ERR(file));
+ } else {
+ printk(KERN_ALERT "daemon dumping log to %s ... writing ...\n",
+ debug_daemon_file_path);
+ }
+
+ debug_daemon_state.overlapped = 0;
+ debug_daemon_state.stopped = 0;
+ atomic_set(&debug_daemon_state.paused, 0);
+ oldfs = get_fs();
+ set_fs(KERNEL_DS);
+ while (1) {
+ unsigned long ending;
+ unsigned long start, tail;
+ long delta;
+
+ debug_daemon_state.daemon_event = 0;
+
+ ending = atomic_read(&debug_off_a);
+ start = atomic_read(&debug_daemon_next_write);
+
+ /* check if paused is imposed by lctl ? */
+ force_flush = !debug_daemon_state.lctl_event;
+
+ delta = ending - start;
+ tail = debug_size - start;
+ size = (delta >= 0) ? delta : tail;
+ while (size && (force_flush || (delta < 0) ||
+ (size >= DAEMON_SND_SIZE))) {
+ if (daemon_file_size_limit) {
+ int ssize = daemon_file_size_limit - file->f_pos;
+ if (size > ssize)
+ size = ssize;
+ }
+
+ rc = file->f_op->write(file, debug_buf+start,
+ size, &file->f_pos);
+ if (rc < 0) {
+ printk(KERN_ALERT
+ "Debug_daemon write error %d\n", rc);
+ goto out;
+ }
+ start += rc;
+ delta = ending - start;
+ tail = debug_size - start;
+ if (tail == 0)
+ start = 0;
+ if (delta >= 0)
+ size = delta;
+ else
+ size = (tail == 0) ? ending : tail;
+ if (daemon_file_size_limit == file->f_pos) {
+ // file wrapped around
+ file->f_pos = 0;
+ }
+ }
+ atomic_set(&debug_daemon_next_write, start);
+ if (force_flush) {
+ rc = file->f_op->fsync(file, file->f_dentry, 1);
+ if (rc < 0) {
+ printk(KERN_ALERT
+ "Debug_daemon sync error %d\n", rc);
+ goto out;
+ }
+ if (debug_daemon_state.stopped)
+ break;
+ debug_daemon_state.lctl_event = 1;
+ wake_up(&debug_daemon_state.lctl);
+ }
+ wait_event(debug_daemon_state.daemon,
+ debug_daemon_state.daemon_event);
+ }
+out:
+ atomic_set(&debug_daemon_state.paused, 1);
+ debug_daemon_state.stopped = 1;
+ set_fs(oldfs);
+ filp_close(file, 0);
+ current->journal_info = journal_info;
+out1:
+ debug_daemon_state.lctl_event = 1;
+ wake_up(&debug_daemon_state.lctl);
+ return 0;
+}
+
+void portals_debug_print(void)
+{
+ unsigned long dumplen = 64 * 1024;
+ char *start1, *start2;
+ char *end1, *end2;
+ unsigned long debug_off = atomic_read(&debug_off_a);
+
+ start1 = debug_buf + debug_off - dumplen;
+ if (start1 < debug_buf) {
+ start1 += debug_size;
+ end1 = debug_buf + debug_size - 1;
+ start2 = debug_buf;
+ end2 = debug_buf + debug_off;
+ } else {
+ end1 = debug_buf + debug_off;
+ start2 = debug_buf + debug_off;
+ end2 = debug_buf + debug_off;
+ }
+
+ while (start1 < end1) {
+ int count = MIN(1024, end1 - start1);
+ printk("%*s", count, start1);
+ start1 += 1024;
+ }
+ while (start2 < end2) {
+ int count = MIN(1024, end2 - start2);
+ printk("%*s", count, start2);
+ start2 += 1024;
+ }
+}
+
+void portals_debug_dumplog(void)
+{
+ int rc;
+ ENTRY;
+
+ init_waitqueue_head(&debug_ctlwq);
+
+ rc = kernel_thread(portals_do_debug_dumplog,
+ NULL, CLONE_VM | CLONE_FS | CLONE_FILES);
+ if (rc < 0) {
+ printk(KERN_ERR "cannot start dump thread\n");
+ return;
+ }
+ sleep_on(&debug_ctlwq);
+}
+
+int portals_debug_daemon_start(char *file, unsigned int size)
+{
+ int rc;
+
+ if (!debug_daemon_state.stopped)
+ return -EALREADY;
+
+ if (file != NULL)
+ strncpy(debug_daemon_file_path, file, 1024);
+
+ init_waitqueue_head(&debug_daemon_state.lctl);
+ init_waitqueue_head(&debug_daemon_state.daemon);
+
+ atomic_set(&debug_daemon_next_write, atomic_read(&debug_off_a));
+
+ daemon_file_size_limit = size << 20;
+
+ debug_daemon_state.lctl_event = 0;
+ rc = kernel_thread(portals_debug_daemon, NULL, 0);
+ if (rc < 0) {
+ printk(KERN_ERR "cannot start debug daemon thread\n");
+ strncpy(debug_daemon_file_path, "\0", 1);
+ return rc;
+ }
+ wait_event(debug_daemon_state.lctl, debug_daemon_state.lctl_event);
+ return 0;
+}
+
+int portals_debug_daemon_pause(void)
+{
+ if (atomic_read(&debug_daemon_state.paused))
+ return -EALREADY;
+
+ atomic_set(&debug_daemon_state.paused, 1);
+ debug_daemon_state.lctl_event = 0;
+ debug_daemon_state.daemon_event = 1;
+ wake_up(&debug_daemon_state.daemon);
+ wait_event(debug_daemon_state.lctl, debug_daemon_state.lctl_event);
+ return 0;
+}
+
+int portals_debug_daemon_continue(void)
+{
+ if (!atomic_read(&debug_daemon_state.paused))
+ return -EINVAL;
+ if (debug_daemon_state.stopped)
+ return -EINVAL;
+
+ debug_daemon_state.overlapped = 0;
+ atomic_set(&debug_daemon_next_write, atomic_read(&debug_off_a));
+ atomic_set(&debug_daemon_state.paused, 0);
+ return 0;
+}
+
+int portals_debug_daemon_stop(void)
+{
+ if (debug_daemon_state.stopped)
+ return -EALREADY;
+
+ if (!atomic_read(&debug_daemon_state.paused))
+ portals_debug_daemon_pause();
+
+ debug_daemon_state.lctl_event = 0;
+ debug_daemon_state.stopped = 1;
+
+ debug_daemon_state.daemon_event = 1;
+ wake_up(&debug_daemon_state.daemon);
+ wait_event(debug_daemon_state.lctl, debug_daemon_state.lctl_event);
+
+ debug_daemon_file_path[0] = '\0';
+ return 0;
+}
+
+int portals_debug_set_daemon(unsigned int cmd, unsigned int length,
+ char *filename, unsigned int size)
+{
+ int rc = -EINVAL;
+
+ down(&debug_daemon_semaphore);
+ switch (cmd) {
+ case DEBUG_DAEMON_START:
+ if (length && (filename[length -1] != '\0')) {
+ CERROR("Invalid filename for debug_daemon\n");
+ rc = -EINVAL;
+ break;
+ }
+ rc = portals_debug_daemon_start(filename, size);
+ break;
+ case DEBUG_DAEMON_STOP:
+ rc = portals_debug_daemon_stop();
+ break;
+ case DEBUG_DAEMON_PAUSE:
+ rc = portals_debug_daemon_pause();
+ break;
+ case DEBUG_DAEMON_CONTINUE:
+ rc = portals_debug_daemon_continue();
+ break;
+ default:
+ CERROR("unknown set_daemon cmd\n");
+ }
+ up(&debug_daemon_semaphore);
+ return rc;
+}
+
+static int panic_dumplog(struct notifier_block *self, unsigned long unused1,
+ void *unused2)
+{
+ if (handled_panic)
+ return 0;
+ else
+ handled_panic = 1;
+
+ if (in_interrupt()) {
+ portals_debug_print();
+ return 0;
+ }
+
+ while (current->lock_depth >= 0)
+ unlock_kernel();
+ portals_debug_dumplog();
+ return 0;
+}
+
+static struct notifier_block lustre_panic_notifier = {
+ notifier_call : panic_dumplog,
+ next : NULL,
+ priority : 10000
+};
+
+int portals_debug_init(unsigned long bufsize)
+{
+ unsigned long debug_off = atomic_read(&debug_off_a);
+ if (debug_buf != NULL)
+ return -EALREADY;
+
+ atomic_set(&debug_daemon_state.paused, 1);
+ debug_daemon_state.stopped = 1;
+
+ debug_buf = vmalloc(bufsize + DEBUG_OVERFLOW);
+ if (debug_buf == NULL)
+ return -ENOMEM;
+ memset(debug_buf, 0, debug_size);
+ debug_wrapped = 0;
+
+ printk(KERN_INFO "Portals: allocated %lu byte debug buffer at %p.\n",
+ bufsize, debug_buf);
+ atomic_set(&debug_off_a, debug_off);
+ notifier_chain_register(&panic_notifier_list, &lustre_panic_notifier);
+ debug_size = bufsize;
+
+ return 0;
+}
+
+int portals_debug_cleanup(void)
+{
+ notifier_chain_unregister(&panic_notifier_list, &lustre_panic_notifier);
+ if (debug_buf == NULL)
+ return -EINVAL;
+
+ down(&debug_daemon_semaphore);
+ portals_debug_daemon_stop();
+
+ vfree(debug_buf);
+ atomic_set(&debug_off_a, 0);
+ up(&debug_daemon_semaphore);
+
+ return 0;
+}
+
+int portals_debug_clear_buffer(void)
+{
+ unsigned long flags;
+ unsigned long state;
+
+ if (debug_buf == NULL)
+ return -EINVAL;
+
+ down(&debug_daemon_semaphore);
+ state = atomic_read(&debug_daemon_state.paused);
+ if (!state)
+ portals_debug_daemon_pause();
+ spin_lock_irqsave(&portals_debug_lock, flags);
+ atomic_set(&debug_off_a, 0);
+ debug_wrapped = 0;
+ atomic_set(&debug_daemon_next_write, 0);
+ debug_daemon_state.overlapped = 0;
+ spin_unlock_irqrestore(&portals_debug_lock, flags);
+
+ if (!state)
+ atomic_set(&debug_daemon_state.paused, 0);
+ up(&debug_daemon_semaphore);
+
+ return 0;
+}
+
+/* Debug markers, although printed by S_PORTALS
+ * should not be be marked as such.
+ */
+#undef DEBUG_SUBSYSTEM
+#define DEBUG_SUBSYSTEM S_UNDEFINED
+int portals_debug_mark_buffer(char *text)
+{
+ if (debug_buf == NULL)
+ return -EINVAL;
+
+ CDEBUG(0, "*******************************************************************************\n");
+ CDEBUG(0, "DEBUG MARKER: %s\n", text);
+ CDEBUG(0, "*******************************************************************************\n");
+
+ return 0;
+}
+#undef DEBUG_SUBSYSTEM
+#define DEBUG_SUBSYSTEM S_PORTALS
+
+__s32 portals_debug_copy_to_user(char *buf, unsigned long len)
+{
+ int rc;
+ unsigned long debug_off;
+ unsigned long flags;
+
+ if (len < debug_size)
+ return -ENOSPC;
+
+ debug_off = atomic_read(&debug_off_a);
+ spin_lock_irqsave(&portals_debug_lock, flags);
+ if (debug_wrapped) {
+ /* All of this juggling with the 1s is to keep the trailing nul
+ * (which falls at debug_buf + debug_off) at the end of what we
+ * copy into user space */
+ copy_to_user(buf, debug_buf + debug_off + 1,
+ debug_size - debug_off - 1);
+ copy_to_user(buf + debug_size - debug_off - 1,
+ debug_buf, debug_off + 1);
+ rc = debug_size;
+ } else {
+ copy_to_user(buf, debug_buf, debug_off);
+ rc = debug_off;
+ }
+ spin_unlock_irqrestore(&portals_debug_lock, flags);
+
+ return rc;
+}
+
+/* FIXME: I'm not very smart; someone smarter should make this better. */
+void
+portals_debug_msg (int subsys, int mask, char *file, char *fn, int line,
+ unsigned long stack, const char *format, ...)
+{
+ va_list ap;
+ unsigned long flags;
+ int max_nob;
+ int prefix_nob;
+ int msg_nob;
+ struct timeval tv;
+ unsigned long base_offset;
+ unsigned long debug_off;
+
+ if (debug_buf == NULL) {
+ printk("portals_debug_msg: debug_buf is NULL!\n");
+ return;
+ }
+
+ spin_lock_irqsave(&portals_debug_lock, flags);
+ debug_off = atomic_read(&debug_off_a);
+ if (!atomic_read(&debug_daemon_state.paused)) {
+ unsigned long available;
+ long delta;
+ long v = atomic_read(&debug_daemon_next_write);
+
+ delta = debug_off - v;
+ available = (delta>=0) ? debug_size-delta : -delta;
+ // Check if we still have enough debug buffer for CDEBUG
+ if (available < DAEMON_SND_SIZE) {
+ /* Drop CDEBUG packets until enough debug_buffer is
+ * available */
+ if (debug_daemon_state.overlapped)
+ goto out;
+ /* If this is the first time, leave a marker in the
+ * output */
+ debug_daemon_state.overlapped = 1;
+ ap = NULL;
+ format = "DEBUG MARKER: Debug buffer overlapped\n";
+ } else /* More space just became available */
+ debug_daemon_state.overlapped = 0;
+ }
+
+ max_nob = debug_size - debug_off + DEBUG_OVERFLOW;
+ if (max_nob <= 0) {
+ spin_unlock_irqrestore(&portals_debug_lock, flags);
+ printk("logic error in portals_debug_msg: <0 bytes to write\n");
+ return;
+ }
+
+ /* NB since we pass a non-zero sized buffer (at least) on the first
+ * print, we can be assured that by the end of all the snprinting,
+ * we _do_ have a terminated buffer, even if our message got truncated.
+ */
+
+ do_gettimeofday(&tv);
+
+ prefix_nob = snprintf(debug_buf + debug_off, max_nob,
+ "%02x:%06x:%d:%lu.%06lu ",
+ subsys >> 24, mask, smp_processor_id(),
+ tv.tv_sec, tv.tv_usec);
+ max_nob -= prefix_nob;
+
+#if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20))
+ msg_nob = snprintf(debug_buf + debug_off + prefix_nob, max_nob,
+ "(%s:%d:%s() %d | %d+%lu): ",
+ file, line, fn, current->pid,
+ current->thread.extern_pid, stack);
+#elif defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+ msg_nob = snprintf(debug_buf + debug_off + prefix_nob, max_nob,
+ "(%s:%d:%s() %d | %d+%lu): ",
+ file, line, fn, current->pid,
+ current->thread.mode.tt.extern_pid, stack);
+#else
+ msg_nob = snprintf(debug_buf + debug_off + prefix_nob, max_nob,
+ "(%s:%d:%s() %d+%lu): ",
+ file, line, fn, current->pid, stack);
+#endif
+ max_nob -= msg_nob;
+
+ va_start(ap, format);
+ msg_nob += vsnprintf(debug_buf + debug_off + prefix_nob + msg_nob,
+ max_nob, format, ap);
+ max_nob -= msg_nob;
+ va_end(ap);
+
+ /* Print to console, while msg is contiguous in debug_buf */
+ /* NB safely terminated see above */
+ if ((mask & D_EMERG) != 0)
+ printk(KERN_EMERG "%s", debug_buf + debug_off + prefix_nob);
+ if ((mask & D_ERROR) != 0)
+ printk(KERN_ERR "%s", debug_buf + debug_off + prefix_nob);
+ else if (portal_printk)
+ printk("<%d>%s", portal_printk, debug_buf+debug_off+prefix_nob);
+ base_offset = debug_off & 0xFFFF;
+
+ debug_off += prefix_nob + msg_nob;
+ if (debug_off > debug_size) {
+ memcpy(debug_buf, debug_buf + debug_size,
+ debug_off - debug_size + 1);
+ debug_off -= debug_size;
+ debug_wrapped = 1;
+ }
+
+ atomic_set(&debug_off_a, debug_off);
+ if (!atomic_read(&debug_daemon_state.paused) &&
+ ((base_offset+prefix_nob+msg_nob) >= DAEMON_SND_SIZE)) {
+ debug_daemon_state.daemon_event = 1;
+ wake_up(&debug_daemon_state.daemon);
+ }
+out:
+ spin_unlock_irqrestore(&portals_debug_lock, flags);
+}
+
+void portals_debug_set_level(unsigned int debug_level)
+{
+ printk("Setting portals debug level to %08x\n", debug_level);
+ portal_debug = debug_level;
+}
+
+void portals_run_lbug_upcall(char * file, char *fn, int line)
+{
+ char *argv[6];
+ char *envp[3];
+ char buf[32];
+ int rc;
+
+ ENTRY;
+ snprintf (buf, sizeof buf, "%d", line);
+
+ argv[0] = portals_upcall;
+ argv[1] = "LBUG";
+ argv[2] = file;
+ argv[3] = fn;
+ argv[4] = buf;
+ argv[5] = NULL;
+
+ envp[0] = "HOME=/";
+ envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
+ envp[2] = NULL;
+
+ rc = call_usermodehelper(argv[0], argv, envp);
+ if (rc < 0) {
+ CERROR("Error invoking lbug upcall %s %s %s %s %s: %d; check "
+ "/proc/sys/portals/upcall\n",
+ argv[0], argv[1], argv[2], argv[3], argv[4], rc);
+
+ } else {
+ CERROR("Invoked upcall %s %s %s %s %s\n",
+ argv[0], argv[1], argv[2], argv[3], argv[4]);
+ }
+}
+
+
+EXPORT_SYMBOL(portals_debug_dumplog);
+EXPORT_SYMBOL(portals_debug_msg);
+EXPORT_SYMBOL(portals_debug_set_level);
+EXPORT_SYMBOL(portals_run_lbug_upcall);
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2.1 of the GNU Lesser General
+ * Public License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#define EXPORT_SYMTAB
+#define DEBUG_SUBSYSTEM S_PORTALS
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/smp_lock.h>
+#include <linux/unistd.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+
+#include <linux/fs.h>
+#include <linux/stat.h>
+#include <asm/uaccess.h>
+#include <asm/segment.h>
+#include <linux/miscdevice.h>
+
+#include <portals/lib-p30.h>
+#include <portals/p30.h>
+#include <linux/kp30.h>
+
+#define PORTAL_MINOR 240
+
+extern void (kping_client)(struct portal_ioctl_data *);
+
+struct nal_cmd_handler {
+ nal_cmd_handler_t nch_handler;
+ void * nch_private;
+};
+
+static struct nal_cmd_handler nal_cmd[NAL_MAX_NR + 1];
+struct semaphore nal_cmd_sem;
+
+#ifdef PORTAL_DEBUG
+void
+kportal_assertion_failed (char *expr, char *file, char *func, int line)
+{
+ unsigned long stack = CDEBUG_STACK(stack);
+ portals_debug_msg(0, D_EMERG, file, func, line, stack,
+ "ASSERTION(%s) failed\n", expr);
+ LBUG();
+}
+#endif
+
+void
+kportal_daemonize (char *str)
+{
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,63))
+ daemonize(str);
+#else
+ daemonize();
+ snprintf (current->comm, sizeof (current->comm), "%s", str);
+#endif
+}
+
+void
+kportal_blockallsigs ()
+{
+ unsigned long flags;
+
+ spin_lock_irqsave (¤t->sigmask_lock, flags);
+ siginitsetinv (¤t->blocked, 0);
+ recalc_sigpending (current);
+ spin_unlock_irqrestore (¤t->sigmask_lock, flags);
+}
+
+/* called when opening /dev/device */
+static int kportal_psdev_open(struct inode * inode, struct file * file)
+{
+ ENTRY;
+
+ if (!inode)
+ RETURN(-EINVAL);
+ PORTAL_MODULE_USE;
+ RETURN(0);
+}
+
+/* called when closing /dev/device */
+static int kportal_psdev_release(struct inode * inode, struct file * file)
+{
+ ENTRY;
+
+ if (!inode)
+ RETURN(-EINVAL);
+
+ PORTAL_MODULE_UNUSE;
+ RETURN(0);
+}
+
+static inline void freedata(void *data, int len)
+{
+ PORTAL_FREE(data, len);
+}
+
+static int
+kportal_add_route(int gateway_nalid, ptl_nid_t gateway_nid, ptl_nid_t lo_nid,
+ ptl_nid_t hi_nid)
+{
+ int rc;
+ kpr_control_interface_t *ci;
+
+ ci = (kpr_control_interface_t *) PORTAL_SYMBOL_GET (kpr_control_interface);
+ if (ci == NULL)
+ return (-ENODEV);
+
+ rc = ci->kprci_add_route (gateway_nalid, gateway_nid, lo_nid, hi_nid);
+
+ PORTAL_SYMBOL_PUT(kpr_control_interface);
+ return (rc);
+}
+
+static int
+kportal_del_route(ptl_nid_t target)
+{
+ int rc;
+ kpr_control_interface_t *ci;
+
+ ci = (kpr_control_interface_t *)PORTAL_SYMBOL_GET(kpr_control_interface);
+ if (ci == NULL)
+ return (-ENODEV);
+
+ rc = ci->kprci_del_route (target);
+
+ PORTAL_SYMBOL_PUT(kpr_control_interface);
+ return (rc);
+}
+
+static int
+kportal_get_route(int index, __u32 *gateway_nalidp, ptl_nid_t *gateway_nidp,
+ ptl_nid_t *lo_nidp, ptl_nid_t *hi_nidp)
+{
+ int gateway_nalid;
+ ptl_nid_t gateway_nid;
+ ptl_nid_t lo_nid;
+ ptl_nid_t hi_nid;
+ int rc;
+ kpr_control_interface_t *ci;
+
+ ci = (kpr_control_interface_t *) PORTAL_SYMBOL_GET(kpr_control_interface);
+ if (ci == NULL)
+ return (-ENODEV);
+
+ rc = ci->kprci_get_route(index, &gateway_nalid, &gateway_nid, &lo_nid,
+ &hi_nid);
+
+ if (rc == 0) {
+ CDEBUG(D_IOCTL, "got route [%d] %d "LPX64":"LPX64" - "LPX64"\n",
+ index, gateway_nalid, gateway_nid, lo_nid, hi_nid);
+
+ *gateway_nalidp = (__u32)gateway_nalid;
+ *gateway_nidp = (__u32)gateway_nid;
+ *lo_nidp = (__u32)lo_nid;
+ *hi_nidp = (__u32)hi_nid;
+ }
+
+ PORTAL_SYMBOL_PUT (kpr_control_interface);
+ return (rc);
+}
+
+static int
+kportal_nal_cmd(int nal, struct portal_ioctl_data *data)
+{
+ int rc = -EINVAL;
+
+ ENTRY;
+
+ down(&nal_cmd_sem);
+ if (nal > 0 && nal <= NAL_MAX_NR && nal_cmd[nal].nch_handler) {
+ CDEBUG(D_IOCTL, "calling handler nal: %d, cmd: %d\n", nal, data->ioc_nal_cmd);
+ rc = nal_cmd[nal].nch_handler(data, nal_cmd[nal].nch_private);
+ }
+ up(&nal_cmd_sem);
+ RETURN(rc);
+}
+
+ptl_handle_ni_t *
+kportal_get_ni (int nal)
+{
+
+ switch (nal)
+ {
+ case QSWNAL:
+ return (PORTAL_SYMBOL_GET(kqswnal_ni));
+ case SOCKNAL:
+ return (PORTAL_SYMBOL_GET(ksocknal_ni));
+ case TOENAL:
+ return (PORTAL_SYMBOL_GET(ktoenal_ni));
+ case GMNAL:
+ return (PORTAL_SYMBOL_GET(kgmnal_ni));
+ case TCPNAL:
+ /* userspace NAL */
+ return (NULL);
+ case SCIMACNAL:
+ return (PORTAL_SYMBOL_GET(kscimacnal_ni));
+ default:
+ /* A warning to a naive caller */
+ CERROR ("unknown nal: %d\n", nal);
+ return (NULL);
+ }
+}
+
+void
+kportal_put_ni (int nal)
+{
+
+ switch (nal)
+ {
+ case QSWNAL:
+ PORTAL_SYMBOL_PUT(kqswnal_ni);
+ break;
+ case SOCKNAL:
+ PORTAL_SYMBOL_PUT(ksocknal_ni);
+ break;
+ case TOENAL:
+ PORTAL_SYMBOL_PUT(ktoenal_ni);
+ break;
+ case GMNAL:
+ PORTAL_SYMBOL_PUT(kgmnal_ni);
+ break;
+ case TCPNAL:
+ /* A lesson to a malicious caller */
+ LBUG ();
+ case SCIMACNAL:
+ PORTAL_SYMBOL_PUT(kscimacnal_ni);
+ break;
+ default:
+ CERROR ("unknown nal: %d\n", nal);
+ }
+}
+
+int
+kportal_nal_register(int nal, nal_cmd_handler_t handler, void * private)
+{
+ int rc = 0;
+
+ CDEBUG(D_IOCTL, "Register NAL %d, handler: %p\n", nal, handler);
+
+ if (nal > 0 && nal <= NAL_MAX_NR) {
+ down(&nal_cmd_sem);
+ if (nal_cmd[nal].nch_handler != NULL)
+ rc = -EBUSY;
+ else {
+ nal_cmd[nal].nch_handler = handler;
+ nal_cmd[nal].nch_private = private;
+ }
+ up(&nal_cmd_sem);
+ }
+ return rc;
+}
+
+int
+kportal_nal_unregister(int nal)
+{
+ int rc = 0;
+
+ CDEBUG(D_IOCTL, "Unregister NAL %d\n", nal);
+
+ if (nal > 0 && nal <= NAL_MAX_NR) {
+ down(&nal_cmd_sem);
+ nal_cmd[nal].nch_handler = NULL;
+ nal_cmd[nal].nch_private = NULL;
+ up(&nal_cmd_sem);
+ }
+ return rc;
+}
+
+
+static int kportal_ioctl(struct inode *inode, struct file *file,
+ unsigned int cmd, unsigned long arg)
+{
+ int err = 0;
+ char buf[1024];
+ struct portal_ioctl_data *data;
+
+ ENTRY;
+
+ if ( _IOC_TYPE(cmd) != IOC_PORTAL_TYPE ||
+ _IOC_NR(cmd) < IOC_PORTAL_MIN_NR ||
+ _IOC_NR(cmd) > IOC_PORTAL_MAX_NR ) {
+ CDEBUG(D_IOCTL, "invalid ioctl ( type %d, nr %d, size %d )\n",
+ _IOC_TYPE(cmd), _IOC_NR(cmd), _IOC_SIZE(cmd));
+ RETURN(-EINVAL);
+ }
+
+ if (portal_ioctl_getdata(buf, buf + 800, (void *)arg)) {
+ CERROR("PORTALS ioctl: data error\n");
+ RETURN(-EINVAL);
+ }
+
+ data = (struct portal_ioctl_data *)buf;
+
+ switch (cmd) {
+ case IOC_PORTAL_SET_DAEMON:
+ RETURN (portals_debug_set_daemon (
+ (unsigned int) data->ioc_count,
+ (unsigned int) data->ioc_inllen1,
+ (char *) data->ioc_inlbuf1,
+ (unsigned int) data->ioc_misc));
+ case IOC_PORTAL_GET_DEBUG: {
+ __s32 size = portals_debug_copy_to_user(data->ioc_pbuf1,
+ data->ioc_plen1);
+
+ if (size < 0)
+ RETURN(size);
+
+ data->ioc_size = size;
+ err = copy_to_user((char *)arg, data, sizeof(*data));
+ RETURN(err);
+ }
+ case IOC_PORTAL_CLEAR_DEBUG:
+ portals_debug_clear_buffer();
+ RETURN(0);
+ case IOC_PORTAL_PANIC:
+ if (!capable (CAP_SYS_BOOT))
+ RETURN (-EPERM);
+ panic("debugctl-invoked panic");
+ RETURN(0);
+ case IOC_PORTAL_MARK_DEBUG:
+ if (data->ioc_inlbuf1 == NULL ||
+ data->ioc_inlbuf1[data->ioc_inllen1 - 1] != '\0')
+ RETURN(-EINVAL);
+ portals_debug_mark_buffer(data->ioc_inlbuf1);
+ RETURN(0);
+ case IOC_PORTAL_PING: {
+ void (*ping)(struct portal_ioctl_data *);
+
+ CDEBUG(D_IOCTL, "doing %d pings to nid "LPU64"\n",
+ data->ioc_count, data->ioc_nid);
+ ping = PORTAL_SYMBOL_GET(kping_client);
+ if (!ping)
+ CERROR("PORTAL_SYMBOL_GET failed\n");
+ else {
+ ping(data);
+ PORTAL_SYMBOL_PUT(kping_client);
+ }
+ RETURN(0);
+ }
+
+ case IOC_PORTAL_ADD_ROUTE:
+ CDEBUG(D_IOCTL, "Adding route: [%d] "LPU64" : "LPU64" - "LPU64"\n",
+ data->ioc_nal, data->ioc_nid, data->ioc_nid2,
+ data->ioc_nid3);
+ err = kportal_add_route(data->ioc_nal, data->ioc_nid,
+ MIN (data->ioc_nid2, data->ioc_nid3),
+ MAX (data->ioc_nid2, data->ioc_nid3));
+ break;
+
+ case IOC_PORTAL_DEL_ROUTE:
+ CDEBUG (D_IOCTL, "Removing route to "LPU64"\n", data->ioc_nid);
+ err = kportal_del_route (data->ioc_nid);
+ break;
+
+ case IOC_PORTAL_GET_ROUTE:
+ CDEBUG (D_IOCTL, "Getting route [%d]\n", data->ioc_count);
+ err = kportal_get_route(data->ioc_count, &data->ioc_nal,
+ &data->ioc_nid, &data->ioc_nid2,
+ &data->ioc_nid3);
+ if (err == 0)
+ if (copy_to_user((char *)arg, data, sizeof (*data)))
+ err = -EFAULT;
+ break;
+
+ case IOC_PORTAL_GET_NID: {
+ const ptl_handle_ni_t *nip;
+ ptl_process_id_t pid;
+
+ CDEBUG (D_IOCTL, "Getting nid [%d]\n", data->ioc_nal);
+
+ nip = kportal_get_ni (data->ioc_nal);
+ if (nip == NULL)
+ RETURN (-EINVAL);
+
+ err = PtlGetId (*nip, &pid);
+ LASSERT (err == PTL_OK);
+ kportal_put_ni (data->ioc_nal);
+
+ data->ioc_nid = pid.nid;
+ if (copy_to_user ((char *)arg, data, sizeof (*data)))
+ err = -EFAULT;
+ break;
+ }
+
+ case IOC_PORTAL_NAL_CMD:
+ CDEBUG (D_IOCTL, "nal command nal %d cmd %d\n", data->ioc_nal,
+ data->ioc_nal_cmd);
+ err = kportal_nal_cmd(data->ioc_nal, data);
+ if (err == 0)
+ if (copy_to_user((char *)arg, data, sizeof (*data)))
+ err = -EFAULT;
+ break;
+
+ case IOC_PORTAL_FAIL_NID: {
+ const ptl_handle_ni_t *nip;
+
+ CDEBUG (D_IOCTL, "fail nid: [%d] "LPU64" count %d\n",
+ data->ioc_nal, data->ioc_nid, data->ioc_count);
+
+ nip = kportal_get_ni (data->ioc_nal);
+ if (nip == NULL)
+ return (-EINVAL);
+
+ err = PtlFailNid (*nip, data->ioc_nid, data->ioc_count);
+ break;
+ }
+
+ default:
+ err = -EINVAL;
+ break;
+ }
+
+ RETURN(err);
+}
+
+
+static struct file_operations portalsdev_fops = {
+ ioctl: kportal_ioctl,
+ open: kportal_psdev_open,
+ release: kportal_psdev_release
+};
+
+
+static struct miscdevice portal_dev = {
+ PORTAL_MINOR,
+ "portals",
+ &portalsdev_fops
+};
+
+extern int insert_proc(void);
+extern void remove_proc(void);
+MODULE_AUTHOR("Peter J. Braam <braam@clusterfs.com>");
+MODULE_DESCRIPTION("Portals v3.1");
+MODULE_LICENSE("GPL");
+
+static int init_kportals_module(void)
+{
+ int rc;
+
+ rc = portals_debug_init(5 * 1024 * 1024);
+ if (rc < 0) {
+ printk(KERN_ERR "portals_debug_init: %d\n", rc);
+ return (rc);
+ }
+
+ sema_init(&nal_cmd_sem, 1);
+
+ rc = misc_register(&portal_dev);
+ if (rc) {
+ CERROR("misc_register: error %d\n", rc);
+ goto cleanup_debug;
+ }
+
+ rc = PtlInit();
+ if (rc) {
+ CERROR("PtlInit: error %d\n", rc);
+ goto cleanup_deregister;
+ }
+
+ rc = insert_proc();
+ if (rc) {
+ CERROR("insert_proc: error %d\n", rc);
+ goto cleanup_fini;
+ }
+
+ CDEBUG (D_OTHER, "portals setup OK\n");
+ return (0);
+
+ cleanup_fini:
+ PtlFini();
+ cleanup_deregister:
+ misc_deregister(&portal_dev);
+ cleanup_debug:
+ portals_debug_cleanup();
+ return rc;
+}
+
+static void exit_kportals_module(void)
+{
+ int rc;
+
+ remove_proc();
+ PtlFini();
+
+ CDEBUG(D_MALLOC, "before Portals cleanup: kmem %d\n",
+ atomic_read(&portal_kmemory));
+
+
+ rc = misc_deregister(&portal_dev);
+ if (rc)
+ CERROR("misc_deregister error %d\n", rc);
+
+ if (atomic_read(&portal_kmemory) != 0)
+ CERROR("Portals memory leaked: %d bytes\n",
+ atomic_read(&portal_kmemory));
+
+ rc = portals_debug_cleanup();
+ if (rc)
+ printk(KERN_ERR "portals_debug_cleanup: %d\n", rc);
+}
+
+EXPORT_SYMBOL(lib_dispatch);
+EXPORT_SYMBOL(PtlMEAttach);
+EXPORT_SYMBOL(PtlMEInsert);
+EXPORT_SYMBOL(PtlMEUnlink);
+EXPORT_SYMBOL(PtlEQAlloc);
+EXPORT_SYMBOL(PtlMDAttach);
+EXPORT_SYMBOL(PtlMDUnlink);
+EXPORT_SYMBOL(PtlNIInit);
+EXPORT_SYMBOL(PtlNIFini);
+EXPORT_SYMBOL(PtlNIDebug);
+EXPORT_SYMBOL(PtlInit);
+EXPORT_SYMBOL(PtlFini);
+EXPORT_SYMBOL(PtlPut);
+EXPORT_SYMBOL(PtlGet);
+EXPORT_SYMBOL(ptl_err_str);
+EXPORT_SYMBOL(portal_subsystem_debug);
+EXPORT_SYMBOL(portal_debug);
+EXPORT_SYMBOL(portal_stack);
+EXPORT_SYMBOL(portal_printk);
+EXPORT_SYMBOL(PtlEQWait);
+EXPORT_SYMBOL(PtlEQFree);
+EXPORT_SYMBOL(PtlEQGet);
+EXPORT_SYMBOL(PtlGetId);
+EXPORT_SYMBOL(PtlMDBind);
+EXPORT_SYMBOL(lib_iov_nob);
+EXPORT_SYMBOL(lib_copy_iov2buf);
+EXPORT_SYMBOL(lib_copy_buf2iov);
+EXPORT_SYMBOL(lib_kiov_nob);
+EXPORT_SYMBOL(lib_copy_kiov2buf);
+EXPORT_SYMBOL(lib_copy_buf2kiov);
+EXPORT_SYMBOL(lib_finalize);
+EXPORT_SYMBOL(lib_parse);
+EXPORT_SYMBOL(lib_init);
+EXPORT_SYMBOL(lib_fini);
+EXPORT_SYMBOL(portal_kmemory);
+EXPORT_SYMBOL(kportal_daemonize);
+EXPORT_SYMBOL(kportal_blockallsigs);
+EXPORT_SYMBOL(kportal_nal_register);
+EXPORT_SYMBOL(kportal_nal_unregister);
+EXPORT_SYMBOL(kportal_assertion_failed);
+EXPORT_SYMBOL(dispatch_name);
+EXPORT_SYMBOL(kportal_get_ni);
+EXPORT_SYMBOL(kportal_put_ni);
+
+module_init(init_kportals_module);
+module_exit (exit_kportals_module);
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ * Author: Zach Brown <zab@zabbo.net>
+ * Author: Peter J. Braam <braam@clusterfs.com>
+ * Author: Phil Schwan <phil@clusterfs.com>
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2.1 of the GNU Lesser General
+ * Public License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#define EXPORT_SYMTAB
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/errno.h>
+#include <linux/smp_lock.h>
+#include <linux/unistd.h>
+#include <net/sock.h>
+#include <linux/uio.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/stat.h>
+#include <linux/list.h>
+#include <asm/uaccess.h>
+#include <asm/segment.h>
+
+#include <linux/proc_fs.h>
+#include <linux/sysctl.h>
+
+# define DEBUG_SUBSYSTEM S_PORTALS
+
+#include <linux/kp30.h>
+#include <asm/div64.h>
+
+static struct ctl_table_header *portals_table_header = NULL;
+extern char debug_file_path[1024];
+extern char debug_daemon_file_path[1024];
+extern char portals_upcall[1024];
+
+#define PSDEV_PORTALS (0x100)
+#define PSDEV_DEBUG 1 /* control debugging */
+#define PSDEV_SUBSYSTEM_DEBUG 2 /* control debugging */
+#define PSDEV_PRINTK 3 /* force all errors to console */
+#define PSDEV_DEBUG_PATH 4 /* crashdump log location */
+#define PSDEV_DEBUG_DUMP_PATH 5 /* crashdump tracelog location */
+#define PSDEV_PORTALS_UPCALL 6 /* User mode upcall script */
+
+#define PORTALS_PRIMARY_CTLCNT 6
+static struct ctl_table portals_table[PORTALS_PRIMARY_CTLCNT + 1] = {
+ {PSDEV_DEBUG, "debug", &portal_debug, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+ {PSDEV_SUBSYSTEM_DEBUG, "subsystem_debug", &portal_subsystem_debug,
+ sizeof(int), 0644, NULL, &proc_dointvec},
+ {PSDEV_PRINTK, "printk", &portal_printk, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+ {PSDEV_DEBUG_PATH, "debug_path", debug_file_path,
+ sizeof(debug_file_path), 0644, NULL, &proc_dostring, &sysctl_string},
+ {PSDEV_DEBUG_DUMP_PATH, "debug_daemon_path", debug_daemon_file_path,
+ sizeof(debug_daemon_file_path), 0644, NULL, &proc_dostring,
+ &sysctl_string},
+ {PSDEV_PORTALS_UPCALL, "upcall", portals_upcall,
+ sizeof(portals_upcall), 0644, NULL, &proc_dostring,
+ &sysctl_string},
+ {0}
+};
+
+static struct ctl_table top_table[2] = {
+ {PSDEV_PORTALS, "portals", NULL, 0, 0555, portals_table},
+ {0}
+};
+
+
+#ifdef PORTALS_PROFILING
+/*
+ * profiling stuff. we do this statically for now 'cause its simple,
+ * but we could do some tricks with elf sections to have this array
+ * automatically built.
+ */
+#define def_prof(FOO) [PROF__##FOO] = {#FOO, 0, }
+
+struct prof_ent prof_ents[] = {
+ def_prof(our_recvmsg),
+ def_prof(our_sendmsg),
+ def_prof(socknal_recv),
+ def_prof(lib_parse),
+ def_prof(conn_list_walk),
+ def_prof(memcpy),
+ def_prof(lib_finalize),
+ def_prof(pingcli_time),
+ def_prof(gmnal_send),
+ def_prof(gmnal_recv),
+};
+
+EXPORT_SYMBOL(prof_ents);
+
+/*
+ * this function is as crazy as the proc filling api
+ * requires.
+ *
+ * buffer: page allocated for us to scribble in. the
+ * data returned to the user will be taken from here.
+ * *start: address of the pointer that will tell the
+ * caller where in buffer the data the user wants is.
+ * ppos: offset in the entire /proc file that the user
+ * currently wants.
+ * wanted: the amount of data the user wants.
+ *
+ * while going, 'curpos' is the offset in the entire
+ * file where we currently are. We only actually
+ * start filling buffer when we get to a place in
+ * the file that the user cares about.
+ *
+ * we take care to only sprintf when the user cares because
+ * we're holding a lock while we do this.
+ *
+ * we're smart and know that we generate fixed size lines.
+ * we only start writing to the buffer when the user cares.
+ * This is unpredictable because we don't snapshot the
+ * list between calls that are filling in a file from
+ * the list. The list could change mid read and the
+ * output will look very weird indeed. oh well.
+ */
+
+static int prof_read_proc(char *buffer, char **start, off_t ppos, int wanted,
+ int *eof, void *data)
+{
+ int len = 0, i;
+ int curpos;
+ char *header = "Interval Cycles_per (Starts Finishes Total)\n";
+ int header_len = strlen(header);
+ char *format = "%-15s %.12Ld (%.12d %.12d %.12Ld)";
+ int line_len = (15 + 1 + 12 + 2 + 12 + 1 + 12 + 1 + 12 + 1);
+
+ *start = buffer;
+
+ if (ppos < header_len) {
+ int diff = MIN(header_len, wanted);
+ memcpy(buffer, header + ppos, diff);
+ len += diff;
+ ppos += diff;
+ }
+
+ if (len >= wanted)
+ goto out;
+
+ curpos = header_len;
+
+ for ( i = 0; i < MAX_PROFS ; i++) {
+ int copied;
+ struct prof_ent *pe = &prof_ents[i];
+ long long cycles_per;
+ /*
+ * find the part of the array that the buffer wants
+ */
+ if (ppos >= (curpos + line_len)) {
+ curpos += line_len;
+ continue;
+ }
+ /* the clever caller split a line */
+ if (ppos > curpos) {
+ *start = buffer + (ppos - curpos);
+ }
+
+ if (pe->finishes == 0)
+ cycles_per = 0;
+ else
+ {
+ cycles_per = pe->total_cycles;
+ do_div (cycles_per, pe->finishes);
+ }
+
+ copied = sprintf(buffer + len, format, pe->str, cycles_per,
+ pe->starts, pe->finishes, pe->total_cycles);
+
+ len += copied;
+
+ /* pad to line len, -1 for \n */
+ if ((copied < line_len-1)) {
+ int diff = (line_len-1) - copied;
+ memset(buffer + len, ' ', diff);
+ len += diff;
+ copied += diff;
+ }
+
+ buffer[len++]= '\n';
+
+ /* bail if we have enough */
+ if (((buffer + len) - *start) >= wanted)
+ break;
+
+ curpos += line_len;
+ }
+
+ /* lameness */
+ if (i == MAX_PROFS)
+ *eof = 1;
+ out:
+
+ return MIN(((buffer + len) - *start), wanted);
+}
+
+/*
+ * all kids love /proc :/
+ */
+static unsigned char basedir[]="net/portals";
+#endif /* PORTALS_PROFILING */
+
+int insert_proc(void)
+{
+#if PORTALS_PROFILING
+ unsigned char dir[128];
+ struct proc_dir_entry *ent;
+
+ if (ARRAY_SIZE(prof_ents) != MAX_PROFS) {
+ CERROR("profiling enum and array are out of sync.\n");
+ return -1;
+ }
+
+ /*
+ * This is pretty lame. assuming that failure just
+ * means that they already existed.
+ */
+ strcat(dir, basedir);
+ create_proc_entry(dir, S_IFDIR, 0);
+
+ strcat(dir, "/cycles");
+ ent = create_proc_entry(dir, 0, 0);
+ if (!ent) {
+ CERROR("couldn't register %s?\n", dir);
+ return -1;
+ }
+
+ ent->data = NULL;
+ ent->read_proc = prof_read_proc;
+#endif /* PORTALS_PROFILING */
+
+#ifdef CONFIG_SYSCTL
+ if (!portals_table_header)
+ portals_table_header = register_sysctl_table(top_table, 0);
+#endif
+
+ return 0;
+}
+
+void remove_proc(void)
+{
+#if PORTALS_PROFILING
+ unsigned char dir[128];
+ int end;
+
+ dir[0]='\0';
+ strcat(dir, basedir);
+
+ end = strlen(dir);
+
+ strcat(dir, "/cycles");
+ remove_proc_entry(dir,0);
+
+ dir[end] = '\0';
+ remove_proc_entry(dir,0);
+#endif /* PORTALS_PROFILING */
+
+#ifdef CONFIG_SYSCTL
+ if (portals_table_header)
+ unregister_sysctl_table(portals_table_header);
+ portals_table_header = NULL;
+#endif
+}
--- /dev/null
+Makefile
+Makefile.in
+aclocal.m4
+config.log
+config.status
+config.cache
+configure
+portals.spec
--- /dev/null
+# Copyright (C) 2002 Cluster File Systems, Inc.
+#
+# This code is issued under the GNU General Public License.
+# See the file COPYING in this distribution
+
+EXTRA_DIST = portals.spec
\ No newline at end of file
--- /dev/null
+%define kversion @RELEASE@
+%define linuxdir @LINUX@
+%define version HEAD
+
+Summary: Sandia Portals Message Passing - utilities
+Name: portals
+Version: %{version}
+Release: 0210101748uml
+Copyright: LGPL
+Group: Utilities/System
+BuildRoot: /var/tmp/portals-%{version}-root
+Source: http://sandiaportals.org/portals-%{version}.tar.gz
+
+%description
+Sandia Portals message passing package. Contains kernel modules, libraries and utilities.
+
+%package -n portals-modules
+Summary: Kernel modules and NAL's for portals
+Group: Development/Kernel
+
+%description -n portals-modules
+Object-Based Disk storage drivers for Linux %{kversion}.
+
+%package -n portals-source
+Summary: Portals kernel source for rebuilding with other kernels
+Group: Development/Kernel
+
+%description -n portals-source
+Portals kernel source for rebuilding with other kernels
+
+%prep
+%setup -n portals-%{version}
+
+%build
+rm -rf $RPM_BUILD_ROOT
+
+# Create the pristine source directory.
+srcdir=$RPM_BUILD_ROOT/usr/src/portals-%{version}
+mkdir -p $srcdir
+find . -name CVS -prune -o -print | cpio -ap $srcdir
+
+# Set an explicit path to our Linux tree, if we can.
+conf_flag=
+linuxdir=%{linuxdir}
+test -d $linuxdir && conf_flag=--with-linux=$linuxdir
+./configure $conf_flag
+make
+
+%install
+make install prefix=$RPM_BUILD_ROOT
+
+%ifarch alpha
+# this hurts me
+ conf_flag=
+ linuxdir=%{linuxdir}
+ test -d $linuxdir && conf_flag=--with-linux=$linuxdir
+ make clean
+ ./configure --enable-rtscts-myrinet $conf_flag
+ make
+ cp linux/rtscts/rtscts.o $RPM_BUILD_ROOT/lib/modules/%{kversion}/kernel/net/portals/rtscts_myrinet.o
+ cp user/myrinet_utils/mcpload $RPM_BUILD_ROOT/usr/sbin/mcpload
+%endif
+
+
+%files
+%attr(-, root, root) %doc COPYING
+%attr(-, root, root) /usr/sbin/acceptor
+%attr(-, root, root) /usr/sbin/ptlctl
+%attr(-, root, root) /usr/sbin/debugctl
+%ifarch alpha
+%attr(-, root, root) /usr/sbin/mcpload
+%endif
+%attr(-, root, root) /lib/libmyrnal.a
+%attr(-, root, root) /lib/libptlapi.a
+%attr(-, root, root) /lib/libptlctl.a
+%attr(-, root, root) /lib/libprocbridge.a
+%attr(-, root, root) /lib/libptllib.a
+%attr(-, root, root) /lib/libtcpnal.a
+%attr(-, root, root) /lib/libtcpnalutil.a
+%attr(-, root, root) /usr/include/portals/*.h
+%attr(-, root, root) /usr/include/portals/base/*.h
+%attr(-, root, root) /usr/include/linux/*.h
+
+%files -n portals-modules
+%attr(-, root, root) %doc COPYING
+%attr(-, root, root) /lib/modules/%{kversion}/kernel/net/portals/portals.o
+%attr(-, root, root) /lib/modules/%{kversion}/kernel/net/portals/kptlrouter.o
+%attr(-, root, root) /lib/modules/%{kversion}/kernel/net/portals/kptrxtx.o
+%ifarch alpha
+%attr(-, root, root) /lib/modules/%{kversion}/kernel/net/portals/p3mod.o
+%attr(-, root, root) /lib/modules/%{kversion}/kernel/net/portals/rtscts.o
+%endif
+%attr(-, root, root) /lib/modules/%{kversion}/kernel/net/portals/*nal.o
+
+%files -n portals-source
+%attr(-, root, root) /usr/src/portals-%{version}
+
+%post
+if [ ! -e /dev/portals ]; then
+ mknod /dev/portals c 10 240
+fi
+depmod -ae || exit 0
+
+grep -q portals /etc/modules.conf || \
+ echo 'alias char-major-10-240 portals' >> /etc/modules.conf
+
+grep -q '/dev/portals' /etc/modules.conf || \
+ echo 'alias /dev/portals portals' >> /etc/modules.conf
+
+%postun
+depmod -ae || exit 0
+
+%clean
+#rm -rf $RPM_BUILD_ROOT
+
+# end of file
--- /dev/null
+# Copyright (C) 2002 Cluster File Systems, Inc.
+#
+# This code is issued under the GNU General Public License.
+# See the file COPYING in this distribution
+
+
+CPPFLAGS=
+INCLUDES=-I$(top_srcdir)/portals/include -I$(top_srcdir)/include
+lib_LIBRARIES= libportals.a
+libportals_a_SOURCES= api-eq.c api-init.c api-me.c api-errno.c api-md.c api-ni.c api-wrap.c lib-dispatch.c lib-init.c lib-me.c lib-msg.c lib-not-impl.c lib-eq.c lib-md.c lib-move.c lib-ni.c lib-pid.c
--- /dev/null
+# Copyright (C) 2001 Cluster File Systems, Inc.
+#
+# This code is issued under the GNU General Public License.
+# See the file COPYING in this distribution
+
+include ../Kernelenv
+
+obj-y += portals.o
+portals-objs := lib-dispatch.o lib-eq.o lib-init.o lib-md.o lib-me.o lib-move.o lib-msg.o lib-ni.o lib-not-impl.o lib-pid.o api-eq.o api-errno.o api-init.o api-md.o api-me.o api-ni.o api-wrap.o
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * api/api-eq.c
+ * User-level event queue management routines
+ *
+ * Copyright (c) 2001-2003 Cluster File Systems, Inc.
+ * Copyright (c) 2001-2002 Sandia National Laboratories
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2.1 of the GNU Lesser General
+ * Public License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * PtlMDUpdate is here so that it can access the per-eventq
+ * structures.
+ */
+
+#include <portals/api-support.h>
+
+int ptl_eq_init(void)
+{
+ /* Nothing to do anymore... */
+ return PTL_OK;
+}
+
+void ptl_eq_fini(void)
+{
+ /* Nothing to do anymore... */
+}
+
+int ptl_eq_ni_init(nal_t * nal)
+{
+ /* Nothing to do anymore... */
+ return PTL_OK;
+}
+
+void ptl_eq_ni_fini(nal_t * nal)
+{
+ /* Nothing to do anymore... */
+}
+
+int PtlEQGet(ptl_handle_eq_t eventq, ptl_event_t * ev)
+{
+ ptl_eq_t *eq;
+ int rc, new_index;
+ unsigned long flags;
+ ptl_event_t *new_event;
+ nal_t *nal;
+ ENTRY;
+
+ if (!ptl_init)
+ RETURN(PTL_NOINIT);
+
+ nal = ptl_hndl2nal(&eventq);
+ if (!nal)
+ RETURN(PTL_INV_EQ);
+
+ eq = ptl_handle2usereq(&eventq);
+ nal->lock(nal, &flags);
+
+ /* size must be a power of 2 to handle a wrapped sequence # */
+ LASSERT (eq->size != 0 &&
+ eq->size == LOWEST_BIT_SET (eq->size));
+
+ new_index = eq->sequence & (eq->size - 1);
+ new_event = &eq->base[new_index];
+ CDEBUG(D_INFO, "new_event: %p, sequence: %lu, eq->size: %u\n",
+ new_event, eq->sequence, eq->size);
+ if (PTL_SEQ_GT (eq->sequence, new_event->sequence)) {
+ nal->unlock(nal, &flags);
+ RETURN(PTL_EQ_EMPTY);
+ }
+
+ *ev = *new_event;
+
+ /* Set the unlinked_me interface number if there is one to pass
+ * back, since the NAL hasn't a clue what it is and therefore can't
+ * set it. */
+ if (!PtlHandleEqual (ev->unlinked_me, PTL_HANDLE_NONE))
+ ev->unlinked_me.nal_idx = eventq.nal_idx;
+
+ /* ensure event is delivered correctly despite possible
+ races with lib_finalize */
+ if (eq->sequence != new_event->sequence) {
+ CERROR("DROPPING EVENT: eq seq %lu ev seq %lu\n",
+ eq->sequence, new_event->sequence);
+ rc = PTL_EQ_DROPPED;
+ } else {
+ rc = PTL_OK;
+ }
+
+ eq->sequence = new_event->sequence + 1;
+ nal->unlock(nal, &flags);
+ RETURN(rc);
+}
+
+
+int PtlEQWait(ptl_handle_eq_t eventq_in, ptl_event_t *event_out)
+{
+ int rc;
+
+ /* PtlEQGet does the handle checking */
+ while ((rc = PtlEQGet(eventq_in, event_out)) == PTL_EQ_EMPTY) {
+ nal_t *nal = ptl_hndl2nal(&eventq_in);
+
+ if (nal->yield)
+ nal->yield(nal);
+ }
+
+ return rc;
+}
+
+#ifndef __KERNEL__
+static jmp_buf eq_jumpbuf;
+
+static void eq_timeout(int signal)
+{
+ longjmp(eq_jumpbuf, -1);
+}
+
+int PtlEQWait_timeout(ptl_handle_eq_t eventq_in, ptl_event_t * event_out,
+ int timeout)
+{
+ static void (*prev) (int);
+ static int left_over;
+ time_t time_at_start;
+ int rc;
+
+ if (setjmp(eq_jumpbuf)) {
+ signal(SIGALRM, prev);
+ alarm(left_over - timeout);
+ return PTL_EQ_EMPTY;
+ }
+
+ left_over = alarm(timeout);
+ prev = signal(SIGALRM, eq_timeout);
+ time_at_start = time(NULL);
+ if (left_over < timeout)
+ alarm(left_over);
+
+ rc = PtlEQWait(eventq_in, event_out);
+
+ signal(SIGALRM, prev);
+ alarm(left_over); /* Should compute how long we waited */
+
+ return rc;
+}
+
+#endif
+
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * api/api-errno.c
+ * Instantiate the string table of errors
+ *
+ * Copyright (c) 2001-2003 Cluster File Systems, Inc.
+ * Copyright (c) 2001-2002 Sandia National Laboratories
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2.1 of the GNU Lesser General
+ * Public License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <portals/api-support.h>
+
+/* If you change these, you must update the number table in portals/errno.h */
+const char *ptl_err_str[] = {
+ "PTL_OK",
+ "PTL_SEGV",
+
+ "PTL_NOSPACE",
+ "PTL_INUSE",
+ "PTL_VAL_FAILED",
+
+ "PTL_NAL_FAILED",
+ "PTL_NOINIT",
+ "PTL_INIT_DUP",
+ "PTL_INIT_INV",
+ "PTL_AC_INV_INDEX",
+
+ "PTL_INV_ASIZE",
+ "PTL_INV_HANDLE",
+ "PTL_INV_MD",
+ "PTL_INV_ME",
+ "PTL_INV_NI",
+/* If you change these, you must update the number table in portals/errno.h */
+ "PTL_ILL_MD",
+ "PTL_INV_PROC",
+ "PTL_INV_PSIZE",
+ "PTL_INV_PTINDEX",
+ "PTL_INV_REG",
+
+ "PTL_INV_SR_INDX",
+ "PTL_ML_TOOLONG",
+ "PTL_ADDR_UNKNOWN",
+ "PTL_INV_EQ",
+ "PTL_EQ_DROPPED",
+
+ "PTL_EQ_EMPTY",
+ "PTL_NOUPDATE",
+ "PTL_FAIL",
+ "PTL_NOT_IMPLEMENTED",
+ "PTL_NO_ACK",
+
+ "PTL_IOV_TOO_MANY",
+ "PTL_IOV_TOO_SMALL",
+
+ "PTL_EQ_INUSE",
+ "PTL_MD_INUSE"
+};
+/* If you change these, you must update the number table in portals/errno.h */
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * api/api-init.c
+ * Initialization and global data for the p30 user side library
+ *
+ * Copyright (c) 2001-2003 Cluster File Systems, Inc.
+ * Copyright (c) 2001-2002 Sandia National Laboratories
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2.1 of the GNU Lesser General
+ * Public License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * All handles have their interface number stored in the second 16 bit word
+ */
+
+#include <portals/api-support.h>
+
+int ptl_init;
+unsigned int portal_subsystem_debug = 0xfff7e3ff;
+unsigned int portal_debug = ~0;
+unsigned int portal_printk;
+unsigned int portal_stack;
+
+#ifdef __KERNEL__
+atomic_t portal_kmemory = ATOMIC_INIT(0);
+#endif
+
+int __p30_initialized;
+int __p30_myr_initialized;
+int __p30_ip_initialized;
+ptl_handle_ni_t __myr_ni_handle;
+ptl_handle_ni_t __ip_ni_handle;
+
+int __p30_myr_timeout = 10;
+int __p30_ip_timeout;
+
+int PtlInit(void)
+{
+
+ if (ptl_init)
+ return PTL_OK;
+
+ ptl_ni_init();
+ ptl_me_init();
+ ptl_eq_init();
+ ptl_init = 1;
+ __p30_initialized = 1;
+
+ return PTL_OK;
+}
+
+
+void PtlFini(void)
+{
+
+ /* Reverse order of initialization */
+ ptl_eq_fini();
+ ptl_me_fini();
+ ptl_ni_fini();
+ ptl_init = 0;
+}
--- /dev/null
+/*
+ * api-p30/md.c
+ *
+ * Memory descriptor functions that need address validation
+ * There are a few standing issues...
+ * - Addresses are invalidated by the library without telling us.
+ */
+#include <portals/api-support.h>
+
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * api/api-me.c
+ * Match Entry local operations.
+ *
+ * Copyright (c) 2001-2003 Cluster File Systems, Inc.
+ * Copyright (c) 2001-2002 Sandia National Laboratories
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2.1 of the GNU Lesser General
+ * Public License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <portals/api-support.h>
+
+int ptl_me_init(void)
+{
+ return PTL_OK;
+}
+void ptl_me_fini(void)
+{ /* Nothing to do */
+}
+int ptl_me_ni_init(nal_t * nal)
+{
+ return PTL_OK;
+}
+
+void ptl_me_ni_fini(nal_t * nal)
+{ /* Nothing to do... */
+}
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * api/api-ni.c
+ * Network Interface code
+ *
+ * Copyright (c) 2001-2003 Cluster File Systems, Inc.
+ * Copyright (c) 2001-2002 Sandia National Laboratories
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2.1 of the GNU Lesser General
+ * Public License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <portals/api-support.h>
+
+#define MAX_NIS 8
+static nal_t *ptl_interfaces[MAX_NIS];
+int ptl_num_interfaces = 0;
+
+nal_t *ptl_hndl2nal(ptl_handle_any_t *handle)
+{
+ unsigned int idx = handle->nal_idx;
+
+ /* XXX we really rely on the caller NOT racing with interface
+ * setup/teardown. That ensures her NI handle can't get
+ * invalidated out from under her (or worse, swapped for a
+ * completely different interface!) */
+
+ if (idx < MAX_NIS)
+ return ptl_interfaces[idx];
+
+ return NULL;
+}
+
+int ptl_ni_init(void)
+{
+ int i;
+
+ for (i = 0; i < MAX_NIS; i++)
+ ptl_interfaces[i] = NULL;
+
+ return PTL_OK;
+}
+
+void ptl_ni_fini(void)
+{
+ int i;
+
+ for (i = 0; i < MAX_NIS; i++) {
+ nal_t *nal = ptl_interfaces[i];
+ if (!nal)
+ continue;
+
+ if (nal->shutdown)
+ nal->shutdown(nal, i);
+ }
+}
+
+#ifdef __KERNEL__
+DECLARE_MUTEX(ptl_ni_init_mutex);
+
+static void ptl_ni_init_mutex_enter (void)
+{
+ down (&ptl_ni_init_mutex);
+}
+
+static void ptl_ni_init_mutex_exit (void)
+{
+ up (&ptl_ni_init_mutex);
+}
+
+#else
+static void ptl_ni_init_mutex_enter (void)
+{
+}
+
+static void ptl_ni_init_mutex_exit (void)
+{
+}
+
+#endif
+
+int PtlNIInit(ptl_interface_t interface, ptl_pt_index_t ptl_size,
+ ptl_ac_index_t acl_size, ptl_pid_t requested_pid,
+ ptl_handle_ni_t * handle)
+{
+ nal_t *nal;
+ int i;
+
+ if (!ptl_init)
+ return PTL_NOINIT;
+
+ ptl_ni_init_mutex_enter ();
+
+ nal = interface(ptl_num_interfaces, ptl_size, acl_size, requested_pid);
+
+ if (!nal) {
+ ptl_ni_init_mutex_exit ();
+ return PTL_NAL_FAILED;
+ }
+
+ for (i = 0; i < ptl_num_interfaces; i++) {
+ if (ptl_interfaces[i] == nal) {
+ nal->refct++;
+ handle->nal_idx = i;
+ fprintf(stderr, "Returning existing NAL (%d)\n", i);
+ ptl_ni_init_mutex_exit ();
+ return PTL_OK;
+ }
+ }
+ nal->refct = 1;
+
+ handle->nal_idx = ptl_num_interfaces;
+ if (ptl_num_interfaces >= MAX_NIS) {
+ if (nal->shutdown)
+ nal->shutdown (nal, ptl_num_interfaces);
+ ptl_ni_init_mutex_exit ();
+ return PTL_NOSPACE;
+ }
+
+ ptl_interfaces[ptl_num_interfaces++] = nal;
+
+ ptl_eq_ni_init(nal);
+ ptl_me_ni_init(nal);
+
+ ptl_ni_init_mutex_exit ();
+ return PTL_OK;
+}
+
+
+int PtlNIFini(ptl_handle_ni_t ni)
+{
+ nal_t *nal;
+ int rc;
+
+ if (!ptl_init)
+ return PTL_NOINIT;
+
+ ptl_ni_init_mutex_enter ();
+
+ nal = ptl_hndl2nal (&ni);
+ if (nal == NULL) {
+ ptl_ni_init_mutex_exit ();
+ return PTL_INV_HANDLE;
+ }
+
+ nal->refct--;
+ if (nal->refct > 0) {
+ ptl_ni_init_mutex_exit ();
+ return PTL_OK;
+ }
+
+ ptl_me_ni_fini(nal);
+ ptl_eq_ni_fini(nal);
+
+ rc = PTL_OK;
+ if (nal->shutdown)
+ rc = nal->shutdown(nal, ni.nal_idx);
+
+ ptl_interfaces[ni.nal_idx] = NULL;
+ ptl_num_interfaces--;
+
+ ptl_ni_init_mutex_exit ();
+ return rc;
+}
+
+int PtlNIHandle(ptl_handle_any_t handle_in, ptl_handle_ni_t * ni_out)
+{
+ *ni_out = handle_in;
+
+ return PTL_OK;
+}
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * api/api-wrap.c
+ * User-level wrappers that dispatch across the protection boundaries
+ *
+ * Copyright (c) 2001-2003 Cluster File Systems, Inc.
+ * Copyright (c) 2001-2002 Sandia National Laboratories
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2.1 of the GNU Lesser General
+ * Public License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Assumes the handle encodes the network number in the second 16 bit word
+ */
+
+# define DEBUG_SUBSYSTEM S_PORTALS
+#include <portals/api-support.h>
+
+static int do_forward(ptl_handle_any_t any_h, int cmd, void *argbuf,
+ int argsize, void *retbuf, int retsize)
+{
+ nal_t *nal;
+
+ if (!ptl_init) {
+ fprintf(stderr, "PtlGetId: Not initialized\n");
+ return PTL_NOINIT;
+ }
+
+ nal = ptl_hndl2nal(&any_h);
+ if (!nal)
+ return PTL_INV_HANDLE;
+
+ nal->forward(nal, cmd, argbuf, argsize, retbuf, retsize);
+
+ return PTL_OK;
+}
+
+int PtlGetId(ptl_handle_ni_t ni_handle, ptl_process_id_t *id)
+{
+ PtlGetId_in args;
+ PtlGetId_out ret;
+ int rc;
+
+ args.handle_in = ni_handle;
+
+ rc = do_forward(ni_handle, PTL_GETID, &args, sizeof(args), &ret,
+ sizeof(ret));
+ if (rc != PTL_OK)
+ return rc;
+
+ if (id)
+ *id = ret.id_out;
+
+ return ret.rc;
+}
+
+int PtlFailNid (ptl_handle_ni_t interface, ptl_nid_t nid, unsigned int threshold)
+{
+ PtlFailNid_in args;
+ PtlFailNid_out ret;
+ int rc;
+
+ args.interface = interface;
+ args.nid = nid;
+ args.threshold = threshold;
+
+ rc = do_forward (interface, PTL_FAILNID,
+ &args, sizeof(args), &ret, sizeof (ret));
+
+ return ((rc != PTL_OK) ? rc : ret.rc);
+}
+
+int PtlNIStatus(ptl_handle_ni_t interface_in, ptl_sr_index_t register_in,
+ ptl_sr_value_t * status_out)
+{
+ PtlNIStatus_in args;
+ PtlNIStatus_out ret;
+ int rc;
+
+ args.interface_in = interface_in;
+ args.register_in = register_in;
+
+ rc = do_forward(interface_in, PTL_NISTATUS, &args, sizeof(args), &ret,
+ sizeof(ret));
+
+ if (rc != PTL_OK)
+ return rc;
+
+ if (status_out)
+ *status_out = ret.status_out;
+
+ return ret.rc;
+}
+
+int PtlNIDist(ptl_handle_ni_t interface_in, ptl_process_id_t process_in,
+ unsigned long *distance_out)
+{
+ PtlNIDist_in args;
+ PtlNIDist_out ret;
+ int rc;
+
+ args.interface_in = interface_in;
+ args.process_in = process_in;
+
+ rc = do_forward(interface_in, PTL_NIDIST, &args, sizeof(args), &ret,
+ sizeof(ret));
+
+ if (rc != PTL_OK)
+ return rc;
+
+ if (distance_out)
+ *distance_out = ret.distance_out;
+
+ return ret.rc;
+}
+
+
+
+unsigned int PtlNIDebug(ptl_handle_ni_t ni, unsigned int mask_in)
+{
+ PtlNIDebug_in args;
+ PtlNIDebug_out ret;
+ int rc;
+
+ args.mask_in = mask_in;
+
+ rc = do_forward(ni, PTL_NIDEBUG, &args, sizeof(args), &ret,
+ sizeof(ret));
+
+ if (rc != PTL_OK)
+ return rc;
+
+ return ret.rc;
+}
+
+int PtlMEAttach(ptl_handle_ni_t interface_in, ptl_pt_index_t index_in,
+ ptl_process_id_t match_id_in, ptl_match_bits_t match_bits_in,
+ ptl_match_bits_t ignore_bits_in, ptl_unlink_t unlink_in,
+ ptl_ins_pos_t pos_in, ptl_handle_me_t * handle_out)
+{
+ PtlMEAttach_in args;
+ PtlMEAttach_out ret;
+ int rc;
+
+ args.interface_in = interface_in;
+ args.index_in = index_in;
+ args.match_id_in = match_id_in;
+ args.match_bits_in = match_bits_in;
+ args.ignore_bits_in = ignore_bits_in;
+ args.unlink_in = unlink_in;
+ args.position_in = pos_in;
+
+ rc = do_forward(interface_in, PTL_MEATTACH, &args, sizeof(args), &ret,
+ sizeof(ret));
+
+ if (rc != PTL_OK)
+ return rc;
+
+ if (handle_out) {
+ handle_out->nal_idx = interface_in.nal_idx;
+ handle_out->cookie = ret.handle_out.cookie;
+ }
+
+ return ret.rc;
+}
+
+int PtlMEInsert(ptl_handle_me_t current_in, ptl_process_id_t match_id_in,
+ ptl_match_bits_t match_bits_in, ptl_match_bits_t ignore_bits_in,
+ ptl_unlink_t unlink_in, ptl_ins_pos_t position_in,
+ ptl_handle_me_t * handle_out)
+{
+ PtlMEInsert_in args;
+ PtlMEInsert_out ret;
+ int rc;
+
+ args.current_in = current_in;
+ args.match_id_in = match_id_in;
+ args.match_bits_in = match_bits_in;
+ args.ignore_bits_in = ignore_bits_in;
+ args.unlink_in = unlink_in;
+ args.position_in = position_in;
+
+ rc = do_forward(current_in, PTL_MEINSERT, &args, sizeof(args), &ret,
+ sizeof(ret));
+
+ if (rc != PTL_OK)
+ return (rc == PTL_INV_HANDLE) ? PTL_INV_ME : rc;
+
+ if (handle_out) {
+ handle_out->nal_idx = current_in.nal_idx;
+ handle_out->cookie = ret.handle_out.cookie;
+ }
+ return ret.rc;
+}
+
+int PtlMEUnlink(ptl_handle_me_t current_in)
+{
+ PtlMEUnlink_in args;
+ PtlMEUnlink_out ret;
+ int rc;
+
+ args.current_in = current_in;
+ args.unlink_in = PTL_RETAIN;
+
+ rc = do_forward(current_in, PTL_MEUNLINK, &args, sizeof(args), &ret,
+ sizeof(ret));
+
+ if (rc != PTL_OK)
+ return (rc == PTL_INV_HANDLE) ? PTL_INV_ME : rc;
+
+ return ret.rc;
+}
+
+int PtlTblDump(ptl_handle_ni_t ni, int index_in)
+{
+ PtlTblDump_in args;
+ PtlTblDump_out ret;
+ int rc;
+
+ args.index_in = index_in;
+
+ rc = do_forward(ni, PTL_TBLDUMP, &args, sizeof(args), &ret,
+ sizeof(ret));
+
+ if (rc != PTL_OK)
+ return rc;
+
+ return ret.rc;
+}
+
+int PtlMEDump(ptl_handle_me_t current_in)
+{
+ PtlMEDump_in args;
+ PtlMEDump_out ret;
+ int rc;
+
+ args.current_in = current_in;
+
+ rc = do_forward(current_in, PTL_MEDUMP, &args, sizeof(args), &ret,
+ sizeof(ret));
+
+ if (rc != PTL_OK)
+ return (rc == PTL_INV_HANDLE) ? PTL_INV_ME : rc;
+
+ return ret.rc;
+}
+
+static int validate_md(ptl_handle_any_t current_in, ptl_md_t md_in)
+{
+ nal_t *nal;
+ int rc;
+ int i;
+
+ if (!ptl_init) {
+ fprintf(stderr, "PtlMDAttach/Bind/Update: Not initialized\n");
+ return PTL_NOINIT;
+ }
+
+ nal = ptl_hndl2nal(¤t_in);
+ if (!nal)
+ return PTL_INV_HANDLE;
+
+ if (nal->validate != NULL) /* nal->validate not a NOOP */
+ {
+ if ((md_in.options & PTL_MD_IOV) == 0) /* contiguous */
+ {
+ rc = nal->validate (nal, md_in.start, md_in.length);
+ if (rc)
+ return (PTL_SEGV);
+ }
+ else
+ {
+ struct iovec *iov = (struct iovec *)md_in.start;
+
+ for (i = 0; i < md_in.niov; i++, iov++)
+ {
+ rc = nal->validate (nal, iov->iov_base, iov->iov_len);
+ if (rc)
+ return (PTL_SEGV);
+ }
+ }
+ }
+
+ return 0;
+}
+
+static ptl_handle_eq_t md2eq (ptl_md_t *md)
+{
+ if (PtlHandleEqual (md->eventq, PTL_EQ_NONE))
+ return (PTL_EQ_NONE);
+
+ return (ptl_handle2usereq (&md->eventq)->cb_eq_handle);
+}
+
+
+int PtlMDAttach(ptl_handle_me_t me_in, ptl_md_t md_in,
+ ptl_unlink_t unlink_in, ptl_handle_md_t * handle_out)
+{
+ PtlMDAttach_in args;
+ PtlMDAttach_out ret;
+ int rc;
+
+ rc = validate_md(me_in, md_in);
+ if (rc == PTL_OK) {
+ args.eq_in = md2eq(&md_in);
+ args.me_in = me_in;
+ args.md_in = md_in;
+ args.unlink_in = unlink_in;
+
+ rc = do_forward(me_in, PTL_MDATTACH,
+ &args, sizeof(args), &ret, sizeof(ret));
+ }
+
+ if (rc != PTL_OK)
+ return (rc == PTL_INV_HANDLE) ? PTL_INV_ME : rc;
+
+ if (handle_out) {
+ handle_out->nal_idx = me_in.nal_idx;
+ handle_out->cookie = ret.handle_out.cookie;
+ }
+ return ret.rc;
+}
+
+
+
+int PtlMDBind(ptl_handle_ni_t ni_in, ptl_md_t md_in,
+ ptl_handle_md_t * handle_out)
+{
+ PtlMDBind_in args;
+ PtlMDBind_out ret;
+ int rc;
+
+ rc = validate_md(ni_in, md_in);
+ if (rc != PTL_OK)
+ return rc;
+
+ args.eq_in = md2eq(&md_in);
+ args.ni_in = ni_in;
+ args.md_in = md_in;
+
+ rc = do_forward(ni_in, PTL_MDBIND,
+ &args, sizeof(args), &ret, sizeof(ret));
+
+ if (rc != PTL_OK)
+ return rc;
+
+ if (handle_out) {
+ handle_out->nal_idx = ni_in.nal_idx;
+ handle_out->cookie = ret.handle_out.cookie;
+ }
+ return ret.rc;
+}
+
+int PtlMDUpdate(ptl_handle_md_t md_in, ptl_md_t *old_inout,
+ ptl_md_t *new_inout, ptl_handle_eq_t testq_in)
+{
+ PtlMDUpdate_internal_in args;
+ PtlMDUpdate_internal_out ret;
+ int rc;
+
+ args.md_in = md_in;
+
+ if (old_inout) {
+ args.old_inout = *old_inout;
+ args.old_inout_valid = 1;
+ } else
+ args.old_inout_valid = 0;
+
+ if (new_inout) {
+ rc = validate_md (md_in, *new_inout);
+ if (rc != PTL_OK)
+ return (rc == PTL_INV_HANDLE) ? PTL_INV_MD : rc;
+ args.new_inout = *new_inout;
+ args.new_inout_valid = 1;
+ } else
+ args.new_inout_valid = 0;
+
+ if (PtlHandleEqual (testq_in, PTL_EQ_NONE)) {
+ args.testq_in = PTL_EQ_NONE;
+ args.sequence_in = -1;
+ } else {
+ ptl_eq_t *eq = ptl_handle2usereq (&testq_in);
+
+ args.testq_in = eq->cb_eq_handle;
+ args.sequence_in = eq->sequence;
+ }
+
+ rc = do_forward(md_in, PTL_MDUPDATE, &args, sizeof(args), &ret,
+ sizeof(ret));
+ if (rc != PTL_OK)
+ return (rc == PTL_INV_HANDLE) ? PTL_INV_MD : rc;
+
+ if (old_inout)
+ *old_inout = ret.old_inout;
+
+ return ret.rc;
+}
+
+int PtlMDUnlink(ptl_handle_md_t md_in)
+{
+ PtlMDUnlink_in args;
+ PtlMDUnlink_out ret;
+ int rc;
+
+ args.md_in = md_in;
+ rc = do_forward(md_in, PTL_MDUNLINK, &args, sizeof(args), &ret,
+ sizeof(ret));
+ if (rc != PTL_OK)
+ return (rc == PTL_INV_HANDLE) ? PTL_INV_MD : rc;
+
+ return ret.rc;
+}
+
+int PtlEQAlloc(ptl_handle_ni_t interface, ptl_size_t count,
+ int (*callback) (ptl_event_t * event),
+ ptl_handle_eq_t * handle_out)
+{
+ ptl_eq_t *eq = NULL;
+ ptl_event_t *ev = NULL;
+ PtlEQAlloc_in args;
+ PtlEQAlloc_out ret;
+ int rc, i;
+ nal_t *nal;
+
+ if (!ptl_init)
+ return PTL_NOINIT;
+
+ nal = ptl_hndl2nal (&interface);
+ if (nal == NULL)
+ return PTL_INV_HANDLE;
+
+ if (count != LOWEST_BIT_SET(count)) { /* not a power of 2 already */
+ do { /* knock off all but the top bit... */
+ count &= ~LOWEST_BIT_SET (count);
+ } while (count != LOWEST_BIT_SET(count));
+
+ count <<= 1; /* ...and round up */
+ }
+
+ if (count == 0) /* catch bad parameter / overflow on roundup */
+ return (PTL_VAL_FAILED);
+
+ PORTAL_ALLOC(ev, count * sizeof(ptl_event_t));
+ if (!ev)
+ return PTL_NOSPACE;
+
+ for (i = 0; i < count; i++)
+ ev[i].sequence = 0;
+
+ if (nal->validate != NULL) {
+ rc = nal->validate(nal, ev, count * sizeof(ptl_event_t));
+ if (rc != PTL_OK)
+ goto fail;
+ }
+
+ args.ni_in = interface;
+ args.count_in = count;
+ args.base_in = ev;
+ args.len_in = count * sizeof(*ev);
+ args.callback_in = callback;
+
+ rc = do_forward(interface, PTL_EQALLOC, &args, sizeof(args), &ret,
+ sizeof(ret));
+ if (rc != PTL_OK)
+ goto fail;
+ if (ret.rc)
+ GOTO(fail, rc = ret.rc);
+
+ PORTAL_ALLOC(eq, sizeof(*eq));
+ if (!eq) {
+ rc = PTL_NOSPACE;
+ goto fail;
+ }
+
+ eq->sequence = 1;
+ eq->size = count;
+ eq->base = ev;
+
+ /* EQ handles are a little wierd. PtlEQGet() just looks at the
+ * queued events in shared memory. It doesn't want to do_forward()
+ * at all, so the cookie in the EQ handle we pass out of here is
+ * simply a pointer to the event queue we just set up. We stash
+ * the handle returned by do_forward(), so we can pass it back via
+ * do_forward() when we need to. */
+
+ eq->cb_eq_handle.nal_idx = interface.nal_idx;
+ eq->cb_eq_handle.cookie = ret.handle_out.cookie;
+
+ handle_out->nal_idx = interface.nal_idx;
+ handle_out->cookie = (__u64)((unsigned long)eq);
+ return PTL_OK;
+
+fail:
+ PORTAL_FREE(ev, count * sizeof(ptl_event_t));
+ return rc;
+}
+
+int PtlEQFree(ptl_handle_eq_t eventq)
+{
+ PtlEQFree_in args;
+ PtlEQFree_out ret;
+ ptl_eq_t *eq;
+ int rc;
+
+ eq = ptl_handle2usereq (&eventq);
+ args.eventq_in = eq->cb_eq_handle;
+
+ rc = do_forward(eq->cb_eq_handle, PTL_EQFREE, &args,
+ sizeof(args), &ret, sizeof(ret));
+
+ /* XXX we're betting rc == PTL_OK here */
+ PORTAL_FREE(eq->base, eq->size * sizeof(ptl_event_t));
+ PORTAL_FREE(eq, sizeof(*eq));
+
+ return rc;
+}
+
+int PtlACEntry(ptl_handle_ni_t ni_in, ptl_ac_index_t index_in,
+ ptl_process_id_t match_id_in, ptl_pt_index_t portal_in)
+{
+ PtlACEntry_in args;
+ PtlACEntry_out ret;
+ int rc;
+
+ /*
+ * Copy arguments into the argument block to
+ * hand to the forwarding object
+ */
+ args.ni_in = ni_in;
+ args.index_in = index_in;
+ args.match_id_in = match_id_in;
+ args.portal_in = portal_in;
+
+ rc = do_forward(ni_in, PTL_ACENTRY, &args, sizeof(args), &ret,
+ sizeof(ret));
+
+ return (rc != PTL_OK) ? rc : ret.rc;
+}
+
+int PtlPut(ptl_handle_md_t md_in, ptl_ack_req_t ack_req_in,
+ ptl_process_id_t target_in, ptl_pt_index_t portal_in,
+ ptl_ac_index_t cookie_in, ptl_match_bits_t match_bits_in,
+ ptl_size_t offset_in, ptl_hdr_data_t hdr_data_in)
+{
+ PtlPut_in args;
+ PtlPut_out ret;
+ int rc;
+
+ /*
+ * Copy arguments into the argument block to
+ * hand to the forwarding object
+ */
+ args.md_in = md_in;
+ args.ack_req_in = ack_req_in;
+ args.target_in = target_in;
+ args.portal_in = portal_in;
+ args.cookie_in = cookie_in;
+ args.match_bits_in = match_bits_in;
+ args.offset_in = offset_in;
+ args.hdr_data_in = hdr_data_in;
+
+ rc = do_forward(md_in, PTL_PUT, &args, sizeof(args), &ret, sizeof(ret));
+
+ return (rc != PTL_OK) ? rc : ret.rc;
+}
+
+int PtlGet(ptl_handle_md_t md_in, ptl_process_id_t target_in,
+ ptl_pt_index_t portal_in, ptl_ac_index_t cookie_in,
+ ptl_match_bits_t match_bits_in, ptl_size_t offset_in)
+{
+ PtlGet_in args;
+ PtlGet_out ret;
+ int rc;
+
+ /*
+ * Copy arguments into the argument block to
+ * hand to the forwarding object
+ */
+ args.md_in = md_in;
+ args.target_in = target_in;
+ args.portal_in = portal_in;
+ args.cookie_in = cookie_in;
+ args.match_bits_in = match_bits_in;
+ args.offset_in = offset_in;
+
+ rc = do_forward(md_in, PTL_GET, &args, sizeof(args), &ret, sizeof(ret));
+
+ return (rc != PTL_OK) ? rc : ret.rc;
+}
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * lib/lib-dispatch.c
+ *
+ * Copyright (c) 2001-2003 Cluster File Systems, Inc.
+ * Copyright (c) 2001-2002 Sandia National Laboratories
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2.1 of the GNU Lesser General
+ * Public License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#define DEBUG_SUBSYSTEM S_PORTALS
+#include <portals/lib-p30.h>
+#include <portals/lib-dispatch.h>
+
+typedef struct {
+ int (*fun) (nal_cb_t * nal, void *private, void *in, void *out);
+ char *name;
+} dispatch_table_t;
+
+static dispatch_table_t dispatch_table[] = {
+ [PTL_GETID] {do_PtlGetId, "PtlGetId"},
+ [PTL_NISTATUS] {do_PtlNIStatus, "PtlNIStatus"},
+ [PTL_NIDIST] {do_PtlNIDist, "PtlNIDist"},
+ [PTL_NIDEBUG] {do_PtlNIDebug, "PtlNIDebug"},
+ [PTL_MEATTACH] {do_PtlMEAttach, "PtlMEAttach"},
+ [PTL_MEINSERT] {do_PtlMEInsert, "PtlMEInsert"},
+ [PTL_MEUNLINK] {do_PtlMEUnlink, "PtlMEUnlink"},
+ [PTL_TBLDUMP] {do_PtlTblDump, "PtlTblDump"},
+ [PTL_MEDUMP] {do_PtlMEDump, "PtlMEDump"},
+ [PTL_MDATTACH] {do_PtlMDAttach, "PtlMDAttach"},
+ [PTL_MDBIND] {do_PtlMDBind, "PtlMDBind"},
+ [PTL_MDUPDATE] {do_PtlMDUpdate_internal, "PtlMDUpdate_internal"},
+ [PTL_MDUNLINK] {do_PtlMDUnlink, "PtlMDUnlink"},
+ [PTL_EQALLOC] {do_PtlEQAlloc_internal, "PtlEQAlloc_internal"},
+ [PTL_EQFREE] {do_PtlEQFree_internal, "PtlEQFree_internal"},
+ [PTL_ACENTRY] {do_PtlACEntry, "PtlACEntry"},
+ [PTL_PUT] {do_PtlPut, "PtlPut"},
+ [PTL_GET] {do_PtlGet, "PtlGet"},
+ [PTL_FAILNID] {do_PtlFailNid, "PtlFailNid"},
+ /* */ {0, ""}
+};
+
+/*
+ * This really should be elsewhere, but lib-p30/dispatch.c is
+ * an automatically generated file.
+ */
+void lib_dispatch(nal_cb_t * nal, void *private, int index, void *arg_block,
+ void *ret_block)
+{
+ lib_ni_t *ni = &nal->ni;
+
+ if (index < 0 || index > LIB_MAX_DISPATCH ||
+ !dispatch_table[index].fun) {
+ CDEBUG(D_NET, LPU64": Invalid API call %d\n", ni->nid, index);
+ return;
+ }
+
+ CDEBUG(D_NET, LPU64": API call %s (%d)\n", ni->nid,
+ dispatch_table[index].name, index);
+
+ dispatch_table[index].fun(nal, private, arg_block, ret_block);
+}
+
+char *dispatch_name(int index)
+{
+ return dispatch_table[index].name;
+}
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * lib/lib-eq.c
+ * Library level Event queue management routines
+ *
+ * Copyright (c) 2001-2003 Cluster File Systems, Inc.
+ * Copyright (c) 2001-2002 Sandia National Laboratories
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2.1 of the GNU Lesser General
+ * Public License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#define DEBUG_SUBSYSTEM S_PORTALS
+#include <portals/lib-p30.h>
+#include <portals/arg-blocks.h>
+
+int do_PtlEQAlloc_internal(nal_cb_t * nal, void *private, void *v_args,
+ void *v_ret)
+{
+ /*
+ * Incoming:
+ * ptl_handle_ni_t ni_in
+ * ptl_size_t count_in
+ * void * base_in
+ *
+ * Outgoing:
+ * ptl_handle_eq_t * handle_out
+ */
+
+ PtlEQAlloc_in *args = v_args;
+ PtlEQAlloc_out *ret = v_ret;
+
+ lib_eq_t *eq;
+ unsigned long flags;
+
+ /* api should have rounded up */
+ if (args->count_in != LOWEST_BIT_SET (args->count_in))
+ return ret->rc = PTL_VAL_FAILED;
+
+ eq = lib_eq_alloc (nal);
+ if (eq == NULL)
+ return (ret->rc = PTL_NOSPACE);
+
+ state_lock(nal, &flags);
+
+ if (nal->cb_map != NULL) {
+ struct iovec iov = {
+ .iov_base = args->base_in,
+ .iov_len = args->count_in * sizeof (ptl_event_t) };
+
+ ret->rc = nal->cb_map (nal, 1, &iov, &eq->eq_addrkey);
+ if (ret->rc != PTL_OK) {
+ lib_eq_free (nal, eq);
+
+ state_unlock (nal, &flags);
+ return (ret->rc);
+ }
+ }
+
+ eq->sequence = 1;
+ eq->base = args->base_in;
+ eq->size = args->count_in;
+ eq->eq_refcount = 0;
+ eq->event_callback = args->callback_in;
+
+ lib_initialise_handle (nal, &eq->eq_lh);
+ list_add (&eq->eq_list, &nal->ni.ni_active_eqs);
+
+ state_unlock(nal, &flags);
+
+ ptl_eq2handle(&ret->handle_out, eq);
+ return (ret->rc = PTL_OK);
+}
+
+int do_PtlEQFree_internal(nal_cb_t * nal, void *private, void *v_args,
+ void *v_ret)
+{
+ /*
+ * Incoming:
+ * ptl_handle_eq_t eventq_in
+ *
+ * Outgoing:
+ */
+
+ PtlEQFree_in *args = v_args;
+ PtlEQFree_out *ret = v_ret;
+ lib_eq_t *eq;
+ long flags;
+
+ state_lock (nal, &flags);
+
+ eq = ptl_handle2eq(&args->eventq_in, nal);
+ if (eq == NULL) {
+ ret->rc = PTL_INV_EQ;
+ } else if (eq->eq_refcount != 0) {
+ ret->rc = PTL_EQ_INUSE;
+ } else {
+ if (nal->cb_unmap != NULL) {
+ struct iovec iov = {
+ .iov_base = eq->base,
+ .iov_len = eq->size * sizeof (ptl_event_t) };
+
+ nal->cb_unmap(nal, 1, &iov, &eq->eq_addrkey);
+ }
+
+ lib_invalidate_handle (nal, &eq->eq_lh);
+ list_del (&eq->eq_list);
+ lib_eq_free (nal, eq);
+ ret->rc = PTL_OK;
+ }
+
+ state_unlock (nal, &flags);
+
+ return (ret->rc);
+}
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * lib/lib-init.c
+ * Start up the internal library and clear all structures
+ * Called by the NAL when it initializes. Safe to call multiple times.
+ *
+ * Copyright (c) 2001-2003 Cluster File Systems, Inc.
+ * Copyright (c) 2001-2002 Sandia National Laboratories
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2.1 of the GNU Lesser General
+ * Public License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+# define DEBUG_SUBSYSTEM S_PORTALS
+#include <portals/lib-p30.h>
+
+#ifdef __KERNEL__
+# include <linux/string.h> /* for memset() */
+# include <linux/kp30.h>
+# ifdef KERNEL_ADDR_CACHE
+# include <compute/OS/addrCache/cache.h>
+# endif
+#else
+# include <string.h>
+# include <sys/time.h>
+#endif
+
+#ifdef PTL_USE_SLAB_CACHE
+static int ptl_slab_users;
+
+kmem_cache_t *ptl_md_slab;
+kmem_cache_t *ptl_msg_slab;
+kmem_cache_t *ptl_me_slab;
+kmem_cache_t *ptl_eq_slab;
+
+atomic_t md_in_use_count;
+atomic_t msg_in_use_count;
+atomic_t me_in_use_count;
+atomic_t eq_in_use_count;
+
+/* NB zeroing in ctor and on freeing ensures items that
+ * kmem_cache_validate() OK, but haven't been initialised
+ * as an MD/ME/EQ can't have valid handles
+ */
+static void
+ptl_md_slab_ctor (void *obj, kmem_cache_t *slab, unsigned long flags)
+{
+ memset (obj, 0, sizeof (lib_md_t));
+}
+
+static void
+ptl_me_slab_ctor (void *obj, kmem_cache_t *slab, unsigned long flags)
+{
+ memset (obj, 0, sizeof (lib_me_t));
+}
+
+static void
+ptl_eq_slab_ctor (void *obj, kmem_cache_t *slab, unsigned long flags)
+{
+ memset (obj, 0, sizeof (lib_eq_t));
+}
+
+int
+kportal_descriptor_setup (nal_cb_t *nal)
+{
+ /* NB on failure caller must still call kportal_descriptor_cleanup */
+ /* ****** */
+
+ /* We'll have 1 set of slabs for ALL the nals :) */
+
+ if (ptl_slab_users++)
+ return 0;
+
+ ptl_md_slab = kmem_cache_create("portals_MD",
+ sizeof(lib_md_t), 0,
+ SLAB_HWCACHE_ALIGN,
+ ptl_md_slab_ctor, NULL);
+ if (!ptl_md_slab) {
+ CERROR("couldn't allocate ptl_md_t slab");
+ RETURN (PTL_NOSPACE);
+ }
+
+ /* NB no ctor for msgs; they don't need handle verification */
+ ptl_msg_slab = kmem_cache_create("portals_MSG",
+ sizeof(lib_msg_t), 0,
+ SLAB_HWCACHE_ALIGN,
+ NULL, NULL);
+ if (!ptl_msg_slab) {
+ CERROR("couldn't allocate ptl_msg_t slab");
+ RETURN (PTL_NOSPACE);
+ }
+
+ ptl_me_slab = kmem_cache_create("portals_ME",
+ sizeof(lib_me_t), 0,
+ SLAB_HWCACHE_ALIGN,
+ ptl_me_slab_ctor, NULL);
+ if (!ptl_me_slab) {
+ CERROR("couldn't allocate ptl_me_t slab");
+ RETURN (PTL_NOSPACE);
+ }
+
+ ptl_eq_slab = kmem_cache_create("portals_EQ",
+ sizeof(lib_eq_t), 0,
+ SLAB_HWCACHE_ALIGN,
+ ptl_eq_slab_ctor, NULL);
+ if (!ptl_eq_slab) {
+ CERROR("couldn't allocate ptl_eq_t slab");
+ RETURN (PTL_NOSPACE);
+ }
+
+ RETURN(PTL_OK);
+}
+
+void
+kportal_descriptor_cleanup (nal_cb_t *nal)
+{
+ if (--ptl_slab_users != 0)
+ return;
+
+ LASSERT (atomic_read (&md_in_use_count) == 0);
+ LASSERT (atomic_read (&me_in_use_count) == 0);
+ LASSERT (atomic_read (&eq_in_use_count) == 0);
+ LASSERT (atomic_read (&msg_in_use_count) == 0);
+
+ if (ptl_md_slab != NULL)
+ kmem_cache_destroy(ptl_md_slab);
+ if (ptl_msg_slab != NULL)
+ kmem_cache_destroy(ptl_msg_slab);
+ if (ptl_me_slab != NULL)
+ kmem_cache_destroy(ptl_me_slab);
+ if (ptl_eq_slab != NULL)
+ kmem_cache_destroy(ptl_eq_slab);
+}
+#else
+
+int
+lib_freelist_init (nal_cb_t *nal, lib_freelist_t *fl, int n, int size)
+{
+ char *space;
+
+ LASSERT (n > 0);
+
+ size += offsetof (lib_freeobj_t, fo_contents);
+
+ space = nal->cb_malloc (nal, n * size);
+ if (space == NULL)
+ return (PTL_NOSPACE);
+
+ INIT_LIST_HEAD (&fl->fl_list);
+ fl->fl_objs = space;
+ fl->fl_nobjs = n;
+ fl->fl_objsize = size;
+
+ do
+ {
+ memset (space, 0, size);
+ list_add ((struct list_head *)space, &fl->fl_list);
+ space += size;
+ } while (--n != 0);
+
+ return (PTL_OK);
+}
+
+void
+lib_freelist_fini (nal_cb_t *nal, lib_freelist_t *fl)
+{
+ struct list_head *el;
+ int count;
+
+ if (fl->fl_nobjs == 0)
+ return;
+
+ count = 0;
+ for (el = fl->fl_list.next; el != &fl->fl_list; el = el->next)
+ count++;
+
+ LASSERT (count == fl->fl_nobjs);
+
+ nal->cb_free (nal, fl->fl_objs, fl->fl_nobjs * fl->fl_objsize);
+ memset (fl, 0, sizeof (fl));
+}
+
+int
+kportal_descriptor_setup (nal_cb_t *nal)
+{
+ /* NB on failure caller must still call kportal_descriptor_cleanup */
+ /* ****** */
+ int rc;
+
+ memset (&nal->ni.ni_free_mes, 0, sizeof (nal->ni.ni_free_mes));
+ memset (&nal->ni.ni_free_msgs, 0, sizeof (nal->ni.ni_free_msgs));
+ memset (&nal->ni.ni_free_mds, 0, sizeof (nal->ni.ni_free_mds));
+ memset (&nal->ni.ni_free_eqs, 0, sizeof (nal->ni.ni_free_eqs));
+
+ rc = lib_freelist_init (nal, &nal->ni.ni_free_mes,
+ MAX_MES, sizeof (lib_me_t));
+ if (rc != PTL_OK)
+ return (rc);
+
+ rc = lib_freelist_init (nal, &nal->ni.ni_free_msgs,
+ MAX_MSGS, sizeof (lib_msg_t));
+ if (rc != PTL_OK)
+ return (rc);
+
+ rc = lib_freelist_init (nal, &nal->ni.ni_free_mds,
+ MAX_MDS, sizeof (lib_md_t));
+ if (rc != PTL_OK)
+ return (rc);
+
+ rc = lib_freelist_init (nal, &nal->ni.ni_free_eqs,
+ MAX_EQS, sizeof (lib_eq_t));
+ return (rc);
+}
+
+void
+kportal_descriptor_cleanup (nal_cb_t *nal)
+{
+ lib_freelist_fini (nal, &nal->ni.ni_free_mes);
+ lib_freelist_fini (nal, &nal->ni.ni_free_msgs);
+ lib_freelist_fini (nal, &nal->ni.ni_free_mds);
+ lib_freelist_fini (nal, &nal->ni.ni_free_eqs);
+}
+
+#endif
+
+__u64
+lib_create_interface_cookie (nal_cb_t *nal)
+{
+ /* NB the interface cookie in wire handles guards against delayed
+ * replies and ACKs appearing valid in a new instance of the same
+ * interface. Initialisation time, even if it's only implemented
+ * to millisecond resolution is probably easily good enough. */
+ struct timeval tv;
+ __u64 cookie;
+#ifndef __KERNEL__
+ int rc = gettimeofday (&tv, NULL);
+ LASSERT (rc == 0);
+#else
+ do_gettimeofday(&tv);
+#endif
+ cookie = tv.tv_sec;
+ cookie *= 1000000;
+ cookie += tv.tv_usec;
+ return (cookie);
+}
+
+int
+lib_setup_handle_hash (nal_cb_t *nal)
+{
+ lib_ni_t *ni = &nal->ni;
+ int i;
+
+ /* Arbitrary choice of hash table size */
+#ifdef __KERNEL__
+ ni->ni_lh_hash_size = PAGE_SIZE / sizeof (struct list_head);
+#else
+ ni->ni_lh_hash_size = (MAX_MES + MAX_MDS + MAX_EQS)/4;
+#endif
+ ni->ni_lh_hash_table =
+ (struct list_head *)nal->cb_malloc (nal, ni->ni_lh_hash_size
+ * sizeof (struct list_head));
+ if (ni->ni_lh_hash_table == NULL)
+ return (PTL_NOSPACE);
+
+ for (i = 0; i < ni->ni_lh_hash_size; i++)
+ INIT_LIST_HEAD (&ni->ni_lh_hash_table[i]);
+
+ ni->ni_next_object_cookie = 0;
+
+ return (PTL_OK);
+}
+
+void
+lib_cleanup_handle_hash (nal_cb_t *nal)
+{
+ lib_ni_t *ni = &nal->ni;
+
+ if (ni->ni_lh_hash_table == NULL)
+ return;
+
+ nal->cb_free (nal, ni->ni_lh_hash_table,
+ ni->ni_lh_hash_size * sizeof (struct list_head));
+}
+
+lib_handle_t *
+lib_lookup_cookie (nal_cb_t *nal, __u64 cookie)
+{
+ /* ALWAYS called with statelock held */
+ lib_ni_t *ni = &nal->ni;
+ struct list_head *list;
+ struct list_head *el;
+ unsigned int hash;
+
+ hash = ((unsigned int)cookie) % ni->ni_lh_hash_size;
+ list = &ni->ni_lh_hash_table[hash];
+
+ list_for_each (el, list) {
+ lib_handle_t *lh = list_entry (el, lib_handle_t, lh_hash_chain);
+
+ if (lh->lh_cookie == cookie)
+ return (lh);
+ }
+
+ return (NULL);
+}
+
+void
+lib_initialise_handle (nal_cb_t *nal, lib_handle_t *lh)
+{
+ /* ALWAYS called with statelock held */
+ lib_ni_t *ni = &nal->ni;
+ unsigned int hash;
+
+ lh->lh_cookie = ni->ni_next_object_cookie++;
+ hash = ((unsigned int)lh->lh_cookie) % ni->ni_lh_hash_size;
+ list_add (&lh->lh_hash_chain, &ni->ni_lh_hash_table[hash]);
+}
+
+void
+lib_invalidate_handle (nal_cb_t *nal, lib_handle_t *lh)
+{
+ list_del (&lh->lh_hash_chain);
+}
+
+int
+lib_init(nal_cb_t * nal, ptl_nid_t nid, ptl_pid_t pid, int gsize,
+ ptl_pt_index_t ptl_size, ptl_ac_index_t acl_size)
+{
+ int rc = PTL_OK;
+ lib_ni_t *ni = &nal->ni;
+ int i;
+ ENTRY;
+
+ /* NB serialised in PtlNIInit() */
+
+ if (ni->refcnt != 0) { /* already initialised */
+ ni->refcnt++;
+ goto out;
+ }
+
+ /*
+ * Allocate the portal table for this interface
+ * and all per-interface objects.
+ */
+ memset(&ni->counters, 0, sizeof(lib_counters_t));
+
+ rc = kportal_descriptor_setup (nal);
+ if (rc != PTL_OK)
+ goto out;
+
+ INIT_LIST_HEAD (&ni->ni_active_msgs);
+ INIT_LIST_HEAD (&ni->ni_active_mds);
+ INIT_LIST_HEAD (&ni->ni_active_eqs);
+
+ INIT_LIST_HEAD (&ni->ni_test_peers);
+
+ ni->ni_interface_cookie = lib_create_interface_cookie (nal);
+ ni->ni_next_object_cookie = 0;
+ rc = lib_setup_handle_hash (nal);
+ if (rc != PTL_OK)
+ goto out;
+
+ ni->nid = nid;
+ ni->pid = pid;
+
+ ni->num_nodes = gsize;
+ ni->tbl.size = ptl_size;
+
+ ni->tbl.tbl = nal->cb_malloc(nal, sizeof(struct list_head) * ptl_size);
+ if (ni->tbl.tbl == NULL) {
+ rc = PTL_NOSPACE;
+ goto out;
+ }
+
+ for (i = 0; i < ptl_size; i++)
+ INIT_LIST_HEAD(&(ni->tbl.tbl[i]));
+
+ ni->debug = PTL_DEBUG_NONE;
+ ni->up = 1;
+ ni->refcnt++;
+
+ out:
+ if (rc != PTL_OK) {
+ lib_cleanup_handle_hash (nal);
+ kportal_descriptor_cleanup (nal);
+ }
+
+ RETURN (rc);
+}
+
+int
+lib_fini(nal_cb_t * nal)
+{
+ lib_ni_t *ni = &nal->ni;
+ int idx;
+
+ ni->refcnt--;
+
+ if (ni->refcnt != 0)
+ goto out;
+
+ /* NB no stat_lock() since this is the last reference. The NAL
+ * should have shut down already, so it should be safe to unlink
+ * and free all descriptors, even those that appear committed to a
+ * network op (eg MD with non-zero pending count)
+ */
+
+ for (idx = 0; idx < ni->tbl.size; idx++)
+ while (!list_empty (&ni->tbl.tbl[idx])) {
+ lib_me_t *me = list_entry (ni->tbl.tbl[idx].next,
+ lib_me_t, me_list);
+
+ CERROR ("Active me %p on exit\n", me);
+ list_del (&me->me_list);
+ lib_me_free (nal, me);
+ }
+
+ while (!list_empty (&ni->ni_active_mds)) {
+ lib_md_t *md = list_entry (ni->ni_active_mds.next,
+ lib_md_t, md_list);
+
+ CERROR ("Active md %p on exit\n", md);
+ list_del (&md->md_list);
+ lib_md_free (nal, md);
+ }
+
+ while (!list_empty (&ni->ni_active_eqs)) {
+ lib_eq_t *eq = list_entry (ni->ni_active_eqs.next,
+ lib_eq_t, eq_list);
+
+ CERROR ("Active eq %p on exit\n", eq);
+ list_del (&eq->eq_list);
+ lib_eq_free (nal, eq);
+ }
+
+ while (!list_empty (&ni->ni_active_msgs)) {
+ lib_msg_t *msg = list_entry (ni->ni_active_msgs.next,
+ lib_msg_t, msg_list);
+
+ CERROR ("Active msg %p on exit\n", msg);
+ list_del (&msg->msg_list);
+ lib_msg_free (nal, msg);
+ }
+
+ nal->cb_free(nal, ni->tbl.tbl, sizeof(struct list_head) * ni->tbl.size);
+ ni->up = 0;
+
+ lib_cleanup_handle_hash (nal);
+ kportal_descriptor_cleanup (nal);
+
+ out:
+ return (PTL_OK);
+}
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * lib/lib-md.c
+ * Memory Descriptor management routines
+ *
+ * Copyright (c) 2001-2003 Cluster File Systems, Inc.
+ * Copyright (c) 2001-2002 Sandia National Laboratories
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2.1 of the GNU Lesser General
+ * Public License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef __KERNEL__
+# include <stdio.h>
+#else
+# define DEBUG_SUBSYSTEM S_PORTALS
+# include <linux/kp30.h>
+#endif
+
+#include <portals/lib-p30.h>
+#include <portals/arg-blocks.h>
+
+/*
+ * must be called with state lock held
+ */
+void lib_md_unlink(nal_cb_t * nal, lib_md_t * md)
+{
+ lib_me_t *me = md->me;
+
+ if (md->pending != 0) {
+ CDEBUG(D_NET, "Queueing unlink of md %p\n", md);
+ md->md_flags |= PTL_MD_FLAG_UNLINK;
+ return;
+ }
+
+ CDEBUG(D_NET, "Unlinking md %p\n", md);
+
+ if ((md->options & PTL_MD_KIOV) != 0) {
+ if (nal->cb_unmap_pages != NULL)
+ nal->cb_unmap_pages (nal, md->md_niov, md->md_iov.kiov,
+ &md->md_addrkey);
+ } else if (nal->cb_unmap != NULL)
+ nal->cb_unmap (nal, md->md_niov, md->md_iov.iov,
+ &md->md_addrkey);
+
+ if (me) {
+ me->md = NULL;
+ if (me->unlink == PTL_UNLINK)
+ lib_me_unlink(nal, me);
+ }
+
+ if (md->eq != NULL)
+ {
+ md->eq->eq_refcount--;
+ LASSERT (md->eq->eq_refcount >= 0);
+ }
+
+ lib_invalidate_handle (nal, &md->md_lh);
+ list_del (&md->md_list);
+ lib_md_free(nal, md);
+}
+
+/* must be called with state lock held */
+static int lib_md_build(nal_cb_t *nal, lib_md_t *new, void *private,
+ ptl_md_t *md, ptl_handle_eq_t *eqh, int unlink)
+{
+ const int max_size_opts = PTL_MD_AUTO_UNLINK |
+ PTL_MD_MAX_SIZE;
+ lib_eq_t *eq = NULL;
+ int rc;
+ int i;
+
+ /* NB we are passes an allocated, but uninitialised/active md.
+ * if we return success, caller may lib_md_unlink() it.
+ * otherwise caller may only lib_md_free() it.
+ */
+
+ if (!PtlHandleEqual (*eqh, PTL_EQ_NONE)) {
+ eq = ptl_handle2eq(eqh, nal);
+ if (eq == NULL)
+ return PTL_INV_EQ;
+ }
+
+ if ((md->options & PTL_MD_IOV) != 0 && /* discontiguous MD */
+ md->niov > PTL_MD_MAX_IOV) /* too many fragments */
+ return PTL_IOV_TOO_MANY;
+
+ if ((md->options & max_size_opts) != 0 && /* max size used */
+ (md->max_size < 0 || md->max_size > md->length)) // illegal max_size
+ return PTL_INV_MD;
+
+ new->me = NULL;
+ new->start = md->start;
+ new->length = md->length;
+ new->offset = 0;
+ new->max_size = md->max_size;
+ new->unlink = unlink;
+ new->options = md->options;
+ new->user_ptr = md->user_ptr;
+ new->eq = eq;
+ new->threshold = md->threshold;
+ new->pending = 0;
+ new->md_flags = 0;
+
+ if ((md->options & PTL_MD_IOV) != 0) {
+ int total_length = 0;
+
+ if ((md->options & PTL_MD_KIOV) != 0) /* Can't specify both */
+ return PTL_INV_MD;
+
+ new->md_niov = md->niov;
+
+ if (nal->cb_read (nal, private, new->md_iov.iov, md->start,
+ md->niov * sizeof (new->md_iov.iov[0])))
+ return PTL_SEGV;
+
+ for (i = 0; i < new->md_niov; i++) {
+ /* We take the base address on trust */
+ if (new->md_iov.iov[i].iov_len <= 0) /* invalid length */
+ return PTL_VAL_FAILED;
+
+ total_length += new->md_iov.iov[i].iov_len;
+ }
+
+ if (md->length > total_length)
+ return PTL_IOV_TOO_SMALL;
+
+ if (nal->cb_map != NULL) {
+ rc = nal->cb_map (nal, new->md_niov, new->md_iov.iov,
+ &new->md_addrkey);
+ if (rc != PTL_OK)
+ return (rc);
+ }
+ } else if ((md->options & PTL_MD_KIOV) != 0) {
+#ifndef __KERNEL__
+ return PTL_INV_MD;
+#else
+ int total_length = 0;
+
+ /* Trap attempt to use paged I/O if unsupported early. */
+ if (nal->cb_send_pages == NULL ||
+ nal->cb_recv_pages == NULL)
+ return PTL_INV_MD;
+
+ new->md_niov = md->niov;
+
+ if (nal->cb_read (nal, private, new->md_iov.kiov, md->start,
+ md->niov * sizeof (new->md_iov.kiov[0])))
+ return PTL_SEGV;
+
+ for (i = 0; i < new->md_niov; i++) {
+ /* We take the page pointer on trust */
+ if (new->md_iov.kiov[i].kiov_offset +
+ new->md_iov.kiov[i].kiov_len > PAGE_SIZE )
+ return PTL_VAL_FAILED; /* invalid length */
+
+ total_length += new->md_iov.kiov[i].kiov_len;
+ }
+
+ if (md->length > total_length)
+ return PTL_IOV_TOO_SMALL;
+
+ if (nal->cb_map_pages != NULL) {
+ rc = nal->cb_map_pages (nal, new->md_niov, new->md_iov.kiov,
+ &new->md_addrkey);
+ if (rc != PTL_OK)
+ return (rc);
+ }
+#endif
+ } else { /* contiguous */
+ new->md_niov = 1;
+ new->md_iov.iov[0].iov_base = md->start;
+ new->md_iov.iov[0].iov_len = md->length;
+
+ if (nal->cb_map != NULL) {
+ rc = nal->cb_map (nal, new->md_niov, new->md_iov.iov,
+ &new->md_addrkey);
+ if (rc != PTL_OK)
+ return (rc);
+ }
+ }
+
+ if (eq != NULL)
+ eq->eq_refcount++;
+
+ /* It's good; let handle2md succeed and add to active mds */
+ lib_initialise_handle (nal, &new->md_lh);
+ list_add (&new->md_list, &nal->ni.ni_active_mds);
+
+ return PTL_OK;
+}
+
+/* must be called with state lock held */
+void lib_md_deconstruct(nal_cb_t * nal, lib_md_t * md, ptl_md_t * new)
+{
+ /* NB this doesn't copy out all the iov entries so when a
+ * discontiguous MD is copied out, the target gets to know the
+ * original iov pointer (in start) and the number of entries it had
+ * and that's all.
+ */
+ new->start = md->start;
+ new->length = md->length;
+ new->threshold = md->threshold;
+ new->max_size = md->max_size;
+ new->options = md->options;
+ new->user_ptr = md->user_ptr;
+ ptl_eq2handle(&new->eventq, md->eq);
+ new->niov = ((md->options & (PTL_MD_IOV | PTL_MD_KIOV)) == 0) ? 0 : md->md_niov;
+}
+
+int do_PtlMDAttach(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
+{
+ /*
+ * Incoming:
+ * ptl_handle_me_t current_in
+ * ptl_md_t md_in
+ * ptl_unlink_t unlink_in
+ *
+ * Outgoing:
+ * ptl_handle_md_t * handle_out
+ */
+
+ PtlMDAttach_in *args = v_args;
+ PtlMDAttach_out *ret = v_ret;
+ lib_me_t *me;
+ lib_md_t *md;
+ unsigned long flags;
+
+ md = lib_md_alloc (nal);
+ if (md == NULL)
+ return (ret->rc = PTL_NOSPACE);
+
+ state_lock(nal, &flags);
+
+ me = ptl_handle2me(&args->me_in, nal);
+ if (me == NULL) {
+ ret->rc = PTL_INV_ME;
+ } else if (me->md != NULL) {
+ ret->rc = PTL_INUSE;
+ } else {
+ ret->rc = lib_md_build(nal, md, private, &args->md_in,
+ &args->eq_in, args->unlink_in);
+
+ if (ret->rc == PTL_OK) {
+ me->md = md;
+ md->me = me;
+
+ ptl_md2handle(&ret->handle_out, md);
+
+ state_unlock (nal, &flags);
+ return (PTL_OK);
+ }
+ }
+
+ lib_md_free (nal, md);
+
+ state_unlock (nal, &flags);
+ return (ret->rc);
+}
+
+int do_PtlMDBind(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
+{
+ /*
+ * Incoming:
+ * ptl_handle_ni_t ni_in
+ * ptl_md_t md_in
+ *
+ * Outgoing:
+ * ptl_handle_md_t * handle_out
+ */
+
+ PtlMDBind_in *args = v_args;
+ PtlMDBind_out *ret = v_ret;
+ lib_md_t *md;
+ unsigned long flags;
+
+ md = lib_md_alloc (nal);
+ if (md == NULL)
+ return (ret->rc = PTL_NOSPACE);
+
+ state_lock(nal, &flags);
+
+ ret->rc = lib_md_build(nal, md, private,
+ &args->md_in, &args->eq_in, PTL_UNLINK);
+
+ if (ret->rc == PTL_OK) {
+ ptl_md2handle(&ret->handle_out, md);
+
+ state_unlock(nal, &flags);
+ return (PTL_OK);
+ }
+
+ lib_md_free (nal, md);
+
+ state_unlock(nal, &flags);
+ return (ret->rc);
+}
+
+int do_PtlMDUnlink(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
+{
+ PtlMDUnlink_in *args = v_args;
+ PtlMDUnlink_out *ret = v_ret;
+
+ lib_md_t *md;
+ unsigned long flags;
+
+ state_lock(nal, &flags);
+
+ md = ptl_handle2md(&args->md_in, nal);
+ if (md == NULL) {
+ ret->rc = PTL_INV_MD;
+ } else if (md->pending != 0) { /* being filled/spilled */
+ ret->rc = PTL_MD_INUSE;
+ } else {
+ /* Callers attempting to unlink a busy MD which will get
+ * unlinked once the net op completes should see INUSE,
+ * before completion and INV_MD thereafter. LASSERT we've
+ * got that right... */
+ LASSERT ((md->md_flags & PTL_MD_FLAG_UNLINK) == 0);
+
+ lib_md_deconstruct(nal, md, &ret->status_out);
+ lib_md_unlink(nal, md);
+ ret->rc = PTL_OK;
+ }
+
+ state_unlock(nal, &flags);
+
+ return (ret->rc);
+}
+
+int do_PtlMDUpdate_internal(nal_cb_t * nal, void *private, void *v_args,
+ void *v_ret)
+{
+ /*
+ * Incoming:
+ * ptl_handle_md_t md_in
+ * ptl_md_t * old_inout
+ * ptl_md_t * new_inout
+ * ptl_handle_eq_t testq_in
+ * ptl_seq_t sequence_in
+ *
+ * Outgoing:
+ * ptl_md_t * old_inout
+ * ptl_md_t * new_inout
+ */
+ PtlMDUpdate_internal_in *args = v_args;
+ PtlMDUpdate_internal_out *ret = v_ret;
+ lib_md_t *md;
+ lib_eq_t *test_eq = NULL;
+ ptl_md_t *new = &args->new_inout;
+ unsigned long flags;
+
+ state_lock(nal, &flags);
+
+ md = ptl_handle2md(&args->md_in, nal);
+ if (md == NULL) {
+ ret->rc = PTL_INV_MD;
+ goto out;
+ }
+
+ if (args->old_inout_valid)
+ lib_md_deconstruct(nal, md, &ret->old_inout);
+
+ if (!args->new_inout_valid) {
+ ret->rc = PTL_OK;
+ goto out;
+ }
+
+ if (!PtlHandleEqual (args->testq_in, PTL_EQ_NONE)) {
+ test_eq = ptl_handle2eq(&args->testq_in, nal);
+ if (test_eq == NULL) {
+ ret->rc = PTL_INV_EQ;
+ goto out;
+ }
+ }
+
+ if (md->pending != 0) {
+ ret->rc = PTL_NOUPDATE;
+ goto out;
+ }
+
+ if (test_eq == NULL ||
+ test_eq->sequence == args->sequence_in) {
+ lib_me_t *me = md->me;
+
+#warning this does not track eq refcounts properly
+
+ ret->rc = lib_md_build(nal, md, private,
+ new, &new->eventq, md->unlink);
+
+ md->me = me;
+ } else {
+ ret->rc = PTL_NOUPDATE;
+ }
+
+ out:
+ state_unlock(nal, &flags);
+ return (ret->rc);
+}
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * lib/lib-me.c
+ * Match Entry management routines
+ *
+ * Copyright (c) 2001-2003 Cluster File Systems, Inc.
+ * Copyright (c) 2001-2002 Sandia National Laboratories
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2.1 of the GNU Lesser General
+ * Public License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef __KERNEL__
+# include <stdio.h>
+#else
+# define DEBUG_SUBSYSTEM S_PORTALS
+# include <linux/kp30.h>
+#endif
+
+#include <portals/lib-p30.h>
+#include <portals/arg-blocks.h>
+
+static void lib_me_dump(nal_cb_t * nal, lib_me_t * me);
+
+int do_PtlMEAttach(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
+{
+ PtlMEAttach_in *args = v_args;
+ PtlMEAttach_out *ret = v_ret;
+ lib_ni_t *ni = &nal->ni;
+ lib_ptl_t *tbl = &ni->tbl;
+ unsigned long flags;
+ lib_me_t *me;
+
+ if (args->index_in < 0 || args->index_in >= tbl->size)
+ return ret->rc = PTL_INV_PTINDEX;
+
+ /* Should check for valid matchid, but not yet */
+ if (0)
+ return ret->rc = PTL_INV_PROC;
+
+ me = lib_me_alloc (nal);
+ if (me == NULL)
+ return (ret->rc = PTL_NOSPACE);
+
+ state_lock(nal, &flags);
+
+ me->match_id = args->match_id_in;
+ me->match_bits = args->match_bits_in;
+ me->ignore_bits = args->ignore_bits_in;
+ me->unlink = args->unlink_in;
+ me->md = NULL;
+
+ lib_initialise_handle (nal, &me->me_lh);
+
+ if (args->position_in == PTL_INS_AFTER)
+ list_add_tail(&me->me_list, &(tbl->tbl[args->index_in]));
+ else
+ list_add(&me->me_list, &(tbl->tbl[args->index_in]));
+
+ ptl_me2handle(&ret->handle_out, me);
+
+ state_unlock(nal, &flags);
+
+ return ret->rc = PTL_OK;
+}
+
+int do_PtlMEInsert(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
+{
+ PtlMEInsert_in *args = v_args;
+ PtlMEInsert_out *ret = v_ret;
+ unsigned long flags;
+ lib_me_t *me;
+ lib_me_t *new;
+
+ new = lib_me_alloc (nal);
+ if (new == NULL)
+ return (ret->rc = PTL_NOSPACE);
+
+ /* Should check for valid matchid, but not yet */
+
+ state_lock(nal, &flags);
+
+ me = ptl_handle2me(&args->current_in, nal);
+ if (me == NULL) {
+ lib_me_free (nal, new);
+
+ state_unlock (nal, &flags);
+ return (ret->rc = PTL_INV_ME);
+ }
+
+ new->match_id = args->match_id_in;
+ new->match_bits = args->match_bits_in;
+ new->ignore_bits = args->ignore_bits_in;
+ new->unlink = args->unlink_in;
+ new->md = NULL;
+
+ lib_initialise_handle (nal, &new->me_lh);
+
+ if (args->position_in == PTL_INS_AFTER)
+ list_add_tail(&new->me_list, &me->me_list);
+ else
+ list_add(&new->me_list, &me->me_list);
+
+ ptl_me2handle(&ret->handle_out, new);
+
+ state_unlock(nal, &flags);
+
+ return ret->rc = PTL_OK;
+}
+
+int do_PtlMEUnlink(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
+{
+ PtlMEUnlink_in *args = v_args;
+ PtlMEUnlink_out *ret = v_ret;
+ unsigned long flags;
+ lib_me_t *me;
+
+ state_lock(nal, &flags);
+
+ me = ptl_handle2me(&args->current_in, nal);
+ if (me == NULL) {
+ ret->rc = PTL_INV_ME;
+ } else {
+ lib_me_unlink(nal, me);
+ ret->rc = PTL_OK;
+ }
+
+ state_unlock(nal, &flags);
+
+ return (ret->rc);
+}
+
+/* call with state_lock please */
+void lib_me_unlink(nal_cb_t *nal, lib_me_t *me)
+{
+ lib_ni_t *ni = &nal->ni;
+
+ if (ni->debug & PTL_DEBUG_UNLINK) {
+ ptl_handle_any_t handle;
+ ptl_me2handle(&handle, me);
+ }
+
+ list_del (&me->me_list);
+
+ if (me->md) {
+ me->md->me = NULL;
+ lib_md_unlink(nal, me->md);
+ }
+
+ lib_invalidate_handle (nal, &me->me_lh);
+ lib_me_free(nal, me);
+}
+
+int do_PtlTblDump(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
+{
+ PtlTblDump_in *args = v_args;
+ PtlTblDump_out *ret = v_ret;
+ lib_ptl_t *tbl = &nal->ni.tbl;
+ ptl_handle_any_t handle;
+ struct list_head *tmp;
+ unsigned long flags;
+
+ if (args->index_in < 0 || args->index_in >= tbl->size)
+ return ret->rc = PTL_INV_PTINDEX;
+
+ nal->cb_printf(nal, "Portal table index %d\n", args->index_in);
+
+ state_lock(nal, &flags);
+ list_for_each(tmp, &(tbl->tbl[args->index_in])) {
+ lib_me_t *me = list_entry(tmp, lib_me_t, me_list);
+ ptl_me2handle(&handle, me);
+ lib_me_dump(nal, me);
+ }
+ state_unlock(nal, &flags);
+
+ return ret->rc = PTL_OK;
+}
+
+int do_PtlMEDump(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
+{
+ PtlMEDump_in *args = v_args;
+ PtlMEDump_out *ret = v_ret;
+ lib_me_t *me;
+ unsigned long flags;
+
+ state_lock(nal, &flags);
+
+ me = ptl_handle2me(&args->current_in, nal);
+ if (me == NULL) {
+ ret->rc = PTL_INV_ME;
+ } else {
+ lib_me_dump(nal, me);
+ ret->rc = PTL_OK;
+ }
+
+ state_unlock(nal, &flags);
+
+ return ret->rc;
+}
+
+static void lib_me_dump(nal_cb_t * nal, lib_me_t * me)
+{
+ nal->cb_printf(nal, "Match Entry %p ("LPX64")\n", me,
+ me->me_lh.lh_cookie);
+
+ nal->cb_printf(nal, "\tMatch/Ignore\t= %016lx / %016lx\n",
+ me->match_bits, me->ignore_bits);
+
+ nal->cb_printf(nal, "\tMD\t= %p\n", me->md);
+ nal->cb_printf(nal, "\tprev\t= %p\n",
+ list_entry(me->me_list.prev, lib_me_t, me_list));
+ nal->cb_printf(nal, "\tnext\t= %p\n",
+ list_entry(me->me_list.next, lib_me_t, me_list));
+}
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * lib/lib-move.c
+ * Data movement routines
+ *
+ * Copyright (c) 2001-2003 Cluster File Systems, Inc.
+ * Copyright (c) 2001-2002 Sandia National Laboratories
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2.1 of the GNU Lesser General
+ * Public License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef __KERNEL__
+# include <stdio.h>
+#else
+# define DEBUG_SUBSYSTEM S_PORTALS
+# include <linux/kp30.h>
+#endif
+#include <portals/p30.h>
+#include <portals/lib-p30.h>
+#include <portals/arg-blocks.h>
+
+/*
+ * Right now it does not check access control lists.
+ *
+ * We only support one MD per ME, which is how the Portals 3.1 spec is written.
+ * All previous complication is removed.
+ */
+
+static lib_me_t *
+lib_find_me(nal_cb_t *nal, int index, int op_mask, ptl_nid_t src_nid,
+ ptl_pid_t src_pid, ptl_size_t rlength, ptl_size_t roffset,
+ ptl_match_bits_t match_bits, ptl_size_t *mlength_out,
+ ptl_size_t *offset_out, int *unlink_out)
+{
+ lib_ni_t *ni = &nal->ni;
+ struct list_head *match_list = &ni->tbl.tbl[index];
+ struct list_head *tmp;
+ lib_me_t *me;
+ lib_md_t *md;
+ ptl_size_t mlength;
+ ptl_size_t offset;
+
+ ENTRY;
+
+ CDEBUG (D_NET, "Request from "LPU64".%d of length %d into portal %d "
+ "MB="LPX64"\n", src_nid, src_pid, rlength, index, match_bits);
+
+ if (index < 0 || index >= ni->tbl.size) {
+ CERROR("Invalid portal %d not in [0-%d]\n",
+ index, ni->tbl.size);
+ goto failed;
+ }
+
+ list_for_each (tmp, match_list) {
+ me = list_entry(tmp, lib_me_t, me_list);
+ md = me->md;
+
+ /* ME attached but MD not attached yet */
+ if (md == NULL)
+ continue;
+
+ LASSERT (me == md->me);
+
+ /* MD deactivated */
+ if (md->threshold == 0)
+ continue;
+
+ /* mismatched MD op */
+ if ((md->options & op_mask) == 0)
+ continue;
+
+ /* mismatched ME nid/pid? */
+ if (me->match_id.nid != PTL_NID_ANY &&
+ me->match_id.nid != src_nid)
+ continue;
+
+ if (me->match_id.pid != PTL_PID_ANY &&
+ me->match_id.pid != src_pid)
+ continue;
+
+ /* mismatched ME matchbits? */
+ if (((me->match_bits ^ match_bits) & ~me->ignore_bits) != 0)
+ continue;
+
+ /* Hurrah! This _is_ a match; check it out... */
+
+ if ((md->options & PTL_MD_MANAGE_REMOTE) == 0)
+ offset = md->offset;
+ else
+ offset = roffset;
+
+ mlength = md->length - offset;
+ if ((md->options & PTL_MD_MAX_SIZE) != 0 &&
+ mlength > md->max_size)
+ mlength = md->max_size;
+
+ if (rlength <= mlength) { /* fits in allowed space */
+ mlength = rlength;
+ } else if ((md->options & PTL_MD_TRUNCATE) == 0) {
+ /* this packet _really_ is too big */
+ CERROR("Matching packet %d too big: %d left, "
+ "%d allowed\n", rlength, md->length - offset,
+ mlength);
+ goto failed;
+ }
+
+ md->offset = offset + mlength;
+
+ *offset_out = offset;
+ *mlength_out = mlength;
+ *unlink_out = ((md->options & PTL_MD_AUTO_UNLINK) != 0 &&
+ md->offset >= (md->length - md->max_size));
+ RETURN (me);
+ }
+
+ failed:
+ CERROR (LPU64": Dropping %s from "LPU64".%d portal %d match "LPX64
+ " offset %d length %d: no match\n",
+ ni->nid, (op_mask == PTL_MD_OP_GET) ? "GET" : "PUT",
+ src_nid, src_pid, index, match_bits, roffset, rlength);
+ RETURN(NULL);
+}
+
+int do_PtlFailNid (nal_cb_t *nal, void *private, void *v_args, void *v_ret)
+{
+ PtlFailNid_in *args = v_args;
+ PtlFailNid_out *ret = v_ret;
+ lib_test_peer_t *tp;
+ unsigned long flags;
+ struct list_head *el;
+ struct list_head *next;
+ struct list_head cull;
+
+ if (args->threshold != 0) {
+ /* Adding a new entry */
+ tp = (lib_test_peer_t *)nal->cb_malloc (nal, sizeof (*tp));
+ if (tp == NULL)
+ return (ret->rc = PTL_FAIL);
+
+ tp->tp_nid = args->nid;
+ tp->tp_threshold = args->threshold;
+
+ state_lock (nal, &flags);
+ list_add (&tp->tp_list, &nal->ni.ni_test_peers);
+ state_unlock (nal, &flags);
+ return (ret->rc = PTL_OK);
+ }
+
+ /* removing entries */
+ INIT_LIST_HEAD (&cull);
+
+ state_lock (nal, &flags);
+
+ list_for_each_safe (el, next, &nal->ni.ni_test_peers) {
+ tp = list_entry (el, lib_test_peer_t, tp_list);
+
+ if (tp->tp_threshold == 0 || /* needs culling anyway */
+ args->nid == PTL_NID_ANY || /* removing all entries */
+ tp->tp_nid == args->nid) /* matched this one */
+ {
+ list_del (&tp->tp_list);
+ list_add (&tp->tp_list, &cull);
+ }
+ }
+
+ state_unlock (nal, &flags);
+
+ while (!list_empty (&cull)) {
+ tp = list_entry (cull.next, lib_test_peer_t, tp_list);
+
+ list_del (&tp->tp_list);
+ nal->cb_free (nal, tp, sizeof (*tp));
+ }
+ return (ret->rc = PTL_OK);
+}
+
+static int
+fail_peer (nal_cb_t *nal, ptl_nid_t nid, int outgoing)
+{
+ lib_test_peer_t *tp;
+ struct list_head *el;
+ struct list_head *next;
+ unsigned long flags;
+ struct list_head cull;
+ int fail = 0;
+
+ INIT_LIST_HEAD (&cull);
+
+ state_lock (nal, &flags);
+
+ list_for_each_safe (el, next, &nal->ni.ni_test_peers) {
+ tp = list_entry (el, lib_test_peer_t, tp_list);
+
+ if (tp->tp_threshold == 0) {
+ /* zombie entry */
+ if (outgoing) {
+ /* only cull zombies on outgoing tests,
+ * since we may be at interrupt priority on
+ * incoming messages. */
+ list_del (&tp->tp_list);
+ list_add (&tp->tp_list, &cull);
+ }
+ continue;
+ }
+
+ if (tp->tp_nid == PTL_NID_ANY || /* fail every peer */
+ nid == tp->tp_nid) { /* fail this peer */
+ fail = 1;
+
+ if (tp->tp_threshold != PTL_MD_THRESH_INF) {
+ tp->tp_threshold--;
+ if (outgoing &&
+ tp->tp_threshold == 0) {
+ /* see above */
+ list_del (&tp->tp_list);
+ list_add (&tp->tp_list, &cull);
+ }
+ }
+ break;
+ }
+ }
+
+ state_unlock (nal, &flags);
+
+ while (!list_empty (&cull)) {
+ tp = list_entry (cull.next, lib_test_peer_t, tp_list);
+ list_del (&tp->tp_list);
+
+ nal->cb_free (nal, tp, sizeof (*tp));
+ }
+
+ return (fail);
+}
+
+ptl_size_t
+lib_iov_nob (int niov, struct iovec *iov)
+{
+ ptl_size_t nob = 0;
+
+ while (niov-- > 0)
+ nob += (iov++)->iov_len;
+
+ return (nob);
+}
+
+void
+lib_copy_iov2buf (char *dest, int niov, struct iovec *iov, ptl_size_t len)
+{
+ ptl_size_t nob;
+
+ while (len > 0)
+ {
+ LASSERT (niov > 0);
+ nob = MIN (iov->iov_len, len);
+ memcpy (dest, iov->iov_base, nob);
+
+ len -= nob;
+ dest += nob;
+ niov--;
+ iov++;
+ }
+}
+
+void
+lib_copy_buf2iov (int niov, struct iovec *iov, char *src, ptl_size_t len)
+{
+ ptl_size_t nob;
+
+ while (len > 0)
+ {
+ LASSERT (niov > 0);
+ nob = MIN (iov->iov_len, len);
+ memcpy (iov->iov_base, src, nob);
+
+ len -= nob;
+ src += nob;
+ niov--;
+ iov++;
+ }
+}
+
+static int
+lib_extract_iov (struct iovec *dst, lib_md_t *md,
+ ptl_size_t offset, ptl_size_t len)
+{
+ /* Initialise 'dst' to the subset of 'src' starting at 'offset',
+ * for exactly 'len' bytes, and return the number of entries.
+ * NB not destructive to 'src' */
+ int src_niov = md->md_niov;
+ struct iovec *src = md->md_iov.iov;
+ ptl_size_t frag_len;
+ int dst_niov;
+
+ LASSERT (len >= 0);
+ LASSERT (offset >= 0);
+ LASSERT (offset + len <= md->length);
+
+ if (len == 0) /* no data => */
+ return (0); /* no frags */
+
+ LASSERT (src_niov > 0);
+ while (offset >= src->iov_len) { /* skip initial frags */
+ offset -= src->iov_len;
+ src_niov--;
+ src++;
+ LASSERT (src_niov > 0);
+ }
+
+ dst_niov = 1;
+ for (;;) {
+ LASSERT (src_niov > 0);
+ LASSERT (dst_niov <= PTL_MD_MAX_IOV);
+
+ frag_len = src->iov_len - offset;
+ dst->iov_base = ((char *)src->iov_base) + offset;
+
+ if (len <= frag_len) {
+ dst->iov_len = len;
+ return (dst_niov);
+ }
+
+ dst->iov_len = frag_len;
+
+ len -= frag_len;
+ dst++;
+ src++;
+ dst_niov++;
+ src_niov--;
+ offset = 0;
+ }
+}
+
+#ifndef __KERNEL__
+ptl_size_t
+lib_kiov_nob (int niov, ptl_kiov_t *kiov)
+{
+ LASSERT (0);
+ return (0);
+}
+
+void
+lib_copy_kiov2buf (char *dest, int niov, ptl_kiov_t *kiov, ptl_size_t len)
+{
+ LASSERT (0);
+}
+
+void
+lib_copy_buf2kiov (int niov, ptl_kiov_t *kiov, char *dest, ptl_size_t len)
+{
+ LASSERT (0);
+}
+
+static int
+lib_extract_kiov (ptl_kiov_t *dst, lib_md_t *md,
+ ptl_size_t offset, ptl_size_t len)
+{
+ LASSERT (0);
+}
+
+#else
+
+ptl_size_t
+lib_kiov_nob (int niov, ptl_kiov_t *kiov)
+{
+ ptl_size_t nob = 0;
+
+ while (niov-- > 0)
+ nob += (kiov++)->kiov_len;
+
+ return (nob);
+}
+
+void
+lib_copy_kiov2buf (char *dest, int niov, ptl_kiov_t *kiov, ptl_size_t len)
+{
+ ptl_size_t nob;
+ char *addr;
+
+ LASSERT (!in_interrupt ());
+ while (len > 0)
+ {
+ LASSERT (niov > 0);
+ nob = MIN (kiov->kiov_len, len);
+
+ addr = ((char *)kmap (kiov->kiov_page)) + kiov->kiov_offset;
+ memcpy (dest, addr, nob);
+ kunmap (kiov->kiov_page);
+
+ len -= nob;
+ dest += nob;
+ niov--;
+ kiov++;
+ }
+}
+
+void
+lib_copy_buf2kiov (int niov, ptl_kiov_t *kiov, char *src, ptl_size_t len)
+{
+ ptl_size_t nob;
+ char *addr;
+
+ LASSERT (!in_interrupt ());
+ while (len > 0)
+ {
+ LASSERT (niov > 0);
+ nob = MIN (kiov->kiov_len, len);
+
+ addr = ((char *)kmap (kiov->kiov_page)) + kiov->kiov_offset;
+ memcpy (addr, src, nob);
+ kunmap (kiov->kiov_page);
+
+ len -= nob;
+ src += nob;
+ niov--;
+ kiov++;
+ }
+}
+
+static int
+lib_extract_kiov (ptl_kiov_t *dst, lib_md_t *md,
+ ptl_size_t offset, ptl_size_t len)
+{
+ /* Initialise 'dst' to the subset of 'src' starting at 'offset',
+ * for exactly 'len' bytes, and return the number of entries.
+ * NB not destructive to 'src' */
+ int src_niov = md->md_niov;
+ ptl_kiov_t *src = md->md_iov.kiov;
+ ptl_size_t frag_len;
+ int dst_niov;
+
+ LASSERT (len >= 0);
+ LASSERT (offset >= 0);
+ LASSERT (offset + len <= md->length);
+
+ if (len == 0) /* no data => */
+ return (0); /* no frags */
+
+ LASSERT (src_niov > 0);
+ while (offset >= src->kiov_len) { /* skip initial frags */
+ offset -= src->kiov_len;
+ src_niov--;
+ src++;
+ LASSERT (src_niov > 0);
+ }
+
+ dst_niov = 1;
+ for (;;) {
+ LASSERT (src_niov > 0);
+ LASSERT (dst_niov <= PTL_MD_MAX_IOV);
+
+ frag_len = src->kiov_len - offset;
+ dst->kiov_page = src->kiov_page;
+ dst->kiov_offset = src->kiov_offset + offset;
+
+ if (len <= frag_len) {
+ dst->kiov_len = len;
+ LASSERT (dst->kiov_offset + dst->kiov_len <= PAGE_SIZE);
+ return (dst_niov);
+ }
+
+ dst->kiov_len = frag_len;
+ LASSERT (dst->kiov_offset + dst->kiov_len <= PAGE_SIZE);
+
+ len -= frag_len;
+ dst++;
+ src++;
+ dst_niov++;
+ src_niov--;
+ offset = 0;
+ }
+}
+#endif
+
+void
+lib_recv (nal_cb_t *nal, void *private, lib_msg_t *msg, lib_md_t *md,
+ ptl_size_t offset, ptl_size_t mlen, ptl_size_t rlen)
+{
+ int niov;
+
+ if (mlen == 0)
+ nal->cb_recv (nal, private, msg, 0, NULL, 0, rlen);
+ else if ((md->options & PTL_MD_KIOV) == 0) {
+ niov = lib_extract_iov (msg->msg_iov.iov, md, offset, mlen);
+ nal->cb_recv (nal, private, msg,
+ niov, msg->msg_iov.iov, mlen, rlen);
+ } else {
+ niov = lib_extract_kiov (msg->msg_iov.kiov, md, offset, mlen);
+ nal->cb_recv_pages (nal, private, msg,
+ niov, msg->msg_iov.kiov, mlen, rlen);
+ }
+}
+
+int
+lib_send (nal_cb_t *nal, void *private, lib_msg_t *msg,
+ ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
+ lib_md_t *md, ptl_size_t offset, ptl_size_t len)
+{
+ int niov;
+
+ if (len == 0)
+ return (nal->cb_send (nal, private, msg,
+ hdr, type, nid, pid,
+ 0, NULL, 0));
+
+ if ((md->options & PTL_MD_KIOV) == 0) {
+ niov = lib_extract_iov (msg->msg_iov.iov, md, offset, len);
+ return (nal->cb_send (nal, private, msg,
+ hdr, type, nid, pid,
+ niov, msg->msg_iov.iov, len));
+ }
+
+ niov = lib_extract_kiov (msg->msg_iov.kiov, md, offset, len);
+ return (nal->cb_send_pages (nal, private, msg,
+ hdr, type, nid, pid,
+ niov, msg->msg_iov.kiov, len));
+}
+
+static lib_msg_t *
+get_new_msg (nal_cb_t *nal, lib_md_t *md)
+{
+ /* ALWAYS called holding the state_lock */
+ lib_counters_t *counters = &nal->ni.counters;
+ lib_msg_t *msg = lib_msg_alloc (nal);
+
+ if (msg == NULL)
+ return (NULL);
+
+ memset (msg, 0, sizeof (*msg));
+
+ msg->send_ack = 0;
+
+ msg->md = md;
+ msg->ev.arrival_time = get_cycles();
+ md->pending++;
+ if (md->threshold != PTL_MD_THRESH_INF) {
+ LASSERT (md->threshold > 0);
+ md->threshold--;
+ }
+
+ counters->msgs_alloc++;
+ if (counters->msgs_alloc > counters->msgs_max)
+ counters->msgs_max = counters->msgs_alloc;
+
+ list_add (&msg->msg_list, &nal->ni.ni_active_msgs);
+
+ return (msg);
+}
+
+
+/*
+ * Incoming messages have a ptl_msg_t object associated with them
+ * by the library. This object encapsulates the state of the
+ * message and allows the NAL to do non-blocking receives or sends
+ * of long messages.
+ *
+ */
+static int parse_put(nal_cb_t * nal, ptl_hdr_t * hdr, void *private)
+{
+ lib_ni_t *ni = &nal->ni;
+ ptl_size_t mlength = 0;
+ ptl_size_t offset = 0;
+ int unlink = 0;
+ lib_me_t *me;
+ lib_md_t *md;
+ lib_msg_t *msg;
+ unsigned long flags;
+
+ /* Convert put fields to host byte order */
+ hdr->msg.put.match_bits = NTOH__u64 (hdr->msg.put.match_bits);
+ hdr->msg.put.ptl_index = NTOH__u32 (hdr->msg.put.ptl_index);
+ hdr->msg.put.offset = NTOH__u32 (hdr->msg.put.offset);
+
+ state_lock(nal, &flags);
+
+ me = lib_find_me(nal, hdr->msg.put.ptl_index, PTL_MD_OP_PUT,
+ hdr->src_nid, hdr->src_pid,
+ PTL_HDR_LENGTH (hdr), hdr->msg.put.offset,
+ hdr->msg.put.match_bits,
+ &mlength, &offset, &unlink);
+ if (me == NULL)
+ goto drop;
+
+ md = me->md;
+ CDEBUG(D_NET, "Incoming put index %x from "LPU64"/%u of length %d/%d "
+ "into md "LPX64" [%d] + %d\n", hdr->msg.put.ptl_index,
+ hdr->src_nid, hdr->src_pid, mlength, PTL_HDR_LENGTH(hdr),
+ md->md_lh.lh_cookie, md->md_niov, offset);
+
+ msg = get_new_msg (nal, md);
+ if (msg == NULL) {
+ CERROR(LPU64": Dropping PUT from "LPU64": can't allocate msg\n",
+ ni->nid, hdr->src_nid);
+ goto drop;
+ }
+
+ if (!ptl_is_wire_handle_none(&hdr->msg.put.ack_wmd) &&
+ !(md->options & PTL_MD_ACK_DISABLE)) {
+ msg->send_ack = 1;
+ msg->ack_wmd = hdr->msg.put.ack_wmd;
+ msg->nid = hdr->src_nid;
+ msg->pid = hdr->src_pid;
+ msg->ev.match_bits = hdr->msg.put.match_bits;
+ }
+
+ if (md->eq) {
+ msg->ev.type = PTL_EVENT_PUT;
+ msg->ev.initiator.nid = hdr->src_nid;
+ msg->ev.initiator.pid = hdr->src_pid;
+ msg->ev.portal = hdr->msg.put.ptl_index;
+ msg->ev.match_bits = hdr->msg.put.match_bits;
+ msg->ev.rlength = PTL_HDR_LENGTH(hdr);
+ msg->ev.mlength = mlength;
+ msg->ev.offset = offset;
+ msg->ev.hdr_data = hdr->msg.put.hdr_data;
+
+ /* NB if this match has exhausted the MD, we can't be sure
+ * that this event will the the last one associated with
+ * this MD in the event queue (another message already
+ * matching this ME/MD could end up being last). So we
+ * remember the ME handle anyway and check again when we're
+ * allocating our slot in the event queue.
+ */
+ ptl_me2handle (&msg->ev.unlinked_me, me);
+
+ lib_md_deconstruct(nal, md, &msg->ev.mem_desc);
+ }
+
+ ni->counters.recv_count++;
+ ni->counters.recv_length += mlength;
+
+ /* only unlink after MD's pending count has been bumped
+ * in get_new_msg() otherwise lib_me_unlink() will nuke it */
+ if (unlink) {
+ md->md_flags |= PTL_MD_FLAG_AUTO_UNLINKED;
+ lib_me_unlink (nal, me);
+ }
+
+ state_unlock(nal, &flags);
+
+ lib_recv (nal, private, msg, md, offset, mlength, PTL_HDR_LENGTH (hdr));
+ return 0;
+
+ drop:
+ nal->ni.counters.drop_count++;
+ nal->ni.counters.drop_length += PTL_HDR_LENGTH(hdr);
+ state_unlock (nal, &flags);
+ lib_recv (nal, private, NULL, NULL, 0, 0, PTL_HDR_LENGTH (hdr));
+ return -1;
+}
+
+static int parse_get(nal_cb_t * nal, ptl_hdr_t * hdr, void *private)
+{
+ lib_ni_t *ni = &nal->ni;
+ ptl_size_t mlength = 0;
+ ptl_size_t offset = 0;
+ int unlink = 0;
+ lib_me_t *me;
+ lib_md_t *md;
+ lib_msg_t *msg;
+ ptl_hdr_t reply;
+ unsigned long flags;
+ int rc;
+
+ /* Convert get fields to host byte order */
+ hdr->msg.get.match_bits = NTOH__u64 (hdr->msg.get.match_bits);
+ hdr->msg.get.ptl_index = NTOH__u32 (hdr->msg.get.ptl_index);
+ hdr->msg.get.sink_length = NTOH__u32 (hdr->msg.get.sink_length);
+ hdr->msg.get.src_offset = NTOH__u32 (hdr->msg.get.src_offset);
+
+ /* compatibility check until field is deleted */
+ if (hdr->msg.get.return_offset != 0)
+ CERROR("Unexpected non-zero get.return_offset %x from "
+ LPU64"\n", hdr->msg.get.return_offset, hdr->src_nid);
+
+ state_lock(nal, &flags);
+
+ me = lib_find_me(nal, hdr->msg.get.ptl_index, PTL_MD_OP_GET,
+ hdr->src_nid, hdr->src_pid,
+ hdr->msg.get.sink_length, hdr->msg.get.src_offset,
+ hdr->msg.get.match_bits,
+ &mlength, &offset, &unlink);
+ if (me == NULL)
+ goto drop;
+
+ md = me->md;
+ CDEBUG(D_NET, "Incoming get index %d from "LPU64".%u of length %d/%d "
+ "from md "LPX64" [%d] + %d\n", hdr->msg.get.ptl_index,
+ hdr->src_nid, hdr->src_pid, mlength, PTL_HDR_LENGTH(hdr),
+ md->md_lh.lh_cookie, md->md_niov, offset);
+
+ msg = get_new_msg (nal, md);
+ if (msg == NULL) {
+ CERROR(LPU64": Dropping GET from "LPU64": can't allocate msg\n",
+ ni->nid, hdr->src_nid);
+ goto drop;
+ }
+
+ if (md->eq) {
+ msg->ev.type = PTL_EVENT_GET;
+ msg->ev.initiator.nid = hdr->src_nid;
+ msg->ev.initiator.pid = hdr->src_pid;
+ msg->ev.portal = hdr->msg.get.ptl_index;
+ msg->ev.match_bits = hdr->msg.get.match_bits;
+ msg->ev.rlength = PTL_HDR_LENGTH(hdr);
+ msg->ev.mlength = mlength;
+ msg->ev.offset = offset;
+ msg->ev.hdr_data = 0;
+
+ /* NB if this match has exhausted the MD, we can't be sure
+ * that this event will the the last one associated with
+ * this MD in the event queue (another message already
+ * matching this ME/MD could end up being last). So we
+ * remember the ME handle anyway and check again when we're
+ * allocating our slot in the event queue.
+ */
+ ptl_me2handle (&msg->ev.unlinked_me, me);
+
+ lib_md_deconstruct(nal, md, &msg->ev.mem_desc);
+ }
+
+ ni->counters.send_count++;
+ ni->counters.send_length += mlength;
+
+ /* only unlink after MD's refcount has been bumped
+ * in get_new_msg() otherwise lib_me_unlink() will nuke it */
+ if (unlink) {
+ md->md_flags |= PTL_MD_FLAG_AUTO_UNLINKED;
+ lib_me_unlink (nal, me);
+ }
+
+ state_unlock(nal, &flags);
+
+ memset (&reply, 0, sizeof (reply));
+ reply.type = HTON__u32 (PTL_MSG_REPLY);
+ reply.dest_nid = HTON__u64 (hdr->src_nid);
+ reply.src_nid = HTON__u64 (ni->nid);
+ reply.dest_pid = HTON__u32 (hdr->src_pid);
+ reply.src_pid = HTON__u32 (ni->pid);
+ PTL_HDR_LENGTH(&reply) = HTON__u32 (mlength);
+
+ reply.msg.reply.dst_wmd = hdr->msg.get.return_wmd;
+
+ rc = lib_send (nal, private, msg, &reply, PTL_MSG_REPLY,
+ hdr->src_nid, hdr->src_pid, md, offset, mlength);
+ if (rc != 0) {
+ CERROR(LPU64": Dropping GET from "LPU64": send REPLY failed\n",
+ ni->nid, hdr->src_nid);
+ state_lock (nal, &flags);
+ goto drop;
+ }
+
+ /* Complete the incoming message */
+ lib_recv (nal, private, NULL, NULL, 0, 0, PTL_HDR_LENGTH (hdr));
+ return (rc);
+ drop:
+ ni->counters.drop_count++;
+ ni->counters.drop_length += hdr->msg.get.sink_length;
+ state_unlock(nal, &flags);
+ lib_recv (nal, private, NULL, NULL, 0, 0, PTL_HDR_LENGTH (hdr));
+ return -1;
+}
+
+static int parse_reply(nal_cb_t * nal, ptl_hdr_t * hdr, void *private)
+{
+ lib_ni_t *ni = &nal->ni;
+ lib_md_t *md;
+ int rlength;
+ int length;
+ lib_msg_t *msg;
+ unsigned long flags;
+
+ /* compatibility check until field is deleted */
+ if (hdr->msg.reply.dst_offset != 0)
+ CERROR("Unexpected non-zero reply.dst_offset %x from "LPU64"\n",
+ hdr->msg.reply.dst_offset, hdr->src_nid);
+
+ state_lock(nal, &flags);
+
+ /* NB handles only looked up by creator (no flips) */
+ md = ptl_wire_handle2md(&hdr->msg.reply.dst_wmd, nal);
+ if (md == NULL || md->threshold == 0) {
+ CERROR (LPU64": Dropping REPLY from "LPU64" for %s MD "LPX64"."LPX64"\n",
+ ni->nid, hdr->src_nid,
+ md == NULL ? "invalid" : "inactive",
+ hdr->msg.reply.dst_wmd.wh_interface_cookie,
+ hdr->msg.reply.dst_wmd.wh_object_cookie);
+ goto drop;
+ }
+
+ LASSERT (md->offset == 0);
+
+ length = rlength = PTL_HDR_LENGTH(hdr);
+
+ if (length > md->length) {
+ if ((md->options & PTL_MD_TRUNCATE) == 0) {
+ CERROR (LPU64": Dropping REPLY from "LPU64
+ " length %d for MD "LPX64" would overflow (%d)\n",
+ ni->nid, hdr->src_nid, length,
+ hdr->msg.reply.dst_wmd.wh_object_cookie,
+ md->length);
+ goto drop;
+ }
+ length = md->length;
+ }
+
+ CDEBUG(D_NET, "Reply from "LPU64" of length %d/%d into md "LPX64"\n",
+ hdr->src_nid, length, rlength,
+ hdr->msg.reply.dst_wmd.wh_object_cookie);
+
+ msg = get_new_msg (nal, md);
+ if (msg == NULL) {
+ CERROR(LPU64": Dropping REPLY from "LPU64": can't "
+ "allocate msg\n", ni->nid, hdr->src_nid);
+ goto drop;
+ }
+
+ if (md->eq) {
+ msg->ev.type = PTL_EVENT_REPLY;
+ msg->ev.initiator.nid = hdr->src_nid;
+ msg->ev.initiator.pid = hdr->src_pid;
+ msg->ev.rlength = rlength;
+ msg->ev.mlength = length;
+ msg->ev.offset = 0;
+
+ lib_md_deconstruct(nal, md, &msg->ev.mem_desc);
+ }
+
+ ni->counters.recv_count++;
+ ni->counters.recv_length += length;
+
+ state_unlock(nal, &flags);
+
+ lib_recv (nal, private, msg, md, 0, length, rlength);
+ return 0;
+
+ drop:
+ nal->ni.counters.drop_count++;
+ nal->ni.counters.drop_length += PTL_HDR_LENGTH(hdr);
+ state_unlock (nal, &flags);
+ lib_recv (nal, private, NULL, NULL, 0, 0, PTL_HDR_LENGTH (hdr));
+ return -1;
+}
+
+static int parse_ack(nal_cb_t * nal, ptl_hdr_t * hdr, void *private)
+{
+ lib_ni_t *ni = &nal->ni;
+ lib_md_t *md;
+ lib_msg_t *msg = NULL;
+ unsigned long flags;
+
+ /* Convert ack fields to host byte order */
+ hdr->msg.ack.match_bits = NTOH__u64 (hdr->msg.ack.match_bits);
+ hdr->msg.ack.mlength = NTOH__u32 (hdr->msg.ack.mlength);
+
+ state_lock(nal, &flags);
+
+ /* NB handles only looked up by creator (no flips) */
+ md = ptl_wire_handle2md(&hdr->msg.ack.dst_wmd, nal);
+ if (md == NULL || md->threshold == 0) {
+ CERROR(LPU64": Dropping ACK from "LPU64" to %s MD "
+ LPX64"."LPX64"\n", ni->nid, hdr->src_nid,
+ (md == NULL) ? "invalid" : "inactive",
+ hdr->msg.ack.dst_wmd.wh_interface_cookie,
+ hdr->msg.ack.dst_wmd.wh_object_cookie);
+ goto drop;
+ }
+
+ CDEBUG(D_NET, LPU64": ACK from "LPU64" into md "LPX64"\n",
+ ni->nid, hdr->src_nid,
+ hdr->msg.ack.dst_wmd.wh_object_cookie);
+
+ msg = get_new_msg (nal, md);
+ if (msg == NULL) {
+ CERROR(LPU64": Dropping ACK from "LPU64": can't allocate msg\n",
+ ni->nid, hdr->src_nid);
+ goto drop;
+ }
+
+ if (md->eq) {
+ msg->ev.type = PTL_EVENT_ACK;
+ msg->ev.initiator.nid = hdr->src_nid;
+ msg->ev.initiator.pid = hdr->src_pid;
+ msg->ev.mlength = hdr->msg.ack.mlength;
+ msg->ev.match_bits = hdr->msg.ack.match_bits;
+
+ lib_md_deconstruct(nal, md, &msg->ev.mem_desc);
+ }
+
+ ni->counters.recv_count++;
+ state_unlock(nal, &flags);
+ lib_recv (nal, private, msg, NULL, 0, 0, PTL_HDR_LENGTH (hdr));
+ return 0;
+
+ drop:
+ nal->ni.counters.drop_count++;
+ state_unlock (nal, &flags);
+ lib_recv (nal, private, NULL, NULL, 0, 0, PTL_HDR_LENGTH (hdr));
+ return -1;
+}
+
+static char *
+hdr_type_string (ptl_hdr_t *hdr)
+{
+ switch (hdr->type) {
+ case PTL_MSG_ACK:
+ return ("ACK");
+ case PTL_MSG_PUT:
+ return ("PUT");
+ case PTL_MSG_GET:
+ return ("GET");
+ case PTL_MSG_REPLY:
+ return ("REPLY");
+ case PTL_MSG_HELLO:
+ return ("HELLO");
+ default:
+ return ("<UNKNOWN>");
+ }
+}
+
+void print_hdr(nal_cb_t * nal, ptl_hdr_t * hdr)
+{
+ char *type_str = hdr_type_string (hdr);
+
+ nal->cb_printf(nal, "P3 Header at %p of type %s\n", hdr, type_str);
+ nal->cb_printf(nal, " From nid/pid %Lu/%Lu", hdr->src_nid,
+ hdr->src_pid);
+ nal->cb_printf(nal, " To nid/pid %Lu/%Lu\n", hdr->dest_nid,
+ hdr->dest_pid);
+
+ switch (hdr->type) {
+ default:
+ break;
+
+ case PTL_MSG_PUT:
+ nal->cb_printf(nal,
+ " Ptl index %d, ack md "LPX64"."LPX64", "
+ "match bits "LPX64"\n",
+ hdr->msg.put.ptl_index,
+ hdr->msg.put.ack_wmd.wh_interface_cookie,
+ hdr->msg.put.ack_wmd.wh_object_cookie,
+ hdr->msg.put.match_bits);
+ nal->cb_printf(nal,
+ " Length %d, offset %d, hdr data "LPX64"\n",
+ PTL_HDR_LENGTH(hdr), hdr->msg.put.offset,
+ hdr->msg.put.hdr_data);
+ break;
+
+ case PTL_MSG_GET:
+ nal->cb_printf(nal,
+ " Ptl index %d, return md "LPX64"."LPX64", "
+ "match bits "LPX64"\n", hdr->msg.get.ptl_index,
+ hdr->msg.get.return_wmd.wh_interface_cookie,
+ hdr->msg.get.return_wmd.wh_object_cookie,
+ hdr->msg.get.match_bits);
+ nal->cb_printf(nal,
+ " Length %d, src offset %d\n",
+ hdr->msg.get.sink_length,
+ hdr->msg.get.src_offset);
+ break;
+
+ case PTL_MSG_ACK:
+ nal->cb_printf(nal, " dst md "LPX64"."LPX64", "
+ "manipulated length %d\n",
+ hdr->msg.ack.dst_wmd.wh_interface_cookie,
+ hdr->msg.ack.dst_wmd.wh_object_cookie,
+ hdr->msg.ack.mlength);
+ break;
+
+ case PTL_MSG_REPLY:
+ nal->cb_printf(nal, " dst md "LPX64"."LPX64", "
+ "length %d\n",
+ hdr->msg.reply.dst_wmd.wh_interface_cookie,
+ hdr->msg.reply.dst_wmd.wh_object_cookie,
+ PTL_HDR_LENGTH(hdr));
+ }
+
+} /* end of print_hdr() */
+
+
+int lib_parse(nal_cb_t * nal, ptl_hdr_t * hdr, void *private)
+{
+ unsigned long flags;
+
+ /* NB static check; optimizer will elide this if it's right */
+ LASSERT (offsetof (ptl_hdr_t, msg.ack.length) ==
+ offsetof (ptl_hdr_t, msg.put.length));
+ LASSERT (offsetof (ptl_hdr_t, msg.ack.length) ==
+ offsetof (ptl_hdr_t, msg.get.length));
+ LASSERT (offsetof (ptl_hdr_t, msg.ack.length) ==
+ offsetof (ptl_hdr_t, msg.reply.length));
+
+ /* convert common fields to host byte order */
+ hdr->dest_nid = NTOH__u64 (hdr->dest_nid);
+ hdr->src_nid = NTOH__u64 (hdr->src_nid);
+ hdr->dest_pid = NTOH__u32 (hdr->dest_pid);
+ hdr->src_pid = NTOH__u32 (hdr->src_pid);
+ hdr->type = NTOH__u32 (hdr->type);
+ PTL_HDR_LENGTH(hdr) = NTOH__u32 (PTL_HDR_LENGTH(hdr));
+#if 0
+ nal->cb_printf(nal, "%d: lib_parse: nal=%p hdr=%p type=%d\n",
+ nal->ni.nid, nal, hdr, hdr->type);
+ print_hdr(nal, hdr);
+#endif
+ if (hdr->type == PTL_MSG_HELLO) {
+ /* dest_nid is really ptl_magicversion_t */
+ ptl_magicversion_t *mv = (ptl_magicversion_t *)&hdr->dest_nid;
+
+ CERROR (LPU64": Dropping unexpected HELLO message: "
+ "magic %d, version %d.%d from "LPD64"\n",
+ nal->ni.nid, mv->magic,
+ mv->version_major, mv->version_minor,
+ hdr->src_nid);
+ lib_recv (nal, private, NULL, NULL, 0, 0, PTL_HDR_LENGTH (hdr));
+ return (-1);
+ }
+
+ if (hdr->dest_nid != nal->ni.nid) {
+ CERROR(LPU64": Dropping %s message from "LPU64" to "LPU64
+ " (not me)\n", nal->ni.nid, hdr_type_string (hdr),
+ hdr->src_nid, hdr->dest_nid);
+
+ state_lock (nal, &flags);
+ nal->ni.counters.drop_count++;
+ nal->ni.counters.drop_length += PTL_HDR_LENGTH(hdr);
+ state_unlock (nal, &flags);
+
+ lib_recv (nal, private, NULL, NULL, 0, 0, PTL_HDR_LENGTH (hdr));
+ return (-1);
+ }
+
+ if (!list_empty (&nal->ni.ni_test_peers) && /* normally we don't */
+ fail_peer (nal, hdr->src_nid, 0)) /* shall we now? */
+ {
+ CERROR(LPU64": Dropping incoming %s from "LPU64
+ ": simulated failure\n",
+ nal->ni.nid, hdr_type_string (hdr),
+ hdr->src_nid);
+ return (-1);
+ }
+
+ switch (hdr->type) {
+ case PTL_MSG_ACK:
+ return (parse_ack(nal, hdr, private));
+ case PTL_MSG_PUT:
+ return (parse_put(nal, hdr, private));
+ break;
+ case PTL_MSG_GET:
+ return (parse_get(nal, hdr, private));
+ break;
+ case PTL_MSG_REPLY:
+ return (parse_reply(nal, hdr, private));
+ break;
+ default:
+ CERROR(LPU64": Dropping <unknown> message from "LPU64
+ ": Bad type=0x%x\n", nal->ni.nid, hdr->src_nid,
+ hdr->type);
+
+ lib_recv (nal, private, NULL, NULL, 0, 0, PTL_HDR_LENGTH (hdr));
+ return (-1);
+ }
+}
+
+
+int do_PtlPut(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
+{
+ /*
+ * Incoming:
+ * ptl_handle_md_t md_in
+ * ptl_ack_req_t ack_req_in
+ * ptl_process_id_t target_in
+ * ptl_pt_index_t portal_in
+ * ptl_ac_index_t cookie_in
+ * ptl_match_bits_t match_bits_in
+ * ptl_size_t offset_in
+ *
+ * Outgoing:
+ */
+
+ PtlPut_in *args = v_args;
+ PtlPut_out *ret = v_ret;
+ ptl_hdr_t hdr;
+
+ lib_ni_t *ni = &nal->ni;
+ lib_md_t *md;
+ lib_msg_t *msg = NULL;
+ ptl_process_id_t *id = &args->target_in;
+ unsigned long flags;
+
+ if (!list_empty (&nal->ni.ni_test_peers) && /* normally we don't */
+ fail_peer (nal, id->nid, 1)) /* shall we now? */
+ {
+ CERROR(LPU64": Dropping PUT to "LPU64": simulated failure\n",
+ nal->ni.nid, id->nid);
+ return (ret->rc = PTL_INV_PROC);
+ }
+
+ ret->rc = PTL_OK;
+ state_lock(nal, &flags);
+ md = ptl_handle2md(&args->md_in, nal);
+ if (md == NULL || !md->threshold) {
+ state_unlock(nal, &flags);
+ return ret->rc = PTL_INV_MD;
+ }
+
+ CDEBUG(D_NET, "PtlPut -> %Lu: %lu\n", (unsigned long long)id->nid,
+ (unsigned long)id->pid);
+
+ memset (&hdr, 0, sizeof (hdr));
+ hdr.type = HTON__u32 (PTL_MSG_PUT);
+ hdr.dest_nid = HTON__u64 (id->nid);
+ hdr.src_nid = HTON__u64 (ni->nid);
+ hdr.dest_pid = HTON__u32 (id->pid);
+ hdr.src_pid = HTON__u32 (ni->pid);
+ PTL_HDR_LENGTH(&hdr) = HTON__u32 (md->length);
+
+ /* NB handles only looked up by creator (no flips) */
+ if (args->ack_req_in == PTL_ACK_REQ) {
+ hdr.msg.put.ack_wmd.wh_interface_cookie = ni->ni_interface_cookie;
+ hdr.msg.put.ack_wmd.wh_object_cookie = md->md_lh.lh_cookie;
+ } else {
+ hdr.msg.put.ack_wmd = PTL_WIRE_HANDLE_NONE;
+ }
+
+ hdr.msg.put.match_bits = HTON__u64 (args->match_bits_in);
+ hdr.msg.put.ptl_index = HTON__u32 (args->portal_in);
+ hdr.msg.put.offset = HTON__u32 (args->offset_in);
+ hdr.msg.put.hdr_data = args->hdr_data_in;
+
+ ni->counters.send_count++;
+ ni->counters.send_length += md->length;
+
+ msg = get_new_msg (nal, md);
+ if (msg == NULL) {
+ CERROR("BAD: could not allocate msg!\n");
+ state_unlock(nal, &flags);
+ return ret->rc = PTL_NOSPACE;
+ }
+
+ /*
+ * If this memory descriptor has an event queue associated with
+ * it we need to allocate a message state object and record the
+ * information about this operation that will be recorded into
+ * event queue once the message has been completed.
+ *
+ * NB. We're now committed to the GET, since we just marked the MD
+ * busy. Callers who observe this (by getting PTL_MD_INUSE from
+ * PtlMDUnlink()) expect a completion event to tell them when the
+ * MD becomes idle.
+ */
+ if (md->eq) {
+ msg->ev.type = PTL_EVENT_SENT;
+ msg->ev.initiator.nid = ni->nid;
+ msg->ev.initiator.pid = ni->pid;
+ msg->ev.portal = args->portal_in;
+ msg->ev.match_bits = args->match_bits_in;
+ msg->ev.rlength = md->length;
+ msg->ev.mlength = md->length;
+ msg->ev.offset = args->offset_in;
+ msg->ev.hdr_data = args->hdr_data_in;
+
+ lib_md_deconstruct(nal, md, &msg->ev.mem_desc);
+ }
+
+ state_unlock(nal, &flags);
+
+ lib_send (nal, private, msg, &hdr, PTL_MSG_PUT,
+ id->nid, id->pid, md, 0, md->length);
+
+ return ret->rc = PTL_OK;
+}
+
+
+int do_PtlGet(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
+{
+ /*
+ * Incoming:
+ * ptl_handle_md_t md_in
+ * ptl_process_id_t target_in
+ * ptl_pt_index_t portal_in
+ * ptl_ac_index_t cookie_in
+ * ptl_match_bits_t match_bits_in
+ * ptl_size_t offset_in
+ *
+ * Outgoing:
+ */
+
+ PtlGet_in *args = v_args;
+ PtlGet_out *ret = v_ret;
+ ptl_hdr_t hdr;
+ lib_msg_t *msg = NULL;
+ lib_ni_t *ni = &nal->ni;
+ ptl_process_id_t *id = &args->target_in;
+ lib_md_t *md;
+ unsigned long flags;
+
+ if (!list_empty (&nal->ni.ni_test_peers) && /* normally we don't */
+ fail_peer (nal, id->nid, 1)) /* shall we now? */
+ {
+ CERROR(LPU64": Dropping PUT to "LPU64": simulated failure\n",
+ nal->ni.nid, id->nid);
+ return (ret->rc = PTL_INV_PROC);
+ }
+
+ state_lock(nal, &flags);
+ md = ptl_handle2md(&args->md_in, nal);
+ if (md == NULL || !md->threshold) {
+ state_unlock(nal, &flags);
+ return ret->rc = PTL_INV_MD;
+ }
+
+ LASSERT (md->offset == 0);
+
+ CDEBUG(D_NET, "PtlGet -> %Lu: %lu\n", (unsigned long long)id->nid,
+ (unsigned long)id->pid);
+
+ memset (&hdr, 0, sizeof (hdr));
+ hdr.type = HTON__u32 (PTL_MSG_GET);
+ hdr.dest_nid = HTON__u64 (id->nid);
+ hdr.src_nid = HTON__u64 (ni->nid);
+ hdr.dest_pid = HTON__u32 (id->pid);
+ hdr.src_pid = HTON__u32 (ni->pid);
+ PTL_HDR_LENGTH(&hdr) = 0;
+
+ /* NB handles only looked up by creator (no flips) */
+ hdr.msg.get.return_wmd.wh_interface_cookie = ni->ni_interface_cookie;
+ hdr.msg.get.return_wmd.wh_object_cookie = md->md_lh.lh_cookie;
+
+ hdr.msg.get.match_bits = HTON__u64 (args->match_bits_in);
+ hdr.msg.get.ptl_index = HTON__u32 (args->portal_in);
+ hdr.msg.get.src_offset = HTON__u32 (args->offset_in);
+ hdr.msg.get.sink_length = HTON__u32 (md->length);
+
+ ni->counters.send_count++;
+
+ msg = get_new_msg (nal, md);
+ if (msg == NULL) {
+ CERROR("do_PtlGet: BAD - could not allocate cookie!\n");
+ state_unlock(nal, &flags);
+ return ret->rc = PTL_NOSPACE;
+ }
+
+ /*
+ * If this memory descriptor has an event queue associated with
+ * it we must allocate a message state object that will record
+ * the information to be filled in once the message has been
+ * completed. More information is in the do_PtlPut() comments.
+ *
+ * NB. We're now committed to the GET, since we just marked the MD
+ * busy. Callers who observe this (by getting PTL_MD_INUSE from
+ * PtlMDUnlink()) expect a completion event to tell them when the
+ * MD becomes idle.
+ */
+ if (md->eq) {
+ msg->ev.type = PTL_EVENT_SENT;
+ msg->ev.initiator.nid = ni->nid;
+ msg->ev.initiator.pid = ni->pid;
+ msg->ev.portal = args->portal_in;
+ msg->ev.match_bits = args->match_bits_in;
+ msg->ev.rlength = md->length;
+ msg->ev.mlength = md->length;
+ msg->ev.offset = args->offset_in;
+ msg->ev.hdr_data = 0;
+
+ lib_md_deconstruct(nal, md, &msg->ev.mem_desc);
+ }
+
+ state_unlock(nal, &flags);
+
+ lib_send (nal, private, msg, &hdr, PTL_MSG_GET,
+ id->nid, id->pid, NULL, 0, 0);
+
+ return ret->rc = PTL_OK;
+}
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * lib/lib-msg.c
+ * Message decoding, parsing and finalizing routines
+ *
+ * Copyright (c) 2001-2003 Cluster File Systems, Inc.
+ * Copyright (c) 2001-2002 Sandia National Laboratories
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2.1 of the GNU Lesser General
+ * Public License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef __KERNEL__
+# include <stdio.h>
+#else
+# define DEBUG_SUBSYSTEM S_PORTALS
+# include <linux/kp30.h>
+#endif
+
+#include <portals/lib-p30.h>
+
+int lib_finalize(nal_cb_t * nal, void *private, lib_msg_t *msg)
+{
+ lib_md_t *md;
+ lib_eq_t *eq;
+ int rc;
+ unsigned long flags;
+
+ /* ni went down while processing this message */
+ if (nal->ni.up == 0) {
+ return -1;
+ }
+
+ if (msg == NULL)
+ return 0;
+
+ rc = 0;
+ if (msg->send_ack) {
+ ptl_hdr_t ack;
+
+ LASSERT (!ptl_is_wire_handle_none (&msg->ack_wmd));
+
+ memset (&ack, 0, sizeof (ack));
+ ack.type = HTON__u32 (PTL_MSG_ACK);
+ ack.dest_nid = HTON__u64 (msg->nid);
+ ack.src_nid = HTON__u64 (nal->ni.nid);
+ ack.dest_pid = HTON__u32 (msg->pid);
+ ack.src_pid = HTON__u32 (nal->ni.pid);
+ PTL_HDR_LENGTH(&ack) = 0;
+
+ ack.msg.ack.dst_wmd = msg->ack_wmd;
+ ack.msg.ack.match_bits = msg->ev.match_bits;
+ ack.msg.ack.mlength = HTON__u32 (msg->ev.mlength);
+
+ rc = lib_send (nal, private, NULL, &ack, PTL_MSG_ACK,
+ msg->nid, msg->pid, NULL, 0, 0);
+ }
+
+ md = msg->md;
+ LASSERT (md->pending > 0); /* I've not dropped my ref yet */
+ eq = md->eq;
+
+ state_lock(nal, &flags);
+
+ if (eq != NULL) {
+ ptl_event_t *ev = &msg->ev;
+ ptl_event_t *eq_slot;
+
+ /* I have to hold the lock while I bump the sequence number
+ * and copy the event into the queue. If not, and I was
+ * interrupted after bumping the sequence number, other
+ * events could fill the queue, including the slot I just
+ * allocated to this event. On resuming, I would overwrite
+ * a more 'recent' event with old event state, and
+ * processes taking events off the queue would not detect
+ * overflow correctly.
+ */
+
+ ev->sequence = eq->sequence++;/* Allocate the next queue slot */
+
+ /* size must be a power of 2 to handle a wrapped sequence # */
+ LASSERT (eq->size != 0 &&
+ eq->size == LOWEST_BIT_SET (eq->size));
+ eq_slot = eq->base + (ev->sequence & (eq->size - 1));
+
+ /* Invalidate unlinked_me unless this is the last
+ * event for an auto-unlinked MD. Note that if md was
+ * auto-unlinked, md->pending can only decrease
+ */
+ if ((md->md_flags & PTL_MD_FLAG_AUTO_UNLINKED) == 0 || /* not auto-unlinked */
+ md->pending != 1) /* not last ref */
+ ev->unlinked_me = PTL_HANDLE_NONE;
+
+ /* Copy the event into the allocated slot, ensuring all the
+ * rest of the event's contents have been copied _before_
+ * the sequence number gets updated. A processes 'getting'
+ * an event waits on the next queue slot's sequence to be
+ * 'new'. When it is, _all_ other event fields had better
+ * be consistent. I assert 'sequence' is the last member,
+ * so I only need a 2 stage copy.
+ */
+ LASSERT(sizeof (ptl_event_t) ==
+ offsetof(ptl_event_t, sequence) + sizeof(ev->sequence));
+
+ rc = nal->cb_write (nal, private, (user_ptr)eq_slot, ev,
+ offsetof (ptl_event_t, sequence));
+ LASSERT (rc == 0);
+
+#ifdef __KERNEL__
+ barrier();
+#endif
+ /* Updating the sequence number is what makes the event 'new' */
+
+ /* cb_write is not necessarily atomic, so this could
+ cause a race with PtlEQGet */
+ rc = nal->cb_write(nal, private, (user_ptr)&eq_slot->sequence,
+ (void *)&ev->sequence,sizeof (ev->sequence));
+ LASSERT (rc == 0);
+
+#ifdef __KERNEL__
+ barrier();
+#endif
+
+ /* I must also ensure that (a) callbacks are made in the
+ * same order as the events land in the queue, and (b) the
+ * callback occurs before the event can be removed from the
+ * queue, so I can't drop the lock during the callback. */
+ if (nal->cb_callback != NULL)
+ nal->cb_callback(nal, private, eq, ev);
+ else if (eq->event_callback != NULL)
+ (void)((eq->event_callback) (ev));
+ }
+
+ LASSERT ((md->md_flags & PTL_MD_FLAG_AUTO_UNLINKED) == 0 ||
+ (md->md_flags & PTL_MD_FLAG_UNLINK) != 0);
+
+ md->pending--;
+ if (md->pending == 0 && /* no more outstanding operations on this md */
+ (md->threshold == 0 || /* done its business */
+ (md->md_flags & PTL_MD_FLAG_UNLINK) != 0)) /* marked for death */
+ lib_md_unlink(nal, md);
+
+ list_del (&msg->msg_list);
+ nal->ni.counters.msgs_alloc--;
+ lib_msg_free(nal, msg);
+
+ state_unlock(nal, &flags);
+
+ return rc;
+}
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * lib/lib-ni.c
+ * Network status registers and distance functions.
+ *
+ * Copyright (c) 2001-2003 Cluster File Systems, Inc.
+ * Copyright (c) 2001-2002 Sandia National Laboratories
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2.1 of the GNU Lesser General
+ * Public License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#define DEBUG_SUBSYSTEM S_PORTALS
+#include <portals/lib-p30.h>
+#include <portals/arg-blocks.h>
+
+#define MAX_DIST 18446744073709551615UL
+
+int do_PtlNIDebug(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
+{
+ PtlNIDebug_in *args = v_args;
+ PtlNIDebug_out *ret = v_ret;
+ lib_ni_t *ni = &nal->ni;
+
+ ret->rc = ni->debug;
+ ni->debug = args->mask_in;
+
+ return 0;
+}
+
+int do_PtlNIStatus(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
+{
+ /*
+ * Incoming:
+ * ptl_handle_ni_t interface_in
+ * ptl_sr_index_t register_in
+ *
+ * Outgoing:
+ * ptl_sr_value_t * status_out
+ */
+
+ PtlNIStatus_in *args = v_args;
+ PtlNIStatus_out *ret = v_ret;
+ lib_ni_t *ni = &nal->ni;
+ lib_counters_t *count = &ni->counters;
+
+ if (!args)
+ return ret->rc = PTL_SEGV;
+
+ ret->rc = PTL_OK;
+ ret->status_out = 0;
+
+ /*
+ * I hate this sort of code.... Hash tables, offset lists?
+ * Treat the counters as an array of ints?
+ */
+ if (args->register_in == PTL_SR_DROP_COUNT)
+ ret->status_out = count->drop_count;
+
+ else if (args->register_in == PTL_SR_DROP_LENGTH)
+ ret->status_out = count->drop_length;
+
+ else if (args->register_in == PTL_SR_RECV_COUNT)
+ ret->status_out = count->recv_count;
+
+ else if (args->register_in == PTL_SR_RECV_LENGTH)
+ ret->status_out = count->recv_length;
+
+ else if (args->register_in == PTL_SR_SEND_COUNT)
+ ret->status_out = count->send_count;
+
+ else if (args->register_in == PTL_SR_SEND_LENGTH)
+ ret->status_out = count->send_length;
+
+ else if (args->register_in == PTL_SR_MSGS_MAX)
+ ret->status_out = count->msgs_max;
+ else
+ ret->rc = PTL_INV_SR_INDX;
+
+ return ret->rc;
+}
+
+
+int do_PtlNIDist(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
+{
+ /*
+ * Incoming:
+ * ptl_handle_ni_t interface_in
+ * ptl_process_id_t process_in
+
+ *
+ * Outgoing:
+ * unsigned long * distance_out
+
+ */
+
+ PtlNIDist_in *args = v_args;
+ PtlNIDist_out *ret = v_ret;
+
+ unsigned long dist;
+ ptl_process_id_t id_in = args->process_in;
+ ptl_nid_t nid;
+ int rc;
+
+ nid = id_in.nid;
+
+ if ((rc = nal->cb_dist(nal, nid, &dist)) != 0) {
+ ret->distance_out = (unsigned long) MAX_DIST;
+ return PTL_INV_PROC;
+ }
+
+ ret->distance_out = dist;
+
+ return ret->rc = PTL_OK;
+}
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * lib/lib-not-impl.c
+ *
+ * boiler plate functions that can be used to write the
+ * library side routines
+ */
+
+# define DEBUG_SUBSYSTEM S_PORTALS
+
+#include <portals/lib-p30.h>
+#include <portals/arg-blocks.h>
+
+
+int do_PtlACEntry(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
+{
+ /*
+ * Incoming:
+ * ptl_handle_ni_t ni_in
+ * ptl_ac_index_t index_in
+ * ptl_process_id_t match_id_in
+ * ptl_pt_index_t portal_in
+
+ *
+ * Outgoing:
+
+ */
+
+ PtlACEntry_in *args = v_args;
+ PtlACEntry_out *ret = v_ret;
+
+ if (!args)
+ return ret->rc = PTL_SEGV;
+
+ return ret->rc = PTL_NOT_IMPLEMENTED;
+}
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * lib/lib-pid.c
+ * Process identification routines
+ */
+
+/* This should be removed. The NAL should have the PID information */
+#define DEBUG_SUBSYSTEM S_PORTALS
+
+#if defined (__KERNEL__)
+# include <linux/kernel.h>
+extern int getpid(void);
+#else
+# include <stdio.h>
+# include <unistd.h>
+#endif
+#include <portals/lib-p30.h>
+#include <portals/arg-blocks.h>
+
+int do_PtlGetId(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
+{
+ /*
+ * Incoming:
+ * ptl_handle_ni_t handle_in
+ *
+ * Outgoing:
+ * ptl_process_id_t * id_out
+ * ptl_id_t * gsize_out
+ */
+
+ PtlGetId_out *ret = v_ret;
+ lib_ni_t *ni = &nal->ni;
+
+ ret->id_out.nid = ni->nid;
+ ret->id_out.pid = ni->pid;
+
+ return ret->rc = PTL_OK;
+}
--- /dev/null
+# Copyright (C) 2001 Cluster File Systems, Inc.
+#
+# This code is issued under the GNU General Public License.
+# See the file COPYING in this distribution
+
+include ../Rules.linux
+
+MODULE = kptlrouter
+modulenet_DATA = kptlrouter.o
+EXTRA_PROGRAMS = kptlrouter
+
+
+#CFLAGS:= @KCFLAGS@
+#CPPFLAGS:=@KCPPFLAGS@
+DEFS =
+kptlrouter_SOURCES = router.c proc.c router.h
--- /dev/null
+# Copyright (C) 2001 Cluster File Systems, Inc.
+#
+# This code is issued under the GNU General Public License.
+# See the file COPYING in this distribution
+
+include ../Kernelenv
+
+obj-y += kptlrouter.o
+kptlrouter-objs := router.o proc.o
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2002 Cluster File Systems, Inc.
+ *
+ * This file is part of Portals
+ * http://sourceforge.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#include "router.h"
+
+#define KPR_PROC_ROUTER "sys/portals/router"
+
+int
+kpr_proc_read (char *page, char **start, off_t off, int count, int *eof, void *data)
+{
+ unsigned long long bytes = kpr_fwd_bytes;
+ unsigned long packets = kpr_fwd_packets;
+ unsigned long errors = kpr_fwd_errors;
+ unsigned int qdepth = atomic_read (&kpr_queue_depth);
+ int len;
+
+ *eof = 1;
+ if (off != 0)
+ return (0);
+
+ len = sprintf (page, "%Ld %ld %ld %d\n", bytes, packets, errors, qdepth);
+
+ *start = page;
+ return (len);
+}
+
+int
+kpr_proc_write (struct file *file, const char *ubuffer, unsigned long count, void *data)
+{
+ /* Ignore what we've been asked to write, and just zero the stats counters */
+ kpr_fwd_bytes = 0;
+ kpr_fwd_packets = 0;
+ kpr_fwd_errors = 0;
+
+ return (count);
+}
+
+void
+kpr_proc_init(void)
+{
+ struct proc_dir_entry *entry = create_proc_entry (KPR_PROC_ROUTER, S_IFREG | S_IRUGO | S_IWUSR, NULL);
+
+ if (entry == NULL)
+ {
+ CERROR("couldn't create proc entry %s\n", KPR_PROC_ROUTER);
+ return;
+ }
+
+ entry->data = NULL;
+ entry->read_proc = kpr_proc_read;
+ entry->write_proc = kpr_proc_write;
+}
+
+void
+kpr_proc_fini(void)
+{
+ remove_proc_entry(KPR_PROC_ROUTER, 0);
+}
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2002 Cluster File Systems, Inc.
+ *
+ * This file is part of Portals
+ * http://sourceforge.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#include "router.h"
+
+struct list_head kpr_routes;
+struct list_head kpr_nals;
+
+unsigned long long kpr_fwd_bytes;
+unsigned long kpr_fwd_packets;
+unsigned long kpr_fwd_errors;
+atomic_t kpr_queue_depth;
+
+/* Mostly the tables are read-only (thread and interrupt context)
+ *
+ * Once in a blue moon we register/deregister NALs and add/remove routing
+ * entries (thread context only)... */
+rwlock_t kpr_rwlock;
+
+kpr_router_interface_t kpr_router_interface = {
+ kprri_register: kpr_register_nal,
+ kprri_lookup: kpr_lookup_target,
+ kprri_fwd_start: kpr_forward_packet,
+ kprri_fwd_done: kpr_complete_packet,
+ kprri_shutdown: kpr_shutdown_nal,
+ kprri_deregister: kpr_deregister_nal,
+};
+
+kpr_control_interface_t kpr_control_interface = {
+ kprci_add_route: kpr_add_route,
+ kprci_del_route: kpr_del_route,
+ kprci_get_route: kpr_get_route,
+};
+
+int
+kpr_register_nal (kpr_nal_interface_t *nalif, void **argp)
+{
+ long flags;
+ struct list_head *e;
+ kpr_nal_entry_t *ne;
+
+ CDEBUG (D_OTHER, "Registering NAL %d\n", nalif->kprni_nalid);
+
+ PORTAL_ALLOC (ne, sizeof (*ne));
+ if (ne == NULL)
+ return (-ENOMEM);
+
+ memset (ne, 0, sizeof (*ne));
+ memcpy ((void *)&ne->kpne_interface, (void *)nalif, sizeof (*nalif));
+
+ LASSERT (!in_interrupt());
+ write_lock_irqsave (&kpr_rwlock, flags);
+
+ for (e = kpr_nals.next; e != &kpr_nals; e = e->next)
+ {
+ kpr_nal_entry_t *ne2 = list_entry (e, kpr_nal_entry_t, kpne_list);
+
+ if (ne2->kpne_interface.kprni_nalid == ne->kpne_interface.kprni_nalid)
+ {
+ write_unlock_irqrestore (&kpr_rwlock, flags);
+
+ CERROR ("Attempt to register same NAL %d twice\n", ne->kpne_interface.kprni_nalid);
+
+ PORTAL_FREE (ne, sizeof (*ne));
+ return (-EEXIST);
+ }
+ }
+
+ list_add (&ne->kpne_list, &kpr_nals);
+
+ write_unlock_irqrestore (&kpr_rwlock, flags);
+
+ *argp = ne;
+ PORTAL_MODULE_USE;
+ return (0);
+}
+
+void
+kpr_shutdown_nal (void *arg)
+{
+ long flags;
+ kpr_nal_entry_t *ne = (kpr_nal_entry_t *)arg;
+
+ CDEBUG (D_OTHER, "Shutting down NAL %d\n", ne->kpne_interface.kprni_nalid);
+
+ LASSERT (!ne->kpne_shutdown);
+ LASSERT (!in_interrupt());
+
+ write_lock_irqsave (&kpr_rwlock, flags); /* locking a bit spurious... */
+ ne->kpne_shutdown = 1;
+ write_unlock_irqrestore (&kpr_rwlock, flags); /* except it's a memory barrier */
+
+ while (atomic_read (&ne->kpne_refcount) != 0)
+ {
+ CDEBUG (D_NET, "Waiting for refcount on NAL %d to reach zero (%d)\n",
+ ne->kpne_interface.kprni_nalid, atomic_read (&ne->kpne_refcount));
+
+ set_current_state (TASK_UNINTERRUPTIBLE);
+ schedule_timeout (HZ);
+ }
+}
+
+void
+kpr_deregister_nal (void *arg)
+{
+ long flags;
+ kpr_nal_entry_t *ne = (kpr_nal_entry_t *)arg;
+
+ CDEBUG (D_OTHER, "Deregister NAL %d\n", ne->kpne_interface.kprni_nalid);
+
+ LASSERT (ne->kpne_shutdown); /* caller must have issued shutdown already */
+ LASSERT (atomic_read (&ne->kpne_refcount) == 0); /* can't be busy */
+ LASSERT (!in_interrupt());
+
+ write_lock_irqsave (&kpr_rwlock, flags);
+
+ list_del (&ne->kpne_list);
+
+ write_unlock_irqrestore (&kpr_rwlock, flags);
+
+ PORTAL_FREE (ne, sizeof (*ne));
+ PORTAL_MODULE_UNUSE;
+}
+
+
+int
+kpr_lookup_target (void *arg, ptl_nid_t target_nid, ptl_nid_t *gateway_nidp)
+{
+ kpr_nal_entry_t *ne = (kpr_nal_entry_t *)arg;
+ struct list_head *e;
+ int rc = -ENOENT;
+
+ CDEBUG (D_OTHER, "lookup "LPX64" from NAL %d\n", target_nid, ne->kpne_interface.kprni_nalid);
+
+ if (ne->kpne_shutdown) /* caller is shutting down */
+ return (-ENOENT);
+
+ read_lock (&kpr_rwlock);
+
+ /* Search routes for one that has a gateway to target_nid on the callers network */
+
+ for (e = kpr_routes.next; e != &kpr_routes; e = e->next)
+ {
+ kpr_route_entry_t *re = list_entry (e, kpr_route_entry_t, kpre_list);
+
+ if (re->kpre_lo_nid > target_nid ||
+ re->kpre_hi_nid < target_nid)
+ continue;
+
+ /* found table entry */
+
+ if (re->kpre_gateway_nalid != ne->kpne_interface.kprni_nalid) /* different NAL */
+ rc = -EHOSTUNREACH;
+ else
+ {
+ rc = 0;
+ *gateway_nidp = re->kpre_gateway_nid;
+ }
+ break;
+ }
+
+ read_unlock (&kpr_rwlock);
+
+ CDEBUG (D_OTHER, "lookup "LPX64" from NAL %d: %d ("LPX64")\n",
+ target_nid, ne->kpne_interface.kprni_nalid, rc,
+ (rc == 0) ? *gateway_nidp : (ptl_nid_t)0);
+ return (rc);
+}
+
+void
+kpr_forward_packet (void *arg, kpr_fwd_desc_t *fwd)
+{
+ kpr_nal_entry_t *src_ne = (kpr_nal_entry_t *)arg;
+ ptl_nid_t target_nid = fwd->kprfd_target_nid;
+ int nob = fwd->kprfd_nob;
+ struct list_head *e;
+
+ CDEBUG (D_OTHER, "forward [%p] "LPX64" from NAL %d\n", fwd,
+ target_nid, src_ne->kpne_interface.kprni_nalid);
+
+ LASSERT (nob >= sizeof (ptl_hdr_t)); /* at least got a packet header */
+ LASSERT (nob == lib_iov_nob (fwd->kprfd_niov, fwd->kprfd_iov));
+
+ atomic_inc (&kpr_queue_depth);
+
+ kpr_fwd_packets++; /* (loose) stats accounting */
+ kpr_fwd_bytes += nob;
+
+ if (src_ne->kpne_shutdown) /* caller is shutting down */
+ goto out;
+
+ fwd->kprfd_router_arg = src_ne; /* stash caller's nal entry */
+ atomic_inc (&src_ne->kpne_refcount); /* source nal is busy until fwd completes */
+
+ read_lock (&kpr_rwlock);
+
+ /* Search routes for one that has a gateway to target_nid NOT on the caller's network */
+
+ for (e = kpr_routes.next; e != &kpr_routes; e = e->next)
+ {
+ kpr_route_entry_t *re = list_entry (e, kpr_route_entry_t, kpre_list);
+
+ if (re->kpre_lo_nid > target_nid || /* no match */
+ re->kpre_hi_nid < target_nid)
+ continue;
+
+ CDEBUG (D_OTHER, "forward [%p] "LPX64" from NAL %d: match "LPX64" on NAL %d\n", fwd,
+ target_nid, src_ne->kpne_interface.kprni_nalid,
+ re->kpre_gateway_nid, re->kpre_gateway_nalid);
+
+ if (re->kpre_gateway_nalid == src_ne->kpne_interface.kprni_nalid)
+ break; /* don't route to same NAL */
+
+ /* Search for gateway's NAL's entry */
+
+ for (e = kpr_nals.next; e != &kpr_nals; e = e->next)
+ {
+ kpr_nal_entry_t *dst_ne = list_entry (e, kpr_nal_entry_t, kpne_list);
+
+ if (re->kpre_gateway_nalid != dst_ne->kpne_interface.kprni_nalid) /* no match */
+ continue;
+
+ if (dst_ne->kpne_shutdown) /* don't route if NAL is shutting down */
+ break;
+
+ fwd->kprfd_gateway_nid = re->kpre_gateway_nid;
+ atomic_inc (&dst_ne->kpne_refcount); /* dest nal is busy until fwd completes */
+
+ read_unlock (&kpr_rwlock);
+
+ CDEBUG (D_OTHER, "forward [%p] "LPX64" from NAL %d: "LPX64" on NAL %d\n", fwd,
+ target_nid, src_ne->kpne_interface.kprni_nalid,
+ fwd->kprfd_gateway_nid, dst_ne->kpne_interface.kprni_nalid);
+
+ dst_ne->kpne_interface.kprni_fwd (dst_ne->kpne_interface.kprni_arg, fwd);
+ return;
+ }
+ break;
+ }
+
+ read_unlock (&kpr_rwlock);
+ out:
+ kpr_fwd_errors++;
+
+ CDEBUG (D_OTHER, "Failed to forward [%p] "LPX64" from NAL %d\n", fwd,
+ target_nid, src_ne->kpne_interface.kprni_nalid);
+
+ /* Can't find anywhere to forward to */
+ (fwd->kprfd_callback)(fwd->kprfd_callback_arg, -EHOSTUNREACH);
+
+ atomic_dec (&kpr_queue_depth);
+ atomic_dec (&src_ne->kpne_refcount);
+}
+
+void
+kpr_complete_packet (void *arg, kpr_fwd_desc_t *fwd, int error)
+{
+ kpr_nal_entry_t *dst_ne = (kpr_nal_entry_t *)arg;
+ kpr_nal_entry_t *src_ne = (kpr_nal_entry_t *)fwd->kprfd_router_arg;
+
+ CDEBUG (D_OTHER, "complete(1) [%p] from NAL %d to NAL %d: %d\n", fwd,
+ src_ne->kpne_interface.kprni_nalid, dst_ne->kpne_interface.kprni_nalid, error);
+
+ atomic_dec (&dst_ne->kpne_refcount); /* CAVEAT EMPTOR dst_ne can disappear now!!! */
+
+ (fwd->kprfd_callback)(fwd->kprfd_callback_arg, error);
+
+ CDEBUG (D_OTHER, "complete(2) [%p] from NAL %d: %d\n", fwd,
+ src_ne->kpne_interface.kprni_nalid, error);
+
+ atomic_dec (&kpr_queue_depth);
+ atomic_dec (&src_ne->kpne_refcount); /* CAVEAT EMPTOR src_ne can disappear now!!! */
+}
+
+int
+kpr_add_route (int gateway_nalid, ptl_nid_t gateway_nid, ptl_nid_t lo_nid,
+ ptl_nid_t hi_nid)
+{
+ long flags;
+ struct list_head *e;
+ kpr_route_entry_t *re;
+
+ CDEBUG(D_OTHER, "Add route: %d "LPX64" : "LPX64" - "LPX64"\n",
+ gateway_nalid, gateway_nid, lo_nid, hi_nid);
+
+ LASSERT(lo_nid <= hi_nid);
+
+ PORTAL_ALLOC (re, sizeof (*re));
+ if (re == NULL)
+ return (-ENOMEM);
+
+ re->kpre_gateway_nalid = gateway_nalid;
+ re->kpre_gateway_nid = gateway_nid;
+ re->kpre_lo_nid = lo_nid;
+ re->kpre_hi_nid = hi_nid;
+
+ LASSERT(!in_interrupt());
+ write_lock_irqsave (&kpr_rwlock, flags);
+
+ for (e = kpr_routes.next; e != &kpr_routes; e = e->next) {
+ kpr_route_entry_t *re2 = list_entry(e, kpr_route_entry_t,
+ kpre_list);
+
+ if (re->kpre_lo_nid > re2->kpre_hi_nid ||
+ re->kpre_hi_nid < re2->kpre_lo_nid)
+ continue;
+
+ CERROR ("Attempt to add duplicate routes ["LPX64" - "LPX64"]"
+ "to ["LPX64" - "LPX64"]\n",
+ re->kpre_lo_nid, re->kpre_hi_nid,
+ re2->kpre_lo_nid, re2->kpre_hi_nid);
+
+ write_unlock_irqrestore (&kpr_rwlock, flags);
+
+ PORTAL_FREE (re, sizeof (*re));
+ return (-EINVAL);
+ }
+
+ list_add (&re->kpre_list, &kpr_routes);
+
+ write_unlock_irqrestore (&kpr_rwlock, flags);
+ return (0);
+}
+
+int
+kpr_del_route (ptl_nid_t nid)
+{
+ long flags;
+ struct list_head *e;
+
+ CDEBUG(D_OTHER, "Del route "LPX64"\n", nid);
+
+ LASSERT(!in_interrupt());
+ write_lock_irqsave(&kpr_rwlock, flags);
+
+ for (e = kpr_routes.next; e != &kpr_routes; e = e->next) {
+ kpr_route_entry_t *re = list_entry(e, kpr_route_entry_t,
+ kpre_list);
+
+ if (re->kpre_lo_nid > nid || re->kpre_hi_nid < nid)
+ continue;
+
+ list_del (&re->kpre_list);
+ write_unlock_irqrestore(&kpr_rwlock, flags);
+
+ PORTAL_FREE(re, sizeof (*re));
+ return (0);
+ }
+
+ write_unlock_irqrestore(&kpr_rwlock, flags);
+ return (-ENOENT);
+}
+
+int
+kpr_get_route(int idx, int *gateway_nalid, ptl_nid_t *gateway_nid,
+ ptl_nid_t *lo_nid, ptl_nid_t *hi_nid)
+{
+ struct list_head *e;
+
+ read_lock(&kpr_rwlock);
+
+ for (e = kpr_routes.next; e != &kpr_routes; e = e->next) {
+ kpr_route_entry_t *re = list_entry(e, kpr_route_entry_t,
+ kpre_list);
+
+ if (idx-- == 0) {
+ *gateway_nalid = re->kpre_gateway_nalid;
+ *gateway_nid = re->kpre_gateway_nid;
+ *lo_nid = re->kpre_lo_nid;
+ *hi_nid = re->kpre_hi_nid;
+
+ read_unlock(&kpr_rwlock);
+ return (0);
+ }
+ }
+
+ read_unlock (&kpr_rwlock);
+ return (-ENOENT);
+}
+
+static void __exit
+kpr_finalise (void)
+{
+ LASSERT (list_empty (&kpr_nals));
+
+ while (!list_empty (&kpr_routes)) {
+ kpr_route_entry_t *re = list_entry(kpr_routes.next,
+ kpr_route_entry_t,
+ kpre_list);
+
+ list_del(&re->kpre_list);
+ PORTAL_FREE(re, sizeof (*re));
+ }
+
+ kpr_proc_fini();
+
+ PORTAL_SYMBOL_UNREGISTER(kpr_router_interface);
+ PORTAL_SYMBOL_UNREGISTER(kpr_control_interface);
+
+ CDEBUG(D_MALLOC, "kpr_finalise: kmem back to %d\n",
+ atomic_read(&portal_kmemory));
+}
+
+static int __init
+kpr_initialise (void)
+{
+ CDEBUG(D_MALLOC, "kpr_initialise: kmem %d\n",
+ atomic_read(&portal_kmemory));
+
+ rwlock_init(&kpr_rwlock);
+ INIT_LIST_HEAD(&kpr_routes);
+ INIT_LIST_HEAD(&kpr_nals);
+
+ kpr_proc_init();
+
+ PORTAL_SYMBOL_REGISTER(kpr_router_interface);
+ PORTAL_SYMBOL_REGISTER(kpr_control_interface);
+ return (0);
+}
+
+MODULE_AUTHOR("Eric Barton");
+MODULE_DESCRIPTION("Kernel Portals Router v0.01");
+MODULE_LICENSE("GPL");
+
+module_init (kpr_initialise);
+module_exit (kpr_finalise);
+
+EXPORT_SYMBOL (kpr_control_interface);
+EXPORT_SYMBOL (kpr_router_interface);
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2002 Cluster File Systems, Inc.
+ *
+ * This file is part of Portals
+ * http://sourceforge.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#ifndef _KPTLROUTER_H
+#define _KPTLROUTER_H
+#define EXPORT_SYMTAB
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+
+#define DEBUG_SUBSYSTEM S_PTLROUTER
+
+#include <linux/kp30.h>
+#include <portals/p30.h>
+#include <portals/lib-p30.h>
+
+typedef struct
+{
+ struct list_head kpne_list;
+ kpr_nal_interface_t kpne_interface;
+ atomic_t kpne_refcount;
+ int kpne_shutdown;
+} kpr_nal_entry_t;
+
+typedef struct
+{
+ struct list_head kpre_list;
+ int kpre_gateway_nalid;
+ ptl_nid_t kpre_gateway_nid;
+ ptl_nid_t kpre_lo_nid;
+ ptl_nid_t kpre_hi_nid;
+} kpr_route_entry_t;
+
+extern int kpr_register_nal (kpr_nal_interface_t *nalif, void **argp);
+extern int kpr_lookup_target (void *arg, ptl_nid_t target_nid, ptl_nid_t *gateway_nidp);
+extern void kpr_forward_packet (void *arg, kpr_fwd_desc_t *fwd);
+extern void kpr_complete_packet (void *arg, kpr_fwd_desc_t *fwd, int error);
+extern void kpr_shutdown_nal (void *arg);
+extern void kpr_deregister_nal (void *arg);
+
+extern void kpr_proc_init (void);
+extern void kpr_proc_fini (void);
+
+extern int kpr_add_route (int gateway_nal, ptl_nid_t gateway_nid,
+ ptl_nid_t lo_nid, ptl_nid_t hi_nid);
+extern int kpr_del_route (ptl_nid_t nid);
+extern int kpr_get_route (int idx, int *gateway_nal, ptl_nid_t *gateway_nid,
+ ptl_nid_t *lo_nid, ptl_nid_t *hi_nid);
+
+extern unsigned long long kpr_fwd_bytes;
+extern unsigned long kpr_fwd_packets;
+extern unsigned long kpr_fwd_errors;
+extern atomic_t kpr_queue_depth;
+
+#endif /* _KPLROUTER_H */
--- /dev/null
+Makefile
+Makefile.in
+.deps
--- /dev/null
+# Copyright (C) 2001 Cluster File Systems, Inc.
+#
+# This code is issued under the GNU General Public License.
+# See the file COPYING in this distribution
+
+include ../Rules.linux
+
+LDFLAGS = -m "`$(LD) --help | awk '/supported emulations/ {print $$4}'`" -r
+LINK = $(LD) $(LDFLAGS) -o $@
+DEFS =
+LIBS =
+MODULE = $(basename)
+EXTRA_DIST = startserver.sh startclient.sh stopserver.sh stopclient.sh
+
+noinst_PROGRAMS = pingsrv.o pingcli.o spingsrv.o spingcli.o
+
+pingsrv_o_SOURCES = ping_srv.c ping.h
+
+pingcli_o_SOURCES = ping_cli.c ping.h
+
+spingsrv_o_SOURCES = sping_srv.c ping.h
+
+spingcli_o_SOURCES = sping_cli.c ping.h
--- /dev/null
+#ifndef _KPING_INCLUDED
+#define _KPING_INCLUDED
+
+#include <portals/p30.h>
+
+
+#define PTL_PING_IN_SIZE 256 // n packets per buffer
+#define PTL_PING_IN_BUFFERS 2 // n fallback buffers
+
+#define PTL_PING_CLIENT 4
+#define PTL_PING_SERVER 5
+
+#define PING_HEADER_MAGIC 0xDEADBEEF
+#define PING_BULK_MAGIC 0xCAFEBABE
+
+#define PING_HEAD_BITS 0x00000001
+#define PING_BULK_BITS 0x00000002
+#define PING_IGNORE_BITS 0xFFFFFFFC
+
+#define PTL_PING_ACK 0x01
+#define PTL_PING_VERBOSE 0x02
+#define PTL_PING_VERIFY 0x04
+#define PTL_PING_PREALLOC 0x08
+
+
+#define NEXT_PRIMARY_BUFFER(index) \
+ (((index + 1) >= PTL_PING_IN_BUFFERS) ? 0 : (index + 1))
+
+#define PDEBUG(str, err) \
+ CERROR ("%s: error=%s (%d)\n", str, ptl_err_str[err], err)
+
+
+/* Ping data to be passed via the ioctl to kernel space */
+
+#if __KERNEL__
+
+
+#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
+#include <linux/workqueue.h>
+#else
+#include <linux/tqueue.h>
+#endif
+struct pingsrv_data {
+
+ ptl_handle_ni_t ni;
+ ptl_handle_me_t me;
+ ptl_handle_eq_t eq;
+ void *in_buf;
+ ptl_process_id_t my_id;
+ ptl_process_id_t id_local;
+ ptl_md_t mdin;
+ ptl_md_t mdout;
+ ptl_handle_md_t mdin_h;
+ ptl_handle_md_t mdout_h;
+ ptl_event_t evnt;
+ struct task_struct *tsk;
+}; /* struct pingsrv_data */
+
+struct pingcli_data {
+
+ struct portal_ioctl_data *args;
+ ptl_handle_me_t me;
+ ptl_handle_eq_t eq;
+ char *inbuf;
+ char *outbuf;
+ ptl_process_id_t myid;
+ ptl_process_id_t id_local;
+ ptl_process_id_t id_remote;
+ ptl_md_t md_in_head;
+ ptl_md_t md_out_head;
+ ptl_handle_md_t md_in_head_h;
+ ptl_handle_md_t md_out_head_h;
+ ptl_event_t ev;
+ struct task_struct *tsk;
+}; /* struct pingcli_data */
+
+
+#endif /* __KERNEL__ */
+
+#endif /* _KPING_INCLUDED */
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2002, Lawrence Livermore National Labs (LLNL)
+ * Author: Brian Behlendorf <behlendorf1@llnl.gov>
+ * Kedar Sovani (kedar@calsoftinc.com)
+ * Amey Inamdar (amey@calsoftinc.com)
+ *
+ * This file is part of Portals, http://www.sf.net/projects/lustre/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#define DEBUG_SUBSYSTEM S_PINGER
+
+#include <linux/kp30.h>
+#include <portals/p30.h>
+#include <linux/module.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+#include "ping.h"
+/* int portal_debug = D_PING_CLI; */
+
+
+#define STDSIZE (sizeof(int) + sizeof(int) + sizeof(struct timeval))
+
+#define MAX_TIME 100000
+
+/* This should be enclosed in a structure */
+
+static struct pingcli_data *client = NULL;
+
+static int count = 0;
+
+static void
+pingcli_shutdown(int err)
+{
+ int rc;
+
+ /* Yes, we are intentionally allowing us to fall through each
+ * case in to the next. This allows us to pass an error
+ * code to just clean up the right stuff.
+ */
+ switch (err) {
+ case 1:
+ /* Unlink any memory descriptors we may have used */
+ if ((rc = PtlMDUnlink (client->md_out_head_h)))
+ PDEBUG ("PtlMDUnlink", rc);
+ case 2:
+ if ((rc = PtlMDUnlink (client->md_in_head_h)))
+ PDEBUG ("PtlMDUnlink", rc);
+
+ /* Free the event queue */
+ if ((rc = PtlEQFree (client->eq)))
+ PDEBUG ("PtlEQFree", rc);
+
+ if ((rc = PtlMEUnlink (client->me)))
+ PDEBUG ("PtlMEUnlink", rc);
+ case 3:
+ kportal_put_ni (client->args->ioc_nal);
+
+ case 4:
+ /* Free our buffers */
+
+ if (client != NULL)
+ PORTAL_FREE (client,
+ sizeof(struct pingcli_data));
+ }
+
+
+ CDEBUG (D_OTHER, "ping client released resources\n");
+} /* pingcli_shutdown() */
+
+static int pingcli_callback(ptl_event_t *ev)
+{
+ int i, magic;
+ i = *(int *)(ev->mem_desc.start + ev->offset + sizeof(unsigned));
+ magic = *(int *)(ev->mem_desc.start + ev->offset);
+
+ if(magic != 0xcafebabe) {
+ printk ("Unexpected response \n");
+ return 1;
+ }
+
+ if((i == count) || !count)
+ wake_up_process (client->tsk);
+ else
+ printk ("Received response after timeout for %d\n",i);
+ return 1;
+}
+
+
+static struct pingcli_data *
+pingcli_start(struct portal_ioctl_data *args)
+{
+ ptl_handle_ni_t *nip;
+ unsigned ping_head_magic = PING_HEADER_MAGIC;
+ unsigned ping_bulk_magic = PING_BULK_MAGIC;
+ int rc;
+ struct timeval tv1, tv2;
+ client->tsk = current;
+ client->args = args;
+ CDEBUG (D_OTHER, "pingcli_setup args: nid "LPX64", \
+ nal %d, size %u, count: %u, timeout: %u\n",
+ args->ioc_nid, args->ioc_nal, args->ioc_size,
+ args->ioc_count, args->ioc_timeout);
+
+
+ PORTAL_ALLOC (client->outbuf, STDSIZE + args->ioc_size) ;
+ if (client->outbuf == NULL)
+ {
+ CERROR ("Unable to allocate out_buf ("LPSZ" bytes)\n", STDSIZE);
+ pingcli_shutdown (4);
+ return (NULL);
+ }
+
+ PORTAL_ALLOC (client->inbuf,
+ (args->ioc_size + STDSIZE) * args->ioc_count);
+ if (client->inbuf == NULL)
+ {
+ CERROR ("Unable to allocate out_buf ("LPSZ" bytes)\n", STDSIZE);
+ pingcli_shutdown (4);
+ return (NULL);
+ }
+
+ /* Aquire and initialize the proper nal for portals. */
+ if ((nip = kportal_get_ni (args->ioc_nal)) == NULL)
+ {
+ CERROR ("NAL %d not loaded\n", args->ioc_nal);
+ pingcli_shutdown (4);
+ return (NULL);
+ }
+
+ /* Based on the initialization aquire our unique portal ID. */
+ if ((rc = PtlGetId (*nip, &client->myid)))
+ {
+ CERROR ("PtlGetId error %d\n", rc);
+ pingcli_shutdown (2);
+ return (NULL);
+ }
+
+ /* Setup the local match entries */
+ client->id_local.nid = PTL_NID_ANY;
+ client->id_local.pid = PTL_PID_ANY;
+
+ /* Setup the remote match entries */
+ client->id_remote.nid = args->ioc_nid;
+ client->id_remote.pid = 0;
+
+ if ((rc = PtlMEAttach (*nip, PTL_PING_CLIENT,
+ client->id_local, 0, ~0, PTL_RETAIN,
+ PTL_INS_AFTER, &client->me)))
+ {
+ CERROR ("PtlMEAttach error %d\n", rc);
+ pingcli_shutdown (2);
+ return (NULL);
+ }
+
+ /* Allocate the event queue for this network interface */
+ if ((rc = PtlEQAlloc (*nip, 64, pingcli_callback, &client->eq)))
+ {
+ CERROR ("PtlEQAlloc error %d\n", rc);
+ pingcli_shutdown (2);
+ return (NULL);
+ }
+
+ count = args->ioc_count;
+
+ client->md_in_head.start = client->inbuf;
+ client->md_in_head.length = (args->ioc_size + STDSIZE)
+ * count;
+ client->md_in_head.threshold = PTL_MD_THRESH_INF;
+ client->md_in_head.options = PTL_MD_OP_PUT;
+ client->md_in_head.user_ptr = NULL;
+ client->md_in_head.eventq = client->eq;
+ memset (client->inbuf, 0, (args->ioc_size + STDSIZE) * count);
+
+ /* Attach the incoming buffer */
+ if ((rc = PtlMDAttach (client->me, client->md_in_head,
+ PTL_UNLINK, &client->md_in_head_h))) {
+ CERROR ("PtlMDAttach error %d\n", rc);
+ pingcli_shutdown (1);
+ return (NULL);
+ }
+ /* Setup the outgoing ping header */
+ client->md_out_head.start = client->outbuf;
+ client->md_out_head.length = STDSIZE + args->ioc_size;
+ client->md_out_head.threshold = args->ioc_count;
+ client->md_out_head.options = PTL_MD_OP_PUT;
+ client->md_out_head.user_ptr = NULL;
+ client->md_out_head.eventq = PTL_EQ_NONE;
+
+ memcpy (client->outbuf, &ping_head_magic, sizeof(ping_bulk_magic));
+
+ count = 0;
+
+ /* Bind the outgoing ping header */
+ if ((rc=PtlMDBind (*nip, client->md_out_head,
+ &client->md_out_head_h))) {
+ CERROR ("PtlMDBind error %d\n", rc);
+ pingcli_shutdown (1);
+ return NULL;
+ }
+ while ((args->ioc_count - count)) {
+ memcpy (client->outbuf + sizeof(unsigned),
+ &(count), sizeof(unsigned));
+ /* Put the ping packet */
+ do_gettimeofday (&tv1);
+
+ memcpy(client->outbuf+sizeof(unsigned)+sizeof(unsigned),&tv1,
+ sizeof(struct timeval));
+
+ if((rc = PtlPut (client->md_out_head_h, PTL_NOACK_REQ,
+ client->id_remote, PTL_PING_SERVER, 0, 0, 0, 0))) {
+ PDEBUG ("PtlPut (header)", rc);
+ pingcli_shutdown (1);
+ return NULL;
+ }
+ printk ("sent msg no %d", count);
+
+ set_current_state (TASK_INTERRUPTIBLE);
+ rc = schedule_timeout (20 * args->ioc_timeout);
+ if (rc == 0) {
+ printk (" :: timeout .....\n");
+ } else {
+ do_gettimeofday (&tv2);
+ printk(" :: Reply in %u usec\n",
+ (unsigned)((tv2.tv_sec - tv1.tv_sec)
+ * 1000000 + (tv2.tv_usec - tv1.tv_usec)));
+ }
+ count++;
+ }
+
+ if (client->outbuf != NULL)
+ PORTAL_FREE (client->outbuf, STDSIZE + args->ioc_size);
+
+ if (client->inbuf != NULL)
+ PORTAL_FREE (client->inbuf,
+ (args->ioc_size + STDSIZE) * args->ioc_count);
+
+ pingcli_shutdown (2);
+
+ /* Success! */
+ return NULL;
+} /* pingcli_setup() */
+
+
+
+/* called by the portals_ioctl for ping requests */
+static int kping_client(struct portal_ioctl_data *args)
+{
+ PORTAL_ALLOC (client, sizeof(struct pingcli_data));
+ if (client == NULL)
+ {
+ CERROR ("Unable to allocate client structure\n");
+ return (0);
+ }
+ memset (client, 0, sizeof(struct pingcli_data));
+ pingcli_start (args);
+
+ return 0;
+} /* kping_client() */
+
+
+static int __init pingcli_init(void)
+{
+ PORTAL_SYMBOL_REGISTER(kping_client);
+ return 0;
+} /* pingcli_init() */
+
+
+static void __exit pingcli_cleanup(void)
+{
+ PORTAL_SYMBOL_UNREGISTER (kping_client);
+} /* pingcli_cleanup() */
+
+
+MODULE_AUTHOR("Brian Behlendorf (LLNL)");
+MODULE_DESCRIPTION("A simple kernel space ping client for portals testing");
+MODULE_LICENSE("GPL");
+
+module_init(pingcli_init);
+module_exit(pingcli_cleanup);
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+EXPORT_SYMBOL (kping_client);
+#endif
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2002, Lawrence Livermore National Labs (LLNL)
+ * Author: Brian Behlendorf <behlendorf1@llnl.gov>
+ * Amey Inamdar <amey@calsoftinc.com>
+ * Kedar Sovani <kedar@calsoftinc.com>
+ *
+ *
+ * This file is part of Portals, http://www.sf.net/projects/lustre/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#define DEBUG_SUBSYSTEM S_PINGER
+
+#include <linux/kp30.h>
+#include <portals/p30.h>
+#include "ping.h"
+
+#include <linux/module.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/version.h>
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+#include <linux/workqueue.h>
+#else
+#include <linux/tqueue.h>
+#endif
+#include <linux/wait.h>
+#include <linux/smp_lock.h>
+
+#include <asm/unistd.h>
+#include <asm/semaphore.h>
+
+#define STDSIZE (sizeof(int) + sizeof(int) + sizeof(struct timeval))
+#define MAXSIZE (16*1024*1024)
+
+static unsigned ping_head_magic;
+static unsigned ping_bulk_magic;
+static int nal = 0; // Your NAL,
+static unsigned long packets_valid = 0; // Valid packets
+static int running = 1;
+atomic_t pkt;
+
+static struct pingsrv_data *server=NULL; // Our ping server
+
+static void *pingsrv_shutdown(int err)
+{
+ int rc;
+
+ /* Yes, we are intentionally allowing us to fall through each
+ * case in to the next. This allows us to pass an error
+ * code to just clean up the right stuff.
+ */
+ switch (err) {
+ case 1:
+ /* Unlink any memory descriptors we may have used */
+ if ((rc = PtlMDUnlink (server->mdin_h)))
+ PDEBUG ("PtlMDUnlink (out head buffer)", rc);
+ case 2:
+ /* Free the event queue */
+ if ((rc = PtlEQFree (server->eq)))
+ PDEBUG ("PtlEQFree", rc);
+
+ /* Unlink the client portal from the ME list */
+ if ((rc = PtlMEUnlink (server->me)))
+ PDEBUG ("PtlMEUnlink", rc);
+
+ case 3:
+ kportal_put_ni (nal);
+
+ case 4:
+
+ case 5:
+ if (server->in_buf != NULL)
+ PORTAL_FREE (server->in_buf, MAXSIZE);
+
+ if (server != NULL)
+ PORTAL_FREE (server,
+ sizeof (struct pingsrv_data));
+
+ }
+
+ CDEBUG (D_OTHER, "ping sever resources released\n");
+ return NULL;
+} /* pingsrv_shutdown() */
+
+
+int pingsrv_thread(void *arg)
+{
+ int rc;
+ unsigned long magic;
+ unsigned long ping_bulk_magic = 0xcafebabe;
+
+ kportal_daemonize ("pingsrv");
+ server->tsk = current;
+
+ while (running) {
+ set_current_state (TASK_INTERRUPTIBLE);
+ if (atomic_read (&pkt) == 0) {
+ schedule_timeout (MAX_SCHEDULE_TIMEOUT);
+ continue;
+ }
+
+ magic = *((int *)(server->evnt.mem_desc.start
+ + server->evnt.offset));
+
+
+ if(magic != 0xdeadbeef) {
+ printk("Unexpected Packet to the server\n");
+
+ }
+ memcpy (server->in_buf, &ping_bulk_magic, sizeof(ping_bulk_magic));
+
+ server->mdout.length = server->evnt.rlength;
+ server->mdout.start = server->in_buf;
+ server->mdout.threshold = 1;
+ server->mdout.options = PTL_MD_OP_PUT;
+ server->mdout.user_ptr = NULL;
+ server->mdout.eventq = PTL_EQ_NONE;
+
+ /* Bind the outgoing buffer */
+ if ((rc = PtlMDBind (server->ni, server->mdout,
+ &server->mdout_h))) {
+ PDEBUG ("PtlMDBind", rc);
+ pingsrv_shutdown (1);
+ return 1;
+ }
+
+
+ server->mdin.start = server->in_buf;
+ server->mdin.length = MAXSIZE;
+ server->mdin.threshold = 1;
+ server->mdin.options = PTL_MD_OP_PUT;
+ server->mdin.user_ptr = NULL;
+ server->mdin.eventq = server->eq;
+
+ if ((rc = PtlMDAttach (server->me, server->mdin,
+ PTL_UNLINK, &server->mdin_h))) {
+ PDEBUG ("PtlMDAttach (bulk)", rc);
+ CDEBUG (D_OTHER, "ping server resources allocated\n");
+ }
+
+ if ((rc = PtlPut (server->mdout_h, PTL_NOACK_REQ,
+ server->evnt.initiator, PTL_PING_CLIENT, 0, 0, 0, 0)))
+ PDEBUG ("PtlPut", rc);
+
+ atomic_dec (&pkt);
+
+ }
+ pingsrv_shutdown (1);
+ running = 1;
+ return 0;
+}
+
+static int pingsrv_packet(ptl_event_t *ev)
+{
+ atomic_inc (&pkt);
+ wake_up_process (server->tsk);
+ return 1;
+} /* pingsrv_head() */
+
+static int pingsrv_callback(ptl_event_t *ev)
+{
+
+ if (ev == NULL) {
+ CERROR ("null in callback, ev=%p\n", ev);
+ return 0;
+ }
+ server->evnt = *ev;
+
+ printk ("received ping from nid "LPX64" "
+ "(off=%u rlen=%u mlen=%u head=%x seq=%d size=%d)\n",
+ ev->initiator.nid, ev->offset, ev->rlength, ev->mlength,
+ *((int *)(ev->mem_desc.start + ev->offset)),
+ *((int *)(ev->mem_desc.start + ev->offset + sizeof(unsigned))),
+ *((int *)(ev->mem_desc.start + ev->offset + 2 *
+ sizeof(unsigned))));
+
+ packets_valid++;
+
+ return pingsrv_packet(ev);
+
+} /* pingsrv_callback() */
+
+
+static struct pingsrv_data *pingsrv_setup(void)
+{
+ ptl_handle_ni_t *nip;
+ int rc;
+
+ /* Aquire and initialize the proper nal for portals. */
+ if ((nip = kportal_get_ni (nal)) == NULL) {
+ CDEBUG (D_OTHER, "NAL %d not loaded\n", nal);
+ return pingsrv_shutdown (4);
+ }
+
+ server->ni= *nip;
+
+ /* Based on the initialization aquire our unique portal ID. */
+ if ((rc = PtlGetId (server->ni, &server->my_id))) {
+ PDEBUG ("PtlGetId", rc);
+ return pingsrv_shutdown (2);
+ }
+
+ server->id_local.nid = PTL_NID_ANY;
+ server->id_local.pid = PTL_PID_ANY;
+
+ /* Attach a match entries for header packets */
+ if ((rc = PtlMEAttach (server->ni, PTL_PING_SERVER,
+ server->id_local,0, ~0,
+ PTL_RETAIN, PTL_INS_AFTER, &server->me))) {
+ PDEBUG ("PtlMEAttach", rc);
+ return pingsrv_shutdown (2);
+ }
+
+
+ if ((rc = PtlEQAlloc (server->ni, 1024, pingsrv_callback,
+ &server->eq))) {
+ PDEBUG ("PtlEQAlloc (callback)", rc);
+ return pingsrv_shutdown (2);
+ }
+
+ PORTAL_ALLOC (server->in_buf, MAXSIZE);
+ if(!server->in_buf){
+ CDEBUG (D_OTHER,"Allocation error\n");
+ return pingsrv_shutdown(2);
+ }
+
+ /* Setup the incoming buffer */
+ server->mdin.start = server->in_buf;
+ server->mdin.length = MAXSIZE;
+ server->mdin.threshold = 1;
+ server->mdin.options = PTL_MD_OP_PUT;
+ server->mdin.user_ptr = NULL;
+ server->mdin.eventq = server->eq;
+ memset (server->in_buf, 0, STDSIZE);
+
+ if ((rc = PtlMDAttach (server->me, server->mdin,
+ PTL_UNLINK, &server->mdin_h))) {
+ PDEBUG ("PtlMDAttach (bulk)", rc);
+ CDEBUG (D_OTHER, "ping server resources allocated\n");
+ }
+
+ /* Success! */
+ return server;
+} /* pingsrv_setup() */
+
+static int pingsrv_start(void)
+{
+ /* Setup our server */
+ if (!pingsrv_setup()) {
+ CDEBUG (D_OTHER, "pingsrv_setup() failed, server stopped\n");
+ return -ENOMEM;
+ }
+ kernel_thread (pingsrv_thread,NULL,0);
+ return 0;
+} /* pingsrv_start() */
+
+
+
+static int __init pingsrv_init(void)
+{
+ ping_head_magic = PING_HEADER_MAGIC;
+ ping_bulk_magic = PING_BULK_MAGIC;
+ PORTAL_ALLOC (server, sizeof(struct pingsrv_data));
+ return pingsrv_start ();
+} /* pingsrv_init() */
+
+
+static void __exit pingsrv_cleanup(void)
+{
+ remove_proc_entry ("net/pingsrv", NULL);
+
+ running = 0;
+ wake_up_process (server->tsk);
+ while (running != 1) {
+ set_current_state (TASK_UNINTERRUPTIBLE);
+ schedule_timeout (HZ);
+ }
+
+} /* pingsrv_cleanup() */
+
+
+MODULE_PARM(nal, "i");
+MODULE_PARM_DESC(nal, "Use the specified NAL "
+ "(6-kscimacnal, 4-toenal, 2-ksocknal, 1-kqswnal)");
+
+MODULE_AUTHOR("Brian Behlendorf (LLNL)");
+MODULE_DESCRIPTION("A kernel space ping server for portals testing");
+MODULE_LICENSE("GPL");
+
+module_init(pingsrv_init);
+module_exit(pingsrv_cleanup);
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2002, Lawrence Livermore National Labs (LLNL)
+ * Author: Brian Behlendorf <behlendorf1@llnl.gov>
+ * Kedar Sovani (kedar@calsoftinc.com)
+ * Amey Inamdar (amey@calsoftinc.com)
+ *
+ * This file is part of Portals, http://www.sf.net/projects/lustre/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+/* This is a striped down version of pinger. It follows a single
+ * request-response protocol. Doesn't do Bulk data pinging. Also doesn't
+ * send multiple packets in a single ioctl.
+ */
+
+
+#define DEBUG_SUBSYSTEM S_PINGER
+
+#include <linux/kp30.h>
+#include <portals/p30.h>
+#include <linux/module.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+#include "ping.h"
+/* int portal_debug = D_PING_CLI; */
+
+
+#define STDSIZE (sizeof(int) + sizeof(int) + 4) /* The data is 4 bytes
+ assumed */
+
+/* This should be enclosed in a structure */
+
+static struct pingcli_data *client = NULL;
+
+static int count = 0;
+
+static void
+pingcli_shutdown(int err)
+{
+ int rc;
+
+ /* Yes, we are intentionally allowing us to fall through each
+ * case in to the next. This allows us to pass an error
+ * code to just clean up the right stuff.
+ */
+ switch (err) {
+ case 1:
+ /* Unlink any memory descriptors we may have used */
+ if ((rc = PtlMDUnlink (client->md_out_head_h)))
+ PDEBUG ("PtlMDUnlink", rc);
+ case 2:
+ /* Free the event queue */
+ if ((rc = PtlEQFree (client->eq)))
+ PDEBUG ("PtlEQFree", rc);
+
+ if ((rc = PtlMEUnlink (client->me)))
+ PDEBUG ("PtlMEUnlink", rc);
+ case 3:
+ kportal_put_ni (client->args->ioc_nal);
+
+ case 4:
+ /* Free our buffers */
+ if (client->outbuf != NULL)
+ PORTAL_FREE (client->outbuf, STDSIZE);
+
+ if (client->inbuf != NULL)
+ PORTAL_FREE (client->inbuf, STDSIZE);
+
+
+ if (client != NULL)
+ PORTAL_FREE (client,
+ sizeof(struct pingcli_data));
+ }
+
+
+ CDEBUG (D_OTHER, "ping client released resources\n");
+} /* pingcli_shutdown() */
+
+static int pingcli_callback(ptl_event_t *ev)
+{
+ wake_up_process (client->tsk);
+ return 1;
+}
+
+
+static struct pingcli_data *
+pingcli_start(struct portal_ioctl_data *args)
+{
+ const ptl_handle_ni_t *nip;
+ unsigned ping_head_magic = PING_HEADER_MAGIC;
+ int rc;
+
+ client->tsk = current;
+ client->args = args;
+
+ CDEBUG (D_OTHER, "pingcli_setup args: nid "LPX64", \
+ nal %d, size %u, count: %u, timeout: %u\n",
+ args->ioc_nid, args->ioc_nal, args->ioc_size,
+ args->ioc_count, args->ioc_timeout);
+
+
+ PORTAL_ALLOC (client->outbuf, STDSIZE) ;
+ if (client->outbuf == NULL)
+ {
+ CERROR ("Unable to allocate out_buf ("LPSZ" bytes)\n", STDSIZE);
+ pingcli_shutdown (4);
+ return (NULL);
+ }
+
+ PORTAL_ALLOC (client->inbuf, STDSIZE);
+
+ if (client->inbuf == NULL)
+ {
+ CERROR ("Unable to allocate out_buf ("LPSZ" bytes)\n", STDSIZE);
+ pingcli_shutdown (4);
+ return (NULL);
+ }
+
+ /* Aquire and initialize the proper nal for portals. */
+ if ((nip = kportal_get_ni (args->ioc_nal)) == NULL)
+ {
+ CERROR ("NAL %d not loaded.\n", args->ioc_nal);
+ pingcli_shutdown (4);
+ return (NULL);
+ }
+
+ /* Based on the initialization aquire our unique portal ID. */
+ if ((rc = PtlGetId (*nip, &client->myid)))
+ {
+ CERROR ("PtlGetId error %d\n", rc);
+ pingcli_shutdown (2);
+ return (NULL);
+ }
+
+ /* Setup the local match entries */
+ client->id_local.nid = PTL_NID_ANY;
+ client->id_local.pid = PTL_PID_ANY;
+
+ /* Setup the remote match entries */
+ client->id_remote.nid = args->ioc_nid;
+ client->id_remote.pid = 0;
+
+ if ((rc = PtlMEAttach (*nip, PTL_PING_CLIENT,
+ client->id_local, 0, ~0, PTL_RETAIN,
+ PTL_INS_AFTER, &client->me)))
+ {
+ CERROR ("PtlMEAttach error %d\n", rc);
+ pingcli_shutdown (2);
+ return (NULL);
+ }
+
+ /* Allocate the event queue for this network interface */
+ if ((rc = PtlEQAlloc (*nip, 64, pingcli_callback, &client->eq)))
+ {
+ CERROR ("PtlEQAlloc error %d\n", rc);
+ pingcli_shutdown (2);
+ return (NULL);
+ }
+
+
+ client->md_in_head.start = client->inbuf;
+ client->md_in_head.length = STDSIZE;
+ client->md_in_head.threshold = 1;
+ client->md_in_head.options = PTL_MD_OP_PUT;
+ client->md_in_head.user_ptr = NULL;
+ client->md_in_head.eventq = client->eq;
+ memset (client->inbuf, 0, STDSIZE);
+
+ /* Attach the incoming buffer */
+ if ((rc = PtlMDAttach (client->me, client->md_in_head,
+ PTL_UNLINK, &client->md_in_head_h))) {
+ CERROR ("PtlMDAttach error %d\n", rc);
+ pingcli_shutdown (1);
+ return (NULL);
+ }
+
+ /* Setup the outgoing ping header */
+ client->md_out_head.start = client->outbuf;
+ client->md_out_head.length = STDSIZE;
+ client->md_out_head.threshold = 1;
+ client->md_out_head.options = PTL_MD_OP_PUT;
+ client->md_out_head.user_ptr = NULL;
+ client->md_out_head.eventq = PTL_EQ_NONE;
+
+ memcpy (client->outbuf, &ping_head_magic, sizeof(ping_head_magic));
+
+ /* Bind the outgoing ping header */
+ if ((rc=PtlMDBind (*nip, client->md_out_head,
+ &client->md_out_head_h))) {
+ CERROR ("PtlMDBind error %d\n", rc);
+ pingcli_shutdown (1);
+ return (NULL);
+ }
+ /* Put the ping packet */
+ if((rc = PtlPut (client->md_out_head_h, PTL_NOACK_REQ,
+ client->id_remote, PTL_PING_SERVER, 0, 0, 0, 0))) {
+ PDEBUG ("PtlPut (header)", rc);
+ pingcli_shutdown (1);
+ return NULL;
+ }
+
+ count = 0;
+ set_current_state (TASK_INTERRUPTIBLE);
+ rc = schedule_timeout (20 * args->ioc_timeout);
+ if (rc == 0) {
+ printk (" Time out on the server\n");
+ pingcli_shutdown (2);
+ return NULL;
+ } else
+ printk("Received respose from the server \n");
+
+
+ pingcli_shutdown (2);
+
+ /* Success! */
+ return NULL;
+} /* pingcli_setup() */
+
+
+
+/* called by the portals_ioctl for ping requests */
+static int kping_client(struct portal_ioctl_data *args)
+{
+
+ PORTAL_ALLOC (client, sizeof(struct pingcli_data));
+ memset (client, 0, sizeof(struct pingcli_data));
+ if (client == NULL)
+ {
+ CERROR ("Unable to allocate client structure\n");
+ return (0);
+ }
+ pingcli_start (args);
+
+ return 0;
+} /* kping_client() */
+
+
+static int __init pingcli_init(void)
+{
+ PORTAL_SYMBOL_REGISTER(kping_client);
+ return 0;
+} /* pingcli_init() */
+
+
+static void __exit pingcli_cleanup(void)
+{
+ PORTAL_SYMBOL_UNREGISTER (kping_client);
+} /* pingcli_cleanup() */
+
+
+MODULE_AUTHOR("Brian Behlendorf (LLNL)");
+MODULE_DESCRIPTION("A simple kernel space ping client for portals testing");
+MODULE_LICENSE("GPL");
+
+module_init(pingcli_init);
+module_exit(pingcli_cleanup);
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+EXPORT_SYMBOL (kping_client);
+#endif
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2002, Lawrence Livermore National Labs (LLNL)
+ * Author: Brian Behlendorf <behlendorf1@llnl.gov>
+ * Amey Inamdar <amey@calsoftinc.com>
+ * Kedar Sovani <kedar@calsoftinc.com>
+ *
+ *
+ * This file is part of Portals, http://www.sf.net/projects/lustre/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/* This is a striped down version of pinger. It follows a single
+ * request-response protocol. Doesn't do Bulk data pinging. Also doesn't
+ * send multiple packets in a single ioctl.
+ */
+
+#define DEBUG_SUBSYSTEM S_PINGER
+
+#include <linux/kp30.h>
+#include <portals/p30.h>
+#include "ping.h"
+
+#include <linux/module.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/version.h>
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+#include <linux/workqueue.h>
+#else
+#include <linux/tqueue.h>
+#endif
+#include <linux/wait.h>
+#include <linux/smp_lock.h>
+
+#include <asm/unistd.h>
+#include <asm/semaphore.h>
+
+#define STDSIZE (sizeof(int) + sizeof(int) + 4)
+
+static int nal = 0; // Your NAL,
+static unsigned long packets_valid = 0; // Valid packets
+static int running = 1;
+atomic_t pkt;
+
+static struct pingsrv_data *server=NULL; // Our ping server
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+#endif
+
+static void *pingsrv_shutdown(int err)
+{
+ int rc;
+
+ /* Yes, we are intentionally allowing us to fall through each
+ * case in to the next. This allows us to pass an error
+ * code to just clean up the right stuff.
+ */
+ switch (err) {
+ case 1:
+ /* Unlink any memory descriptors we may have used */
+ if ((rc = PtlMDUnlink (server->mdin_h)))
+ PDEBUG ("PtlMDUnlink (out head buffer)", rc);
+ case 2:
+ /* Free the event queue */
+ if ((rc = PtlEQFree (server->eq)))
+ PDEBUG ("PtlEQFree", rc);
+
+ /* Unlink the client portal from the ME list */
+ if ((rc = PtlMEUnlink (server->me)))
+ PDEBUG ("PtlMEUnlink", rc);
+
+ case 3:
+ kportal_put_ni (nal);
+
+ case 4:
+
+ if (server->in_buf != NULL)
+ PORTAL_FREE (server->in_buf, STDSIZE);
+
+ if (server != NULL)
+ PORTAL_FREE (server,
+ sizeof (struct pingsrv_data));
+
+ }
+
+ CDEBUG (D_OTHER, "ping sever resources released\n");
+ return NULL;
+} /* pingsrv_shutdown() */
+
+
+int pingsrv_thread(void *arg)
+{
+ int rc;
+
+ kportal_daemonize ("pingsrv");
+ server->tsk = current;
+
+ while (running) {
+ set_current_state (TASK_INTERRUPTIBLE);
+ if (atomic_read (&pkt) == 0) {
+ schedule_timeout (MAX_SCHEDULE_TIMEOUT);
+ continue;
+ }
+
+ server->mdout.start = server->in_buf;
+ server->mdout.length = STDSIZE;
+ server->mdout.threshold = 1;
+ server->mdout.options = PTL_MD_OP_PUT;
+ server->mdout.user_ptr = NULL;
+ server->mdout.eventq = PTL_EQ_NONE;
+
+ /* Bind the outgoing buffer */
+ if ((rc = PtlMDBind (server->ni, server->mdout,
+ &server->mdout_h))) {
+ PDEBUG ("PtlMDBind", rc);
+ pingsrv_shutdown (1);
+ return 1;
+ }
+
+
+ server->mdin.start = server->in_buf;
+ server->mdin.length = STDSIZE;
+ server->mdin.threshold = 1;
+ server->mdin.options = PTL_MD_OP_PUT;
+ server->mdin.user_ptr = NULL;
+ server->mdin.eventq = server->eq;
+
+ if ((rc = PtlMDAttach (server->me, server->mdin,
+ PTL_UNLINK, &server->mdin_h))) {
+ PDEBUG ("PtlMDAttach (bulk)", rc);
+ CDEBUG (D_OTHER, "ping server resources allocated\n");
+ }
+
+ if ((rc = PtlPut (server->mdout_h, PTL_NOACK_REQ,
+ server->evnt.initiator, PTL_PING_CLIENT, 0, 0, 0, 0)))
+ PDEBUG ("PtlPut", rc);
+
+ atomic_dec (&pkt);
+
+ }
+ pingsrv_shutdown (1);
+ running = 1;
+ return 0;
+}
+
+static int pingsrv_packet(ptl_event_t *ev)
+{
+ atomic_inc (&pkt);
+ wake_up_process (server->tsk);
+ return 1;
+} /* pingsrv_head() */
+
+static int pingsrv_callback(ptl_event_t *ev)
+{
+
+ if (ev == NULL) {
+ CERROR ("null in callback, ev=%p\n", ev);
+ return 0;
+ }
+ server->evnt = *ev;
+
+ printk ("received ping from nid "LPX64" "
+ "(off=%u rlen=%u mlen=%u head=%x)\n",
+ ev->initiator.nid, ev->offset, ev->rlength, ev->mlength,
+ *((int *)(ev->mem_desc.start + ev->offset)));
+
+ packets_valid++;
+
+ return pingsrv_packet(ev);
+
+} /* pingsrv_callback() */
+
+
+static struct pingsrv_data *pingsrv_setup(void)
+{
+ ptl_handle_ni_t *nip;
+ int rc;
+
+ /* Aquire and initialize the proper nal for portals. */
+ if ((nip = kportal_get_ni (nal)) == NULL) {
+ CDEBUG (D_OTHER, "Nal %d not loaded.\n", nal);
+ return pingsrv_shutdown (4);
+ }
+
+ server->ni= *nip;
+
+ /* Based on the initialization aquire our unique portal ID. */
+ if ((rc = PtlGetId (server->ni, &server->my_id))) {
+ PDEBUG ("PtlGetId", rc);
+ return pingsrv_shutdown (2);
+ }
+
+ server->id_local.nid = PTL_NID_ANY;
+ server->id_local.pid = PTL_PID_ANY;
+
+ /* Attach a match entries for header packets */
+ if ((rc = PtlMEAttach (server->ni, PTL_PING_SERVER,
+ server->id_local,0, ~0,
+ PTL_RETAIN, PTL_INS_AFTER, &server->me))) {
+ PDEBUG ("PtlMEAttach", rc);
+ return pingsrv_shutdown (2);
+ }
+
+
+ if ((rc = PtlEQAlloc (server->ni, 64, pingsrv_callback,
+ &server->eq))) {
+ PDEBUG ("PtlEQAlloc (callback)", rc);
+ return pingsrv_shutdown (2);
+ }
+
+ PORTAL_ALLOC (server->in_buf, STDSIZE);
+ if(!server->in_buf){
+ CDEBUG (D_OTHER,"Allocation error\n");
+ return pingsrv_shutdown(2);
+ }
+
+ /* Setup the incoming buffer */
+ server->mdin.start = server->in_buf;
+ server->mdin.length = STDSIZE;
+ server->mdin.threshold = 1;
+ server->mdin.options = PTL_MD_OP_PUT;
+ server->mdin.user_ptr = NULL;
+ server->mdin.eventq = server->eq;
+ memset (server->in_buf, 0, STDSIZE);
+
+ if ((rc = PtlMDAttach (server->me, server->mdin,
+ PTL_UNLINK, &server->mdin_h))) {
+ PDEBUG ("PtlMDAttach (bulk)", rc);
+ CDEBUG (D_OTHER, "ping server resources allocated\n");
+ }
+
+ /* Success! */
+ return server;
+} /* pingsrv_setup() */
+
+static int pingsrv_start(void)
+{
+ /* Setup our server */
+ if (!pingsrv_setup()) {
+ CDEBUG (D_OTHER, "pingsrv_setup() failed, server stopped\n");
+ return -ENOMEM;
+ }
+ kernel_thread (pingsrv_thread,NULL,0);
+ return 0;
+} /* pingsrv_start() */
+
+
+
+static int __init pingsrv_init(void)
+{
+ PORTAL_ALLOC (server, sizeof(struct pingsrv_data));
+ return pingsrv_start ();
+} /* pingsrv_init() */
+
+
+static void __exit pingsrv_cleanup(void)
+{
+ remove_proc_entry ("net/pingsrv", NULL);
+
+ running = 0;
+ wake_up_process (server->tsk);
+ while (running != 1) {
+ set_current_state (TASK_UNINTERRUPTIBLE);
+ schedule_timeout (HZ);
+ }
+
+} /* pingsrv_cleanup() */
+
+
+MODULE_PARM(nal, "i");
+MODULE_PARM_DESC(nal, "Use the specified NAL "
+ "(6-kscimacnal, 4-toenal, 2-ksocknal, 1-kqswnal)");
+
+MODULE_AUTHOR("Brian Behlendorf (LLNL)");
+MODULE_DESCRIPTION("A kernel space ping server for portals testing");
+MODULE_LICENSE("GPL");
+
+module_init(pingsrv_init);
+module_exit(pingsrv_cleanup);
--- /dev/null
+#!/bin/sh
+
+SIMPLE=${SIMPLE:-0}
+
+if [ $SIMPLE -eq 0 ]; then
+ PING=pingcli.o
+else
+ PING=spingcli.o
+fi
+
+case "$1" in
+ toe)
+ /sbin/insmod ../oslib/portals.o
+ /sbin/insmod ../toenal/ktoenal.o
+ /sbin/insmod ./$PING
+ echo ktoenal > /tmp/nal
+ ;;
+
+ tcp)
+ /sbin/insmod ../oslib/portals.o
+ /sbin/insmod ../socknal/ksocknal.o
+ /sbin/insmod ./$PING
+ echo ksocknal > /tmp/nal
+ ;;
+
+ elan)
+ /sbin/insmod ../oslib/portals.o
+ /sbin/insmod ../qswnal/kqswnal.o
+ /sbin/insmod ./$PING
+ echo kqswnal > /tmp/nal
+ ;;
+
+ *)
+ echo "Usage : ${0} < tcp | toe | elan >"
+ exit 1;
+esac
+exit 0;
--- /dev/null
+#!/bin/sh
+
+SIMPLE=${SIMPLE:-0}
+
+if [ $SIMPLE -eq 0 ]; then
+ PING=pingsrv.o
+else
+ PING=spingsrv.o
+fi
+
+case "$1" in
+ toe)
+ /sbin/insmod ../oslib/portals.o
+ /sbin/insmod ../toenal/ktoenal.o
+ /sbin/insmod ./$PING nal=4
+ echo ktoenal > /tmp/nal
+ ;;
+
+ tcp)
+ /sbin/insmod ../oslib/portals.o
+ /sbin/insmod ../socknal/ksocknal.o
+ /sbin/insmod ./$PING nal=2
+ echo ksocknal > /tmp/nal
+ ;;
+
+ elan)
+ /sbin/insmod ../oslib/portals.o
+ /sbin/insmod ../qswnal/kqswnal.o
+ /sbin/insmod ./$PING nal=4
+ echo kqswnal > /tmp/nal
+ ;;
+
+ *)
+ echo "Usage : ${0} < tcp | toe | elan >"
+ exit 1;
+esac
+../utils/acceptor 9999&
+exit 0;
--- /dev/null
+#!/bin/sh
+
+SIMPLE=${SIMPLE:-1}
+
+if [ $SIMPLE -eq 0 ]; then
+ PING=spingcli
+else
+ PING=pingcli
+fi
+
+rmmod $PING
+NAL=`cat /tmp/nal`;
+rmmod $NAL
+rmmod portals
--- /dev/null
+#!/bin/sh
+
+SIMPLE=${SIMPLE:-1}
+
+if [ $SIMPLE -eq 0 ]; then
+ PING=spingsrv
+else
+ PING=pingsrv
+fi
+
+rmmod $PING
+NAL=`cat /tmp/nal`;
+rmmod $NAL
+killall -9 acceptor
+rm -f /var/run/acceptor-9999.pid
+rmmod portals
--- /dev/null
+CPPFLAGS=
+INCLUDES=-I$(top_srcdir)/portals/include -I$(top_srcdir)/include -I$(srcdir)
+lib_LIBRARIES = libtcpnal.a
+pkginclude_HEADERS = pqtimer.h dispatch.h table.h timer.h connection.h
+libtcpnal_a_SOURCES = debug.c pqtimer.c select.c table.c pqtimer.h dispatch.h table.h timer.h address.c procapi.c proclib.c connection.c tcpnal.c connection.h
--- /dev/null
+This library implements two NAL interfaces, both running over IP.
+The first, tcpnal, creates TCP connections between participating
+processes in order to transport the portals requests. The second,
+ernal, provides a simple transport protocol which runs over
+UDP datagrams.
+
+The interface functions return both of these values in host order for
+convenience and readability. However this means that addresses
+exchanged in messages between hosts of different orderings will not
+function properly.
+
+Both NALs use the same support functions in order to schedule events
+and communicate with the generic portals implementation.
+
+ -------------------------
+ | api |
+ |_______________________|
+ | lib |
+ |_______________________|
+ | ernal | |tcpnal |
+ |--------| |----------|
+ | udpsock| |connection|
+ |-----------------------|
+ | timer/select |
+ -------------------------
+
+
+ These NALs uses the framework from fdnal of a pipe between the api
+and library sides. This is wrapped up in the select on the library
+side, and blocks on the api side. Performance could be severely
+enhanced by collapsing this aritificial barrier, by using shared
+memory queues, or by wiring the api layer directly to the library.
+
+
+nid is defined as the low order 24-bits of the IP address of the
+physical node left shifted by 8 plus a virtual node number of 0
+through 255 (really only 239). The virtual node number of a tcpnal
+application should be specified using the environment variable
+PTL_VIRTNODE. pid is now a completely arbitrary number in the
+range of 0 to 255. The IP interface used can be overridden by
+specifying the appropriate hostid by setting the PTL_HOSTID
+environment variable. The value can be either dotted decimal
+(n.n.n.n) or hex starting with "0x".
+TCPNAL:
+ As the NAL needs to try to send to a particular nid/pid pair, it
+ will open up connections on demand. Because the port associated with
+ the connecting socket is different from the bound port, two
+ connections will normally be established between a pair of peers, with
+ data flowing from the anonymous connect (active) port to the advertised
+ or well-known bound (passive) port of each peer.
+
+ Should the connection fail to open, an error is reported to the
+ library component, which causes the api request to fail.
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (c) 2002 Cray Inc.
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2.1 of the GNU Lesser General
+ * Public License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/* address.c:
+ * this file provides functions to aquire the IP address of the node
+ * and translate them into a NID/PID pair which supports a static
+ * mapping of virtual nodes into the port range of an IP socket.
+*/
+
+#include <stdlib.h>
+#include <netdb.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <portals/p30.h>
+#include <bridge.h>
+#include <ipmap.h>
+
+
+/* Function: get_node_id
+ * Returns: a 32 bit id for this node, actually a big-endian IP address
+ *
+ * get_node_id() determines the host name and uses the resolver to
+ * find out its ip address. This is fairly fragile and inflexible, but
+ * explicitly asking about interfaces and their addresses is very
+ * complicated and nonportable.
+ */
+static unsigned int get_node_id(void)
+{
+ char buffer[255];
+ unsigned int x;
+ struct hostent *he;
+ char * host_envp;
+
+ if (!(host_envp = getenv("PTL_HOSTID")))
+ {
+ gethostname(buffer,sizeof(buffer));
+ he=gethostbyname(buffer);
+ if (he)
+ x=*(unsigned int *)he->h_addr_list[0];
+ else
+ x = 0;
+ return(ntohl(x));
+ }
+ else
+ {
+ if (host_envp[1] != 'x')
+ {
+ int a, b, c, d;
+ sscanf(host_envp, "%d.%d.%d.%d", &a, &b, &c, &d);
+ return ((a<<24) | (b<<16) | (c<<8) | d);
+ }
+ else
+ {
+ long long hostid = strtoll(host_envp, 0, 0);
+ return((unsigned int) hostid);
+ }
+ }
+}
+
+
+/* Function: set_address
+ * Arugments: t: a procnal structure to populate with the request
+ *
+ * set_address performs the bit manipulations to set the nid, pid, and
+ * iptop8 fields of the procnal structures.
+ *
+ * TODO: fix pidrequest to try to do dynamic binding if PTL_ID_ANY
+ */
+
+#ifdef DIRECT_IP_MODE
+void set_address(bridge t,ptl_pid_t pidrequest)
+{
+ int port;
+ if (pidrequest==(unsigned short)PTL_PID_ANY) port = 0;
+ else port=pidrequest;
+ t->nal_cb->ni.nid=get_node_id();
+ t->nal_cb->ni.pid=port;
+}
+#else
+
+void set_address(bridge t,ptl_pid_t pidrequest)
+{
+ int virtnode, in_addr, port;
+ ptl_pid_t pid;
+
+ /* get and remember my node id*/
+ if (!getenv("PTL_VIRTNODE"))
+ virtnode = 0;
+ else
+ {
+ int maxvnode = PNAL_VNODE_MASK - (PNAL_BASE_PORT
+ >> PNAL_VNODE_SHIFT);
+ virtnode = atoi(getenv("PTL_VIRTNODE"));
+ if (virtnode > maxvnode)
+ {
+ fprintf(stderr, "PTL_VIRTNODE of %d is too large - max %d\n",
+ virtnode, maxvnode);
+ return;
+ }
+ }
+
+ in_addr = get_node_id();
+
+ t->iptop8 = in_addr >> PNAL_HOSTID_SHIFT;/* for making new connections */
+ t->nal_cb->ni.nid = ((in_addr & PNAL_HOSTID_MASK)
+ << PNAL_VNODE_SHIFT)
+ + virtnode;
+
+ pid=pidrequest;
+ /* TODO: Support of pid PTL_ID_ANY with virtual nodes needs more work. */
+#ifdef notyet
+ if (pid==(unsigned short)PTL_PID_ANY) port = 0;
+#endif
+ if (pid==(unsigned short)PTL_PID_ANY)
+ {
+ fprintf(stderr, "portal pid PTL_ID_ANY is not currently supported\n");
+ return;
+ }
+ else if (pid > PNAL_PID_MASK)
+ {
+ fprintf(stderr, "portal pid of %d is too large - max %d\n",
+ pid, PNAL_PID_MASK);
+ return;
+ }
+ else port = ((virtnode << PNAL_VNODE_SHIFT) + pid) + PNAL_BASE_PORT;
+ t->nal_cb->ni.pid=pid;
+}
+#endif
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (c) 2002 Cray Inc.
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ */
+
+#include <portals/lib-p30.h>
+
+typedef struct bridge {
+ int alive;
+ nal_cb_t *nal_cb;
+ void *lower;
+ void *local;
+ void (*shutdown)(struct bridge *);
+ /* this doesn't really belong here */
+ unsigned char iptop8;
+} *bridge;
+
+
+nal_t *bridge_init(ptl_interface_t nal,
+ ptl_pid_t pid_request,
+ ptl_ni_limits_t *desired,
+ ptl_ni_limits_t *actual,
+ int *rc);
+
+typedef int (*nal_initialize)(bridge);
+extern nal_initialize nal_table[PTL_IFACE_MAX];
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (c) 2002 Cray Inc.
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2.1 of the GNU Lesser General
+ * Public License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/* connection.c:
+ This file provides a simple stateful connection manager which
+ builds tcp connections on demand and leaves them open for
+ future use. It also provides the machinery to allow peers
+ to connect to it
+*/
+
+#include <stdlib.h>
+#include <pqtimer.h>
+#include <dispatch.h>
+#include <table.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <string.h>
+#include <unistd.h>
+#include <syscall.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <connection.h>
+#include <errno.h>
+
+
+/* global variable: acceptor port */
+unsigned short tcpnal_acceptor_port = 988;
+
+
+/* Function: compare_connection
+ * Arguments: connection c: a connection in the hash table
+ * ptl_process_id_t: an id to verify agains
+ * Returns: 1 if the connection is the one requested, 0 otherwise
+ *
+ * compare_connection() tests for collisions in the hash table
+ */
+static int compare_connection(void *arg1, void *arg2)
+{
+ connection c = arg1;
+ unsigned int * id = arg2;
+ return((c->ip==id[0]) && (c->port==id[1]));
+}
+
+
+/* Function: connection_key
+ * Arguments: ptl_process_id_t id: an id to hash
+ * Returns: a not-particularily-well-distributed hash
+ * of the id
+ */
+static unsigned int connection_key(unsigned int *id)
+{
+ return(id[0]^id[1]);
+}
+
+
+/* Function: remove_connection
+ * Arguments: c: the connection to remove
+ */
+void remove_connection(void *arg)
+{
+ connection c = arg;
+ unsigned int id[2];
+
+ id[0]=c->ip;
+ id[1]=c->port;
+ hash_table_remove(c->m->connections,id);
+ close(c->fd);
+ free(c);
+}
+
+
+/* Function: read_connection:
+ * Arguments: c: the connection to read from
+ * dest: the buffer to read into
+ * len: the number of bytes to read
+ * Returns: success as 1, or failure as 0
+ *
+ * read_connection() reads data from the connection, continuing
+ * to read partial results until the request is satisfied or
+ * it errors. TODO: this read should be covered by signal protection.
+ */
+int read_connection(connection c,
+ unsigned char *dest,
+ int len)
+{
+ int offset=0,rc;
+
+ if (len){
+ do {
+ if((rc=syscall(SYS_read, c->fd, dest+offset, len-offset))<=0){
+ if (errno==EINTR) {
+ rc=0;
+ } else {
+ remove_connection(c);
+ return(0);
+ }
+ }
+ offset+=rc;
+ } while (offset<len);
+ }
+ return(1);
+}
+
+static int connection_input(connection c)
+{
+ return((*c->m->handler)(c->m->handler_arg,c));
+}
+
+
+/* Function: allocate_connection
+ * Arguments: t: tcpnal the allocation is occuring in the context of
+ * dest: portal endpoint address for this connection
+ * fd: open file descriptor for the socket
+ * Returns: an allocated connection structure
+ *
+ * just encompasses the action common to active and passive
+ * connections of allocation and placement in the global table
+ */
+static connection allocate_connection(manager m,
+ unsigned int ip,
+ unsigned short port,
+ int fd)
+{
+ connection c=malloc(sizeof(struct connection));
+ unsigned int id[2];
+ c->m=m;
+ c->fd=fd;
+ c->ip=ip;
+ c->port=port;
+ id[0]=ip;
+ id[1]=port;
+ register_io_handler(fd,READ_HANDLER,connection_input,c);
+ hash_table_insert(m->connections,c,id);
+ return(c);
+}
+
+
+/* Function: new_connection
+ * Arguments: t: opaque argument holding the tcpname
+ * Returns: 1 in order to reregister for new connection requests
+ *
+ * called when the bound service socket recieves
+ * a new connection request, it always accepts and
+ * installs a new connection
+ */
+static int new_connection(void *z)
+{
+ manager m=z;
+ struct sockaddr_in s;
+ int len=sizeof(struct sockaddr_in);
+ int fd=accept(m->bound,(struct sockaddr *)&s,&len);
+ unsigned int nid=*((unsigned int *)&s.sin_addr);
+ /* cfs specific hack */
+ //unsigned short pid=s.sin_port;
+ allocate_connection(m,htonl(nid),0/*pid*/,fd);
+ return(1);
+}
+
+
+/* Function: force_tcp_connection
+ * Arguments: t: tcpnal
+ * dest: portals endpoint for the connection
+ * Returns: an allocated connection structure, either
+ * a pre-existing one, or a new connection
+ */
+connection force_tcp_connection(manager m,
+ unsigned int ip,
+ unsigned short port)
+{
+ connection c;
+ struct sockaddr_in addr;
+ unsigned int id[2];
+
+ port = tcpnal_acceptor_port;
+
+ id[0]=ip;
+ id[1]=port;
+
+ if (!(c=hash_table_find(m->connections,id))){
+ int fd;
+
+ bzero((char *) &addr, sizeof(addr));
+ addr.sin_family = AF_INET;
+ addr.sin_addr.s_addr = htonl(ip);
+ addr.sin_port = htons(port);
+
+ if ((fd = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
+ perror("tcpnal socket failed");
+ exit(-1);
+ }
+ if (connect(fd,
+ (struct sockaddr *)&addr,
+ sizeof(struct sockaddr_in)))
+ {
+ perror("tcpnal connect");
+ return(0);
+ }
+ return(allocate_connection(m,ip,port,fd));
+ }
+ return(c);
+}
+
+
+/* Function: bind_socket
+ * Arguments: t: the nal state for this interface
+ * port: the port to attempt to bind to
+ * Returns: 1 on success, or 0 on error
+ *
+ * bind_socket() attempts to allocate and bind a socket to the requested
+ * port, or dynamically assign one from the kernel should the port be
+ * zero. Sets the bound and bound_handler elements of m.
+ *
+ * TODO: The port should be an explicitly sized type.
+ */
+static int bind_socket(manager m,unsigned short port)
+{
+ struct sockaddr_in addr;
+ int alen=sizeof(struct sockaddr_in);
+
+ if ((m->bound = socket(AF_INET, SOCK_STREAM, 0)) < 0)
+ return(0);
+
+ bzero((char *) &addr, sizeof(addr));
+ addr.sin_family = AF_INET;
+ addr.sin_addr.s_addr = 0;
+ addr.sin_port = port;
+
+ if (bind(m->bound,(struct sockaddr *)&addr,alen)<0){
+ perror ("tcpnal bind");
+ return(0);
+ }
+
+ getsockname(m->bound,(struct sockaddr *)&addr, &alen);
+
+ m->bound_handler=register_io_handler(m->bound,READ_HANDLER,
+ new_connection,m);
+ listen(m->bound,5);
+ m->port=addr.sin_port;
+ return(1);
+}
+
+
+/* Function: shutdown_connections
+ * Arguments: m: the manager structure
+ *
+ * close all connections and reclaim resources
+ */
+void shutdown_connections(manager m)
+{
+ close(m->bound);
+ remove_io_handler(m->bound_handler);
+ hash_destroy_table(m->connections,remove_connection);
+ free(m);
+}
+
+
+/* Function: init_connections
+ * Arguments: t: the nal state for this interface
+ * port: the port to attempt to bind to
+ * Returns: a newly allocated manager structure, or
+ * zero if the fixed port could not be bound
+ */
+manager init_connections(unsigned short pid,
+ int (*input)(),
+ void *a)
+{
+ manager m=(manager)malloc(sizeof(struct manager));
+ m->connections=hash_create_table(compare_connection,connection_key);
+ m->handler=input;
+ m->handler_arg=a;
+ if (bind_socket(m,pid)) return(m);
+ free(m);
+ return(0);
+}
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (c) 2002 Cray Inc.
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ */
+
+#include <table.h>
+
+typedef struct manager {
+ table connections;
+ int bound;
+ io_handler bound_handler;
+ int (*handler)(void *, void *);
+ void *handler_arg;
+ unsigned short port;
+} *manager;
+
+
+typedef struct connection {
+ unsigned int ip;
+ unsigned short port;
+ int fd;
+ manager m;
+} *connection;
+
+connection force_tcp_connection(manager m,
+ unsigned int ip,
+ unsigned int short);
+manager init_connections(unsigned short,
+ int (*f)(void *,connection),
+ void *);
+void remove_connection(void *arg);
+void shutdown_connections(manager m);
+int read_connection(connection c,
+ unsigned char *dest,
+ int len);
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2002 Cluster File Systems, Inc.
+ * Author: Phil Schwan <phil@clusterfs.com>
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2.1 of the GNU Lesser General
+ * Public License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <stdio.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <stdarg.h>
+#include <sys/time.h>
+
+int smp_processor_id = 1;
+char debug_file_path[1024] = "/tmp/lustre-log";
+char debug_file_name[1024];
+FILE *debug_file_fd;
+
+int portals_do_debug_dumplog(void *arg)
+{
+ printf("Look in %s\n", debug_file_name);
+ return 0;
+}
+
+
+void portals_debug_print(void)
+{
+ return;
+}
+
+
+void portals_debug_dumplog(void)
+{
+ printf("Look in %s\n", debug_file_name);
+ return;
+}
+
+
+int portals_debug_init(unsigned long bufsize)
+{
+ debug_file_fd = stdout;
+ return 0;
+}
+
+int portals_debug_cleanup(void)
+{
+ return 0; //close(portals_debug_fd);
+}
+
+int portals_debug_clear_buffer(void)
+{
+ return 0;
+}
+
+int portals_debug_mark_buffer(char *text)
+{
+
+ fprintf(debug_file_fd, "*******************************************************************************\n");
+ fprintf(debug_file_fd, "DEBUG MARKER: %s\n", text);
+ fprintf(debug_file_fd, "*******************************************************************************\n");
+
+ return 0;
+}
+
+int portals_debug_copy_to_user(char *buf, unsigned long len)
+{
+ return 0;
+}
+
+/* FIXME: I'm not very smart; someone smarter should make this better. */
+void
+portals_debug_msg (int subsys, int mask, char *file, char *fn, int line,
+ const char *format, ...)
+{
+ va_list ap;
+ unsigned long flags;
+ struct timeval tv;
+ int nob;
+
+
+ /* NB since we pass a non-zero sized buffer (at least) on the first
+ * print, we can be assured that by the end of all the snprinting,
+ * we _do_ have a terminated buffer, even if our message got truncated.
+ */
+
+ gettimeofday(&tv, NULL);
+
+ nob += fprintf(debug_file_fd,
+ "%02x:%06x:%d:%lu.%06lu ",
+ subsys >> 24, mask, smp_processor_id,
+ tv.tv_sec, tv.tv_usec);
+
+ nob += fprintf(debug_file_fd,
+ "(%s:%d:%s() %d+%ld): ",
+ file, line, fn, 0,
+ 8192 - ((unsigned long)&flags & 8191UL));
+
+ va_start (ap, format);
+ nob += fprintf(debug_file_fd, format, ap);
+ va_end (ap);
+
+
+}
+
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (c) 2002 Cray Inc.
+ * Copyright (c) 2002 Eric Hoffman
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ */
+
+/* this file is only called dispatch.h to prevent it
+ from colliding with /usr/include/sys/select.h */
+
+typedef struct io_handler *io_handler;
+
+struct io_handler{
+ io_handler *last;
+ io_handler next;
+ int fd;
+ int type;
+ int (*function)(void *);
+ void *argument;
+ int disabled;
+};
+
+
+#define READ_HANDLER 1
+#define WRITE_HANDLER 2
+#define EXCEPTION_HANDLER 4
+#define ALL_HANDLER (READ_HANDLER | WRITE_HANDLER | EXCEPTION_HANDLER)
+
+io_handler register_io_handler(int fd,
+ int type,
+ int (*function)(void *),
+ void *arg);
+
+void remove_io_handler (io_handler i);
+void init_unix_timer(void);
+void select_timer_block(when until);
+when now(void);
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (c) 2002 Cray Inc.
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ */
+
+#define DIRECT_IP_MODE
+#ifdef DIRECT_IP_MODE
+#define PNAL_NID(in_addr, port) (in_addr)
+#define PNAL_PID(pid) (pid)
+#define PNAL_IP(in_addr, port) (in_addr)
+#define PNAL_PORT(nid, pid) (pid)
+#else
+
+#define PNAL_BASE_PORT 4096
+#define PNAL_HOSTID_SHIFT 24
+#define PNAL_HOSTID_MASK ((1 << PNAL_HOSTID_SHIFT) - 1)
+#define PNAL_VNODE_SHIFT 8
+#define PNAL_VNODE_MASK ((1 << PNAL_VNODE_SHIFT) - 1)
+#define PNAL_PID_SHIFT 8
+#define PNAL_PID_MASK ((1 << PNAL_PID_SHIFT) - 1)
+
+#define PNAL_NID(in_addr, port) (((ntohl(in_addr) & PNAL_HOSTID_MASK) \
+ << PNAL_VNODE_SHIFT) \
+ | (((ntohs(port)-PNAL_BASE_PORT) >>\
+ PNAL_PID_SHIFT)))
+#define PNAL_PID(port) ((ntohs(port) - PNAL_BASE_PORT) & PNAL_PID_MASK)
+
+#define PNAL_IP(nid,t) (htonl((((unsigned)(nid))\
+ >> PNAL_VNODE_SHIFT)\
+ | (t->iptop8 << PNAL_HOSTID_SHIFT)))
+#define PNAL_PORT(nid, pid) (htons(((((nid) & PNAL_VNODE_MASK) \
+ << PNAL_VNODE_SHIFT) \
+ | ((pid) & PNAL_PID_MASK)) \
+ + PNAL_BASE_PORT))
+#endif
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (c) 2002 Cray Inc.
+ * Copyright (c) 2002 Eric Hoffman
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2.1 of the GNU Lesser General
+ * Public License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/* timer.c:
+ * this file implements a simple priority-queue based timer system. when
+ * combined with a file which implements now() and block(), it can
+ * be used to provide course-grained time-based callbacks.
+ */
+
+#include <pqtimer.h>
+#include <stdlib.h>
+#include <string.h>
+
+struct timer {
+ void (*function)(void *);
+ void *arg;
+ when w;
+ int interval;
+ int disable;
+};
+
+typedef struct thunk *thunk;
+struct thunk {
+ void (*f)(void *);
+ void *a;
+ thunk next;
+};
+
+extern when now(void);
+
+static thunk thunks;
+static int internal;
+static void (*block_function)(when);
+static int number_of_timers;
+static int size_of_pqueue;
+static timer *timers;
+
+
+static void heal(int where)
+{
+ int left=(where<<1);
+ int right=(where<<1)+1;
+ int min=where;
+ timer temp;
+
+ if (left <= number_of_timers)
+ if (timers[left]->w < timers[min]->w) min=left;
+ if (right <= number_of_timers)
+ if (timers[right]->w < timers[min]->w) min=right;
+ if (min != where){
+ temp=timers[where];
+ timers[where]=timers[min];
+ timers[min]=temp;
+ heal(min);
+ }
+}
+
+static void add_pqueue(int i)
+{
+ timer temp;
+ int parent=(i>>1);
+ if ((i>1) && (timers[i]->w< timers[parent]->w)){
+ temp=timers[i];
+ timers[i]=timers[parent];
+ timers[parent]=temp;
+ add_pqueue(parent);
+ }
+}
+
+static void add_timer(timer t)
+{
+ if (size_of_pqueue<(number_of_timers+2)){
+ int oldsize=size_of_pqueue;
+ timer *new=(void *)malloc(sizeof(struct timer)*(size_of_pqueue+=10));
+ memcpy(new,timers,sizeof(timer)*oldsize);
+ timers=new;
+ }
+ timers[++number_of_timers]=t;
+ add_pqueue(number_of_timers);
+}
+
+/* Function: register_timer
+ * Arguments: interval: the time interval from the current time when
+ * the timer function should be called
+ * function: the function to call when the time has expired
+ * argument: the argument to call it with.
+ * Returns: a pointer to a timer structure
+ */
+timer register_timer(when interval,
+ void (*function)(void *),
+ void *argument)
+{
+ timer t=(timer)malloc(sizeof(struct timer));
+
+ t->arg=argument;
+ t->function=function;
+ t->interval=interval;
+ t->disable=0;
+ t->w=now()+interval;
+ add_timer(t);
+ if (!internal && (number_of_timers==1))
+ block_function(t->w);
+ return(t);
+}
+
+/* Function: remove_timer
+ * Arguments: t:
+ * Returns: nothing
+ *
+ * remove_timer removes a timer from the system, insuring
+ * that it will never be called. It does not actually
+ * free the timer due to reentrancy issues.
+ */
+
+void remove_timer(timer t)
+{
+ t->disable=1;
+}
+
+
+
+void timer_fire()
+{
+ timer current;
+
+ current=timers[1];
+ timers[1]=timers[number_of_timers--];
+ heal(1);
+ if (!current->disable) {
+ (*current->function)(current->arg);
+ }
+ free(current);
+}
+
+when next_timer(void)
+{
+ when here=now();
+
+ while (number_of_timers && (timers[1]->w <= here)) timer_fire();
+ if (number_of_timers) return(timers[1]->w);
+ return(0);
+}
+
+/* Function: timer_loop
+ * Arguments: none
+ * Returns: never
+ *
+ * timer_loop() is the blocking dispatch function for the timer.
+ * Is calls the block() function registered with init_timer,
+ * and handles associated with timers that have been registered.
+ */
+void timer_loop()
+{
+ when here;
+
+ while (1){
+ thunk z;
+ here=now();
+
+ for (z=thunks;z;z=z->next) (*z->f)(z->a);
+
+ if (number_of_timers){
+ if (timers[1]->w > here){
+ (*block_function)(timers[1]->w);
+ } else {
+ timer_fire();
+ }
+ } else {
+ thunk z;
+ for (z=thunks;z;z=z->next) (*z->f)(z->a);
+ (*block_function)(0);
+ }
+ }
+}
+
+
+/* Function: register_thunk
+ * Arguments: f: the function to call
+ * a: the single argument to call it with
+ *
+ * Thunk functions get called at irregular intervals, they
+ * should not assume when, or take a particularily long
+ * amount of time. Thunks are for background cleanup tasks.
+ */
+void register_thunk(void (*f)(void *),void *a)
+{
+ thunk t=(void *)malloc(sizeof(struct thunk));
+ t->f=f;
+ t->a=a;
+ t->next=thunks;
+ thunks=t;
+}
+
+/* Function: initialize_timer
+ * Arguments: block: the function to call to block for the specified interval
+ *
+ * initialize_timer() must be called before any other timer function,
+ * including timer_loop.
+ */
+void initialize_timer(void (*block)(when))
+{
+ block_function=block;
+ number_of_timers=0;
+ size_of_pqueue=10;
+ timers=(timer *)malloc(sizeof(timer)*size_of_pqueue);
+ thunks=0;
+}
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (c) 2002 Cray Inc.
+ * Copyright (c) 2002 Eric Hoffman
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ */
+
+typedef unsigned long long when;
+when now(void);
+typedef struct timer *timer;
+timer register_timer(when interval,
+ void (*function)(void *),
+ void *argument);
+timer register_timer_wait(void);
+void remove_timer(timer);
+void timer_loop(void);
+void initialize_timer(void (*block)(when));
+void timer_fire(void);
+
+
+#define HZ 0x100000000ull
+
+
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (c) 2002 Cray Inc.
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2.1 of the GNU Lesser General
+ * Public License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/* api.c:
+ * This file provides the 'api' side for the process-based nals.
+ * it is responsible for creating the 'library' side thread,
+ * and passing wrapped portals transactions to it.
+ *
+ * Along with initialization, shutdown, and transport to the library
+ * side, this file contains some stubs to satisfy the nal definition.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <syscall.h>
+#include <procbridge.h>
+#include <pqtimer.h>
+#include <dispatch.h>
+#include <errno.h>
+
+
+/* Function: forward
+ * Arguments: nal_t *nal: pointer to my top-side nal structure
+ * id: the command to pass to the lower layer
+ * args, args_len:pointer to and length of the request
+ * ret, ret_len: pointer to and size of the result
+ * Returns: a portals status code
+ *
+ * forwards a packaged api call from the 'api' side to the 'library'
+ * side, and collects the result
+ */
+#define forward_failure(operand,fd,buffer,length)\
+ if(syscall(SYS_##operand,fd,buffer,length)!=length){\
+ lib_fini(b->nal_cb);\
+ return(PTL_SEGV);\
+ }
+static int procbridge_forward(nal_t *n, int id, void *args, ptl_size_t args_len,
+ void *ret, ptl_size_t ret_len)
+{
+ bridge b=(bridge)n->nal_data;
+ procbridge p=(procbridge)b->local;
+ int lib=p->to_lib[1];
+ int k;
+
+ forward_failure(write,lib, &id, sizeof(id));
+ forward_failure(write,lib,&args_len, sizeof(args_len));
+ forward_failure(write,lib,&ret_len, sizeof(ret_len));
+ forward_failure(write,lib,args, args_len);
+
+ do {
+ k=syscall(SYS_read, p->from_lib[0], ret, ret_len);
+ } while ((k!=ret_len) && (errno += EINTR));
+
+ if(k!=ret_len){
+ perror("nal: read return block");
+ return PTL_SEGV;
+ }
+ return (PTL_OK);
+}
+#undef forward_failure
+
+
+/* Function: shutdown
+ * Arguments: nal: a pointer to my top side nal structure
+ * ni: my network interface index
+ *
+ * cleanup nal state, reclaim the lower side thread and
+ * its state using PTL_FINI codepoint
+ */
+static int procbridge_shutdown(nal_t *n, int ni)
+{
+ bridge b=(bridge)n->nal_data;
+ procbridge p=(procbridge)b->local;
+ int code=PTL_FINI;
+
+ syscall(SYS_write, p->to_lib[1],&code,sizeof(code));
+ syscall(SYS_read, p->from_lib[0],&code,sizeof(code));
+
+ syscall(SYS_close, p->to_lib[0]);
+ syscall(SYS_close, p->to_lib[1]);
+ syscall(SYS_close, p->from_lib[0]);
+ syscall(SYS_close, p->from_lib[1]);
+
+ free(p);
+ return(0);
+}
+
+
+/* Function: validate
+ * useless stub
+ */
+static int procbridge_validate(nal_t *nal, void *base, ptl_size_t extent)
+{
+ return(0);
+}
+
+
+/* Function: yield
+ * Arguments: pid:
+ *
+ * this function was originally intended to allow the
+ * lower half thread to be scheduled to allow progress. we
+ * overload it to explicitly block until signalled by the
+ * lower half.
+ */
+static void procbridge_yield(nal_t *n)
+{
+ bridge b=(bridge)n->nal_data;
+ procbridge p=(procbridge)b->local;
+
+ pthread_mutex_lock(&p->mutex);
+ pthread_cond_wait(&p->cond,&p->mutex);
+ pthread_mutex_unlock(&p->mutex);
+}
+
+
+static void procbridge_lock(nal_t * nal, unsigned long *flags){}
+static void procbridge_unlock(nal_t * nal, unsigned long *flags){}
+/* api_nal
+ * the interface vector to allow the generic code to access
+ * this nal. this is seperate from the library side nal_cb.
+ * TODO: should be dyanmically allocated
+ */
+static nal_t api_nal = {
+ ni: {0},
+ nal_data: NULL,
+ forward: procbridge_forward,
+ shutdown: procbridge_shutdown,
+ validate: procbridge_validate,
+ yield: procbridge_yield,
+ lock: procbridge_lock,
+ unlock: procbridge_unlock
+};
+
+/* Function: bridge_init
+ *
+ * Arguments: pid: requested process id (port offset)
+ * PTL_ID_ANY not supported.
+ * desired: limits passed from the application
+ * and effectively ignored
+ * actual: limits actually allocated and returned
+ *
+ * Returns: a pointer to my statically allocated top side NAL
+ * structure
+ *
+ * initializes the tcp nal. we define unix_failure as an
+ * error wrapper to cut down clutter.
+ */
+#define unix_failure(operand,fd,buffer,length,text)\
+ if(syscall(SYS_##operand,fd,buffer,length)!=length){\
+ perror(text);\
+ return(NULL);\
+ }
+#if 0
+static nal_t *bridge_init(ptl_interface_t nal,
+ ptl_pid_t pid_request,
+ ptl_ni_limits_t *desired,
+ ptl_ni_limits_t *actual,
+ int *rc)
+{
+ procbridge p;
+ bridge b;
+ static int initialized=0;
+ ptl_ni_limits_t limits = {-1,-1,-1,-1,-1};
+
+ if(initialized) return (&api_nal);
+
+ init_unix_timer();
+
+ b=(bridge)malloc(sizeof(struct bridge));
+ p=(procbridge)malloc(sizeof(struct procbridge));
+ api_nal.nal_data=b;
+ b->local=p;
+
+ if(pipe(p->to_lib) || pipe(p->from_lib)) {
+ perror("nal_init: pipe");
+ return(NULL);
+ }
+
+ if (desired) limits = *desired;
+ unix_failure(write,p->to_lib[1], &pid_request, sizeof(pid_request),
+ "nal_init: write");
+ unix_failure(write,p->to_lib[1], &limits, sizeof(ptl_ni_limits_t),
+ "nal_init: write");
+ unix_failure(write,p->to_lib[1], &nal, sizeof(ptl_interface_t),
+ "nal_init: write");
+
+ if(pthread_create(&p->t, NULL, nal_thread, b)) {
+ perror("nal_init: pthread_create");
+ return(NULL);
+ }
+
+ unix_failure(read,p->from_lib[0], actual, sizeof(ptl_ni_limits_t),
+ "tcp_init: read");
+ unix_failure(read,p->from_lib[0], rc, sizeof(rc),
+ "nal_init: read");
+
+ if(*rc) return(NULL);
+
+ initialized = 1;
+ pthread_mutex_init(&p->mutex,0);
+ pthread_cond_init(&p->cond, 0);
+
+ return (&api_nal);
+}
+#endif
+
+ptl_nid_t tcpnal_mynid;
+
+nal_t *procbridge_interface(int num_interface,
+ ptl_pt_index_t ptl_size,
+ ptl_ac_index_t acl_size,
+ ptl_pid_t requested_pid)
+{
+ procbridge p;
+ bridge b;
+ static int initialized=0;
+ ptl_ni_limits_t limits = {-1,-1,-1,-1,-1};
+ int rc, nal_type = PTL_IFACE_TCP;/* PTL_IFACE_DEFAULT FIXME hack */
+
+ if(initialized) return (&api_nal);
+
+ init_unix_timer();
+
+ b=(bridge)malloc(sizeof(struct bridge));
+ p=(procbridge)malloc(sizeof(struct procbridge));
+ api_nal.nal_data=b;
+ b->local=p;
+
+ if(pipe(p->to_lib) || pipe(p->from_lib)) {
+ perror("nal_init: pipe");
+ return(NULL);
+ }
+
+ if (ptl_size)
+ limits.max_ptable_index = ptl_size;
+ if (acl_size)
+ limits.max_atable_index = acl_size;
+
+ unix_failure(write,p->to_lib[1], &requested_pid, sizeof(requested_pid),
+ "nal_init: write");
+ unix_failure(write,p->to_lib[1], &limits, sizeof(ptl_ni_limits_t),
+ "nal_init: write");
+ unix_failure(write,p->to_lib[1], &nal_type, sizeof(nal_type),
+ "nal_init: write");
+
+ if(pthread_create(&p->t, NULL, nal_thread, b)) {
+ perror("nal_init: pthread_create");
+ return(NULL);
+ }
+
+ unix_failure(read,p->from_lib[0], &rc, sizeof(rc),
+ "nal_init: read");
+
+ if(rc) return(NULL);
+
+ b->nal_cb->ni.nid = tcpnal_mynid;
+ initialized = 1;
+ pthread_mutex_init(&p->mutex,0);
+ pthread_cond_init(&p->cond, 0);
+
+ return (&api_nal);
+}
+#undef unix_failure
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (c) 2002 Cray Inc.
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ */
+
+#ifndef _PROCBRIDGE_H_
+#define _PROCBRIDGE_H_
+
+#include <pthread.h>
+#include <bridge.h>
+#include <ipmap.h>
+
+
+typedef struct procbridge {
+ pthread_t t;
+ pthread_cond_t cond;
+ pthread_mutex_t mutex;
+ int to_lib[2];
+ int from_lib[2];
+} *procbridge;
+
+extern void *nal_thread(void *);
+
+
+#define PTL_INIT (LIB_MAX_DISPATCH+1)
+#define PTL_FINI (LIB_MAX_DISPATCH+2)
+
+#define MAX_ACLS 1
+#define MAX_PTLS 128
+
+extern void set_address(bridge t,ptl_pid_t pidrequest);
+extern nal_t *procbridge_interface(int num_interface,
+ ptl_pt_index_t ptl_size,
+ ptl_ac_index_t acl_size,
+ ptl_pid_t requested_pid);
+
+#endif
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (c) 2002 Cray Inc.
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2.1 of the GNU Lesser General
+ * Public License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/* lib.c:
+ * This file provides the 'library' side for the process-based nals.
+ * it is responsible for communication with the 'api' side and
+ * providing service to the generic portals 'library'
+ * implementation. 'library' might be better termed 'communication'
+ * or 'kernel'.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <unistd.h>
+#include <syscall.h>
+#include <procbridge.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netdb.h>
+#include <errno.h>
+#include <timer.h>
+//#include <util/pqtimer.h>
+#include <dispatch.h>
+
+/* the following functions are stubs to satisfy the nal definition
+ without doing anything particularily useful*/
+
+static int nal_write(nal_cb_t *nal,
+ void *private,
+ user_ptr dst_addr,
+ void *src_addr,
+ ptl_size_t len)
+{
+ memcpy(dst_addr, src_addr, len);
+ return 0;
+}
+
+static int nal_read(nal_cb_t * nal,
+ void *private,
+ void *dst_addr,
+ user_ptr src_addr,
+ size_t len)
+{
+ memcpy(dst_addr, src_addr, len);
+ return 0;
+}
+
+static void *nal_malloc(nal_cb_t *nal,
+ ptl_size_t len)
+{
+ void *buf = malloc(len);
+ return buf;
+}
+
+static void nal_free(nal_cb_t *nal,
+ void *buf,
+ ptl_size_t len)
+{
+ free(buf);
+}
+
+static void nal_printf(nal_cb_t *nal,
+ const char *fmt,
+ ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ vprintf(fmt, ap);
+ va_end(ap);
+}
+
+
+static void nal_cli(nal_cb_t *nal,
+ unsigned long *flags)
+{
+}
+
+
+static void nal_sti(nal_cb_t *nal,
+ unsigned long *flags)
+{
+}
+
+
+static int nal_dist(nal_cb_t *nal,
+ ptl_nid_t nid,
+ unsigned long *dist)
+{
+ return 0;
+}
+
+
+
+/* Function: data_from_api
+ * Arguments: t: the nal state for this interface
+ * Returns: whether to continue reading from the pipe
+ *
+ * data_from_api() reads data from the api side in response
+ * to a select.
+ *
+ * We define data_failure() for syntactic convenience
+ * of unix error reporting.
+ */
+
+#define data_failure(operand,fd,buffer,length)\
+ if(syscall(SYS_##operand,fd,buffer,length)!=length){\
+ lib_fini(b->nal_cb);\
+ return(0);\
+ }
+static int data_from_api(void *arg)
+{
+ bridge b = arg;
+ procbridge p=(procbridge)b->local;
+ /* where are these two sizes derived from ??*/
+ char arg_block[ 256 ];
+ char ret_block[ 128 ];
+ ptl_size_t arg_len,ret_len;
+ int fd=p->to_lib[0];
+ int index;
+
+ data_failure(read,fd, &index, sizeof(index));
+
+ if (index==PTL_FINI) {
+ lib_fini(b->nal_cb);
+ if (b->shutdown) (*b->shutdown)(b);
+ syscall(SYS_write, p->from_lib[1],&b->alive,sizeof(b->alive));
+
+ /* a heavy-handed but convenient way of shutting down
+ the lower side thread */
+ pthread_exit(0);
+ }
+
+ data_failure(read,fd, &arg_len, sizeof(arg_len));
+ data_failure(read,fd, &ret_len, sizeof(ret_len));
+ data_failure(read,fd, arg_block, arg_len);
+
+ lib_dispatch(b->nal_cb, NULL, index, arg_block, ret_block);
+
+ data_failure(write,p->from_lib[1],ret_block, ret_len);
+ return(1);
+}
+#undef data_failure
+
+
+
+static void wakeup_topside(void *z)
+{
+ bridge b=z;
+ procbridge p=b->local;
+
+ pthread_mutex_lock(&p->mutex);
+ pthread_cond_broadcast(&p->cond);
+ pthread_mutex_unlock(&p->mutex);
+}
+
+
+/* Function: nal_thread
+ * Arguments: z: an opaque reference to a nal control structure
+ * allocated and partially populated by the api level code
+ * Returns: nothing, and only on error or explicit shutdown
+ *
+ * This function is the entry point of the pthread initiated on
+ * the api side of the interface. This thread is used to handle
+ * asynchronous delivery to the application.
+ *
+ * We define a limit macro to place a ceiling on limits
+ * for syntactic convenience
+ */
+#define LIMIT(x,y,max)\
+ if ((unsigned int)x > max) y = max;
+
+extern int tcpnal_init(bridge);
+
+nal_initialize nal_table[PTL_IFACE_MAX]={0,tcpnal_init,0};
+
+void *nal_thread(void *z)
+{
+ bridge b=z;
+ procbridge p=b->local;
+ int rc;
+ ptl_pid_t pid_request;
+ int nal_type;
+ ptl_ni_limits_t desired;
+ ptl_ni_limits_t actual;
+
+ b->nal_cb=(nal_cb_t *)malloc(sizeof(nal_cb_t));
+ b->nal_cb->nal_data=b;
+ b->nal_cb->cb_read=nal_read;
+ b->nal_cb->cb_write=nal_write;
+ b->nal_cb->cb_malloc=nal_malloc;
+ b->nal_cb->cb_free=nal_free;
+ b->nal_cb->cb_map=NULL;
+ b->nal_cb->cb_unmap=NULL;
+ b->nal_cb->cb_printf=nal_printf;
+ b->nal_cb->cb_cli=nal_cli;
+ b->nal_cb->cb_sti=nal_sti;
+ b->nal_cb->cb_dist=nal_dist;
+
+
+ register_io_handler(p->to_lib[0],READ_HANDLER,data_from_api,(void *)b);
+
+ if(!(rc = syscall(SYS_read, p->to_lib[0], &pid_request, sizeof(pid_request))))
+ perror("procbridge read from api");
+ if(!(rc = syscall(SYS_read, p->to_lib[0], &desired, sizeof(ptl_ni_limits_t))))
+ perror("procbridge read from api");
+ if(!(rc = syscall(SYS_read, p->to_lib[0], &nal_type, sizeof(nal_type))))
+ perror("procbridge read from api");
+
+ actual = desired;
+ LIMIT(desired.max_match_entries,actual.max_match_entries,MAX_MES);
+ LIMIT(desired.max_mem_descriptors,actual.max_mem_descriptors,MAX_MDS);
+ LIMIT(desired.max_event_queues,actual.max_event_queues,MAX_EQS);
+ LIMIT(desired.max_atable_index,actual.max_atable_index,MAX_ACLS);
+ LIMIT(desired.max_ptable_index,actual.max_ptable_index,MAX_PTLS);
+
+ set_address(b,pid_request);
+
+ if (nal_table[nal_type]) rc=(*nal_table[nal_type])(b);
+ /* initialize the generic 'library' level code */
+
+ rc = lib_init(b->nal_cb,
+ b->nal_cb->ni.nid,
+ b->nal_cb->ni.pid,
+ 10,
+ actual.max_ptable_index,
+ actual.max_atable_index);
+
+ /*
+ * Whatever the initialization returned is passed back to the
+ * user level code for further interpretation. We just exit if
+ * it is non-zero since something went wrong.
+ */
+ /* this should perform error checking */
+#if 0
+ write(p->from_lib[1], &actual, sizeof(ptl_ni_limits_t));
+#endif
+ syscall(SYS_write, p->from_lib[1], &rc, sizeof(rc));
+
+ if(!rc) {
+ /* the thunk function is called each time the timer loop
+ performs an operation and returns to blocking mode. we
+ overload this function to inform the api side that
+ it may be interested in looking at the event queue */
+ register_thunk(wakeup_topside,b);
+ timer_loop();
+ }
+ return(0);
+}
+#undef LIMIT
+
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (c) 2002 Cray Inc.
+ * Copyright (c) 2002 Eric Hoffman
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2.1 of the GNU Lesser General
+ * Public License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/* select.c:
+ * Provides a general mechanism for registering and dispatching
+ * io events through the select system call.
+ */
+
+#ifdef sun
+#include <sys/filio.h>
+#else
+#include <sys/ioctl.h>
+#endif
+
+#include <sys/time.h>
+#include <sys/types.h>
+#include <stdlib.h>
+#include <pqtimer.h>
+#include <dispatch.h>
+
+
+static struct timeval beginning_of_epoch;
+static io_handler io_handlers;
+
+/* Function: now
+ *
+ * Return: the current time in canonical units: a 64 bit number
+ * where the most significant 32 bits contains the number
+ * of seconds, and the least signficant a count of (1/(2^32))ths
+ * of a second.
+ */
+when now()
+{
+ struct timeval result;
+
+ gettimeofday(&result,0);
+ return((((unsigned long long)result.tv_sec)<<32)|
+ (((unsigned long long)result.tv_usec)<<32)/1000000);
+}
+
+
+/* Function: register_io_handler
+ * Arguments: fd: the file descriptor of interest
+ * type: a mask of READ_HANDLER, WRITE_HANDLER, EXCEPTION_HANDLER
+ * function: a function to call when io is available on fd
+ * arg: an opaque correlator to return to the handler
+ * Returns: a pointer to the io_handler structure
+ */
+io_handler register_io_handler(int fd,
+ int type,
+ int (*function)(void *),
+ void *arg)
+{
+ io_handler i=(io_handler)malloc(sizeof(struct io_handler));
+ if ((i->fd=fd)>=0){
+ i->type=type;
+ i->function=function;
+ i->argument=arg;
+ i->disabled=0;
+ i->last=&io_handlers;
+ if ((i->next=io_handlers)) i->next->last=&i->next;
+ io_handlers=i;
+ }
+ return(i);
+}
+
+/* Function: remove_io_handler
+ * Arguments: i: a pointer to the handler to stop servicing
+ *
+ * remove_io_handler() doesn't actually free the handler, due
+ * to reentrancy problems. it just marks the handler for
+ * later cleanup by the blocking function.
+ */
+void remove_io_handler (io_handler i)
+{
+ i->disabled=1;
+}
+
+static void set_flag(io_handler n,fd_set *fds)
+{
+ if (n->type & READ_HANDLER) FD_SET(n->fd,fds);
+ if (n->type & WRITE_HANDLER) FD_SET(n->fd,fds+1);
+ if (n->type & EXCEPTION_HANDLER) FD_SET(n->fd,fds+2);
+}
+
+
+/* Function: select_timer_block
+ * Arguments: until: an absolute time when the select should return
+ *
+ * This function dispatches the various file descriptors' handler
+ * functions, if the kernel indicates there is io available.
+ */
+void select_timer_block(when until)
+{
+ fd_set fds[3];
+ struct timeval timeout;
+ struct timeval *timeout_pointer;
+ int result;
+ io_handler j;
+ io_handler *k;
+
+ /* TODO: loop until the entire interval is expired*/
+ if (until){
+ when interval=until-now();
+ timeout.tv_sec=(interval>>32);
+ timeout.tv_usec=((interval<<32)/1000000)>>32;
+ timeout_pointer=&timeout;
+ } else timeout_pointer=0;
+
+ FD_ZERO(fds);
+ FD_ZERO(fds+1);
+ FD_ZERO(fds+2);
+ for (k=&io_handlers;*k;){
+ if ((*k)->disabled){
+ j=*k;
+ *k=(*k)->next;
+ free(j);
+ }
+ if (*k) {
+ set_flag(*k,fds);
+ k=&(*k)->next;
+ }
+ }
+ result=select(FD_SETSIZE,fds,fds+1,fds+2,timeout_pointer);
+
+ if (result > 0)
+ for (j=io_handlers;j;j=j->next){
+ if (!(j->disabled) &&
+ ((FD_ISSET(j->fd,fds) && (j->type & READ_HANDLER)) ||
+ (FD_ISSET(j->fd,fds+1) && (j->type & WRITE_HANDLER)) ||
+ (FD_ISSET(j->fd,fds+2) && (j->type & EXCEPTION_HANDLER)))){
+ if (!(*j->function)(j->argument))
+ j->disabled=1;
+ }
+ }
+}
+
+/* Function: init_unix_timer()
+ * is called to initialize the library
+ */
+void init_unix_timer()
+{
+ io_handlers=0;
+ gettimeofday(&beginning_of_epoch, 0);
+ initialize_timer(select_timer_block);
+}
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (c) 2002 Cray Inc.
+ * Copyright (c) 2002 Eric Hoffman
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2.1 of the GNU Lesser General
+ * Public License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <table.h>
+#include <stdlib.h>
+#include <string.h>
+
+
+/* table.c:
+ * a very simple hash table implementation with paramerterizable
+ * comparison and key generation functions. it does resize
+ * in order to accomidate more entries, but never collapses
+ * the table
+ */
+
+static table_entry *table_lookup (table t,void *comparator,
+ unsigned int k,
+ int (*compare_function)(void *, void *),
+ int *success)
+{
+ unsigned int key=k%t->size;
+ table_entry *i;
+
+ for (i=&(t->entries[key]);*i;i=&((*i)->next)){
+ if (compare_function && ((*i)->key==k))
+ if ((*t->compare_function)((*i)->value,comparator)){
+ *success=1;
+ return(i);
+ }
+ }
+ *success=0;
+ return(&(t->entries[key]));
+}
+
+
+static void resize_table(table t, int size)
+{
+ int old_size=t->size;
+ table_entry *old_entries=t->entries;
+ int i;
+ table_entry j,n;
+ table_entry *position;
+ int success;
+
+ t->size=size;
+ t->entries=(table_entry *)malloc(sizeof(table_entry)*t->size);
+ memset(t->entries,0,sizeof(table_entry)*t->size);
+
+ for (i=0;i<old_size;i++)
+ for (j=old_entries[i];j;j=n){
+ n=j->next;
+ position=table_lookup(t,0,j->key,0,&success);
+ j->next= *position;
+ *position=j;
+ }
+ free(old_entries);
+}
+
+
+/* Function: key_from_int
+ * Arguments: int i: value to compute the key of
+ * Returns: the key
+ */
+unsigned int key_from_int(int i)
+{
+ return(i);
+}
+
+
+/* Function: key_from_string
+ * Arguments: char *s: the null terminated string
+ * to compute the key of
+ * Returns: the key
+ */
+unsigned int key_from_string(char *s)
+{
+ unsigned int result=0;
+ unsigned char *n;
+ int i;
+ if (!s) return(1);
+ for (n=s,i=0;*n;n++,i++) result^=(*n*57)^*n*i;
+ return(result);
+}
+
+
+/* Function: hash_create_table
+ * Arguments: compare_function: a function to compare
+ * a table instance with a correlator
+ * key_function: a function to generate a 32 bit
+ * hash key from a correlator
+ * Returns: a pointer to the new table
+ */
+table hash_create_table (int (*compare_function)(void *, void *),
+ unsigned int (*key_function)(unsigned int *))
+{
+ table new=(table)malloc(sizeof(struct table));
+ memset(new, 0, sizeof(struct table));
+
+ new->compare_function=compare_function;
+ new->key_function=key_function;
+ new->number_of_entries=0;
+ new->size=4;
+ new->entries=(table_entry *)malloc(sizeof(table_entry)*new->size);
+ memset(new->entries,0,sizeof(table_entry)*new->size);
+ return(new);
+}
+
+
+/* Function: hash_table_find
+ * Arguments: t: a table to look in
+ * comparator: a value to access the table entry
+ * Returns: the element references to by comparator, or null
+ */
+void *hash_table_find (table t, void *comparator)
+{
+ int success;
+ table_entry* entry=table_lookup(t,comparator,
+ (*t->key_function)(comparator),
+ t->compare_function,
+ &success);
+ if (success) return((*entry)->value);
+ return(0);
+}
+
+
+/* Function: hash_table_insert
+ * Arguments: t: a table to insert the object
+ * value: the object to put in the table
+ * comparator: the value by which the object
+ * will be addressed
+ * Returns: nothing
+ */
+void hash_table_insert (table t, void *value, void *comparator)
+{
+ int success;
+ unsigned int k=(*t->key_function)(comparator);
+ table_entry *position=table_lookup(t,comparator,k,
+ t->compare_function,&success);
+ table_entry entry;
+
+ if (success) {
+ entry = *position;
+ } else {
+ entry = (table_entry)malloc(sizeof(struct table_entry));
+ memset(entry, 0, sizeof(struct table_entry));
+ entry->next= *position;
+ *position=entry;
+ t->number_of_entries++;
+ }
+ entry->value=value;
+ entry->key=k;
+ if (t->number_of_entries > t->size) resize_table(t,t->size*2);
+}
+
+/* Function: hash_table_remove
+ * Arguments: t: the table to remove the object from
+ * comparator: the index value of the object to remove
+ * Returns:
+ */
+void hash_table_remove (table t, void *comparator)
+{
+ int success;
+ table_entry temp;
+ table_entry *position=table_lookup(t,comparator,
+ (*t->key_function)(comparator),
+ t->compare_function,&success);
+ if(success) {
+ temp=*position;
+ *position=(*position)->next;
+ free(temp); /* the value? */
+ t->number_of_entries--;
+ }
+}
+
+/* Function: hash_iterate_table_entries
+ * Arguments: t: the table to iterate over
+ * handler: a function to call with each element
+ * of the table, along with arg
+ * arg: the opaque object to pass to handler
+ * Returns: nothing
+ */
+void hash_iterate_table_entries(table t,
+ void (*handler)(void *,void *),
+ void *arg)
+{
+ int i;
+ table_entry *j,*next;
+
+ for (i=0;i<t->size;i++)
+ for (j=t->entries+i;*j;j=next){
+ next=&((*j)->next);
+ (*handler)(arg,(*j)->value);
+ }
+}
+
+/* Function: hash_filter_table_entries
+ * Arguments: t: the table to iterate over
+ * handler: a function to call with each element
+ * of the table, along with arg
+ * arg: the opaque object to pass to handler
+ * Returns: nothing
+ * Notes: operations on the table inside handler are not safe
+ *
+ * filter_table_entires() calls the handler function for each
+ * item in the table, passing it and arg. The handler function
+ * returns 1 if it is to be retained in the table, and 0
+ * if it is to be removed.
+ */
+void hash_filter_table_entries(table t, int (*handler)(void *, void *), void *arg)
+{
+ int i;
+ table_entry *j,*next,v;
+
+ for (i=0;i<t->size;i++)
+ for (j=t->entries+i;*j;j=next){
+ next=&((*j)->next);
+ if (!(*handler)(arg,(*j)->value)){
+ next=j;
+ v=*j;
+ *j=(*j)->next;
+ free(v);
+ t->number_of_entries--;
+ }
+ }
+}
+
+/* Function: destroy_table
+ * Arguments: t: the table to free
+ * thunk: a function to call with each element,
+ * most likely free()
+ * Returns: nothing
+ */
+void hash_destroy_table(table t,void (*thunk)(void *))
+{
+ table_entry j,next;
+ int i;
+ for (i=0;i<t->size;i++)
+ for (j=t->entries[i];j;j=next){
+ next=j->next;
+ if (thunk) (*thunk)(j->value);
+ free(j);
+ }
+ free(t->entries);
+ free(t);
+}
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (c) 2002 Cray Inc.
+ * Copyright (c) 2002 Eric Hoffman
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ */
+
+#ifndef E_TABLE
+#define E_TABLE
+
+typedef struct table_entry {
+ unsigned int key;
+ void *value;
+ struct table_entry *next;
+} *table_entry;
+
+
+typedef struct table {
+ unsigned int size;
+ int number_of_entries;
+ table_entry *entries;
+ int (*compare_function)(void *, void *);
+ unsigned int (*key_function)(unsigned int *);
+} *table;
+
+/* table.c */
+unsigned int key_from_int(int i);
+unsigned int key_from_string(char *s);
+table hash_create_table(int (*compare_function)(void *, void *), unsigned int (*key_function)(unsigned int *));
+void *hash_table_find(table t, void *comparator);
+void hash_table_insert(table t, void *value, void *comparator);
+void hash_table_remove(table t, void *comparator);
+void hash_iterate_table_entries(table t, void (*handler)(void *, void *), void *arg);
+void hash_filter_table_entries(table t, int (*handler)(void *, void *), void *arg);
+void hash_destroy_table(table t, void (*thunk)(void *));
+
+#endif
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (c) 2002 Cray Inc.
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2.1 of the GNU Lesser General
+ * Public License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/* tcpnal.c:
+ This file implements the TCP-based nal by providing glue
+ between the connection service and the generic NAL implementation */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <unistd.h>
+#include <syscall.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <pqtimer.h>
+#include <dispatch.h>
+#include <bridge.h>
+#include <ipmap.h>
+#include <connection.h>
+
+/* Function: tcpnal_send
+ * Arguments: nal: pointer to my nal control block
+ * private: unused
+ * cookie: passed back to the portals library
+ * hdr: pointer to the portals header
+ * nid: destination node
+ * pid: destination process
+ * data: body of the message
+ * len: length of the body
+ * Returns: zero on success
+ *
+ * sends a packet to the peer, after insuring that a connection exists
+ */
+#warning FIXME: "param 'type' is newly added, make use of it!!"
+int tcpnal_send(nal_cb_t *n,
+ void *private,
+ lib_msg_t *cookie,
+ ptl_hdr_t *hdr,
+ int type,
+ ptl_nid_t nid,
+ ptl_pid_t pid,
+ unsigned int niov,
+ struct iovec *iov,
+ size_t len)
+{
+ connection c;
+ bridge b=(bridge)n->nal_data;
+ struct iovec tiov[2];
+ int count = 1;
+
+ if (!(c=force_tcp_connection((manager)b->lower,
+ PNAL_IP(nid,b),
+ PNAL_PORT(nid,pid))))
+ return(1);
+
+#if 0
+ /* TODO: these results should be checked. furthermore, provision
+ must be made for the SIGPIPE which is delivered when
+ writing on a tcp socket which has closed underneath
+ the application. there is a linux flag in the sendmsg
+ call which turns off the signally behaviour, but its
+ nonstandard */
+ syscall(SYS_write, c->fd,hdr,sizeof(ptl_hdr_t));
+ LASSERT (niov <= 1);
+ if (len) syscall(SYS_write, c->fd,iov[0].iov_base,len);
+#else
+ LASSERT (niov <= 1);
+
+ tiov[0].iov_base = hdr;
+ tiov[0].iov_len = sizeof(ptl_hdr_t);
+
+ if (len) {
+ tiov[1].iov_base = iov[0].iov_base;
+ tiov[1].iov_len = len;
+ count++;
+ }
+
+ syscall(SYS_writev, c->fd, tiov, count);
+#endif
+ lib_finalize(n, private, cookie);
+
+ return(0);
+}
+
+
+/* Function: tcpnal_recv
+ * Arguments: nal_cb_t *nal: pointer to my nal control block
+ * void *private: connection pointer passed through
+ * lib_parse()
+ * lib_msg_t *cookie: passed back to portals library
+ * user_ptr data: pointer to the destination buffer
+ * size_t mlen: length of the body
+ * size_t rlen: length of data in the network
+ * Returns: zero on success
+ *
+ * blocking read of the requested data. must drain out the
+ * difference of mainpulated and requested lengths from the network
+ */
+int tcpnal_recv(nal_cb_t *n,
+ void *private,
+ lib_msg_t *cookie,
+ unsigned int niov,
+ struct iovec *iov,
+ ptl_size_t mlen,
+ ptl_size_t rlen)
+
+{
+ if (mlen) {
+ LASSERT (niov <= 1);
+ read_connection(private,iov[0].iov_base,mlen);
+ lib_finalize(n, private, cookie);
+ }
+
+ if (mlen!=rlen){
+ char *trash=malloc(rlen-mlen);
+
+ /*TODO: check error status*/
+ read_connection(private,trash,rlen-mlen);
+ free(trash);
+ }
+
+ return(rlen);
+}
+
+
+/* Function: from_connection:
+ * Arguments: c: the connection to read from
+ * Returns: whether or not to continue reading from this connection,
+ * expressed as a 1 to continue, and a 0 to not
+ *
+ * from_connection() is called from the select loop when i/o is
+ * available. It attempts to read the portals header and
+ * pass it to the generic library for processing.
+ */
+static int from_connection(void *a,connection c)
+{
+ bridge b=a;
+ ptl_hdr_t hdr;
+ if (read_connection(c, (unsigned char *)&hdr, sizeof(hdr))){
+ lib_parse(b->nal_cb, &hdr, c);
+ return(1);
+ }
+ return(0);
+}
+
+
+static void tcpnal_shutdown(bridge b)
+{
+ shutdown_connections(b->lower);
+}
+
+/* Function: PTL_IFACE_TCP
+ * Arguments: pid_request: desired port number to bind to
+ * desired: passed NAL limits structure
+ * actual: returned NAL limits structure
+ * Returns: a nal structure on success, or null on failure
+ */
+int tcpnal_init(bridge b)
+{
+ manager m;
+
+ b->nal_cb->cb_send=tcpnal_send;
+ b->nal_cb->cb_recv=tcpnal_recv;
+ b->shutdown=tcpnal_shutdown;
+
+ if (!(m=init_connections(PNAL_PORT(b->nal_cb->ni.nid,
+ b->nal_cb->ni.pid),
+ from_connection,b))){
+ /* TODO: this needs to shut down the
+ newly created junk */
+ return(PTL_NAL_FAILED);
+ }
+ /* XXX cfs hack */
+ b->nal_cb->ni.pid=0;
+ b->lower=m;
+ return(PTL_OK);
+}
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (c) 2002 Cray Inc.
+ * Copyright (c) 2002 Eric Hoffman
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ */
+
+/* TODO: make this an explicit type when they become available */
+typedef unsigned long long when;
+
+typedef struct timer {
+ void (*function)(void *);
+ void *arg;
+ when w;
+ int interval;
+ int disable;
+} *timer;
+
+timer register_timer(when, void (*f)(void *), void *a);
+void remove_timer(timer t);
+void timer_loop(void);
+void initialize_timer(void);
+void register_thunk(void (*f)(void *),void *a);
+
+
+#define HZ 0x100000000ull
+
+
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (c) 2002 Cray Inc.
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ */
+
+typedef unsigned short uint16;
+typedef unsigned long uint32;
+typedef unsigned long long uint64;
+typedef unsigned char uint8;
--- /dev/null
+Makefile
+Makefile.in
+acceptor
+debugctl
+ptlctl
+.deps
+routerstat
--- /dev/null
+# Copyright (C) 2001 Cluster File Systems, Inc.
+#
+# This code is issued under the GNU General Public License.
+# See the file COPYING in this distribution
+
+
+COMPILE = gcc -Wall -g -I$(srcdir)/../include
+LINK = gcc -o $@
+
+sbin_PROGRAMS = acceptor ptlctl debugctl routerstat
+lib_LIBRARIES = libptlctl.a
+
+acceptor_SOURCES = acceptor.c # -lefence
+
+libptlctl_a_SOURCES = portals.c debug.c l_ioctl.c parser.c parser.h
+
+ptlctl_SOURCES = ptlctl.c
+ptlctl_LDADD = -L. -lptlctl -lncurses # -lefence
+ptlctl_DEPENDENCIES = libptlctl.a
+
+debugctl_SOURCES = debugctl.c
+debugctl_LDADD = -L. -lptlctl -lncurses # -lefence
+debugctl_DEPENDENCIES = libptlctl.a
+
+routerstat_SOURCES = routerstat.c
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/tcp.h>
+#include <netdb.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <unistd.h>
+#include <asm/byteorder.h>
+#include <syslog.h>
+
+#include <errno.h>
+
+#include <portals/api-support.h>
+#include <portals/list.h>
+#include <portals/lib-types.h>
+
+/* should get this from autoconf somehow */
+#ifndef PIDFILE_DIR
+#define PIDFILE_DIR "/var/run"
+#endif
+
+#define PROGNAME "acceptor"
+
+void create_pidfile(char *name, int port)
+{
+ char pidfile[1024];
+ FILE *fp;
+
+ snprintf(pidfile, sizeof(pidfile), "%s/%s-%d.pid",
+ PIDFILE_DIR, name, port);
+
+ if ((fp = fopen(pidfile, "w"))) {
+ fprintf(fp, "%d\n", getpid());
+ fclose(fp);
+ } else {
+ syslog(LOG_ERR, "%s: %s\n", pidfile,
+ strerror(errno));
+ }
+}
+
+int pidfile_exists(char *name, int port)
+{
+ char pidfile[1024];
+
+ snprintf(pidfile, sizeof(pidfile), "%s/%s-%d.pid",
+ PIDFILE_DIR, name, port);
+
+ if (!access(pidfile, F_OK)) {
+ fprintf(stderr, "%s: exists, acceptor already running.\n",
+ pidfile);
+ return (1);
+ }
+ return (0);
+}
+
+int
+parse_size (int *sizep, char *str)
+{
+ int size;
+ char mod[32];
+
+ switch (sscanf (str, "%d%1[gGmMkK]", &size, mod))
+ {
+ default:
+ return (-1);
+
+ case 1:
+ *sizep = size;
+ return (0);
+
+ case 2:
+ switch (*mod)
+ {
+ case 'g':
+ case 'G':
+ *sizep = size << 30;
+ return (0);
+
+ case 'm':
+ case 'M':
+ *sizep = size << 20;
+ return (0);
+
+ case 'k':
+ case 'K':
+ *sizep = size << 10;
+ return (0);
+
+ default:
+ *sizep = size;
+ return (0);
+ }
+ }
+}
+
+void
+show_connection (int fd, __u32 net_ip, ptl_nid_t nid)
+{
+ struct hostent *h = gethostbyaddr ((char *)&net_ip, sizeof net_ip, AF_INET);
+ __u32 host_ip = ntohl (net_ip);
+ int rxmem = 0;
+ int txmem = 0;
+ int nonagle = 0;
+ int len;
+ char host[1024];
+
+ len = sizeof (txmem);
+ if (getsockopt (fd, SOL_SOCKET, SO_SNDBUF, &txmem, &len) != 0)
+ perror ("Cannot get write buffer size");
+
+ len = sizeof (rxmem);
+ if (getsockopt (fd, SOL_SOCKET, SO_RCVBUF, &rxmem, &len) != 0)
+ perror ("Cannot get read buffer size");
+
+ len = sizeof (nonagle);
+ if (getsockopt (fd, IPPROTO_TCP, TCP_NODELAY, &nonagle, &len) != 0)
+ perror ("Cannot get nagle");
+
+ if (h == NULL)
+ snprintf (host, sizeof(host), "%d.%d.%d.%d", (host_ip >> 24) & 0xff,
+ (host_ip >> 16) & 0xff, (host_ip >> 8) & 0xff, host_ip & 0xff);
+ else
+ snprintf (host, sizeof(host), "%s", h->h_name);
+
+ syslog (LOG_INFO, "Accepted host: %s NID: "LPX64" snd: %d rcv %d nagle: %s\n",
+ host, nid, txmem, rxmem, nonagle ? "disabled" : "enabled");
+}
+
+int
+sock_write (int cfd, void *buffer, int nob)
+{
+ while (nob > 0)
+ {
+ int rc = write (cfd, buffer, nob);
+
+ if (rc < 0)
+ {
+ if (errno == EINTR)
+ continue;
+
+ return (rc);
+ }
+
+ if (rc == 0)
+ {
+ fprintf (stderr, "Unexpected zero sock_write\n");
+ abort();
+ }
+
+ nob -= rc;
+ buffer = (char *)buffer + nob;
+ }
+
+ return (0);
+}
+
+int
+sock_read (int cfd, void *buffer, int nob)
+{
+ while (nob > 0)
+ {
+ int rc = read (cfd, buffer, nob);
+
+ if (rc < 0)
+ {
+ if (errno == EINTR)
+ continue;
+
+ return (rc);
+ }
+
+ if (rc == 0) /* EOF */
+ {
+ errno = ECONNABORTED;
+ return (-1);
+ }
+
+ nob -= rc;
+ buffer = (char *)buffer + nob;
+ }
+
+ return (0);
+}
+
+int
+exchange_nids (int cfd, ptl_nid_t my_nid, ptl_nid_t *peer_nid)
+{
+ int rc;
+ ptl_hdr_t hdr;
+ ptl_magicversion_t *hmv = (ptl_magicversion_t *)&hdr.dest_nid;
+
+ LASSERT (sizeof (*hmv) == sizeof (hdr.dest_nid));
+
+ memset (&hdr, 0, sizeof (hdr));
+
+ hmv->magic = __cpu_to_le32 (PORTALS_PROTO_MAGIC);
+ hmv->version_major = __cpu_to_le16 (PORTALS_PROTO_VERSION_MAJOR);
+ hmv->version_minor = __cpu_to_le16 (PORTALS_PROTO_VERSION_MINOR);
+
+ hdr.src_nid = __cpu_to_le64 (my_nid);
+ hdr.type = __cpu_to_le32 (PTL_MSG_HELLO);
+
+ /* Assume there's sufficient socket buffering for a portals HELLO header */
+ rc = sock_write (cfd, &hdr, sizeof (hdr));
+ if (rc != 0) {
+ perror ("Can't send initial HELLO");
+ return (-1);
+ }
+
+ /* First few bytes down the wire are the portals protocol magic and
+ * version, no matter what protocol version we're running. */
+
+ rc = sock_read (cfd, hmv, sizeof (*hmv));
+ if (rc != 0) {
+ perror ("Can't read from peer");
+ return (-1);
+ }
+
+ if (__cpu_to_le32 (hmv->magic) != PORTALS_PROTO_MAGIC) {
+ fprintf (stderr, "Bad magic %#08x (%#08x expected)\n",
+ __cpu_to_le32 (hmv->magic), PORTALS_PROTO_MAGIC);
+ return (-1);
+ }
+
+ if (__cpu_to_le16 (hmv->version_major) != PORTALS_PROTO_VERSION_MAJOR ||
+ __cpu_to_le16 (hmv->version_minor) != PORTALS_PROTO_VERSION_MINOR) {
+ fprintf (stderr, "Incompatible protocol version %d.%d (%d.%d expected)\n",
+ __cpu_to_le16 (hmv->version_major),
+ __cpu_to_le16 (hmv->version_minor),
+ PORTALS_PROTO_VERSION_MAJOR,
+ PORTALS_PROTO_VERSION_MINOR);
+ }
+
+ /* version 0 sends magic/version as the dest_nid of a 'hello' header,
+ * so read the rest of it in now... */
+ LASSERT (PORTALS_PROTO_VERSION_MAJOR == 0);
+ rc = sock_read (cfd, hmv + 1, sizeof (hdr) - sizeof (*hmv));
+ if (rc != 0) {
+ perror ("Can't read rest of HELLO hdr");
+ return (-1);
+ }
+
+ /* ...and check we got what we expected */
+ if (__cpu_to_le32 (hdr.type) != PTL_MSG_HELLO ||
+ __cpu_to_le32 (PTL_HDR_LENGTH (&hdr)) != 0) {
+ fprintf (stderr, "Expecting a HELLO hdr with 0 payload,"
+ " but got type %d with %d payload\n",
+ __cpu_to_le32 (hdr.type),
+ __cpu_to_le32 (PTL_HDR_LENGTH (&hdr)));
+ return (-1);
+ }
+
+ *peer_nid = __le64_to_cpu (hdr.src_nid);
+ return (0);
+}
+
+void
+usage (char *myname)
+{
+ fprintf (stderr, "Usage: %s [-r recv_mem] [-s send_mem] [-n] [-N nal_id] port\n", myname);
+ exit (1);
+}
+
+int main(int argc, char **argv)
+{
+ int o, fd, rc, port, pfd;
+ struct sockaddr_in srvaddr;
+ int c;
+ int rxmem = 0;
+ int txmem = 0;
+ int noclose = 0;
+ int nonagle = 1;
+ int nal = SOCKNAL;
+ int xchg_nids = 0;
+ int bind_irq = 0;
+
+ while ((c = getopt (argc, argv, "N:r:s:nlxi")) != -1)
+ switch (c)
+ {
+ case 'r':
+ if (parse_size (&rxmem, optarg) != 0 || rxmem < 0)
+ usage (argv[0]);
+ break;
+
+ case 's':
+ if (parse_size (&txmem, optarg) != 0 || txmem < 0)
+ usage (argv[0]);
+ break;
+
+ case 'n':
+ nonagle = 0;
+ break;
+
+ case 'l':
+ noclose = 1;
+ break;
+
+ case 'x':
+ xchg_nids = 1;
+ break;
+
+ case 'i':
+ bind_irq = 1;
+ break;
+
+ case 'N':
+ if (parse_size(&nal, optarg) != 0 ||
+ nal < 0 || nal > NAL_MAX_NR)
+ usage(argv[0]);
+ break;
+
+ default:
+ usage (argv[0]);
+ break;
+ }
+
+ if (optind >= argc)
+ usage (argv[0]);
+
+ port = atol(argv[optind++]);
+
+ if (pidfile_exists(PROGNAME, port))
+ exit(1);
+
+ memset(&srvaddr, 0, sizeof(srvaddr));
+ srvaddr.sin_family = AF_INET;
+ srvaddr.sin_port = htons(port);
+ srvaddr.sin_addr.s_addr = INADDR_ANY;
+
+ fd = socket(PF_INET, SOCK_STREAM, 0);
+ if (fd < 0) {
+ perror("opening socket");
+ exit(1);
+ }
+
+ o = 1;
+ if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &o, sizeof(o))) {
+ perror("Cannot set REUSEADDR socket opt");
+ exit(1);
+ }
+
+ if (nonagle)
+ {
+ o = 1;
+ rc = setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &o, sizeof (o));
+ if (rc != 0)
+ {
+ perror ("Cannot disable nagle");
+ exit (1);
+ }
+ }
+
+ if (txmem != 0)
+ {
+ rc = setsockopt (fd, SOL_SOCKET, SO_SNDBUF, &txmem, sizeof (txmem));
+ if (rc != 0)
+ {
+ perror ("Cannot set write buffer size");
+ exit (1);
+ }
+ }
+
+ if (rxmem != 0)
+ {
+ rc = setsockopt (fd, SOL_SOCKET, SO_RCVBUF, &rxmem, sizeof (rxmem));
+ if (rc != 0)
+ {
+ perror ("Cannot set read buffer size");
+ exit (1);
+ }
+ }
+
+ rc = bind(fd, (struct sockaddr *)&srvaddr, sizeof(srvaddr));
+ if ( rc == -1 ) {
+ perror("bind: ");
+ exit(1);
+ }
+
+ if (listen(fd, 127)) {
+ perror("listen: ");
+ exit(1);
+ }
+ fprintf(stderr, "listening on port %d\n", port);
+
+ pfd = open("/dev/portals", O_RDWR);
+ if ( pfd < 0 ) {
+ perror("opening portals device");
+ exit(1);
+ }
+
+ rc = daemon(1, noclose);
+ if (rc < 0) {
+ perror("daemon(): ");
+ exit(1);
+ }
+
+ openlog(PROGNAME, LOG_PID, LOG_DAEMON);
+ syslog(LOG_INFO, "started, listening on port %d\n", port);
+ create_pidfile(PROGNAME, port);
+
+ while (1) {
+ struct sockaddr_in clntaddr;
+ int len = sizeof(clntaddr);
+ int cfd;
+ struct portal_ioctl_data data;
+ ptl_nid_t peer_nid;
+
+ cfd = accept(fd, (struct sockaddr *)&clntaddr, &len);
+ if ( cfd < 0 ) {
+ perror("accept");
+ exit(0);
+ continue;
+ }
+
+ if (!xchg_nids)
+ peer_nid = ntohl (clntaddr.sin_addr.s_addr); /* HOST byte order */
+ else
+ {
+ PORTAL_IOC_INIT (data);
+ data.ioc_nal = nal;
+ rc = ioctl (pfd, IOC_PORTAL_GET_NID, &data);
+ if (rc < 0)
+ {
+ perror ("Can't get my NID");
+ close (cfd);
+ continue;
+ }
+
+ rc = exchange_nids (cfd, data.ioc_nid, &peer_nid);
+ if (rc != 0)
+ {
+ close (cfd);
+ continue;
+ }
+ }
+
+ show_connection (cfd, clntaddr.sin_addr.s_addr, peer_nid);
+
+ PORTAL_IOC_INIT(data);
+ data.ioc_fd = cfd;
+ data.ioc_nal = nal;
+ data.ioc_nal_cmd = NAL_CMD_REGISTER_PEER_FD;
+ data.ioc_nid = peer_nid;
+ data.ioc_flags = bind_irq;
+
+ if (ioctl(pfd, IOC_PORTAL_NAL_CMD, &data) < 0) {
+ perror("ioctl failed");
+
+ } else {
+ printf("client registered\n");
+ }
+ rc = close(cfd);
+ if (rc)
+ perror ("close failed");
+ }
+
+ closelog();
+ exit(0);
+
+}
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ *
+ * This file is part of Portals, http://www.sf.net/projects/lustre/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Some day I'll split all of this functionality into a cfs_debug module
+ * of its own. That day is not today.
+ *
+ */
+
+#include <stdio.h>
+#include <netdb.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <unistd.h>
+#include <time.h>
+#include <syscall.h>
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#define BUG() /* workaround for module.h includes */
+#include <linux/version.h>
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+#include <linux/module.h>
+#endif
+
+#include <portals/api-support.h>
+#include <portals/ptlctl.h>
+#include "parser.h"
+
+static char rawbuf[8192];
+static char *buf = rawbuf;
+static int max = 8192;
+//static int g_pfd = -1;
+static int subsystem_array[1 << 8];
+static int debug_mask = ~0;
+
+static const char *portal_debug_subsystems[] =
+ {"undefined", "mdc", "mds", "osc", "ost", "class", "obdfs", "llite",
+ "rpc", "ext2obd", "portals", "socknal", "qswnal", "pinger", "filter",
+ "obdtrace", "echo", "ldlm", "lov", "gmnal", "router", "ptldb", NULL};
+static const char *portal_debug_masks[] =
+ {"trace", "inode", "super", "ext2", "malloc", "cache", "info", "ioctl",
+ "blocks", "net", "warning", "buffs", "other", "dentry", "portals",
+ "page", "dlmtrace", "error", "emerg", "ha", "rpctrace", "vfstrace", NULL};
+
+struct debug_daemon_cmd {
+ char *cmd;
+ unsigned int cmdv;
+};
+
+static const struct debug_daemon_cmd portal_debug_daemon_cmd[] = {
+ {"start", DEBUG_DAEMON_START},
+ {"stop", DEBUG_DAEMON_STOP},
+ {"pause", DEBUG_DAEMON_PAUSE},
+ {"continue", DEBUG_DAEMON_CONTINUE},
+ {0, 0}
+};
+
+static int do_debug_mask(char *name, int enable)
+{
+ int found = 0, i;
+
+ for (i = 0; portal_debug_subsystems[i] != NULL; i++) {
+ if (strcasecmp(name, portal_debug_subsystems[i]) == 0 ||
+ strcasecmp(name, "all_subs") == 0) {
+ printf("%s output from subsystem \"%s\"\n",
+ enable ? "Enabling" : "Disabling",
+ portal_debug_subsystems[i]);
+ subsystem_array[i] = enable;
+ found = 1;
+ }
+ }
+ for (i = 0; portal_debug_masks[i] != NULL; i++) {
+ if (strcasecmp(name, portal_debug_masks[i]) == 0 ||
+ strcasecmp(name, "all_types") == 0) {
+ printf("%s output of type \"%s\"\n",
+ enable ? "Enabling" : "Disabling",
+ portal_debug_masks[i]);
+ if (enable)
+ debug_mask |= (1 << i);
+ else
+ debug_mask &= ~(1 << i);
+ found = 1;
+ }
+ }
+
+ return found;
+}
+
+int dbg_initialize(int argc, char **argv)
+{
+ memset(subsystem_array, 1, sizeof(subsystem_array));
+ return 0;
+}
+
+int jt_dbg_filter(int argc, char **argv)
+{
+ int i;
+
+ if (argc < 2) {
+ fprintf(stderr, "usage: %s <subsystem ID or debug mask>\n",
+ argv[0]);
+ return 0;
+ }
+
+ for (i = 1; i < argc; i++)
+ if (!do_debug_mask(argv[i], 0))
+ fprintf(stderr, "Unknown subsystem or debug type: %s\n",
+ argv[i]);
+ return 0;
+}
+
+int jt_dbg_show(int argc, char **argv)
+{
+ int i;
+
+ if (argc < 2) {
+ fprintf(stderr, "usage: %s <subsystem ID or debug mask>\n",
+ argv[0]);
+ return 0;
+ }
+
+ for (i = 1; i < argc; i++)
+ if (!do_debug_mask(argv[i], 1))
+ fprintf(stderr, "Unknown subsystem or debug type: %s\n",
+ argv[i]);
+
+ return 0;
+}
+
+static int applymask(char* procpath, int value)
+{
+ int rc;
+ char buf[64];
+ int len = snprintf(buf, 64, "%d", value);
+
+ int fd = open(procpath, O_WRONLY);
+ if (fd == -1) {
+ fprintf(stderr, "Unable to open %s: %s\n",
+ procpath, strerror(errno));
+ return fd;
+ }
+ rc = write(fd, buf, len+1);
+ if (rc<0) {
+ fprintf(stderr, "Write to %s failed: %s\n",
+ procpath, strerror(errno));
+ return rc;
+ }
+ close(fd);
+ return 0;
+}
+
+extern char *dump_filename;
+extern int dump(int dev_id, int opc, void *buf);
+
+static void applymask_all(unsigned int subs_mask, unsigned int debug_mask)
+{
+ if (!dump_filename) {
+ applymask("/proc/sys/portals/subsystem_debug", subs_mask);
+ applymask("/proc/sys/portals/debug", debug_mask);
+ } else {
+ struct portals_debug_ioctl_data data;
+
+ data.hdr.ioc_len = sizeof(data);
+ data.hdr.ioc_version = 0;
+ data.subs = subs_mask;
+ data.debug = debug_mask;
+
+ dump(OBD_DEV_ID, PTL_IOC_DEBUG_MASK, &data);
+ }
+ printf("Applied subsystem_debug=%d, debug=%d to /proc/sys/portals\n",
+ subs_mask, debug_mask);
+}
+
+int jt_dbg_list(int argc, char **argv)
+{
+ int i;
+
+ if (argc != 2) {
+ fprintf(stderr, "usage: %s <subs || types>\n", argv[0]);
+ return 0;
+ }
+
+ if (strcasecmp(argv[1], "subs") == 0) {
+ printf("Subsystems: all_subs");
+ for (i = 0; portal_debug_subsystems[i] != NULL; i++)
+ printf(", %s", portal_debug_subsystems[i]);
+ printf("\n");
+ } else if (strcasecmp(argv[1], "types") == 0) {
+ printf("Types: all_types");
+ for (i = 0; portal_debug_masks[i] != NULL; i++)
+ printf(", %s", portal_debug_masks[i]);
+ printf("\n");
+ }
+ else if (strcasecmp(argv[1], "applymasks") == 0) {
+ unsigned int subsystem_mask = 0;
+ for (i = 0; portal_debug_subsystems[i] != NULL; i++) {
+ if (subsystem_array[i]) subsystem_mask |= (1 << i);
+ }
+ applymask_all(subsystem_mask, debug_mask);
+ }
+ return 0;
+}
+
+/* if 'raw' is true, don't strip the debug information from the front of the
+ * lines */
+static void dump_buffer(FILE *fd, char *buf, int size, int raw)
+{
+ char *p, *z;
+ unsigned long subsystem, debug, dropped = 0, kept = 0;
+ int max_sub, max_type;
+
+ for (max_sub = 0; portal_debug_subsystems[max_sub] != NULL; max_sub++)
+ ;
+ for (max_type = 0; portal_debug_masks[max_type] != NULL; max_type++)
+ ;
+
+ while (size) {
+ p = memchr(buf, '\n', size);
+ if (!p)
+ break;
+ subsystem = strtoul(buf, &z, 16);
+ debug = strtoul(z + 1, &z, 16);
+
+ z++;
+ /* for some reason %*s isn't working. */
+ *p = '\0';
+ if (subsystem < max_sub &&
+ subsystem_array[subsystem] &&
+ (!debug || (debug_mask & debug))) {
+ if (raw)
+ fprintf(fd, "%s\n", buf);
+ else
+ fprintf(fd, "%s\n", z);
+ //printf("%s\n", buf);
+ kept++;
+ } else {
+ //fprintf(stderr, "dropping line (%lx:%lx): %s\n", subsystem, debug, buf);
+ dropped++;
+ }
+ *p = '\n';
+ p++;
+ size -= (p - buf);
+ buf = p;
+ }
+
+ printf("Debug log: %lu lines, %lu kept, %lu dropped.\n",
+ dropped + kept, kept, dropped);
+}
+
+int jt_dbg_debug_kernel(int argc, char **argv)
+{
+ int rc, raw = 1;
+ FILE *fd = stdout;
+ const int databuf_size = (6 << 20);
+ struct portal_ioctl_data data, *newdata;
+ char *databuf = NULL;
+
+ if (argc > 3) {
+ fprintf(stderr, "usage: %s [file] [raw]\n", argv[0]);
+ return 0;
+ }
+
+ if (argc > 1) {
+ fd = fopen(argv[1], "w");
+ if (fd == NULL) {
+ fprintf(stderr, "fopen(%s) failed: %s\n", argv[1],
+ strerror(errno));
+ return -1;
+ }
+ }
+ if (argc > 2)
+ raw = atoi(argv[2]);
+
+ databuf = malloc(databuf_size);
+ if (!databuf) {
+ fprintf(stderr, "No memory for buffer.\n");
+ goto out;
+ }
+
+ memset(&data, 0, sizeof(data));
+ data.ioc_plen1 = databuf_size;
+ data.ioc_pbuf1 = databuf;
+
+ if (portal_ioctl_pack(&data, &buf, max) != 0) {
+ fprintf(stderr, "portal_ioctl_pack failed.\n");
+ goto out;
+ }
+
+ rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_GET_DEBUG, buf);
+ if (rc) {
+ fprintf(stderr, "IOC_PORTAL_GET_DEBUG failed: %s\n",
+ strerror(errno));
+ goto out;
+ }
+
+ newdata = (struct portal_ioctl_data *)buf;
+ if (newdata->ioc_size > 0)
+ dump_buffer(fd, databuf, newdata->ioc_size, raw);
+ else
+ fprintf(stderr, "No data in the debug buffer.\n");
+
+ out:
+ if (databuf)
+ free(databuf);
+ if (fd != stdout)
+ fclose(fd);
+ return 0;
+}
+
+int jt_dbg_debug_daemon(int argc, char **argv)
+{
+ int i, rc;
+ unsigned int cmd = 0;
+ FILE *fd = stdout;
+ struct portal_ioctl_data data;
+
+ if (argc <= 1) {
+ fprintf(stderr, "usage: %s [start file <#MB>|stop|pause|"
+ "continue]\n", argv[0]);
+ return 0;
+ }
+ for (i = 0; portal_debug_daemon_cmd[i].cmd != NULL; i++) {
+ if (strcasecmp(argv[1], portal_debug_daemon_cmd[i].cmd) == 0) {
+ cmd = portal_debug_daemon_cmd[i].cmdv;
+ break;
+ }
+ }
+ if (portal_debug_daemon_cmd[i].cmd == NULL) {
+ fprintf(stderr, "usage: %s [start file <#MB>|stop|pause|"
+ "continue]\n", argv[0]);
+ return 0;
+ }
+ memset(&data, 0, sizeof(data));
+ if (cmd == DEBUG_DAEMON_START) {
+ if (argc < 3) {
+ fprintf(stderr, "usage: %s [start file <#MB>|stop|"
+ "pause|continue]\n", argv[0]);
+ return 0;
+ }
+ if (access(argv[2], F_OK) != 0) {
+ fd = fopen(argv[2], "w");
+ if (fd != NULL) {
+ fclose(fd);
+ remove(argv[2]);
+ goto ok;
+ }
+ }
+ if (access(argv[2], W_OK) == 0)
+ goto ok;
+ fprintf(stderr, "fopen(%s) failed: %s\n", argv[2],
+ strerror(errno));
+ return -1;
+ok:
+ data.ioc_inllen1 = strlen(argv[2]) + 1;
+ data.ioc_inlbuf1 = argv[2];
+ data.ioc_misc = 0;
+ if (argc == 4) {
+ unsigned long size;
+ errno = 0;
+ size = strtoul(argv[3], NULL, 0);
+ if (errno) {
+ fprintf(stderr, "file size(%s): error %s\n",
+ argv[3], strerror(errno));
+ return -1;
+ }
+ data.ioc_misc = size;
+ }
+ }
+ data.ioc_count = cmd;
+ if (portal_ioctl_pack(&data, &buf, max) != 0) {
+ fprintf(stderr, "portal_ioctl_pack failed.\n");
+ return -1;
+ }
+ rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_SET_DAEMON, buf);
+ if (rc < 0) {
+ fprintf(stderr, "IOC_PORTAL_SET_DEMON failed: %s\n",
+ strerror(errno));
+ return rc;
+ }
+ return 0;
+}
+
+int jt_dbg_debug_file(int argc, char **argv)
+{
+ int rc, fd = -1, raw = 1;
+ FILE *output = stdout;
+ char *databuf = NULL;
+ struct stat statbuf;
+
+ if (argc > 4 || argc < 2) {
+ fprintf(stderr, "usage: %s <input> [output] [raw]\n", argv[0]);
+ return 0;
+ }
+
+ fd = open(argv[1], O_RDONLY);
+ if (fd < 0) {
+ fprintf(stderr, "fopen(%s) failed: %s\n", argv[1],
+ strerror(errno));
+ return -1;
+ }
+#warning FIXME: cleanup fstat issue here
+#ifndef SYS_fstat64
+#define __SYS_fstat__ SYS_fstat
+#else
+#define __SYS_fstat__ SYS_fstat64
+#endif
+ rc = syscall(__SYS_fstat__, fd, &statbuf);
+ if (rc < 0) {
+ fprintf(stderr, "fstat failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ if (argc >= 3) {
+ output = fopen(argv[2], "w");
+ if (output == NULL) {
+ fprintf(stderr, "fopen(%s) failed: %s\n", argv[2],
+ strerror(errno));
+ goto out;
+ }
+ }
+
+ if (argc == 4)
+ raw = atoi(argv[3]);
+
+ databuf = mmap(NULL, statbuf.st_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE, fd, 0);
+ if (databuf == NULL) {
+ fprintf(stderr, "mmap failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ dump_buffer(output, databuf, statbuf.st_size, raw);
+
+ out:
+ if (databuf)
+ munmap(databuf, statbuf.st_size);
+ if (output != stdout)
+ fclose(output);
+ if (fd > 0)
+ close(fd);
+ return 0;
+}
+
+int jt_dbg_clear_debug_buf(int argc, char **argv)
+{
+ int rc;
+ struct portal_ioctl_data data;
+
+ if (argc != 1) {
+ fprintf(stderr, "usage: %s\n", argv[0]);
+ return 0;
+ }
+
+ memset(&data, 0, sizeof(data));
+ if (portal_ioctl_pack(&data, &buf, max) != 0) {
+ fprintf(stderr, "portal_ioctl_pack failed.\n");
+ return -1;
+ }
+
+ rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_CLEAR_DEBUG, buf);
+ if (rc) {
+ fprintf(stderr, "IOC_PORTAL_CLEAR_DEBUG failed: %s\n",
+ strerror(errno));
+ return -1;
+ }
+ return 0;
+}
+
+int jt_dbg_mark_debug_buf(int argc, char **argv)
+{
+ int rc;
+ struct portal_ioctl_data data;
+ char *text;
+ time_t now = time(NULL);
+
+ if (argc > 2) {
+ fprintf(stderr, "usage: %s [marker text]\n", argv[0]);
+ return 0;
+ }
+
+ if (argc == 2) {
+ text = argv[1];
+ } else {
+ text = ctime(&now);
+ text[strlen(text) - 1] = '\0'; /* stupid \n */
+ }
+
+ memset(&data, 0, sizeof(data));
+ data.ioc_inllen1 = strlen(text) + 1;
+ data.ioc_inlbuf1 = text;
+ if (portal_ioctl_pack(&data, &buf, max) != 0) {
+ fprintf(stderr, "portal_ioctl_pack failed.\n");
+ return -1;
+ }
+
+ rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_MARK_DEBUG, buf);
+ if (rc) {
+ fprintf(stderr, "IOC_PORTAL_MARK_DEBUG failed: %s\n",
+ strerror(errno));
+ return -1;
+ }
+ return 0;
+}
+
+
+int jt_dbg_modules(int argc, char **argv)
+{
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+ struct mod_paths {
+ char *name, *path;
+ } *mp, mod_paths[] = {
+ {"portals", "portals/linux/oslib"},
+ {"ksocknal", "portals/linux/socknal"},
+ {"obdclass", "lustre/obdclass"},
+ {"ptlrpc", "lustre/ptlrpc"},
+ {"obdext2", "lustre/obdext2"},
+ {"ost", "lustre/ost"},
+ {"osc", "lustre/osc"},
+ {"mds", "lustre/mds"},
+ {"mdc", "lustre/mdc"},
+ {"llite", "lustre/llite"},
+ {"obdecho", "lustre/obdecho"},
+ {"ldlm", "lustre/ldlm"},
+ {"obdfilter", "lustre/obdfilter"},
+ {"extN", "lustre/extN"},
+ {"lov", "lustre/lov"},
+ {"fsfilt_ext3", "lustre/obdclass"},
+ {"fsfilt_extN", "lustre/obdclass"},
+ {"mds_ext2", "lustre/mds"},
+ {"mds_ext3", "lustre/mds"},
+ {"mds_extN", "lustre/mds"},
+ {"ptlbd", "lustre/ptlbd"},
+ {NULL, NULL}
+ };
+ char *path = "..";
+ char *kernel = "linux";
+
+ if (argc >= 2)
+ path = argv[1];
+ if (argc == 3)
+ kernel = argv[2];
+ if (argc > 3) {
+ printf("%s [path] [kernel]\n", argv[0]);
+ return 0;
+ }
+
+ for (mp = mod_paths; mp->name != NULL; mp++) {
+ struct module_info info;
+ int rc;
+ size_t crap;
+ int query_module(const char *name, int which, void *buf,
+ size_t bufsize, size_t *ret);
+
+ rc = query_module(mp->name, QM_INFO, &info, sizeof(info),
+ &crap);
+ if (rc < 0) {
+ if (errno != ENOENT)
+ printf("query_module(%s) failed: %s\n",
+ mp->name, strerror(errno));
+ } else {
+ printf("add-symbol-file %s/%s/%s.o 0x%0lx\n", path,
+ mp->path, mp->name,
+ info.addr + sizeof(struct module));
+ }
+ }
+
+ return 0;
+#else
+ printf("jt_dbg_module is not yet implemented for Linux 2.5\n");
+ return 0;
+#endif /* linux 2.5 */
+}
+
+int jt_dbg_panic(int argc, char **argv)
+{
+ int rc;
+ struct portal_ioctl_data data;
+
+ if (argc != 1) {
+ fprintf(stderr, "usage: %s\n", argv[0]);
+ return 0;
+ }
+
+ memset(&data, 0, sizeof(data));
+ if (portal_ioctl_pack(&data, &buf, max) != 0) {
+ fprintf(stderr, "portal_ioctl_pack failed.\n");
+ return -1;
+ }
+
+ rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_PANIC, buf);
+ if (rc) {
+ fprintf(stderr, "IOC_PORTAL_PANIC failed: %s\n",
+ strerror(errno));
+ return -1;
+ }
+ return 0;
+}
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ *
+ * This file is part of Portals, http://www.sf.net/projects/lustre/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Some day I'll split all of this functionality into a cfs_debug module
+ * of its own. That day is not today.
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <portals/api-support.h>
+#include <portals/ptlctl.h>
+#include "parser.h"
+
+
+command_t list[] = {
+ {"debug_kernel", jt_dbg_debug_kernel, 0, "usage: debug_kernel [file] [raw], get debug buffer and print it [to a file]"},
+ {"debug_daemon", jt_dbg_debug_daemon, 0, "usage: debug_daemon [start file [#MB]|stop|pause|continue], control debug daemon to dump debug buffer to a file"},
+ {"debug_file", jt_dbg_debug_file, 0, "usage: debug_file <input> [output] [raw], read debug buffer from input and print it [to output]"},
+ {"clear", jt_dbg_clear_debug_buf, 0, "clear kernel debug buffer"},
+ {"mark", jt_dbg_mark_debug_buf, 0, "insert a marker into the kernel debug buffer (args: [marker text])"},
+ {"filter", jt_dbg_filter, 0, "filter certain messages (args: subsystem/debug ID)\n"},
+ {"show", jt_dbg_show, 0, "enable certain messages (args: subsystem/debug ID)\n"},
+ {"list", jt_dbg_list, 0, "list subsystem and debug types (args: subs or types)\n"},
+ {"modules", jt_dbg_modules, 0, "provide gdb-friendly module info (arg: <path>)"},
+ {"panic", jt_dbg_panic, 0, "cause the kernel to panic"},
+ {"dump", jt_ioc_dump, 0, "usage: dump file, save ioctl buffer to file"},
+ {"help", Parser_help, 0, "help"},
+ {"exit", Parser_quit, 0, "quit"},
+ {"quit", Parser_quit, 0, "quit"},
+ { 0, 0, 0, NULL }
+};
+
+int main(int argc, char **argv)
+{
+ if (dbg_initialize(argc, argv) < 0)
+ exit(2);
+
+ register_ioc_dev(PORTALS_DEV_ID, PORTALS_DEV_PATH);
+
+ Parser_init("debugctl > ", list);
+ if (argc > 1)
+ return Parser_execarg(argc - 1, &argv[1], list);
+
+ Parser_commands();
+
+ unregister_ioc_dev(PORTALS_DEV_ID);
+ return 0;
+}
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ *
+ * This file is part of Portals, http://www.sf.net/projects/lustre/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <syscall.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <sys/ioctl.h>
+#include <errno.h>
+#include <unistd.h>
+
+#include <portals/api-support.h>
+#include <portals/ptlctl.h>
+
+struct ioc_dev {
+ const char * dev_name;
+ int dev_fd;
+};
+
+static struct ioc_dev ioc_dev_list[10];
+
+struct dump_hdr {
+ int magic;
+ int dev_id;
+ int opc;
+};
+
+char * dump_filename;
+
+static int
+open_ioc_dev(int dev_id)
+{
+ const char * dev_name;
+
+ if (dev_id < 0 || dev_id >= sizeof(ioc_dev_list))
+ return -EINVAL;
+
+ dev_name = ioc_dev_list[dev_id].dev_name;
+ if (dev_name == NULL) {
+ fprintf(stderr, "unknown device id: %d\n", dev_id);
+ return -EINVAL;
+ }
+
+ if (ioc_dev_list[dev_id].dev_fd < 0) {
+ int fd = open(dev_name, O_RDWR);
+
+ if (fd < 0) {
+ fprintf(stderr, "opening %s failed: %s\n"
+ "hint: the kernel modules may not be loaded\n",
+ dev_name, strerror(errno));
+ return fd;
+ }
+ ioc_dev_list[dev_id].dev_fd = fd;
+ }
+
+ return ioc_dev_list[dev_id].dev_fd;
+}
+
+
+static int
+do_ioctl(int dev_id, int opc, void *buf)
+{
+ int fd, rc;
+
+ fd = open_ioc_dev(dev_id);
+ if (fd < 0)
+ return fd;
+
+ rc = ioctl(fd, opc, buf);
+ return rc;
+
+}
+
+static FILE *
+get_dump_file()
+{
+ FILE *fp = NULL;
+
+ if (!dump_filename) {
+ fprintf(stderr, "no dump filename\n");
+ } else
+ fp = fopen(dump_filename, "a");
+ return fp;
+}
+
+/*
+ * The dump file should start with a description of which devices are
+ * used, but for now it will assumed whatever app reads the file will
+ * know what to do. */
+int
+dump(int dev_id, int opc, void *buf)
+{
+ FILE *fp;
+ struct dump_hdr dump_hdr;
+ struct portal_ioctl_hdr * ioc_hdr = (struct portal_ioctl_hdr *) buf;
+ int rc;
+
+ printf("dumping opc %x to %s\n", opc, dump_filename);
+
+
+ dump_hdr.magic = 0xdeadbeef;
+ dump_hdr.dev_id = dev_id;
+ dump_hdr.opc = opc;
+
+ fp = get_dump_file();
+ if (fp == NULL) {
+ fprintf(stderr, "%s: %s\n", dump_filename,
+ strerror(errno));
+ return -EINVAL;
+ }
+
+ rc = fwrite(&dump_hdr, sizeof(dump_hdr), 1, fp);
+ if (rc == 1)
+ rc = fwrite(buf, ioc_hdr->ioc_len, 1, fp);
+ fclose(fp);
+ if (rc != 1) {
+ fprintf(stderr, "%s: %s\n", dump_filename,
+ strerror(errno));
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/* register a device to send ioctls to. */
+int
+register_ioc_dev(int dev_id, const char * dev_name)
+{
+
+ if (dev_id < 0 || dev_id >= sizeof(ioc_dev_list))
+ return -EINVAL;
+
+ unregister_ioc_dev(dev_id);
+
+ ioc_dev_list[dev_id].dev_name = dev_name;
+ ioc_dev_list[dev_id].dev_fd = -1;
+
+ return dev_id;
+}
+
+void
+unregister_ioc_dev(int dev_id)
+{
+
+ if (dev_id < 0 || dev_id >= sizeof(ioc_dev_list))
+ return;
+ if (ioc_dev_list[dev_id].dev_name != NULL &&
+ ioc_dev_list[dev_id].dev_fd >= 0)
+ close(ioc_dev_list[dev_id].dev_fd);
+
+ ioc_dev_list[dev_id].dev_name = NULL;
+ ioc_dev_list[dev_id].dev_fd = -1;
+}
+
+/* If this file is set, then all ioctl buffers will be
+ appended to the file. */
+int
+set_ioctl_dump(char * file)
+{
+ if (dump_filename)
+ free(dump_filename);
+
+ dump_filename = strdup(file);
+ return 0;
+}
+
+int
+l_ioctl(int dev_id, int opc, void *buf)
+{
+ if (dump_filename)
+ return dump(dev_id, opc, buf);
+ else
+ return do_ioctl(dev_id, opc, buf);
+}
+
+/* Read an ioctl dump file, and call the ioc_func for each ioctl buffer
+ * in the file. For example:
+ *
+ * parse_dump("lctl.dump", l_ioctl);
+ *
+ * Note: if using l_ioctl, then you also need to register_ioc_dev() for
+ * each device used in the dump.
+ */
+int
+parse_dump(char * dump_file, int (*ioc_func)(int dev_id, int opc, void *))
+{
+ int fd, line =0;
+ struct stat st;
+ char *buf, *end;
+
+ fd = syscall(SYS_open, dump_file, O_RDONLY);
+
+#warning FIXME: cleanup fstat issue here
+#ifndef SYS_fstat64
+#define __SYS_fstat__ SYS_fstat
+#else
+#define __SYS_fstat__ SYS_fstat64
+#endif
+ if (syscall(__SYS_fstat__, fd, &st)) {
+ perror("stat fails");
+ exit(1);
+ }
+
+ if (st.st_size < 1) {
+ fprintf(stderr, "KML is empty\n");
+ exit(1);
+ }
+
+ buf = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE , fd, 0);
+ end = buf + st.st_size;
+ close(fd);
+ while (buf < end) {
+ struct dump_hdr *dump_hdr = (struct dump_hdr *) buf;
+ struct portal_ioctl_hdr * data;
+ char tmp[8096];
+ int rc;
+
+ line++;
+
+ data = (struct portal_ioctl_hdr *) (buf + sizeof(*dump_hdr));
+ if (buf + data->ioc_len > end ) {
+ fprintf(stderr, "dump file overflow, %p + %d > %p\n", buf,
+ data->ioc_len, end);
+ return -1;
+ }
+#if 0
+ printf ("dump_hdr: %lx data: %lx\n",
+ (unsigned long)dump_hdr - (unsigned long)buf, (unsigned long)data - (unsigned long)buf);
+
+ printf("%d: opcode %x len: %d ver: %x ", line, dump_hdr->opc,
+ data->ioc_len, data->ioc_version);
+#endif
+
+ memcpy(tmp, data, data->ioc_len);
+
+ rc = ioc_func(dump_hdr->dev_id, dump_hdr->opc, tmp);
+ if (rc) {
+ printf("failed: %d\n", rc);
+ exit(1);
+ }
+
+ buf += data->ioc_len + sizeof(*dump_hdr);
+ }
+ return 0;
+}
+
+int
+jt_ioc_dump(int argc, char **argv)
+{
+ if (argc > 2) {
+ fprintf(stderr, "usage: %s [hostname]\n", argv[0]);
+ return 0;
+ }
+ printf("setting dumpfile to: %s\n", argv[1]);
+
+ set_ioctl_dump(argv[1]);
+ return 0;
+}
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2001 Cluster File Systems, Inc.
+ *
+ * This file is part of Lustre, http://www.sf.net/projects/lustre/
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <string.h>
+#include <stddef.h>
+#include <unistd.h>
+#include <sys/param.h>
+#include <assert.h>
+
+#include <config.h>
+#ifdef HAVE_LIBREADLINE
+#define READLINE_LIBRARY
+#include <readline/readline.h>
+#endif
+//extern char **completion_matches __P((char *, rl_compentry_func_t *));
+extern void using_history(void);
+extern void stifle_history(int);
+extern void add_history(char *);
+
+#include "parser.h"
+
+static command_t * top_level; /* Top level of commands, initialized by
+ * InitParser */
+static char * parser_prompt = NULL;/* Parser prompt, set by InitParser */
+static int done; /* Set to 1 if user types exit or quit */
+
+
+/* static functions */
+static char *skipwhitespace(char *s);
+static char *skiptowhitespace(char *s);
+static command_t *find_cmd(char *name, command_t cmds[], char **next);
+static int process(char *s, char **next, command_t *lookup, command_t **result,
+ char **prev);
+static void print_commands(char *str, command_t *table);
+
+static char * skipwhitespace(char * s)
+{
+ char * t;
+ int len;
+
+ len = (int)strlen(s);
+ for (t = s; t <= s + len && isspace(*t); t++);
+ return(t);
+}
+
+
+static char * skiptowhitespace(char * s)
+{
+ char * t;
+
+ for (t = s; *t && !isspace(*t); t++);
+ return(t);
+}
+
+static int line2args(char *line, char **argv, int maxargs)
+{
+ char *arg;
+ int i = 0;
+
+ arg = strtok(line, " \t");
+ if ( arg ) {
+ argv[i] = arg;
+ i++;
+ } else
+ return 0;
+
+ while( (arg = strtok(NULL, " \t")) && (i <= maxargs)) {
+ argv[i] = arg;
+ i++;
+ }
+ return i;
+}
+
+/* find a command -- return it if unique otherwise print alternatives */
+static command_t *Parser_findargcmd(char *name, command_t cmds[])
+{
+ command_t *cmd;
+
+ for (cmd = cmds; cmd->pc_name; cmd++) {
+ if (strcmp(name, cmd->pc_name) == 0)
+ return cmd;
+ }
+ return NULL;
+}
+
+int Parser_execarg(int argc, char **argv, command_t cmds[])
+{
+ command_t *cmd;
+
+ cmd = Parser_findargcmd(argv[0], cmds);
+ if ( cmd ) {
+ return (cmd->pc_func)(argc, argv);
+ } else {
+ printf("Try interactive use without arguments or use one of:\n");
+ for (cmd = cmds; cmd->pc_name; cmd++)
+ printf("\"%s\" ", cmd->pc_name);
+ printf("\nas argument.\n");
+ }
+ return -1;
+}
+
+/* returns the command_t * (NULL if not found) corresponding to a
+ _partial_ match with the first token in name. It sets *next to
+ point to the following token. Does not modify *name. */
+static command_t * find_cmd(char * name, command_t cmds[], char ** next)
+{
+ int i, len;
+
+ if (!cmds || !name )
+ return NULL;
+
+ /* This sets name to point to the first non-white space character,
+ and next to the first whitespace after name, len to the length: do
+ this with strtok*/
+ name = skipwhitespace(name);
+ *next = skiptowhitespace(name);
+ len = *next - name;
+ if (len == 0)
+ return NULL;
+
+ for (i = 0; cmds[i].pc_name; i++) {
+ if (strncasecmp(name, cmds[i].pc_name, len) == 0) {
+ *next = skipwhitespace(*next);
+ return(&cmds[i]);
+ }
+ }
+ return NULL;
+}
+
+/* Recursively process a command line string s and find the command
+ corresponding to it. This can be ambiguous, full, incomplete,
+ non-existent. */
+static int process(char *s, char ** next, command_t *lookup,
+ command_t **result, char **prev)
+{
+ *result = find_cmd(s, lookup, next);
+ *prev = s;
+
+ /* non existent */
+ if ( ! *result )
+ return CMD_NONE;
+
+ /* found entry: is it ambigous, i.e. not exact command name and
+ more than one command in the list matches. Note that find_cmd
+ points to the first ambiguous entry */
+ if ( strncasecmp(s, (*result)->pc_name, strlen((*result)->pc_name)) &&
+ find_cmd(s, (*result) + 1, next))
+ return CMD_AMBIG;
+
+ /* found a unique command: component or full? */
+ if ( (*result)->pc_func ) {
+ return CMD_COMPLETE;
+ } else {
+ if ( *next == '\0' ) {
+ return CMD_INCOMPLETE;
+ } else {
+ return process(*next, next, (*result)->pc_sub_cmd, result, prev);
+ }
+ }
+}
+
+#ifdef HAVE_LIBREADLINE
+static command_t * match_tbl; /* Command completion against this table */
+static char * command_generator(const char * text, int state)
+{
+ static int index,
+ len;
+ char *name;
+
+ /* Do we have a match table? */
+ if (!match_tbl)
+ return NULL;
+
+ /* If this is the first time called on this word, state is 0 */
+ if (!state) {
+ index = 0;
+ len = (int)strlen(text);
+ }
+
+ /* Return next name in the command list that paritally matches test */
+ while ( (name = (match_tbl + index)->pc_name) ) {
+ index++;
+
+ if (strncasecmp(name, text, len) == 0) {
+ return(strdup(name));
+ }
+ }
+
+ /* No more matches */
+ return NULL;
+}
+
+/* probably called by readline */
+static char **command_completion(char * text, int start, int end)
+{
+ command_t * table;
+ char * pos;
+
+ match_tbl = top_level;
+ for (table = find_cmd(rl_line_buffer, match_tbl, &pos);
+ table;
+ table = find_cmd(pos, match_tbl, &pos)) {
+
+ if (*(pos - 1) == ' ') match_tbl = table->pc_sub_cmd;
+ }
+
+ return(completion_matches(text, command_generator));
+}
+#endif
+
+/* take a string and execute the function or print help */
+int execute_line(char * line)
+{
+ command_t *cmd, *ambig;
+ char *prev;
+ char *next, *tmp;
+ char *argv[MAXARGS];
+ int i;
+ int rc = 0;
+
+ switch( process(line, &next, top_level, &cmd, &prev) ) {
+ case CMD_AMBIG:
+ fprintf(stderr, "Ambiguous command \'%s\'\nOptions: ", line);
+ while( (ambig = find_cmd(prev, cmd, &tmp)) ) {
+ fprintf(stderr, "%s ", ambig->pc_name);
+ cmd = ambig + 1;
+ }
+ fprintf(stderr, "\n");
+ break;
+ case CMD_NONE:
+ fprintf(stderr, "No such command, type help\n");
+ break;
+ case CMD_INCOMPLETE:
+ fprintf(stderr,
+ "'%s' incomplete command. Use '%s x' where x is one of:\n",
+ line, line);
+ fprintf(stderr, "\t");
+ for (i = 0; cmd->pc_sub_cmd[i].pc_name; i++) {
+ fprintf(stderr, "%s ", cmd->pc_sub_cmd[i].pc_name);
+ }
+ fprintf(stderr, "\n");
+ break;
+ case CMD_COMPLETE:
+ i = line2args(line, argv, MAXARGS);
+ rc = (cmd->pc_func)(i, argv);
+
+ if (rc == CMD_HELP)
+ fprintf(stderr, "%s\n", cmd->pc_help);
+
+ break;
+ }
+
+ return rc;
+}
+
+int
+noop_fn ()
+{
+ return (0);
+}
+
+/* just in case you're ever in an airplane and discover you
+ forgot to install readline-dev. :) */
+int init_input()
+{
+ int interactive = isatty (fileno (stdin));
+
+#ifdef HAVE_LIBREADLINE
+ using_history();
+ stifle_history(HISTORY);
+
+ if (!interactive)
+ {
+ rl_prep_term_function = (rl_vintfunc_t *)noop_fn;
+ rl_deprep_term_function = (rl_voidfunc_t *)noop_fn;
+ }
+
+ rl_attempted_completion_function = (CPPFunction *)command_completion;
+ rl_completion_entry_function = (void *)command_generator;
+#endif
+ return interactive;
+}
+
+#ifndef HAVE_LIBREADLINE
+#define add_history(s)
+char * readline(char * prompt)
+{
+ char line[2048];
+ int n = 0;
+ if (prompt)
+ printf ("%s", prompt);
+ if (fgets(line, sizeof(line), stdin) == NULL)
+ return (NULL);
+ n = strlen(line);
+ if (n && line[n-1] == '\n')
+ line[n-1] = '\0';
+ return strdup(line);
+}
+#endif
+
+/* this is the command execution machine */
+int Parser_commands(void)
+{
+ char *line, *s;
+ int rc = 0;
+ int interactive;
+
+ interactive = init_input();
+
+ while(!done) {
+ line = readline(interactive ? parser_prompt : NULL);
+
+ if (!line) break;
+
+ s = skipwhitespace(line);
+
+ if (*s) {
+ add_history(s);
+ rc = execute_line(s);
+ }
+
+ free(line);
+ }
+ return rc;
+}
+
+
+/* sets the parser prompt */
+void Parser_init(char * prompt, command_t * cmds)
+{
+ done = 0;
+ top_level = cmds;
+ if (parser_prompt) free(parser_prompt);
+ parser_prompt = strdup(prompt);
+}
+
+/* frees the parser prompt */
+void Parser_exit(int argc, char *argv[])
+{
+ done = 1;
+ free(parser_prompt);
+ parser_prompt = NULL;
+}
+
+/* convert a string to an integer */
+int Parser_int(char *s, int *val)
+{
+ int ret;
+
+ if (*s != '0')
+ ret = sscanf(s, "%d", val);
+ else if (*(s+1) != 'x')
+ ret = sscanf(s, "%o", val);
+ else {
+ s++;
+ ret = sscanf(++s, "%x", val);
+ }
+
+ return(ret);
+}
+
+
+void Parser_qhelp(int argc, char *argv[]) {
+
+ printf("Available commands are:\n");
+
+ print_commands(NULL, top_level);
+ printf("For more help type: help command-name\n");
+}
+
+int Parser_help(int argc, char **argv)
+{
+ char line[1024];
+ char *next, *prev, *tmp;
+ command_t *result, *ambig;
+ int i;
+
+ if ( argc == 1 ) {
+ Parser_qhelp(argc, argv);
+ return 0;
+ }
+
+ line[0]='\0';
+ for ( i = 1 ; i < argc ; i++ ) {
+ strcat(line, argv[i]);
+ }
+
+ switch ( process(line, &next, top_level, &result, &prev) ) {
+ case CMD_COMPLETE:
+ fprintf(stderr, "%s: %s\n",line, result->pc_help);
+ break;
+ case CMD_NONE:
+ fprintf(stderr, "%s: Unknown command.\n", line);
+ break;
+ case CMD_INCOMPLETE:
+ fprintf(stderr,
+ "'%s' incomplete command. Use '%s x' where x is one of:\n",
+ line, line);
+ fprintf(stderr, "\t");
+ for (i = 0; result->pc_sub_cmd[i].pc_name; i++) {
+ fprintf(stderr, "%s ", result->pc_sub_cmd[i].pc_name);
+ }
+ fprintf(stderr, "\n");
+ break;
+ case CMD_AMBIG:
+ fprintf(stderr, "Ambiguous command \'%s\'\nOptions: ", line);
+ while( (ambig = find_cmd(prev, result, &tmp)) ) {
+ fprintf(stderr, "%s ", ambig->pc_name);
+ result = ambig + 1;
+ }
+ fprintf(stderr, "\n");
+ break;
+ }
+ return 0;
+}
+
+
+void Parser_printhelp(char *cmd)
+{
+ char *argv[] = { "help", cmd };
+ Parser_help(2, argv);
+}
+
+/*************************************************************************
+ * COMMANDS *
+ *************************************************************************/
+
+
+static void print_commands(char * str, command_t * table) {
+ command_t * cmds;
+ char buf[80];
+
+ for (cmds = table; cmds->pc_name; cmds++) {
+ if (cmds->pc_func) {
+ if (str) printf("\t%s %s\n", str, cmds->pc_name);
+ else printf("\t%s\n", cmds->pc_name);
+ }
+ if (cmds->pc_sub_cmd) {
+ if (str) {
+ sprintf(buf, "%s %s", str, cmds->pc_name);
+ print_commands(buf, cmds->pc_sub_cmd);
+ } else {
+ print_commands(cmds->pc_name, cmds->pc_sub_cmd);
+ }
+ }
+ }
+}
+
+char *Parser_getstr(const char *prompt, const char *deft, char *res,
+ size_t len)
+{
+ char *line = NULL;
+ int size = strlen(prompt) + strlen(deft) + 8;
+ char *theprompt;
+ theprompt = malloc(size);
+ assert(theprompt);
+
+ sprintf(theprompt, "%s [%s]: ", prompt, deft);
+
+ line = readline(theprompt);
+ free(theprompt);
+
+ if ( line == NULL || *line == '\0' ) {
+ strncpy(res, deft, len);
+ } else {
+ strncpy(res, line, len);
+ }
+
+ if ( line ) {
+ free(line);
+ return res;
+ } else {
+ return NULL;
+ }
+}
+
+/* get integer from prompt, loop forever to get it */
+int Parser_getint(const char *prompt, long min, long max, long deft, int base)
+{
+ int rc;
+ long result;
+ char *line;
+ int size = strlen(prompt) + 40;
+ char *theprompt = malloc(size);
+ assert(theprompt);
+ sprintf(theprompt,"%s [%ld, (0x%lx)]: ", prompt, deft, deft);
+
+ fflush(stdout);
+
+ do {
+ line = NULL;
+ line = readline(theprompt);
+ if ( !line ) {
+ fprintf(stdout, "Please enter an integer.\n");
+ fflush(stdout);
+ continue;
+ }
+ if ( *line == '\0' ) {
+ free(line);
+ result = deft;
+ break;
+ }
+ rc = Parser_arg2int(line, &result, base);
+ free(line);
+ if ( rc != 0 ) {
+ fprintf(stdout, "Invalid string.\n");
+ fflush(stdout);
+ } else if ( result > max || result < min ) {
+ fprintf(stdout, "Error: response must lie between %ld and %ld.\n",
+ min, max);
+ fflush(stdout);
+ } else {
+ break;
+ }
+ } while ( 1 ) ;
+
+ if (theprompt)
+ free(theprompt);
+ return result;
+
+}
+
+/* get boolean (starting with YyNn; loop forever */
+int Parser_getbool(const char *prompt, int deft)
+{
+ int result = 0;
+ char *line;
+ int size = strlen(prompt) + 8;
+ char *theprompt = malloc(size);
+ assert(theprompt);
+
+ fflush(stdout);
+
+ if ( deft != 0 && deft != 1 ) {
+ fprintf(stderr, "Error: Parser_getbool given bad default (%d).\n",
+ deft);
+ assert ( 0 );
+ }
+ sprintf(theprompt, "%s [%s]: ", prompt, (deft==0)? "N" : "Y");
+
+ do {
+ line = NULL;
+ line = readline(theprompt);
+ if ( line == NULL ) {
+ result = deft;
+ break;
+ }
+ if ( *line == '\0' ) {
+ result = deft;
+ break;
+ }
+ if ( *line == 'y' || *line == 'Y' ) {
+ result = 1;
+ break;
+ }
+ if ( *line == 'n' || *line == 'N' ) {
+ result = 0;
+ break;
+ }
+ if ( line )
+ free(line);
+ fprintf(stdout, "Invalid string. Must start with yY or nN\n");
+ fflush(stdout);
+ } while ( 1 );
+
+ if ( line )
+ free(line);
+ if ( theprompt )
+ free(theprompt);
+ return result;
+}
+
+/* parse int out of a string or prompt for it */
+long Parser_intarg(const char *inp, const char *prompt, int deft,
+ int min, int max, int base)
+{
+ long result;
+ int rc;
+
+ rc = Parser_arg2int(inp, &result, base);
+
+ if ( rc == 0 ) {
+ return result;
+ } else {
+ return Parser_getint(prompt, deft, min, max, base);
+ }
+}
+
+/* parse int out of a string or prompt for it */
+char *Parser_strarg(char *inp, const char *prompt, const char *deft,
+ char *answer, int len)
+{
+ if ( inp == NULL || *inp == '\0' ) {
+ return Parser_getstr(prompt, deft, answer, len);
+ } else
+ return inp;
+}
+
+/* change a string into a number: return 0 on success. No invalid characters
+ allowed. The processing of base and validity follows strtol(3)*/
+int Parser_arg2int(const char *inp, long *result, int base)
+{
+ char *endptr;
+
+ if ( (base !=0) && (base < 2 || base > 36) )
+ return 1;
+
+ *result = strtol(inp, &endptr, base);
+
+ if ( *inp != '\0' && *endptr == '\0' )
+ return 0;
+ else
+ return 1;
+}
+
+/* Convert human readable size string to and int; "1k" -> 1000 */
+int Parser_size (int *sizep, char *str) {
+ int size;
+ char mod[32];
+
+ switch (sscanf (str, "%d%1[gGmMkK]", &size, mod)) {
+ default:
+ return (-1);
+
+ case 1:
+ *sizep = size;
+ return (0);
+
+ case 2:
+ switch (*mod) {
+ case 'g':
+ case 'G':
+ *sizep = size << 30;
+ return (0);
+
+ case 'm':
+ case 'M':
+ *sizep = size << 20;
+ return (0);
+
+ case 'k':
+ case 'K':
+ *sizep = size << 10;
+ return (0);
+
+ default:
+ *sizep = size;
+ return (0);
+ }
+ }
+}
+
+/* Convert a string boolean to an int; "enable" -> 1 */
+int Parser_bool (int *b, char *str) {
+ if (!strcasecmp (str, "no") ||
+ !strcasecmp (str, "n") ||
+ !strcasecmp (str, "off") ||
+ !strcasecmp (str, "disable"))
+ {
+ *b = 0;
+ return (0);
+ }
+
+ if (!strcasecmp (str, "yes") ||
+ !strcasecmp (str, "y") ||
+ !strcasecmp (str, "on") ||
+ !strcasecmp (str, "enable"))
+ {
+ *b = 1;
+ return (0);
+ }
+
+ return (-1);
+}
+
+int Parser_quit(int argc, char **argv)
+{
+ argc = argc;
+ argv = argv;
+ done = 1;
+ return 0;
+}
--- /dev/null
+#ifndef _PARSER_H_
+#define _PARSER_H_
+
+#define HISTORY 100 /* Don't let history grow unbounded */
+#define MAXARGS 100
+
+#define CMD_COMPLETE 0
+#define CMD_INCOMPLETE 1
+#define CMD_NONE 2
+#define CMD_AMBIG 3
+#define CMD_HELP 4
+
+typedef struct parser_cmd {
+ char *pc_name;
+ int (* pc_func)(int, char **);
+ struct parser_cmd * pc_sub_cmd;
+ char *pc_help;
+} command_t;
+
+typedef struct argcmd {
+ char *ac_name;
+ int (*ac_func)(int, char **);
+ char *ac_help;
+} argcmd_t;
+
+typedef struct network {
+ char *type;
+ char *server;
+ int port;
+} network_t;
+
+int Parser_quit(int argc, char **argv);
+void Parser_init(char *, command_t *); /* Set prompt and load command list */
+int Parser_commands(void); /* Start the command parser */
+void Parser_qhelp(int, char **); /* Quick help routine */
+int Parser_help(int, char **); /* Detailed help routine */
+void Parser_printhelp(char *); /* Detailed help routine */
+void Parser_exit(int, char **); /* Shuts down command parser */
+int Parser_execarg(int argc, char **argv, command_t cmds[]);
+int execute_line(char * line);
+
+/* Converts a string to an integer */
+int Parser_int(char *, int *);
+
+/* Prompts for a string, with default values and a maximum length */
+char *Parser_getstr(const char *prompt, const char *deft, char *res,
+ size_t len);
+
+/* Prompts for an integer, with minimum, maximum and default values and base */
+int Parser_getint(const char *prompt, long min, long max, long deft,
+ int base);
+
+/* Prompts for a yes/no, with default */
+int Parser_getbool(const char *prompt, int deft);
+
+/* Extracts an integer from a string, or prompts if it cannot get one */
+long Parser_intarg(const char *inp, const char *prompt, int deft,
+ int min, int max, int base);
+
+/* Extracts a word from the input, or propmts if it cannot get one */
+char *Parser_strarg(char *inp, const char *prompt, const char *deft,
+ char *answer, int len);
+
+/* Extracts an integer from a string with a base */
+int Parser_arg2int(const char *inp, long *result, int base);
+
+/* Convert human readable size string to and int; "1k" -> 1000 */
+int Parser_size(int *sizep, char *str);
+
+/* Convert a string boolean to an int; "enable" -> 1 */
+int Parser_bool(int *b, char *str);
+
+#endif
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ *
+ * This file is part of Portals, http://www.sf.net/projects/lustre/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/tcp.h>
+#include <netdb.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <errno.h>
+#include <unistd.h>
+#include <time.h>
+#include <asm/byteorder.h>
+
+#include <portals/api-support.h>
+#include <portals/ptlctl.h>
+#include <portals/list.h>
+#include <portals/lib-types.h>
+#include "parser.h"
+
+unsigned int portal_debug;
+unsigned int portal_printk;
+unsigned int portal_stack;
+
+
+static ptl_nid_t g_nid = 0;
+static unsigned int g_nal = 0;
+static unsigned short g_port = 0;
+
+static int g_socket_txmem = 0;
+static int g_socket_rxmem = 0;
+static int g_socket_nonagle = 1;
+
+typedef struct
+{
+ char *name;
+ int num;
+} name2num_t;
+
+static name2num_t nalnames[] = {
+ {"tcp", SOCKNAL},
+ {"toe", TOENAL},
+ {"elan", QSWNAL},
+ {"gm", GMNAL},
+ {"scimac", SCIMACNAL},
+ {NULL, -1}
+};
+
+static name2num_t *
+name2num_lookup_name (name2num_t *table, char *str)
+{
+ while (table->name != NULL)
+ if (!strcmp (str, table->name))
+ return (table);
+ else
+ table++;
+ return (NULL);
+}
+
+static name2num_t *
+name2num_lookup_num (name2num_t *table, int num)
+{
+ while (table->name != NULL)
+ if (num == table->num)
+ return (table);
+ else
+ table++;
+ return (NULL);
+}
+
+int
+ptl_name2nal (char *str)
+{
+ name2num_t *e = name2num_lookup_name (nalnames, str);
+
+ return ((e == NULL) ? 0 : e->num);
+}
+
+static char *
+nal2name (int nal)
+{
+ name2num_t *e = name2num_lookup_num (nalnames, nal);
+
+ return ((e == NULL) ? "???" : e->name);
+}
+
+static int
+nid2nal (ptl_nid_t nid)
+{
+ /* BIG pragmatic assumption */
+ return ((((__u32)nid) & 0xffff0000) != 0 ? SOCKNAL : QSWNAL);
+}
+
+int
+ptl_parse_nid (ptl_nid_t *nidp, char *str)
+{
+ struct hostent *he;
+ int a;
+ int b;
+ int c;
+ int d;
+
+ if (sscanf (str, "%d.%d.%d.%d", &a, &b, &c, &d) == 4 &&
+ (a & ~0xff) == 0 && (b & ~0xff) == 0 &&
+ (c & ~0xff) == 0 && (d & ~0xff) == 0)
+ {
+ __u32 addr = (a<<24)|(b<<16)|(c<<8)|d;
+
+ *nidp = (ptl_nid_t)addr;
+ return (0);
+ }
+
+ if ((('a' <= str[0] && str[0] <= 'z') ||
+ ('A' <= str[0] && str[0] <= 'Z')) &&
+ (he = gethostbyname (str)) != NULL)
+ {
+ __u32 addr = *(__u32 *)he->h_addr;
+
+ *nidp = (ptl_nid_t)ntohl(addr); /* HOST byte order */
+ return (0);
+ }
+
+ if (sscanf (str, "%i", &a) == 1)
+ {
+ *nidp = (ptl_nid_t)a;
+ return (0);
+ }
+
+ if (sscanf (str, "%x", &a) == 1)
+ {
+ *nidp = (ptl_nid_t) a;
+ return (0);
+ }
+
+ return (-1);
+}
+
+char *
+ptl_nid2str (char *buffer, ptl_nid_t nid)
+{
+ switch (nid2nal(nid))
+ {
+ case QSWNAL:
+ sprintf (buffer, LPD64, nid);
+ return (buffer);
+
+ case SCIMACNAL:
+ sprintf (buffer, LPX64, nid);
+ return (buffer);
+
+ case SOCKNAL: {
+ __u32 addr = htonl((__u32)nid); /* back to NETWORK byte order */
+ struct hostent *he = gethostbyaddr ((const char *)&addr, sizeof (addr), AF_INET);
+
+ if (he != NULL)
+ strcpy (buffer, he->h_name);
+ else
+ {
+ addr = (__u32)nid;
+ sprintf (buffer, "%d.%d.%d.%d",
+ (addr>>24)&0xff, (addr>>16)&0xff, (addr>>8)&0xff, addr&0xff);
+ }
+ return (buffer);
+ }
+
+ default:
+ sprintf (buffer, "nid2nal broken");
+ return (buffer);
+ }
+}
+
+int
+sock_write (int cfd, void *buffer, int nob)
+{
+ while (nob > 0)
+ {
+ int rc = write (cfd, buffer, nob);
+
+ if (rc < 0)
+ {
+ if (errno == EINTR)
+ continue;
+
+ return (rc);
+ }
+
+ if (rc == 0)
+ {
+ fprintf (stderr, "Unexpected zero sock_write\n");
+ abort();
+ }
+
+ nob -= rc;
+ buffer = (char *)buffer + nob;
+ }
+
+ return (0);
+}
+
+int
+sock_read (int cfd, void *buffer, int nob)
+{
+ while (nob > 0)
+ {
+ int rc = read (cfd, buffer, nob);
+
+ if (rc < 0)
+ {
+ if (errno == EINTR)
+ continue;
+
+ return (rc);
+ }
+
+ if (rc == 0) /* EOF */
+ {
+ errno = ECONNABORTED;
+ return (-1);
+ }
+
+ nob -= rc;
+ buffer = (char *)buffer + nob;
+ }
+
+ return (0);
+}
+
+int ptl_initialize(int argc, char **argv)
+{
+ register_ioc_dev(PORTALS_DEV_ID, PORTALS_DEV_PATH);
+ return 0;
+}
+
+
+int jt_ptl_network(int argc, char **argv)
+{
+ int nal;
+
+ if (argc != 2 ||
+ (nal = ptl_name2nal (argv[1])) == 0)
+ {
+ name2num_t *entry;
+
+ fprintf(stderr, "usage: %s \n", argv[0]);
+ for (entry = nalnames; entry->name != NULL; entry++)
+ fprintf (stderr, "%s%s", entry == nalnames ? "<" : "|", entry->name);
+ fprintf(stderr, ">\n");
+ }
+ else
+ g_nal = nal;
+
+ return (0);
+}
+
+int
+exchange_nids (int cfd, ptl_nid_t my_nid, ptl_nid_t *peer_nid)
+{
+ int rc;
+ ptl_hdr_t hdr;
+ ptl_magicversion_t *hmv = (ptl_magicversion_t *)&hdr.dest_nid;
+
+ LASSERT (sizeof (*hmv) == sizeof (hdr.dest_nid));
+
+ memset (&hdr, 0, sizeof (hdr));
+
+ hmv->magic = __cpu_to_le32 (PORTALS_PROTO_MAGIC);
+ hmv->version_major = __cpu_to_le16 (PORTALS_PROTO_VERSION_MAJOR);
+ hmv->version_minor = __cpu_to_le16 (PORTALS_PROTO_VERSION_MINOR);
+
+ hdr.src_nid = __cpu_to_le64 (my_nid);
+ hdr.type = __cpu_to_le32 (PTL_MSG_HELLO);
+
+ /* Assume there's sufficient socket buffering for a portals HELLO header */
+ rc = sock_write (cfd, &hdr, sizeof (hdr));
+ if (rc != 0) {
+ perror ("Can't send initial HELLO");
+ return (-1);
+ }
+
+ /* First few bytes down the wire are the portals protocol magic and
+ * version, no matter what protocol version we're running. */
+
+ rc = sock_read (cfd, hmv, sizeof (*hmv));
+ if (rc != 0) {
+ perror ("Can't read from peer");
+ return (-1);
+ }
+
+ if (__cpu_to_le32 (hmv->magic) != PORTALS_PROTO_MAGIC) {
+ fprintf (stderr, "Bad magic %#08x (%#08x expected)\n",
+ __cpu_to_le32 (hmv->magic), PORTALS_PROTO_MAGIC);
+ return (-1);
+ }
+
+ if (__cpu_to_le16 (hmv->version_major) != PORTALS_PROTO_VERSION_MAJOR ||
+ __cpu_to_le16 (hmv->version_minor) != PORTALS_PROTO_VERSION_MINOR) {
+ fprintf (stderr, "Incompatible protocol version %d.%d (%d.%d expected)\n",
+ __cpu_to_le16 (hmv->version_major),
+ __cpu_to_le16 (hmv->version_minor),
+ PORTALS_PROTO_VERSION_MAJOR,
+ PORTALS_PROTO_VERSION_MINOR);
+ }
+
+ /* version 0 sends magic/version as the dest_nid of a 'hello' header,
+ * so read the rest of it in now... */
+ LASSERT (PORTALS_PROTO_VERSION_MAJOR == 0);
+ rc = sock_read (cfd, hmv + 1, sizeof (hdr) - sizeof (*hmv));
+ if (rc != 0) {
+ perror ("Can't read rest of HELLO hdr");
+ return (-1);
+ }
+
+ /* ...and check we got what we expected */
+ if (__cpu_to_le32 (hdr.type) != PTL_MSG_HELLO ||
+ __cpu_to_le32 (PTL_HDR_LENGTH (&hdr)) != 0) {
+ fprintf (stderr, "Expecting a HELLO hdr with 0 payload,"
+ " but got type %d with %d payload\n",
+ __cpu_to_le32 (hdr.type),
+ __cpu_to_le32 (PTL_HDR_LENGTH (&hdr)));
+ return (-1);
+ }
+
+ *peer_nid = __le64_to_cpu (hdr.src_nid);
+ return (0);
+}
+
+int jt_ptl_connect(int argc, char **argv)
+{
+ if (argc < 2) {
+ usage:
+ fprintf(stderr, "usage: %s <hostname port [xi]> or <elan ID>\n",
+ argv[0]);
+ return 0;
+ }
+ if (g_nal == 0) {
+ fprintf(stderr, "Error: you must run the 'network' command "
+ "first.\n");
+ return -1;
+ }
+ if (g_nal == SOCKNAL || g_nal == TOENAL) {
+ ptl_nid_t peer_nid;
+ struct hostent *he;
+ struct portal_ioctl_data data;
+ struct sockaddr_in srvaddr;
+ char *flag;
+ int fd, rc;
+ int nonagle = 0;
+ int rxmem = 0;
+ int txmem = 0;
+ int bind_irq = 0;
+ int xchange_nids = 0;
+ int o;
+ int olen;
+
+ if (argc < 3) {
+ goto usage;
+ }
+
+ he = gethostbyname(argv[1]);
+ if (!he) {
+ fprintf(stderr, "gethostbyname error: %s\n",
+ strerror(errno));
+ return -1;
+ }
+
+ g_port = atol(argv[2]);
+
+ if (argc > 3)
+ for (flag = argv[3]; *flag != 0; flag++)
+ switch (*flag)
+ {
+ case 'i':
+ bind_irq = 1;
+ break;
+
+ case 'x':
+ xchange_nids = 1;
+ break;
+
+ default:
+ fprintf (stderr, "unrecognised flag '%c'\n",
+ *flag);
+ return (-1);
+ }
+
+ memset(&srvaddr, 0, sizeof(srvaddr));
+ srvaddr.sin_family = AF_INET;
+ srvaddr.sin_port = htons(g_port);
+ srvaddr.sin_addr.s_addr = *(__u32 *)he->h_addr;
+
+ fd = socket(PF_INET, SOCK_STREAM, 0);
+ if ( fd < 0 ) {
+ fprintf(stderr, "socket() failed: %s\n",
+ strerror(errno));
+ return -1;
+ }
+
+ if (g_socket_nonagle)
+ {
+ o = 1;
+ if (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &o, sizeof (o)) != 0)
+ {
+ fprintf(stderr, "cannot disable nagle: %s\n", strerror(errno));
+ return (-1);
+ }
+ }
+
+ if (g_socket_rxmem != 0)
+ {
+ o = g_socket_rxmem;
+ if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &o, sizeof (o)) != 0)
+ {
+ fprintf(stderr, "cannot set receive buffer size: %s\n", strerror(errno));
+ return (-1);
+ }
+ }
+
+ if (g_socket_txmem != 0)
+ {
+ o = g_socket_txmem;
+ if (setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &o, sizeof (o)) != 0)
+ {
+ fprintf(stderr, "cannot set send buffer size: %s\n", strerror(errno));
+ return (-1);
+ }
+ }
+
+ rc = connect(fd, (struct sockaddr *)&srvaddr, sizeof(srvaddr));
+ if ( rc == -1 ) {
+ fprintf(stderr, "connect() failed: %s\n",
+ strerror(errno));
+ return -1;
+ }
+
+ olen = sizeof (txmem);
+ if (getsockopt (fd, SOL_SOCKET, SO_SNDBUF, &txmem, &olen) != 0)
+ fprintf (stderr, "Can't get send buffer size: %s\n", strerror (errno));
+ olen = sizeof (rxmem);
+ if (getsockopt (fd, SOL_SOCKET, SO_RCVBUF, &rxmem, &olen) != 0)
+ fprintf (stderr, "Can't get receive buffer size: %s\n", strerror (errno));
+ olen = sizeof (nonagle);
+ if (getsockopt (fd, IPPROTO_TCP, TCP_NODELAY, &nonagle, &olen) != 0)
+ fprintf (stderr, "Can't get nagle: %s\n", strerror (errno));
+
+ if (xchange_nids) {
+
+ PORTAL_IOC_INIT (data);
+ data.ioc_nal = g_nal;
+ rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_GET_NID, &data);
+ if (rc != 0)
+ {
+ fprintf (stderr, "failed to get my nid: %s\n",
+ strerror (errno));
+ close (fd);
+ return (-1);
+ }
+
+ rc = exchange_nids (fd, data.ioc_nid, &peer_nid);
+ if (rc != 0)
+ {
+ close (fd);
+ return (-1);
+ }
+ }
+ else
+ peer_nid = ntohl (srvaddr.sin_addr.s_addr); /* HOST byte order */
+
+ printf("Connected host: %s NID "LPX64" snd: %d rcv: %d nagle: %s\n", argv[1],
+ peer_nid, txmem, rxmem, nonagle ? "Disabled" : "Enabled");
+
+ PORTAL_IOC_INIT(data);
+ data.ioc_fd = fd;
+ data.ioc_nal = g_nal;
+ data.ioc_nal_cmd = NAL_CMD_REGISTER_PEER_FD;
+ data.ioc_nid = peer_nid;
+ data.ioc_flags = bind_irq;
+
+ rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_NAL_CMD, &data);
+ if (rc) {
+ fprintf(stderr, "failed to register fd with portals: "
+ "%s\n", strerror(errno));
+ close (fd);
+ return -1;
+ }
+
+ g_nid = peer_nid;
+ printf("Connection to "LPX64" registered with socknal\n", g_nid);
+
+ rc = close(fd);
+ if (rc) {
+ fprintf(stderr, "close failed: %d\n", rc);
+ }
+ } else if (g_nal == QSWNAL) {
+ g_nid = atoi(argv[1]);
+ } else if (g_nal == GMNAL) {
+ g_nid = atoi(argv[1]);
+ } else if (g_nal == SCIMACNAL) {
+ unsigned int tmpnid;
+ if(sscanf(argv[1], "%x", &tmpnid) == 1) {
+ g_nid=tmpnid;
+ }
+ else {
+ fprintf(stderr, "nid %s invalid for SCI nal\n", argv[1]);
+ }
+
+
+ } else {
+ fprintf(stderr, "This should never happen. Also it is very "
+ "bad.\n");
+ }
+
+ return 0;
+}
+
+int jt_ptl_disconnect(int argc, char **argv)
+{
+ if (argc > 2) {
+ fprintf(stderr, "usage: %s [hostname]\n", argv[0]);
+ return 0;
+ }
+ if (g_nal == 0) {
+ fprintf(stderr, "Error: you must run the 'network' command "
+ "first.\n");
+ return -1;
+ }
+ if (g_nal == SOCKNAL || g_nal == TOENAL) {
+ struct hostent *he;
+ struct portal_ioctl_data data;
+ int rc;
+
+ PORTAL_IOC_INIT(data);
+ if (argc == 2) {
+ he = gethostbyname(argv[1]);
+ if (!he) {
+ fprintf(stderr, "gethostbyname error: %s\n",
+ strerror(errno));
+ return -1;
+ }
+
+ data.ioc_nid = ntohl (*(__u32 *)he->h_addr); /* HOST byte order */
+
+ } else {
+ printf("Disconnecting ALL connections.\n");
+ /* leave ioc_nid zeroed == disconnect all */
+ }
+ data.ioc_nal = g_nal;
+ data.ioc_nal_cmd = NAL_CMD_CLOSE_CONNECTION;
+ rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_NAL_CMD, &data);
+ if (rc) {
+ fprintf(stderr, "failed to remove connection: %s\n",
+ strerror(errno));
+ return -1;
+ }
+ } else if (g_nal == QSWNAL) {
+ printf("'disconnect' doesn't make any sense for "
+ "elan.\n");
+ } else if (g_nal == GMNAL) {
+ printf("'disconnect' doesn't make any sense for "
+ "GM.\n");
+ } else if (g_nal == SCIMACNAL) {
+ printf("'disconnect' doesn't make any sense for "
+ "SCI.\n");
+ } else {
+ fprintf(stderr, "This should never happen. Also it is very "
+ "bad.\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+int jt_ptl_push_connection (int argc, char **argv)
+{
+ if (argc > 2) {
+ fprintf(stderr, "usage: %s [hostname]\n", argv[0]);
+ return 0;
+ }
+ if (g_nal == 0) {
+ fprintf(stderr, "Error: you must run the 'network' command "
+ "first.\n");
+ return -1;
+ }
+ if (g_nal == SOCKNAL || g_nal == TOENAL) {
+ struct hostent *he;
+ struct portal_ioctl_data data;
+ int rc;
+
+ PORTAL_IOC_INIT(data);
+ if (argc == 2) {
+ he = gethostbyname(argv[1]);
+ if (!he) {
+ fprintf(stderr, "gethostbyname error: %s\n",
+ strerror(errno));
+ return -1;
+ }
+
+ data.ioc_nid = ntohl (*(__u32 *)he->h_addr); /* HOST byte order */
+
+ } else {
+ printf("Pushing ALL connections.\n");
+ /* leave ioc_nid zeroed == disconnect all */
+ }
+ data.ioc_nal = g_nal;
+ data.ioc_nal_cmd = NAL_CMD_PUSH_CONNECTION;
+ rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_NAL_CMD, &data);
+ if (rc) {
+ fprintf(stderr, "failed to push connection: %s\n",
+ strerror(errno));
+ return -1;
+ }
+ } else if (g_nal == QSWNAL) {
+ printf("'push' doesn't make any sense for elan.\n");
+ } else if (g_nal == GMNAL) {
+ printf("'push' doesn't make any sense for GM.\n");
+ } else if (g_nal == SCIMACNAL) {
+ printf("'push' doesn't make any sense for SCI.\n");
+ } else {
+ fprintf(stderr, "This should never happen. Also it is very "
+ "bad.\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+int jt_ptl_ping(int argc, char **argv)
+{
+ int rc;
+ ptl_nid_t nid;
+ long count = 1;
+ long size = 4;
+ long timeout = 1;
+ struct portal_ioctl_data data;
+
+ if (argc < 2) {
+ fprintf(stderr, "usage: %s nid [count] [size] [timeout (secs)]\n", argv[0]);
+ return 0;
+ }
+
+ if (g_nal == 0) {
+ fprintf(stderr, "Error: you must run the 'network' command "
+ "first.\n");
+ return -1;
+ }
+
+ if (ptl_parse_nid (&nid, argv[1]) != 0)
+ {
+ fprintf (stderr, "Can't parse nid \"%s\"\n", argv[1]);
+ return (-1);
+ }
+
+ if (argc > 2)
+ {
+ count = atol(argv[2]);
+
+ if (count < 0 || count > 20000)
+ {
+ fprintf(stderr, "are you insane? %ld is a crazy count.\n", count);
+ return -1;
+ }
+ }
+
+ if (argc > 3)
+ size= atol(argv[3]);
+
+ if (argc > 4)
+ timeout = atol (argv[4]);
+
+ PORTAL_IOC_INIT (data);
+ data.ioc_count = count;
+ data.ioc_size = size;
+ data.ioc_nid = nid;
+ data.ioc_nal = g_nal;
+ data.ioc_timeout = timeout;
+
+ rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_PING, &data);
+ if (rc) {
+ fprintf(stderr, "failed to start pinger: %s\n",
+ strerror(errno));
+ return -1;
+ }
+ return 0;
+}
+
+int jt_ptl_mynid(int argc, char **argv)
+{
+ int rc;
+ struct hostent *h;
+ char buf[1024], *hostname;
+ struct portal_ioctl_data data;
+ ptl_nid_t mynid;
+
+ if (argc > 2) {
+ fprintf(stderr, "usage: %s [hostname]\n", argv[0]);
+ fprintf(stderr, "hostname defaults to the hostname of the "
+ "machine.\n");
+ return 0;
+ }
+
+ if (g_nal == 0) {
+ fprintf(stderr, "Error: you must run the 'network' command "
+ "first.\n");
+ return -1;
+ }
+
+ if (g_nal == QSWNAL) {
+ fprintf(stderr, "'mynid' doesn't make any sense for elan.\n");
+ return -1;
+ } else if (g_nal == GMNAL) {
+ fprintf(stderr, "'mynid' doesn't make any sense for GM.\n");
+ return -1;
+ } else if (g_nal == SCIMACNAL) {
+ fprintf(stderr, "'mynid' doesn't make any sense for SCI.\n");
+ return -1;
+ }
+
+ if (g_nal != SOCKNAL && g_nal != TOENAL) {
+ fprintf(stderr, "This should never happen. Also it is very "
+ "bad.\n");
+ return -1;
+ }
+
+ if (argc == 1) {
+ if (gethostname(buf, sizeof(buf)) != 0) {
+ fprintf(stderr, "gethostname failed: %s\n",
+ strerror(errno));
+ return -1;
+ }
+ hostname = buf;
+ } else {
+ hostname = argv[1];
+ }
+
+ h = gethostbyname(hostname);
+
+ if (!h) {
+ fprintf(stderr, "cannot get address for host '%s': %d\n",
+ hostname, h_errno);
+ return -1;
+ }
+ mynid = (ptl_nid_t)ntohl (*(__u32 *)h->h_addr); /* HOST byte order */
+
+ PORTAL_IOC_INIT(data);
+ data.ioc_nid = mynid;
+ data.ioc_nal = g_nal;
+ data.ioc_nal_cmd = NAL_CMD_REGISTER_MYNID;
+
+ rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_NAL_CMD, &data);
+ if (rc < 0)
+ fprintf(stderr, "IOC_PORTAL_REGISTER_MYNID failed: %s\n",
+ strerror(errno));
+ else
+ printf("registered my nid "LPX64" (%s)\n", mynid, hostname);
+ return 0;
+}
+
+int
+jt_ptl_fail_nid (int argc, char **argv)
+{
+ int rc;
+ ptl_nid_t nid;
+ unsigned int threshold;
+ struct portal_ioctl_data data;
+
+ if (argc < 2 || argc > 3)
+ {
+ fprintf (stderr, "usage: %s nid|\"_all_\" [count (0 == mend)]\n", argv[0]);
+ return (0);
+ }
+
+ if (g_nal == 0) {
+ fprintf(stderr, "Error: you must run the 'network' command "
+ "first.\n");
+ return (-1);
+ }
+
+ if (!strcmp (argv[1], "_all_"))
+ nid = PTL_NID_ANY;
+ else if (ptl_parse_nid (&nid, argv[1]) != 0)
+ {
+ fprintf (stderr, "Can't parse nid \"%s\"\n", argv[1]);
+ return (-1);
+ }
+
+ if (argc < 3)
+ threshold = PTL_MD_THRESH_INF;
+ else if (sscanf (argv[2], "%i", &threshold) != 1) {
+ fprintf (stderr, "Can't parse count \"%s\"\n", argv[2]);
+ return (-1);
+ }
+
+ PORTAL_IOC_INIT (data);
+ data.ioc_nal = g_nal;
+ data.ioc_nid = nid;
+ data.ioc_count = threshold;
+
+ rc = l_ioctl (PORTALS_DEV_ID, IOC_PORTAL_FAIL_NID, &data);
+ if (rc < 0)
+ fprintf (stderr, "IOC_PORTAL_FAIL_NID failed: %s\n",
+ strerror (errno));
+ else
+ printf ("%s %s\n", threshold == 0 ? "Unfailing" : "Failing", argv[1]);
+
+ return (0);
+}
+
+int
+jt_ptl_rxmem (int argc, char **argv)
+{
+ int size;
+
+ if (argc > 1)
+ {
+ if (Parser_size (&size, argv[1]) != 0 || size < 0)
+ {
+ fprintf (stderr, "Can't parse size %s\n", argv[1]);
+ return (0);
+ }
+
+ g_socket_rxmem = size;
+ }
+ printf ("Socket rmem = %d\n", g_socket_rxmem);
+ return (0);
+}
+
+int
+jt_ptl_txmem (int argc, char **argv)
+{
+ int size;
+
+ if (argc > 1)
+ {
+ if (Parser_size (&size, argv[1]) != 0 || size < 0)
+ {
+ fprintf (stderr, "Can't parse size %s\n", argv[1]);
+ return (0);
+ }
+ g_socket_txmem = size;
+ }
+ printf ("Socket txmem = %d\n", g_socket_txmem);
+ return (0);
+}
+
+int
+jt_ptl_nagle (int argc, char **argv)
+{
+ int enable;
+
+ if (argc > 1)
+ {
+ if (Parser_bool (&enable, argv[1]) != 0)
+ {
+ fprintf (stderr, "Can't parse boolean %s\n", argv[1]);
+ return (0);
+ }
+ g_socket_nonagle = !enable;
+ }
+ printf ("Nagle %s\n", g_socket_nonagle ? "disabled" : "enabled");
+ return (0);
+}
+
+int
+jt_ptl_add_route (int argc, char **argv)
+{
+ struct portal_ioctl_data data;
+ ptl_nid_t nid1;
+ ptl_nid_t nid2;
+ ptl_nid_t gateway_nid;
+ int gateway_nal;
+ int rc;
+
+ if (argc < 3)
+ {
+ fprintf (stderr, "usage: %s gateway target [target]\n", argv[0]);
+ return (0);
+ }
+
+ if (ptl_parse_nid (&gateway_nid, argv[1]) != 0)
+ {
+ fprintf (stderr, "Can't parse gateway NID \"%s\"\n", argv[1]);
+ return (-1);
+ }
+
+ gateway_nal = nid2nal (gateway_nid);
+
+ if (ptl_parse_nid (&nid1, argv[2]) != 0)
+ {
+ fprintf (stderr, "Can't parse first target NID \"%s\"\n", argv[2]);
+ return (-1);
+ }
+
+ if (argc < 4)
+ nid2 = nid1;
+ else if (ptl_parse_nid (&nid2, argv[3]) != 0)
+ {
+ fprintf (stderr, "Can't parse second target NID \"%s\"\n", argv[4]);
+ return (-1);
+ }
+
+ PORTAL_IOC_INIT(data);
+ data.ioc_nid = gateway_nid;
+ data.ioc_nal = gateway_nal;
+ data.ioc_nid2 = MIN (nid1, nid2);
+ data.ioc_nid3 = MAX (nid1, nid2);
+
+ rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_ADD_ROUTE, &data);
+ if (rc != 0)
+ {
+ fprintf (stderr, "IOC_PORTAL_ADD_ROUTE failed: %s\n", strerror (errno));
+ return (-1);
+ }
+
+ return (0);
+}
+
+int
+jt_ptl_del_route (int argc, char **argv)
+{
+ struct portal_ioctl_data data;
+ ptl_nid_t nid;
+ int rc;
+
+ if (argc < 2)
+ {
+ fprintf (stderr, "usage: %s targetNID\n", argv[0]);
+ return (0);
+ }
+
+ if (ptl_parse_nid (&nid, argv[1]) != 0)
+ {
+ fprintf (stderr, "Can't parse target NID \"%s\"\n", argv[1]);
+ return (-1);
+ }
+
+ PORTAL_IOC_INIT(data);
+ data.ioc_nid = nid;
+
+ rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_DEL_ROUTE, &data);
+ if (rc != 0)
+ {
+ fprintf (stderr, "IOC_PORTAL_DEL_ROUTE ("LPX64") failed: %s\n", nid, strerror (errno));
+ return (-1);
+ }
+
+ return (0);
+}
+
+int
+jt_ptl_print_routes (int argc, char **argv)
+{
+ char buffer[3][128];
+ struct portal_ioctl_data data;
+ int rc;
+ int index;
+ int gateway_nal;
+ ptl_nid_t gateway_nid;
+ ptl_nid_t nid1;
+ ptl_nid_t nid2;
+
+
+ for (index = 0;;index++)
+ {
+ PORTAL_IOC_INIT(data);
+ data.ioc_count = index;
+
+ rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_GET_ROUTE, &data);
+ if (rc != 0)
+ break;
+
+ gateway_nal = data.ioc_nal;
+ gateway_nid = data.ioc_nid;
+ nid1 = data.ioc_nid2;
+ nid2 = data.ioc_nid3;
+
+ printf ("%8s %18s : %s - %s\n",
+ nal2name (gateway_nal),
+ ptl_nid2str (buffer[0], gateway_nid),
+ ptl_nid2str (buffer[1], nid1),
+ ptl_nid2str (buffer[2], nid2));
+ }
+ return (0);
+}
+
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ *
+ * This file is part of Portals, http://www.sf.net/projects/lustre/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <portals/api-support.h>
+#include <portals/ptlctl.h>
+
+#include "parser.h"
+
+
+command_t list[] = {
+ {"network", jt_ptl_network, 0,"setup the NAL (args: nal name)"},
+ {"connect", jt_ptl_connect, 0, "connect to a remote nid (args: <hostname port> | <id> for tcp/elan respectively)"},
+ {"disconnect", jt_ptl_disconnect, 0, "disconnect from a remote nid (args: [hostname]"},
+ {"push", jt_ptl_push_connection, 0, "flush connection to a remote nid (args: [hostname]"},
+ {"ping", jt_ptl_ping, 0, "do a ping test (args: nid [count] [size] [timeout])"},
+ {"mynid", jt_ptl_mynid, 0, "inform the socknal of the local NID (args: [hostname])"},
+ {"add_route", jt_ptl_add_route, 0, "add an entry to the routing table (args: gatewayNID targetNID [targetNID])"},
+ {"del_route", jt_ptl_del_route, 0, "delete an entry from the routing table (args: targetNID"},
+ {"print_routes", jt_ptl_print_routes, 0, "print the routing table (args: none)"},
+ {"recv_mem", jt_ptl_rxmem, 0, "Set socket receive buffer size (args: [size])"},
+ {"send_mem", jt_ptl_txmem, 0, "Set socket send buffer size (args: [size])"},
+ {"nagle", jt_ptl_nagle, 0, "Enable/Disable Nagle (args: [on/off])"},
+ {"dump", jt_ioc_dump, 0, "usage: dump file, save ioctl buffer to file"},
+ {"fail", jt_ptl_fail_nid, 0, "usage: fail nid|_all_ [count]"},
+ {"help", Parser_help, 0, "help"},
+ {"exit", Parser_quit, 0, "quit"},
+ {"quit", Parser_quit, 0, "quit"},
+ { 0, 0, 0, NULL }
+};
+
+int main(int argc, char **argv)
+{
+ if (ptl_initialize(argc, argv) < 0)
+ exit(1);
+
+ Parser_init("ptlctl > ", list);
+ if (argc > 1)
+ return Parser_execarg(argc - 1, &argv[1], list);
+
+ Parser_commands();
+
+ return 0;
+}
--- /dev/null
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/time.h>
+
+double
+timenow ()
+{
+ struct timeval tv;
+
+ gettimeofday (&tv, NULL);
+ return (tv.tv_sec + tv.tv_usec / 1000000.0);
+}
+
+void
+do_stat (int fd)
+{
+ static char buffer[1024];
+ static double last = 0.0;
+ double now;
+ double t;
+ long long bytes;
+ long packets;
+ long errors;
+ long depth;
+ int n;
+
+ lseek (fd, 0, SEEK_SET);
+ now = timenow();
+ n = read (fd, buffer, sizeof (buffer));
+ if (n < 0)
+ {
+ fprintf (stderr, "Can't read statfile\n");
+ exit (1);
+ }
+ buffer[n] = 0;
+
+ n = sscanf (buffer, "%Ld %ld %ld %ld", &bytes, &packets, &errors, &depth);
+
+ if (n < 3)
+ {
+ fprintf (stderr, "Can't parse statfile\n");
+ exit (1);
+ }
+
+ if (last == 0.0)
+ printf ("%Ld bytes, %ld packets (sz %Ld) %ld errors",
+ bytes, packets, (long long)((packets == 0) ? 0LL : bytes/packets), errors);
+ else
+ {
+ t = now - last;
+
+ printf ("%9Ld (%7.2fMb/s), %7ld packets (sz %5Ld, %5ld/s) %ld errors (%ld/s)",
+ bytes, ((double)bytes)/((1<<20) * t),
+ packets, (long long)((packets == 0) ? 0LL : bytes/packets), (long)(packets/t),
+ errors, (long)(errors/t));
+ }
+
+ if (n == 4)
+ printf (" (%ld)\n", depth);
+ else
+ printf ("\n");
+
+ fflush (stdout);
+
+ lseek (fd, 0, SEEK_SET);
+ write (fd, "\n", 1);
+ last = timenow();
+}
+
+int main (int argc, char **argv)
+{
+ int interval = 0;
+ int fd;
+
+ if (argc > 1)
+ interval = atoi (argv[1]);
+
+ fd = open ("/proc/sys/portals/router", O_RDWR);
+ if (fd < 0)
+ {
+ fprintf (stderr, "Can't open stat: %s\n", strerror (errno));
+ return (1);
+ }
+
+ do_stat (fd);
+ if (interval == 0)
+ return (0);
+
+ for (;;)
+ {
+ sleep (interval);
+ do_stat (fd);
+ }
+}