From: cvs2svn Date: Sun, 24 Oct 2004 00:24:41 +0000 (+0000) Subject: This commit was manufactured by cvs2svn to create branch 'b1_4'. X-Git-Tag: v1_8_0_110~486^5~140 X-Git-Url: https://git.whamcloud.com/gitweb?a=commitdiff_plain;h=ac73e9a25d3d43965fc7e8e80dd55730f1ec7d76;p=fs%2Flustre-release.git This commit was manufactured by cvs2svn to create branch 'b1_4'. --- ac73e9a25d3d43965fc7e8e80dd55730f1ec7d76 diff --cc lnet/.cvsignore index f30d862,f30d862..0000000 deleted file mode 100644,100644 --- a/lnet/.cvsignore +++ /dev/null @@@ -1,11 -1,11 +1,0 @@@ --Kernelenv --Makefile --autoMakefile --autoMakefile.in --aclocal.m4 --autom4te.cache --config.log --config.status --configure --.*.cmd --.depend diff --cc lnet/AUTHORS index e69de29,e69de29..0000000 deleted file mode 100644,100644 --- a/lnet/AUTHORS +++ /dev/null diff --cc lnet/ChangeLog index e69de29,e69de29..0000000 deleted file mode 100644,100644 --- a/lnet/ChangeLog +++ /dev/null diff --cc lnet/Kernelenv.in index 7a48c58,7a48c58..0000000 deleted file mode 100644,100644 --- a/lnet/Kernelenv.in +++ /dev/null @@@ -1,6 -1,6 +1,0 @@@ --EXTRA_CFLAGS := -Ifs/lustre/include -Ifs/lustre/portals/include --# portals/utils/debug.c wants from userspace. sigh. --HOSTCFLAGS := -I@LINUX@/include $(EXTRA_CFLAGS) --LIBREADLINE := @LIBREADLINE@ --# 2.5's makefiles aren't nice to cross dir libraries in host programs --PTLCTLOBJS := debug.o l_ioctl.o parser.o portals.o diff --cc lnet/Kernelenv.mk index 7c66dfa,7c66dfa..0000000 deleted file mode 100644,100644 --- a/lnet/Kernelenv.mk +++ /dev/null @@@ -1,4 -1,4 +1,0 @@@ --EXTRA_CFLAGS := -Ifs/lustre/include -Ifs/lustre/portals/include --HOSTCFLAGS := $(EXTRA_CFLAGS) --# the kernel doesn't want us to build archives for host binaries :/ --PTLCTLOBJS := debug.o l_ioctl.o parser.o portals.o diff --cc lnet/Makefile.in index 71d0dc8,71d0dc8..0000000 deleted file mode 100644,100644 --- a/lnet/Makefile.in +++ /dev/null @@@ -1,9 -1,9 +1,0 @@@ --subdir-m += libcfs -- --cray-subdirs += portals --cray-subdirs += knals --cray-subdirs += router --cray-subdirs += tests --@CRAY_PORTALS_FALSE@subdir-m += $(cray-subdirs) -- --@INCLUDE_RULES@ diff --cc lnet/Makefile.mk index 73a19df,73a19df..0000000 deleted file mode 100644,100644 --- a/lnet/Makefile.mk +++ /dev/null @@@ -1,12 -1,12 +1,0 @@@ --include $(src)/Kernelenv -- --# The ordering of these determines the order that each subsystem's --# module_init() functions are called in. if these are changed make sure --# they reflect the dependencies between each subsystem's _init functions. --obj-y += libcfs/ --obj-y += portals/ --obj-y += router/ --obj-y += knals/ --obj-y += tests/ -- --obj-m += utils/ diff --cc lnet/NEWS index e69de29,e69de29..0000000 deleted file mode 100644,100644 --- a/lnet/NEWS +++ /dev/null diff --cc lnet/README index e69de29,e69de29..0000000 deleted file mode 100644,100644 --- a/lnet/README +++ /dev/null diff --cc lnet/archdep.m4 index 27704bd,d2bd1a1..0000000 deleted file mode 100644,100644 --- a/lnet/archdep.m4 +++ /dev/null @@@ -1,667 -1,681 +1,0 @@@ --# -------- we can't build modules unless srcdir = builddir --if test x$enable_modules != xno ; then -- AC_CHECK_FILE([autoMakefile.am],[], -- [AC_MSG_ERROR([At this time, Lustre does not support building kernel modules with srcdir != buildir.])]) --fi -- --# -------- in kernel compilation? (2.5 only) ------------- --AC_MSG_CHECKING([if inkernel build support is requested]) --AC_ARG_ENABLE([inkernel], -- AC_HELP_STRING([--enable-inkernel], -- [set up 2.5 kernel makefiles]), -- [],[enable_inkernel=no]) --AC_MSG_RESULT([$enable_inkernel]) --AM_CONDITIONAL(INKERNEL, test x$enable_inkernel = xyes) -- --# -------- are we building against an external portals? ------- --AC_MSG_CHECKING([if Cray portals should be used]) --AC_ARG_WITH([cray-portals], -- AC_HELP_STRING([--with-cray-portals=path], -- [path to cray portals]), -- [ - CRAY_PORTALS_INCLUDE="-I$with_cray_portals" - AC_DEFINE(CRAY_PORTALS, 1, [Building with Cray Portals]) - if test "$with_cray_portals" != no; then - if test -r $with_cray_portals/include/portals/api.h ; then - CRAY_PORTALS_PATH=$with_cray_portals - CRAY_PORTALS_INCLUDE="-I$with_cray_portals/include" - AC_DEFINE(CRAY_PORTALS, 1, [Building with Cray Portals]) - else - AC_MSG_ERROR([--with-cray-portals specified badly]) - fi - fi -- ],[with_cray_portals=no]) -AC_SUBST(CRAY_PORTALS_PATH) --AC_MSG_RESULT([$with_cray_portals]) - --AM_CONDITIONAL(CRAY_PORTALS, test x$with_cray_portals != xno) - -# -------- enable tests and utils? ------- --if test x$enable_tests = xno ; then -- AC_MSG_NOTICE([disabling tests]) -- enable_tests=no --fi --if test x$enable_utils = xno ; then -- AC_MSG_NOTICE([disabling utilities]) -- enable_utils=no --fi -- --if test x$enable_modules != xno ; then -- # -------- set linuxdir ------------ -- AC_MSG_CHECKING([for Linux sources]) -- AC_ARG_WITH([linux], -- AC_HELP_STRING([--with-linux=path], -- [set path to Linux source (default=/usr/src/linux)]), -- [LINUX=$with_linux], -- [LINUX=/usr/src/linux]) -- AC_MSG_RESULT([$LINUX]) -- AC_SUBST(LINUX) -- if test x$enable_inkernel = xyes ; then -- echo ln -s `pwd` $LINUX/fs/lustre -- rm $LINUX/fs/lustre -- ln -s `pwd` $LINUX/fs/lustre -- fi -- -- # -------- check for .confg -------- -- AC_ARG_WITH([linux-config], -- [AC_HELP_STRING([--with-linux-config=path], -- [set path to Linux .conf (default=\$LINUX/.config)])], -- [LINUX_CONFIG=$with_linux_config], -- [LINUX_CONFIG=$LINUX/.config]) -- AC_SUBST(LINUX_CONFIG) -- -- AC_CHECK_FILE([/boot/kernel.h], -- [KERNEL_SOURCE_HEADER='/boot/kernel.h'], -- [AC_CHECK_FILE([/var/adm/running-kernel.h]), -- [KERNEL_SOURCE_HEADER='/var/adm/running-kernel.h']]) -- -- AC_ARG_WITH([kernel-source-header], -- AC_HELP_STRING([--with-kernel-source-header=path], -- [Use a different kernel version header. Consult README.kernel-source for details.]), -- [KERNEL_SOURCE_HEADER=$with_kernel_source_header]) -- -- # -------------------- -- ARCH_UM= -- UML_CFLAGS= -- -- AC_MSG_CHECKING([if you are running user mode linux for $host_cpu]) -- if test -e $LINUX/include/asm-um ; then -- if test X`ls -id $LINUX/include/asm/ | awk '{print $1}'` = X`ls -id $LINUX/include/asm-um | awk '{print $1}'` ; then -- ARCH_UM='ARCH=um' -- # see notes in Rules.in -- UML_CFLAGS='-O0' -- AC_MSG_RESULT(yes) -- else -- AC_MSG_RESULT([no (asm doesn't point at asm-um)]) -- fi -- else -- AC_MSG_RESULT([no (asm-um missing)]) -- fi -- -- AC_SUBST(ARCH_UM) -- AC_SUBST(UML_CFLAGS) -- -- # --------- Linux 25 ------------------ -- AC_CHECK_FILE([$LINUX/include/linux/namei.h], -- [ -- linux25="yes" -- KMODEXT=".ko" -- enable_ldiskfs="yes" -- BACKINGFS="ldiskfs" -- ],[ -- KMODEXT=".o" -- linux25="no" -- ]) -- AC_MSG_CHECKING([if you are using Linux 2.6]) -- AC_MSG_RESULT([$linux25]) -- -- AC_SUBST(LINUX25) -- AC_SUBST(KMODEXT) -- -- AC_PATH_PROG(PATCH, patch, [no]) -- AC_PATH_PROG(QUILT, quilt, [no]) -- -- if test x$enable_ldiskfs$PATCH$QUILT = xyesnono ; then -- AC_MSG_ERROR([Quilt or patch are needed to build the ldiskfs module (for Linux 2.6)]) -- fi --fi --AM_CONDITIONAL(LINUX25, test x$linux25 = xyes) --AM_CONDITIONAL(USE_QUILT, test x$QUILT != xno) -- --# ------- Makeflags ------------------ -- --CPPFLAGS="$CRAY_PORTALS_INCLUDE $CRAY_PORTALS_COMMANDLINE -I\$(top_srcdir)/include -I\$(top_srcdir)/portals/include" -- --# liblustre are all the same --LLCPPFLAGS="-D__arch_lib__ -D_LARGEFILE64_SOURCE=1" --AC_SUBST(LLCPPFLAGS) -- --LLCFLAGS="-g -Wall -fPIC" --AC_SUBST(LLCFLAGS) -- --# everyone builds against portals and lustre -- --if test x$enable_ldiskfs = xyes ; then -- AC_DEFINE(CONFIG_LDISKFS_FS_MODULE, 1, [build ldiskfs as a module]) -- AC_DEFINE(CONFIG_LDISKFS_FS_XATTR, 1, [enable extended attributes for ldiskfs]) -- AC_DEFINE(CONFIG_LDISKFS_FS_POSIX_ACL, 1, [enable posix acls]) -- AC_DEFINE(CONFIG_LDISKFS_FS_SECURITY, 1, [enable fs security]) --fi -- --EXTRA_KCFLAGS="-g $CRAY_PORTALS_INCLUDE $CRAY_PORTALS_COMMANDLINE -I$PWD/portals/include -I$PWD/include" -- --# these are like AC_TRY_COMPILE, but try to build modules against the --# kernel, inside the kernel-tests directory -- --AC_DEFUN([LUSTRE_MODULE_CONFTEST], --[cat >conftest.c <<_ACEOF --$1 --_ACEOF --]) -- --AC_DEFUN([LUSTRE_MODULE_COMPILE_IFELSE], --[m4_ifvaln([$1], [LUSTRE_MODULE_CONFTEST([$1])])dnl --rm -f kernel-tests/conftest.o kernel-tests/conftest.mod.c kernel-tests/conftest.ko --AS_IF([AC_TRY_COMMAND(cp conftest.c kernel-tests && make [$2] -f $PWD/kernel-tests/Makefile LUSTRE_LINUX_CONFIG=$LINUX_CONFIG -o tmp_include_depends -o scripts -o include/config/MARKER -C $LINUX EXTRA_CFLAGS="-Werror-implicit-function-declaration $EXTRA_KCFLAGS" $ARCH_UM SUBDIRS=$PWD/kernel-tests) >/dev/null && AC_TRY_COMMAND([$3])], -- [$4], -- [_AC_MSG_LOG_CONFTEST --m4_ifvaln([$5],[$5])dnl])dnl --rm -f kernel-tests/conftest.o kernel-tests/conftest.mod.c kernel-tests/conftest.mod.o kernel-tests/conftest.ko m4_ifval([$1], [kernel-tests/conftest.c conftest.c])[]dnl --]) -- --AC_DEFUN([LUSTRE_MODULE_TRY_COMPILE], --[LUSTRE_MODULE_COMPILE_IFELSE( -- [AC_LANG_PROGRAM([[$1]], [[$2]])], -- [modules], -- [test -s kernel-tests/conftest.o], -- [$3], [$4])]) -- --AC_DEFUN([LUSTRE_MODULE_TRY_MAKE], --[LUSTRE_MODULE_COMPILE_IFELSE([AC_LANG_PROGRAM([[$1]], [[$2]])], [$3], [$4], [$5], [$6])]) -- --# ------------ include paths ------------------ -- --if test x$enable_modules != xno ; then -- # ------------ .config exists ---------------- -- AC_CHECK_FILE([$LINUX_CONFIG],[], -- [AC_MSG_ERROR([Kernel config could not be found. If you are building from a kernel-source rpm consult README.kernel-source])]) -- -- # ----------- make dep run? ------------------ -- AC_CHECK_FILES([$LINUX/include/linux/autoconf.h -- $LINUX/include/linux/version.h -- $LINUX/include/linux/config.h],[], -- [AC_MSG_ERROR([Run make config in $LINUX.])]) -- -- # ------------ rhconfig.h includes runtime-generated bits -- -- # red hat kernel-source checks -- -- # we know this exists after the check above. if the user -- # tarred up the tree and ran make dep etc. in it, then -- # version.h gets overwritten with a standard linux one. -- -- if grep rhconfig $LINUX/include/linux/version.h >/dev/null ; then -- # This is a clean kernel-source tree, we need to -- # enable extensive workarounds to get this to build -- # modules -- AC_CHECK_FILE([$KERNEL_SOURCE_HEADER], -- [if test $KERNEL_SOURCE_HEADER = '/boot/kernel.h' ; then -- AC_MSG_WARN([Using /boot/kernel.h from RUNNING kernel.]) -- AC_MSG_WARN([If this is not what you want, use --with-kernel-source-header.]) -- AC_MSG_WARN([Consult README.kernel-source for details.]) -- fi], -- [AC_MSG_ERROR([$KERNEL_SOURCE_HEADER not found. Consult README.kernel-source for details.])]) -- EXTRA_KCFLAGS="-include $KERNEL_SOURCE_HEADER $EXTRA_KCFLAGS" -- fi -- -- # --- check that we can build modules at all -- AC_MSG_CHECKING([that modules can be built]) -- LUSTRE_MODULE_TRY_COMPILE([],[], -- [ -- AC_MSG_RESULT([yes]) -- ],[ -- AC_MSG_RESULT([no]) -- AC_MSG_WARN([Consult config.log for details.]) -- AC_MSG_WARN([If you are trying to build with a kernel-source rpm, consult README.kernel-source]) -- AC_MSG_ERROR([Kernel modules could not be built.]) -- ]) -- -- # ------------ LINUXRELEASE and moduledir ------------------ -- MODULE_TARGET="SUBDIRS" -- if test $linux25 = 'yes' ; then -- # ------------ external module support --------------------- -- makerule="$PWD/kernel-tests" -- AC_MSG_CHECKING([for external module build support]) -- rm -f kernel-tests/conftest.i -- LUSTRE_MODULE_TRY_MAKE([],[], -- [$makerule LUSTRE_KERNEL_TEST=conftest.i], -- [test -s kernel-tests/conftest.i], -- [ -- AC_MSG_RESULT([no]) -- ],[ -- AC_MSG_RESULT([yes]) -- makerule="_module_$makerule" -- MODULE_TARGET="M" -- ]) -- else -- makerule="_dir_$PWD/kernel-tests" -- fi -- AC_SUBST(MODULE_TARGET) -- LINUXRELEASE= -- rm -f kernel-tests/conftest.i -- AC_MSG_CHECKING([for Linux release]) -- LUSTRE_MODULE_TRY_MAKE( -- [#include ], -- [char *LINUXRELEASE; -- LINUXRELEASE=UTS_RELEASE;], -- [$makerule LUSTRE_KERNEL_TEST=conftest.i], -- [test -s kernel-tests/conftest.i], -- [ -- # LINUXRELEASE="UTS_RELEASE" -- eval $(grep "LINUXRELEASE=" kernel-tests/conftest.i) -- ],[ -- AC_MSG_RESULT([unknown]) -- AC_MSG_ERROR([Could not preprocess test program. Consult config.log for details.]) -- ]) -- rm -f kernel-tests/conftest.i -- if test x$LINUXRELEASE = x ; then -- AC_MSG_RESULT([unknown]) -- AC_MSG_ERROR([Could not determine Linux release version from linux/version.h.]) -- fi -- AC_MSG_RESULT([$LINUXRELEASE]) -- AC_SUBST(LINUXRELEASE) -- -- moduledir='/lib/modules/'$LINUXRELEASE/kernel -- modulefsdir='$(moduledir)/fs/$(PACKAGE)' -- modulenetdir='$(moduledir)/net/$(PACKAGE)' -- -- AC_SUBST(moduledir) -- AC_SUBST(modulefsdir) -- AC_SUBST(modulenetdir) -- -- # ------------ RELEASE -------------------------------- -- AC_MSG_CHECKING([for Lustre release]) -- RELEASE="`echo ${LINUXRELEASE} | tr '-' '_'`_`date +%Y%m%d%H%M`" -- AC_MSG_RESULT($RELEASE) -- AC_SUBST(RELEASE) -- -- # ---------- Portals flags -------------------- -- -- AC_MSG_CHECKING([for zero-copy TCP support]) -- AC_ARG_ENABLE([zerocopy], -- AC_HELP_STRING([--disable-zerocopy], -- [disable socknal zerocopy]), -- [],[enable_zerocopy='yes']) -- if test x$enable_zerocopy = xno ; then -- AC_MSG_RESULT([no (by request)]) -- else -- ZCCD="`grep -c zccd $LINUX/include/linux/skbuff.h`" -- if test "$ZCCD" != 0 ; then -- AC_DEFINE(SOCKNAL_ZC, 1, [use zero-copy TCP]) -- AC_MSG_RESULT(yes) -- else -- AC_MSG_RESULT([no (no kernel support)]) -- fi -- fi -- -- AC_ARG_ENABLE([affinity], -- AC_HELP_STRING([--disable-affinity], -- [disable process/irq affinity]), -- [],[enable_affinity='yes']) -- -- AC_MSG_CHECKING([for CPU affinity support]) -- if test x$enable_affinity = xno ; then -- AC_MSG_RESULT([no (by request)]) -- else -- LUSTRE_MODULE_TRY_COMPILE( -- [ -- #include -- ],[ -- struct task_struct t; -- #ifdef CPU_ARRAY_SIZE -- cpumask_t m; -- #else -- unsigned long m; -- #endif -- set_cpus_allowed(&t, m); -- ],[ -- AC_DEFINE(CPU_AFFINITY, 1, [kernel has cpu affinity support]) -- AC_MSG_RESULT([yes]) -- ],[ -- AC_MSG_RESULT([no (no kernel support)]) -- ]) -- fi -- -- ##################################### -- -- AC_MSG_CHECKING([if quadrics kernel headers are present]) -- if test -d $LINUX/drivers/net/qsnet ; then -- AC_MSG_RESULT([yes]) -- QSWNAL="qswnal" -- AC_MSG_CHECKING([for multirail EKC]) -- if test -f $LINUX/include/elan/epcomms.h; then -- AC_MSG_RESULT([supported]) -- QSWCPPFLAGS="-DMULTIRAIL_EKC=1" -- else -- AC_MSG_RESULT([not supported]) - QSWCPPFLAGS="-I$LINUX/drivers/net/qsnet/include" - if test -d $LINUX/drivers/net/qsnet/include; then - QSWCPPFLAGS="-I$LINUX/drivers/net/qsnet/include" - else - QSWCPPFLAGS="-I$LINUX/include/linux" - fi -- fi -- else -- AC_MSG_RESULT([no]) -- QSWNAL="" -- QSWCPPFLAGS="" -- fi -- AC_SUBST(QSWCPPFLAGS) -- AC_SUBST(QSWNAL) -- -- AC_MSG_CHECKING([if gm support was requested]) -- AC_ARG_WITH([gm], -- AC_HELP_STRING([--with-gm=path], -- [build gmnal against path]), -- [ -- case $with_gm in -- yes) -- AC_MSG_RESULT([yes]) -- GMCPPFLAGS="-I/usr/local/gm/include" -- GMNAL="gmnal" -- ;; -- no) -- AC_MSG_RESULT([no]) -- GMCPPFLAGS="" -- GMNAL="" -- ;; -- *) -- AC_MSG_RESULT([yes]) -- GMCPPFLAGS="-I$with_gm/include -I$with_gm/drivers -I$with_gm/drivers/linux/gm" -- GMNAL="gmnal" -- ;; -- esac -- ],[ -- AC_MSG_RESULT([no]) -- GMCPPFLAGS="" -- GMNAL="" -- ]) -- AC_SUBST(GMCPPFLAGS) -- AC_SUBST(GMNAL) - - #fixme: where are the default IB includes? - default_ib_include_dir=/usr/local/ib/include - an_ib_include_file=vapi.h -- - AC_MSG_CHECKING([if ib nal support was requested]) - AC_ARG_WITH([ib], - AC_HELP_STRING([--with-ib=yes/no/path], - [Path to IB includes]), - #### OpenIB - AC_MSG_CHECKING([if OpenIB kernel headers are present]) - OPENIBCPPFLAGS="-I$LINUX/drivers/infiniband/include -DIN_TREE_BUILD" - EXTRA_KCFLAGS_save="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="$EXTRA_KCFLAGS $OPENIBCPPFLAGS" - LUSTRE_MODULE_TRY_COMPILE( -- [ - case $with_ib in - yes) - AC_MSG_RESULT([yes]) - IBCPPFLAGS="-I/usr/local/ib/include" - IBNAL="ibnal" - ;; - no) - AC_MSG_RESULT([no]) - IBCPPFLAGS="" - IBNAL="" - ;; - *) - AC_MSG_RESULT([yes]) - IBCPPFLAGS="-I$with_ib" - IBNAL="" - ;; - esac - #include - ],[ - struct ib_device_properties props; - return 0; - ],[ - AC_MSG_RESULT([yes]) - OPENIBNAL="openibnal" -- ],[ -- AC_MSG_RESULT([no]) - IBFLAGS="" - IBNAL="" - OPENIBNAL="" - OPENIBCPPFLAGS="" -- ]) - AC_SUBST(IBNAL) - AC_SUBST(IBCPPFLAGS) - EXTRA_KCFLAGS="$EXTRA_KCFLAGS_save" - AC_SUBST(OPENIBCPPFLAGS) - AC_SUBST(OPENIBNAL) -- -- # ---------- Red Hat 2.4.18 has iobuf->dovary -------------- -- # But other kernels don't -- -- AC_MSG_CHECKING([if struct kiobuf has a dovary field]) -- LUSTRE_MODULE_TRY_COMPILE( -- [ -- #include -- ],[ -- struct kiobuf iobuf; -- iobuf.dovary = 1; -- ],[ -- AC_MSG_RESULT([yes]) -- AC_DEFINE(HAVE_KIOBUF_DOVARY, 1, [struct kiobuf has a dovary field]) -- ],[ -- AC_MSG_RESULT([no]) - ]) - ]) -- -- # ----------- 2.6.4 no longer has page->list --------------- -- AC_MSG_CHECKING([if struct page has a list field]) -- LUSTRE_MODULE_TRY_COMPILE( -- [ -- #include -- ],[ -- struct page page; -- &page.list; -- ],[ -- AC_MSG_RESULT([yes]) -- AC_DEFINE(HAVE_PAGE_LIST, 1, [struct page has a list field]) -- ],[ -- AC_MSG_RESULT([no]) -- ]) -- -- # ---------- Red Hat 2.4.20 backports some 2.5 bits -------- -- # This needs to run after we've defined the KCPPFLAGS -- -- AC_MSG_CHECKING([if task_struct has a sighand field]) -- LUSTRE_MODULE_TRY_COMPILE( -- [ -- #include -- ],[ -- struct task_struct p; -- p.sighand = NULL; -- ],[ -- AC_DEFINE(CONFIG_RH_2_4_20, 1, [this kernel contains Red Hat 2.4.20 patches]) -- AC_MSG_RESULT([yes]) -- ],[ -- AC_MSG_RESULT([no]) -- ]) -- -- # ---------- 2.4.20 introduced cond_resched -------------- -- -- AC_MSG_CHECKING([if kernel offers cond_resched]) -- LUSTRE_MODULE_TRY_COMPILE( -- [ -- #include -- ],[ -- cond_resched(); -- ],[ -- AC_MSG_RESULT([yes]) -- AC_DEFINE(HAVE_COND_RESCHED, 1, [cond_resched found]) -- ],[ -- AC_MSG_RESULT([no]) -- ]) - - # --------- zap_page_range(vma) -------------------------------- - AC_MSG_CHECKING([if zap_pag_range with vma parameter]) - ZAP_PAGE_RANGE_VMA="`grep -c 'zap_page_range.*struct vm_area_struct' $LINUX/include/linux/mm.h`" - if test "$ZAP_PAGE_RANGE_VMA" != 0 ; then - AC_DEFINE(ZAP_PAGE_RANGE_VMA, 1, [zap_page_range with vma parameter]) - AC_MSG_RESULT([yes]) - else - AC_MSG_RESULT([no]) - fi -- -- # ---------- Red Hat 2.4.21 backports some more 2.5 bits -------- -- -- AC_MSG_CHECKING([if kernel defines PDE]) -- HAVE_PDE="`grep -c 'proc_dir_entry..PDE' $LINUX/include/linux/proc_fs.h`" -- if test "$HAVE_PDE" != 0 ; then -- AC_DEFINE(HAVE_PDE, 1, [the kernel defines PDE]) -- AC_MSG_RESULT([yes]) -- else -- AC_MSG_RESULT([no]) -- fi -- -- AC_MSG_CHECKING([if kernel passes struct file to direct_IO]) -- HAVE_DIO_FILE="`grep -c 'direct_IO.*struct file' $LINUX/include/linux/fs.h`" -- if test "$HAVE_DIO_FILE" != 0 ; then -- AC_DEFINE(HAVE_DIO_FILE, 1, [the kernel passes struct file to direct_IO]) -- AC_MSG_RESULT(yes) -- else -- AC_MSG_RESULT(no) -- fi -- -- AC_MSG_CHECKING([if kernel defines cpu_online()]) -- LUSTRE_MODULE_TRY_COMPILE( -- [ -- #include -- ],[ -- cpu_online(0); -- ],[ -- AC_MSG_RESULT([yes]) -- AC_DEFINE(HAVE_CPU_ONLINE, 1, [cpu_online found]) -- ],[ -- AC_MSG_RESULT([no]) -- ]) - -- AC_MSG_CHECKING([if kernel defines cpumask_t]) -- LUSTRE_MODULE_TRY_COMPILE( -- [ -- #include -- ],[ -- return sizeof (cpumask_t); -- ],[ -- AC_MSG_RESULT([yes]) -- AC_DEFINE(HAVE_CPUMASK_T, 1, [cpumask_t found]) -- ],[ -- AC_MSG_RESULT([no]) -- ]) -- -- # ---------- RHEL kernels define page_count in mm_inline.h -- AC_MSG_CHECKING([if kernel has mm_inline.h header]) -- LUSTRE_MODULE_TRY_COMPILE( -- [ -- #include -- ],[ -- #ifndef page_count -- #error mm_inline.h does not define page_count -- #endif -- ],[ -- AC_MSG_RESULT([yes]) -- AC_DEFINE(HAVE_MM_INLINE, 1, [mm_inline found]) -- ],[ -- AC_MSG_RESULT([no]) -- ]) -- -- # ---------- inode->i_alloc_sem -------------- -- AC_MSG_CHECKING([if struct inode has i_alloc_sem]) -- LUSTRE_MODULE_TRY_COMPILE( -- [ -- #include -- #include -- ],[ -- #if defined(CONFIG_X86_64) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,24)) -- #error "x86_64 down_read_trylock broken before 2.4.24" -- #endif -- struct inode i; -- return (char *)&i.i_alloc_sem - (char *)&i; -- ],[ -- AC_MSG_RESULT([yes]) -- AC_DEFINE(HAVE_I_ALLOC_SEM, 1, [struct inode has i_alloc_sem]) -- ],[ -- AC_MSG_RESULT([no]) -- ]) - -- -- # ---------- modules? ------------------------ -- AC_MSG_CHECKING([for module support]) -- LUSTRE_MODULE_TRY_COMPILE( -- [ -- #include -- ],[ -- #ifndef CONFIG_MODULES -- #error CONFIG_MODULES not #defined -- #endif -- ],[ -- AC_MSG_RESULT([yes]) -- ],[ -- AC_MSG_RESULT([no]) -- AC_MSG_ERROR([module support is required to build Lustre kernel modules.]) -- ]) -- -- # ---------- modversions? -------------------- -- AC_MSG_CHECKING([for MODVERSIONS]) -- LUSTRE_MODULE_TRY_COMPILE( -- [ -- #include -- ],[ -- #ifndef CONFIG_MODVERSIONS -- #error CONFIG_MODVERSIONS not #defined -- #endif -- ],[ -- AC_MSG_RESULT([yes]) -- ],[ -- AC_MSG_RESULT([no]) -- ]) -- -- # ------------ preempt ----------------------- -- AC_MSG_CHECKING([if preempt is enabled]) -- LUSTRE_MODULE_TRY_COMPILE( -- [ -- #include -- ],[ -- #ifndef CONFIG_PREEMPT -- #error CONFIG_PREEMPT is not #defined -- #endif -- ],[ -- AC_MSG_RESULT([yes]) -- AC_MSG_ERROR([Lustre does not support kernels with preempt enabled.]) -- ],[ -- AC_MSG_RESULT([no]) -- ]) -- -- case $BACKINGFS in -- ext3) -- # --- Check that ext3 and ext3 xattr are enabled in the kernel -- AC_MSG_CHECKING([that ext3 is enabled in the kernel]) -- LUSTRE_MODULE_TRY_COMPILE( -- [ -- #include -- ],[ -- #ifndef CONFIG_EXT3_FS -- #ifndef CONFIG_EXT3_FS_MODULE -- #error CONFIG_EXT3_FS not #defined -- #endif -- #endif -- ],[ -- AC_MSG_RESULT([yes]) -- ],[ -- AC_MSG_RESULT([no]) -- AC_MSG_ERROR([Lustre requires that ext3 is enabled in the kernel (CONFIG_EXT3_FS)]) -- ]) -- -- AC_MSG_CHECKING([that extended attributes for ext3 are enabled in the kernel]) -- LUSTRE_MODULE_TRY_COMPILE( -- [ -- #include -- ],[ -- #ifndef CONFIG_EXT3_FS_XATTR -- #error CONFIG_EXT3_FS_XATTR not #defined -- #endif -- ],[ -- AC_MSG_RESULT([yes]) -- ],[ -- AC_MSG_RESULT([no]) -- AC_MSG_WARN([Lustre requires that extended attributes for ext3 are enabled in the kernel (CONFIG_EXT3_FS_XATTR.)]) -- AC_MSG_WARN([This build may fail.]) -- ]) -- ;; -- ldiskfs) -- AC_MSG_CHECKING([if fshooks are present]) -- LUSTRE_MODULE_TRY_COMPILE( -- [ -- #include -- ],[],[ -- AC_MSG_RESULT([yes]) -- LDISKFS_SERIES="2.6-suse.series" -- ],[ -- AC_MSG_RESULT([no]) -- LDISKFS_SERIES="2.6-vanilla.series" -- ]) -- AC_SUBST(LDISKFS_SERIES) -- # --- check which ldiskfs series we should use -- ;; -- esac # $BACKINGFS --fi -- - AM_CONDITIONAL(BUILD_IBNAL, test x$IBNAL = "xibnal") - AM_CONDITIONAL(BUILD_GMNAL, test x$GMNAL = "xgmnal") --AM_CONDITIONAL(BUILD_QSWNAL, test x$QSWNAL = "xqswnal") -AM_CONDITIONAL(BUILD_GMNAL, test x$GMNAL = "xgmnal") -AM_CONDITIONAL(BUILD_OPENIBNAL, test x$OPENIBNAL = "xopenibnal") -- --CPPFLAGS="-include \$(top_builddir)/include/config.h $CPPFLAGS" --EXTRA_KCFLAGS="-include $PWD/include/config.h $EXTRA_KCFLAGS" --AC_SUBST(EXTRA_KCFLAGS) -- --#echo "KCPPFLAGS: $KCPPFLAGS" --#echo "KCFLAGS: $KCFLAGS" --#echo "LLCPPFLAGS: $LLCPPFLAGS" --#echo "LLCFLAGS: $LLCFLAGS" --#echo "MOD_LINK: $MOD_LINK" --#echo "CFLAGS: $CFLAGS" --#echo "CPPFLAGS: $CPPFLAGS" diff --cc lnet/autoMakefile.am index 485ff04,485ff04..0000000 deleted file mode 100644,100644 --- a/lnet/autoMakefile.am +++ /dev/null @@@ -1,8 -1,8 +1,0 @@@ --# Copyright (C) 2001 Cluster File Systems, Inc. --# --# This code is issued under the GNU General Public License. --# See the file COPYING in this distribution -- --EXTRA_DIST = archdep.m4 build.m4 -- --SUBDIRS = portals libcfs knals unals router tests doc utils include diff --cc lnet/autogen.sh index 9deed73,9deed73..0000000 deleted file mode 100644,100644 --- a/lnet/autogen.sh +++ /dev/null @@@ -1,5 -1,5 +1,0 @@@ --#!/bin/sh -- --aclocal && --automake --add-missing && --${AUTOCONF:-autoconf} diff --cc lnet/build.m4 index e8a540a,861bb4a..0000000 deleted file mode 100644,100644 --- a/lnet/build.m4 +++ /dev/null @@@ -1,116 -1,118 +1,0 @@@ --# ---------- other tests and settings --------- -- --AC_CHECK_TYPE([spinlock_t], -- [AC_DEFINE(HAVE_SPINLOCK_T, 1, [spinlock_t is defined])], -- [], -- [#include ]) -- --# --------- unsigned long long sane? ------- -- --AC_CHECK_SIZEOF(unsigned long long, 0) --echo "---> size SIZEOF $SIZEOF_unsigned_long_long" --echo "---> size SIZEOF $ac_cv_sizeof_unsigned_long_long" --if test $ac_cv_sizeof_unsigned_long_long != 8 ; then -- AC_MSG_ERROR([** we assume that sizeof(long long) == 8. Tell phil@clusterfs.com]) --fi -- --# directories for binaries --ac_default_prefix=/usr -- --# mount.lustre --rootsbindir='/sbin' --AC_SUBST(rootsbindir) -sysconfdir='/etc' -AC_SUBST(sysconfdir) --# Directories for documentation and demos. --docdir='${datadir}/doc/$(PACKAGE)' --AC_SUBST(docdir) --demodir='$(docdir)/demo' --AC_SUBST(demodir) - pkgexampledir='${pkglibdir}/examples' -pkgexampledir='${pkgdatadir}/examples' --AC_SUBST(pkgexampledir) --pymoddir='${pkglibdir}/python/Lustre' --AC_SUBST(pymoddir) -- --# ---------- BAD gcc? ------------ --AC_PROG_RANLIB --AC_PROG_CC --AC_MSG_CHECKING([for buggy compiler]) --CC_VERSION=`$CC -v 2>&1 | grep "^gcc version"` --bad_cc() { -- AC_MSG_RESULT([buggy compiler found!]) -- echo -- echo " '$CC_VERSION'" -- echo " has been known to generate bad code, " -- echo " please get an updated compiler." -- AC_MSG_ERROR([sorry]) --} --TMP_VERSION=`echo $CC_VERSION | cut -c 1-16` --if test "$TMP_VERSION" = "gcc version 2.95"; then -- bad_cc --fi --case "$CC_VERSION" in -- # ost_pack_niobuf putting 64bit NTOH temporaries on the stack -- # without "sub $0xc,%esp" to protect the stack from being -- # stomped on by interrupts (bug 606) -- "gcc version 2.96 20000731 (Red Hat Linux 7.1 2.96-98)") -- bad_cc -- ;; -- # mandrake's similar sub 0xc compiler bug -- # http://marc.theaimsgroup.com/?l=linux-kernel&m=104748366226348&w=2 -- "gcc version 2.96 20000731 (Mandrake Linux 8.1 2.96-0.62mdk)") -- bad_cc -- ;; -- *) -- AC_MSG_RESULT([no known problems]) -- ;; --esac --# end ------ BAD gcc? ------------ -- --# -------- Check for required packages -------------- -- --# this doesn't seem to work on older autoconf --# AC_CHECK_LIB(readline, readline,,) --AC_MSG_CHECKING([for readline support]) --AC_ARG_ENABLE(readline, -- AC_HELP_STRING([--disable-readline], -- [do not use readline library]), -- [],[enable_readline='yes']) --AC_MSG_RESULT([$enable_readline]) --if test x$enable_readline = xyes ; then -- LIBREADLINE="-lreadline -lncurses" -- AC_DEFINE(HAVE_LIBREADLINE, 1, [readline library is available]) --else -- LIBREADLINE="" --fi --AC_SUBST(LIBREADLINE) -- --AC_MSG_CHECKING([if efence debugging support is requested]) --AC_ARG_ENABLE(efence, -- AC_HELP_STRING([--enable-efence], -- [use efence library]), -- [],[enable_efence='no']) --AC_MSG_RESULT([$enable_efence]) --if test "$enable_efence" = "yes" ; then -- LIBEFENCE="-lefence" -- AC_DEFINE(HAVE_LIBEFENCE, 1, [libefence support is requested]) --else -- LIBEFENCE="" --fi --AC_SUBST(LIBEFENCE) -- --# -------- enable acceptor libwrap (TCP wrappers) support? ------- --AC_MSG_CHECKING([if libwrap support is requested]) --AC_ARG_ENABLE([libwrap], -- AC_HELP_STRING([--enable-libwrap], [use TCP wrappers]), -- [case "${enableval}" in -- yes) enable_libwrap=yes ;; -- no) enable_libwrap=no ;; -- *) AC_MSG_ERROR(bad value ${enableval} for --enable-libwrap) ;; -- esac],[enable_libwrap=no]) --AC_MSG_RESULT([$enable_libwrap]) --if test x$enable_libwrap = xyes ; then -- LIBWRAP="-lwrap" -- AC_DEFINE(HAVE_LIBWRAP, 1, [libwrap support is requested]) --else -- LIBWRAP="" --fi --AC_SUBST(LIBWRAP) diff --cc lnet/doc/Data-structures index b5532b1,b5532b1..0000000 deleted file mode 100644,100644 --- a/lnet/doc/Data-structures +++ /dev/null @@@ -1,65 -1,65 +1,0 @@@ --In this document I will try to draw the data structures and how they --interrelate in the Portals 3 reference implementation. It is probably --best shown with a drawing, so there may be an additional xfig or --Postscript figure. -- -- --MEMORY POOLS: -------------- -- --First, a digression on memory allocation in the library. As mentioned --in the NAL Writer's Guide, the library does not link against any --standard C libraries and as such is unable to dynamically allocate --memory on its own. It requires that the NAL implement a method --for allocation that is appropriate for the protection domain in --which the library lives. This is only called when a network --interface is initialized to allocate the Portals object pools. -- --These pools are preallocate blocks of objects that the library --can rapidly make active and manage with a minimum of overhead. --It is also cuts down on overhead for setting up structures --since the NAL->malloc() callback does not need to be called --for each object. -- --The objects are maintained on a per-object type singly linked free --list and contain a pointer to the next free object. This pointer --is NULL if the object is not on the free list and is non-zero --if it is on the list. The special sentinal value of 0xDEADBEEF --is used to mark the end of the free list since NULL could --indicate that the last object in the list is not free. -- --When one of the lib_*_alloc() functions is called, the library --returns the head of the free list and advances the head pointer --to the next item on the list. The special case of 0xDEADBEEF is --checked and a NULL pointer is returned if there are no more --objects of this type available. The lib_*_free() functions --are even simpler -- check to ensure that the object is not already --free, set its next pointer to the current head and then set --the head to be this newly freed object. -- --Since C does not have templates, I did the next best thing and wrote --the memory pool allocation code as a macro that expands based on the --type of the argument. The mk_alloc(T) macro expands to --write the _lib_T_alloc() and lib_T_free() functions. --It requires that the object have a pointer of the type T named --"next_free". There are also functions that map _lib_T_alloc() --to lib_T_alloc() so that the library can add some extra --functionality to the T constructor. -- -- -- --LINKED LISTS: -------------- -- --Many of the active Portals objects are stored in doubly linked lists --when they are active. These are always implemented with the pointer --to the next object and a pointer to the next pointer of the --previous object. This avoids the "dummy head" object or --special cases for inserting at the beginning or end of the list. --The pointer manipulations are a little hairy at times, but --I hope that they are understandable. -- --The actual linked list code is implemented as macros in , --although the object has to know about -- -- diff --cc lnet/doc/Makefile.am index b7f6252,b7f6252..0000000 deleted file mode 100644,100644 --- a/lnet/doc/Makefile.am +++ /dev/null @@@ -1,51 -1,51 +1,0 @@@ --# Copyright (C) 2001 Cluster File Systems, Inc. --# --# This code is issued under the GNU General Public License. --# See the file COPYING in this distribution -- --LYX2PDF = lyx --export pdf --LYX2TXT = lyx --export text --LYX2HTML = lyx --export html --SUFFIXES = .lin .lyx .pdf .sgml .html .txt .fig .eps -- --if DOC -- DOCS = portals3.pdf --else -- DOCS = --endif -- --IMAGES = file.eps flow_new.eps get.eps mpi.eps portals.eps put.eps --LYXFILES= portals3.lyx -- --MAINTAINERCLEANFILES = $(IMAGES) $(DOCS) $(GENERATED) --GENERATED = --EXTRA_DIST = $(DOCS) $(IMAGES) $(LYXFILES) -- --all: $(DOCS) -- --# update date and version in document --date := $(shell date +%x) --tag := $(shell echo '$$Name: $$' | sed -e 's/^\$$Na''me: *\$$$$/HEAD/; s/^\$$Na''me: \(.*\) \$$$$/\1/') --addversion = sed -e 's|@T''AG@|$(tag)|g; s|@VER''SION@|$(VERSION)|g; s|@DA''TE@|$(date)|g' -- --# Regenerate when the $(VERSION) or $Name: $ changes. --.INTERMEDIATE: $(GENERATED) --$(GENERATED) : %.lyx: %.lin Makefile -- $(addversion) $< > $@ -- --.lyx.pdf: -- @$(LYX2PDF) $< || printf "\n*** Warning: not creating PDF docs; install lyx to rectify this\n" -- --.lyx.txt: -- @$(LYX2TXT) $< || printf "\n*** Warning: not creating text docs; install lyx to rectify this\n" --.lyx.html: -- @$(LYX2HTML) $< || printf "\n*** Warning: not creating HTML docs; install lyx to rectify this\n" --.fig.eps: -- -fig2dev -L eps $< > $@ -- --portals3.pdf portals3.txt portals3.html: $(IMAGES) portals3.lyx -- --syncweb: portals3.pdf --# cp lustre.pdf /usr/src/www/content/lustre/docs/lustre.pdf --# ( cd /usr/src/www ; make lustre ; make synclustre ) -- diff --cc lnet/doc/Message-life-cycle index e8cc7e2,e8cc7e2..0000000 deleted file mode 100644,100644 --- a/lnet/doc/Message-life-cycle +++ /dev/null @@@ -1,118 -1,118 +1,0 @@@ --This documents the life cycle of message as it arrives and is handled by --a basic async, packetized NAL. There are four types of messages that have --slightly different life cycles, so they are addressed independently. -- -- --Put request ------------- -- --1. NAL notices that there is a incoming message header on the network --and reads an ptl_hdr_t in from the wire. -- --2. It may store additional NAL specific data that provides context --for this event in a void* that it will interpret in some fashion --later. -- --3. The NAL calls lib_parse() with a pointer to the header and its --private data structure. -- --4. The library decodes the header and may build a message state --object that describes the event to be written and the ACK to be --sent, if any. It then calls nal->recv() with the private data --that the NAL passed in, a pointer to the message state object --and a translated user address. -- -- The NAL will have been given a chance to pretranslate -- all user addresses when the buffers are created. This -- process is described in the NAL-HOWTO. -- --5. The NAL should restore what ever context it required from the --private data pointer, begin receiving the bytes and possibly store --some extra state of its own. It should return at this point. -- -- -- --Get request ------------- -- --1. As with a Put, the NAL notices the incoming message header and --passes it to lib_parse(). -- --2. The library decodes the header and calls nal->recv() with a --zero byte length, offset and destination to instruct it to clean --up the wire after reading the header. The private data will --be passed in as well, allowing the NAL to retrieve any state --or context that it requires. -- --3. The library may build a message state object to possibly --write an event log or invalidate a memory region. -- --4. The library will build a ptl_msg_t header that specifies the --Portals protocol information for delivery at the remote end. -- --5. The library calls nal->send() with the pre-built header, --the optional message state object, the four part address --component, a translated user pointer + offset, and some --other things. -- --6. The NAL is to put the header on the wire or copy it at --this point (since it off the stack). It should store some --amount of state about its current position in the message and --the destination address. -- --7. And then return to the library. -- -- --Reply request --------------- -- --1. Starting at "The library decodes the header..." -- --2. The library decodes the header and calls nal->recv() --to bring in the rest of the message. Flow continues in --exactly the same fashion as with all other receives. -- -- --Ack request ------------- -- --1. The library decodes the header, builds the appropriate data --structures for the event in a message state object and calls nal->recv() --with a zero byte length, etc. -- -- --Packet arrival ---------------- -- --1. The NAL should notice the arrival of a packet, retrieve whatever --state it needs from the message ID or other NAL specific header data --and place the data bytes directly into the user address that were --given to nal->recv(). -- -- How this happens is outside the scope of the Portals library -- and soley determined by the NAL... -- --2. If this is the last packet in a message, the NAL should retrieve --the lib_msg_t *cookie that it was given in the call to nal->recv() --and pass it to lib_finalize(). lib_finalize() may call nal->send() --to send an ACK, nal->write() to record an entry in the event log, --nal->invalidate() to unregister a region of memory or do nothing at all. -- --3. It should then clean up any remaining NAL specific state about --the message and go back into the main loop. -- -- --Outgoing packets ------------------ -- --1. When the NAL has pending output, it should put the packets on --the wire wrapped with whatever implementation specified wrappers. -- --2. Once it has output all the packets of a message it should --call lib_finalize() with the message state object that was --handed to nal->send(). This will allows the library to clean --up its state regarding the message and write any pending event --entries. -- -- -- diff --cc lnet/doc/NAL-HOWTO index ea38aed,ea38aed..0000000 deleted file mode 100644,100644 --- a/lnet/doc/NAL-HOWTO +++ /dev/null @@@ -1,293 -1,293 +1,0 @@@ --This document is a first attempt at describing how to write a NAL --for the Portals 3 library. It also defines the library architecture --and the abstraction of protection domains. -- -- --First, an overview of the architecture: -- -- Application -- ------|----+-------- -- | -- API === NAL (User space) -- | -----------+---|----- -- | -- LIB === NAL (Library space) -- | -----------+---|----- -- -- Physical wire (NIC space) -- -- --Application -- API --API-side NAL -------------- --LIB-side NAL -- LIB --LIB-side NAL -- wire -- --Communication is through the indicated paths via well defined --interfaces. The API and LIB portions are written to be portable --across platforms and do not depend on the network interface. -- --Communcation between the application and the API code is --defined in the Portals 3 API specification. This is the --user-visible portion of the interface and should be the most --stable. -- -- -- --API-side NAL: -------------- -- --The user space NAL needs to implement only a few functions --that are stored in a nal_t data structure and called by the --API-side library: -- -- int forward( nal_t *nal, -- int index, -- void *args, -- size_t arg_len, -- void *ret, -- size_t ret_len -- ); -- --Most of the data structures in the portals library are held in --the LIB section of the code, so it is necessary to forward API --calls across the protection domain to the library. This is --handled by the NAL's forward method. Once the argument and return --blocks are on the remote side the NAL should call lib_dispatch() --to invoke the appropriate API function. -- -- int validate( nal_t *nal, -- void *base, -- size_t extent, -- void **trans_base, -- void **trans_data -- ); -- --The validate method provides a means for the NAL to prevalidate --and possibly pretranslate user addresses into a form suitable --for fast use by the network card or kernel module. The trans_base --pointer will be used by the library everytime it needs to --refer to the block of memory. The trans_data result is a --cookie that will be handed to the NAL along with the trans_base. -- --The library never performs calculations on the trans_base value; --it only computes offsets that are then handed to the NAL. -- -- -- int shutdown( nal_t *nal, int interface ); -- --Brings down the network interface. The remote NAL side should --call lib_fini() to bring down the library side of the network. -- -- void yield( nal_t *nal ); -- --This allows the user application to gracefully give up the processor --while busy waiting. Performance critical applications may not --want to take the time to call this function, so it should be an --option to the PtlEQWait call. Right now it is not implemented as such. -- --Lastly, the NAL must implement a function named PTL_IFACE_*, where --* is the name of the NAL such as PTL_IFACE_IP or PTL_IFACE_MYR. --This initialization function is to set up communication with the --library-side NAL, which should call lib_init() to bring up the --network interface. -- -- -- --LIB-side NAL: -------------- -- --On the library-side, the NAL has much more responsibility. It --is responsible for calling lib_dispatch() on behalf of the user, --it is also responsible for bringing packets off the wire and --pushing bits out. As on the user side, the methods are stored --in a nal_cb_t structure that is defined on a per network --interface basis. -- --The calls to lib_dispatch() need to be examined. The prototype: -- -- void lib_dispatch( -- nal_cb_t *nal, -- void *private, -- int index, -- void *arg_block, -- void *ret_block -- ); -- --has two complications. The private field is a NAL-specific --value that will be passed to any callbacks produced as a result --of this API call. Kernel module implementations may use this --for task structures, or perhaps network card data. It is ignored --by the library. -- --Secondly, the arg_block and ret_block must be in the same protection --domain as the library. The NAL's two halves must communicate the --sizes and perform the copies. After the call, the buffer pointed --to by ret_block will be filled in and should be copied back to --the user space. How this is to be done is NAL specific. -- -- int lib_parse( -- nal_cb_t *nal, -- ptl_hdr_t *hdr, -- void *private -- ); -- --This is the only other entry point into the library from the NAL. --When the NAL detects an incoming message on the wire it should read --sizeof(ptl_hdr_t) bytes and pass a pointer to the header to --lib_parse(). It may set private to be anything that it needs to --tie the incoming message to callbacks that are made as a result --of this event. -- --The method calls are: -- -- int (*send)( -- nal_cb_t *nal, -- void *private, -- lib_msg_t *cookie, -- ptl_hdr_t *hdr, -- int nid, -- int pid, -- int gid, -- int rid, -- user_ptr trans_base, -- user_ptr trans_data, -- size_t offset, -- size_t len -- ); -- --This is a tricky function -- it must support async output --of messages as well as properly syncronized event log writing. --The private field is the same that was passed into lib_dispatch() --or lib_parse() and may be used to tie this call to the event --that initiated the entry to the library. -- --The cookie is a pointer to a library private value that must --be passed to lib_finalize() once the message has been completely --sent. It should not be examined by the NAL for any meaning. -- --The four ID fields are passed in, although some implementations --may not use all of them. -- --The single base pointer has been replaced with the translated --address that the API NAL generated in the api_nal->validate() --call. The trans_data is unchanged and the offset is in bytes. -- -- -- int (*recv)( -- nal_cb_t *nal, -- void *private, -- lib_msg_t *cookie, -- user_ptr trans_base, -- user_ptr trans_data, -- size_t offset, -- size_t mlen, -- size_t rlen -- ); -- --This callback will only be called in response to lib_parse(). --The cookie, trans_addr and trans_data are as discussed in send(). --The NAL should read mlen bytes from the wire, deposit them into --trans_base + offset and then discard (rlen - mlen) bytes. --Once the entire message has been received the NAL should call --lib_finalize() with the lib_msg_t *cookie. -- --The special arguments of base=NULL, data=NULL, offset=0, mlen=0, rlen=0 --is used to indicate that the NAL should clean up the wire. This could --be implemented as a blocking call, although having it return as quickly --as possible is desirable. -- -- int (*write)( -- nal_cb_t *nal, -- void *private, -- user_ptr trans_addr, -- user_ptr trans_data, -- size_t offset, -- -- void *src_addr, -- size_t len -- ); -- --This is essentially a cross-protection domain memcpy(). The user address --has been pretranslated by the api_nal->translate() call. -- -- void *(*malloc)( -- nal_cb_t *nal, -- size_t len -- ); -- -- void (*free)( -- nal_cb_t *nal, -- void *buf -- ); -- --Since the NAL may be in a non-standard hosted environment it can --not call malloc(). This allows the library side NAL to implement --the system specific malloc(). In the current reference implementation --the libary only calls nal->malloc() when the network interface is --initialized and then calls free when it is brought down. The library --maintains its own pool of objects for allocation so only one call to --malloc is made per object type. -- -- void (*invalidate)( -- nal_cb_t *nal, -- user_ptr trans_base, -- user_ptr trans_data, -- size_t extent -- ); -- --User addresses are validated/translated at the user-level API NAL --method, which is likely to push them to this level. Meanwhile, --the library NAL will be notified when the library no longer --needs the buffer. Overlapped buffers are not detected by the --library, so the NAL should ref count each page involved. -- --Unfortunately we have a few bugs when the invalidate method is --called. It is still in progress... -- -- void (*printf)( -- nal_cb_t *nal, -- const char *fmt, -- ... -- ); -- --As with malloc(), the library does not have any way to do printf --or printk. It is not necessary for the NAL to implement the this --call, although it will make debugging difficult. -- -- void (*cli)( -- nal_cb_t *nal, -- unsigned long *flags -- ); -- -- void (*sti)( -- nal_cb_t *nal, -- unsigned long *flags -- ); -- --These are used by the library to mark critical sections. -- -- int (*gidrid2nidpid)( -- nal_cb_t *nal, -- ptl_id_t gid, -- ptl_id_t rid, -- ptl_id_t *nid, -- ptl_id_t *pid -- ); -- -- -- int (*nidpid2gidrid)( -- nal_cb_t *nal, -- ptl_id_t nid, -- ptl_id_t pid, -- ptl_id_t *gid, -- ptl_id_t *rid -- ); -- --Rolf added these. I haven't looked at how they have to work yet. diff --cc lnet/doc/file.fig index 914c294,914c294..0000000 deleted file mode 100644,100644 --- a/lnet/doc/file.fig +++ /dev/null @@@ -1,111 -1,111 +1,0 @@@ --#FIG 3.2 --Landscape --Center --Inches --Letter --100.00 --Single ---2 --1200 2 --6 1200 750 1650 1050 --2 4 0 1 0 7 100 0 -1 0.000 0 0 7 0 0 5 -- 1650 1050 1650 750 1200 750 1200 1050 1650 1050 --4 1 0 100 0 0 10 0.0000 0 105 240 1425 952 FS0\001 ---6 --6 1200 2325 1650 2625 --2 4 0 1 0 7 100 0 -1 0.000 0 0 7 0 0 5 -- 1650 2625 1650 2325 1200 2325 1200 2625 1650 2625 --4 1 0 100 0 0 10 0.0000 0 105 240 1425 2527 FS3\001 ---6 --6 1200 1800 1650 2100 --2 4 0 1 0 7 100 0 -1 0.000 0 0 7 0 0 5 -- 1650 2100 1650 1800 1200 1800 1200 2100 1650 2100 --4 1 0 100 0 0 10 0.0000 0 105 240 1425 2002 FS2\001 ---6 --6 1200 1275 1650 1575 --2 4 0 1 0 7 100 0 -1 0.000 0 0 7 0 0 5 -- 1650 1575 1650 1275 1200 1275 1200 1575 1650 1575 --4 1 0 100 0 0 10 0.0000 0 105 240 1425 1477 FS1\001 ---6 --6 450 750 900 1200 --5 1 0 1 0 7 100 0 20 0.000 0 1 0 0 675.000 750.000 450 1050 675 1125 900 1050 --1 2 0 1 0 7 100 0 20 0.000 1 0.0000 675 825 225 75 450 900 900 750 --2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2 -- 450 825 450 1050 --2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2 -- 900 1050 900 825 ---6 --6 450 2325 900 2775 --5 1 0 1 0 7 100 0 20 0.000 0 1 0 0 675.000 2325.000 450 2625 675 2700 900 2625 --1 2 0 1 0 7 100 0 20 0.000 1 0.0000 675 2400 225 75 450 2475 900 2325 --2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2 -- 450 2400 450 2625 --2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2 -- 900 2625 900 2400 ---6 --6 450 1800 900 2250 --5 1 0 1 0 7 100 0 20 0.000 0 1 0 0 675.000 1800.000 450 2100 675 2175 900 2100 --1 2 0 1 0 7 100 0 20 0.000 1 0.0000 675 1875 225 75 450 1950 900 1800 --2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2 -- 450 1875 450 2100 --2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2 -- 900 2100 900 1875 ---6 --6 450 1275 900 1725 --5 1 0 1 0 7 100 0 20 0.000 0 1 0 0 675.000 1275.000 450 1575 675 1650 900 1575 --1 2 0 1 0 7 100 0 20 0.000 1 0.0000 675 1350 225 75 450 1425 900 1275 --2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2 -- 450 1350 450 1575 --2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2 -- 900 1575 900 1350 ---6 --6 2250 750 3450 2625 --2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2 -- 2550 1200 3150 1200 --2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2 -- 2550 1500 3150 1500 --2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2 -- 2550 1800 3150 1800 --2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2 -- 2550 2100 3150 2100 --2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 -- 2550 975 3150 975 3150 2625 2550 2625 2550 975 --2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2 -- 2550 2400 3150 2400 --4 1 0 100 0 0 10 0.0000 0 135 1185 2850 900 Application Buffer\001 ---6 --2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 1 2 -- 0 0 1.00 60.00 120.00 -- 0 0 1.00 60.00 120.00 -- 1650 2400 2550 1350 --2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 1 2 -- 0 0 1.00 60.00 120.00 -- 0 0 1.00 60.00 120.00 -- 1650 1875 2550 1050 --2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 1 2 -- 0 0 1.00 60.00 120.00 -- 0 0 1.00 60.00 120.00 -- 1650 1425 2550 1950 --2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 1 2 -- 0 0 1.00 60.00 120.00 -- 0 0 1.00 60.00 120.00 -- 1650 900 2550 1650 --2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2 -- 900 900 1200 900 --2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2 -- 900 1425 1200 1425 --2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2 -- 900 1950 1200 1950 --2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2 -- 900 2475 1200 2475 --2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 1 2 -- 0 0 1.00 60.00 120.00 -- 0 0 1.00 60.00 120.00 -- 1650 2025 2550 2250 --2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 1 2 -- 0 0 1.00 60.00 120.00 -- 0 0 1.00 60.00 120.00 -- 1650 2550 2550 2475 --2 4 0 1 0 7 100 0 -1 0.000 0 0 7 0 0 5 -- 1875 2850 1875 600 225 600 225 2850 1875 2850 --4 1 0 100 0 0 10 0.0000 0 105 1215 1050 525 Parallel File Server\001 diff --cc lnet/doc/flow_new.fig index d828dea,d828dea..0000000 deleted file mode 100644,100644 --- a/lnet/doc/flow_new.fig +++ /dev/null @@@ -1,213 -1,213 +1,0 @@@ --#FIG 3.2 --Landscape --Center --Inches --Letter --100.00 --Single ---2 --1200 2 --6 525 2175 1575 2925 --6 675 2287 1425 2812 --4 1 0 50 0 0 10 0.0000 4 105 255 1050 2437 MD\001 --4 1 0 50 0 0 10 0.0000 4 105 645 1050 2587 Exists and\001 --4 1 0 50 0 0 10 0.0000 4 135 555 1050 2737 Accepts?\001 ---6 --2 3 0 1 0 7 100 0 -1 0.000 0 0 0 0 0 5 -- 1575 2550 1050 2175 525 2550 1050 2925 1575 2550 ---6 --6 3450 1275 4350 1725 --6 3600 1312 4200 1687 --4 1 0 100 0 0 10 0.0000 0 135 525 3900 1612 Message\001 --4 1 0 100 0 0 10 0.0000 0 105 465 3900 1462 Discard\001 ---6 --2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 -- 3450 1275 4350 1275 4350 1725 3450 1725 3450 1275 ---6 --6 4650 1275 5550 1725 --6 4725 1312 5475 1687 --4 1 0 100 0 0 10 0.0000 0 135 735 5100 1612 Drop Count\001 --4 1 0 100 0 0 10 0.0000 0 105 630 5100 1462 Increment\001 ---6 --2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 -- 4650 1275 5550 1275 5550 1725 4650 1725 4650 1275 ---6 --6 1350 525 2250 975 --6 1350 562 2250 937 --4 1 0 100 0 0 10 0.0000 0 135 795 1800 862 Match Entry\001 --4 1 0 100 0 0 10 0.0000 0 105 585 1800 712 Get Next\001 ---6 --2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 -- 1350 525 2250 525 2250 975 1350 975 1350 525 ---6 --6 525 1125 1575 1875 --2 3 0 1 0 7 100 0 -1 0.000 0 0 0 0 0 5 -- 1575 1500 1050 1125 525 1500 1050 1875 1575 1500 --4 1 0 100 0 0 10 0.0000 0 105 465 1049 1552 Match?\001 ---6 --6 2340 1237 2940 1687 --6 2340 1237 2940 1687 --4 1 0 100 0 0 10 0.0000 0 105 345 2640 1387 More\001 --4 1 0 100 0 0 10 0.0000 0 105 405 2640 1537 Match\001 --4 1 0 100 0 0 10 0.0000 0 105 510 2640 1687 Entries?\001 ---6 ---6 --6 525 3225 1575 3975 --6 675 3375 1425 3750 --4 1 0 50 0 0 10 0.0000 4 105 255 1050 3525 MD\001 --4 1 0 50 0 0 10 0.0000 4 105 615 1050 3720 has room?\001 ---6 --2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 0 0 5 -- 525 3600 1050 3225 1575 3600 1050 3975 525 3600 ---6 --6 3300 3375 4350 3825 --6 3300 3412 4350 3787 --4 1 0 50 0 0 10 0.0000 4 105 735 3825 3562 Unlink MD\001 --4 1 0 50 0 0 10 0.0000 4 135 945 3825 3712 & Match Entry\001 ---6 --2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 -- 3300 3375 4350 3375 4350 3825 3300 3825 3300 3375 ---6 --6 1950 3225 3000 3975 --6 2250 3450 2700 3750 --4 1 0 50 0 0 10 0.0000 4 105 450 2475 3600 Unlink\001 --4 1 0 50 0 0 10 0.0000 4 105 315 2475 3750 full?\001 ---6 --2 3 0 1 0 7 100 0 -1 0.000 0 0 0 0 0 5 -- 3000 3600 2475 3225 1950 3600 2475 3975 3000 3600 ---6 --6 3150 4500 4200 4950 --6 3150 4537 4200 4912 --4 1 0 50 0 0 10 0.0000 4 105 735 3675 4687 Unlink MD\001 --4 1 0 50 0 0 10 0.0000 4 135 945 3675 4837 & Match Entry\001 ---6 --2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 -- 3150 4500 4200 4500 4200 4950 3150 4950 3150 4500 ---6 --6 600 4500 1500 4950 --6 675 4537 1425 4912 --4 1 0 50 0 0 10 0.0000 4 135 615 1050 4837 Operation\001 --4 1 0 50 0 0 10 0.0000 4 105 525 1050 4687 Perform\001 ---6 --2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 -- 600 4500 1500 4500 1500 4950 600 4950 600 4500 ---6 --6 4650 4350 5700 5100 --6 4950 4537 5400 4912 --6 4950 4537 5400 4912 --4 1 0 50 0 0 10 0.0000 4 135 435 5175 4837 Queue?\001 --4 1 0 50 0 0 10 0.0000 4 105 360 5175 4687 Event\001 ---6 ---6 --2 3 0 1 0 7 100 0 -1 0.000 0 0 0 0 0 5 -- 5700 4725 5175 4350 4650 4725 5175 5100 5700 4725 ---6 --6 6000 4500 6900 4950 --6 6225 4575 6675 4875 --4 1 0 50 0 0 10 0.0000 4 105 360 6450 4875 Event\001 --4 1 0 50 0 0 10 0.0000 4 105 435 6450 4725 Record\001 ---6 --2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 -- 6000 4500 6900 4500 6900 4950 6000 4950 6000 4500 ---6 --6 1800 4350 2850 5100 --6 2100 4575 2550 4875 --4 1 0 50 0 0 10 0.0000 4 105 450 2325 4725 Unlink\001 --4 1 0 50 0 0 10 0.0000 4 105 450 2325 4875 thresh?\001 ---6 --2 3 0 1 0 7 100 0 -1 0.000 0 0 0 0 0 5 -- 2850 4725 2325 4350 1800 4725 2325 5100 2850 4725 ---6 --2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 -- 0 0 1.00 60.00 120.00 -- 1050 1875 1050 2175 --2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 -- 0 0 1.00 60.00 120.00 -- 1575 1500 2100 1500 --2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 -- 0 0 1.00 60.00 120.00 -- 1050 450 1050 1125 --2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 -- 0 0 1.00 60.00 120.00 -- 1350 750 1050 750 --2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 -- 0 0 1.00 60.00 120.00 -- 1050 2925 1050 3225 --2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 -- 0 0 1.00 60.00 120.00 -- 3150 1500 3450 1500 --2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 -- 0 0 1.00 60.00 120.00 -- 4350 1500 4650 1500 --2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 0 0 5 -- 2100 1500 2625 1125 3150 1500 2625 1875 2100 1500 --2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 -- 0 0 1.00 60.00 120.00 -- 1575 3600 1950 3600 --2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 -- 0 0 1.00 60.00 120.00 -- 1050 3975 1050 4500 --2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2 -- 0 0 1.00 60.00 120.00 -- 3000 3600 3300 3600 --2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2 -- 0 0 1.00 60.00 120.00 -- 1500 4725 1800 4725 --2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2 -- 0 0 1.00 60.00 120.00 -- 5700 4725 6000 4725 --2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2 -- 0 0 1.00 60.00 120.00 -- 2850 4725 3150 4725 --2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2 -- 0 0 1.00 60.00 120.00 -- 4200 4725 4650 4725 --2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 -- 0 0 1.00 60.00 120.00 -- 6900 4725 7950 4725 --3 0 0 1 0 7 100 0 -1 0.000 0 1 0 5 -- 0 0 1.00 60.00 120.00 -- 1575 2550 1650 2550 1800 2550 1800 2400 1800 1500 -- 0.000 1.000 1.000 1.000 0.000 --3 0 0 1 0 7 100 0 -1 0.000 0 0 1 5 -- 0 0 1.00 60.00 120.00 -- 2250 750 2475 750 2625 750 2625 900 2625 1125 -- 0.000 1.000 1.000 1.000 0.000 --3 0 0 1 0 7 100 0 -1 0.000 0 0 1 5 -- 0 0 1.00 60.00 120.00 -- 7500 4725 7500 1650 7500 1500 7350 1500 5550 1500 -- 0.000 1.000 1.000 1.000 0.000 --3 0 0 1 0 7 50 0 -1 0.000 0 1 0 5 -- 0 0 1.00 60.00 120.00 -- 2475 3225 2475 2400 2475 2250 2325 2250 1800 2250 -- 0.000 1.000 1.000 1.000 0.000 --3 0 0 1 0 7 50 0 -1 0.000 0 1 0 5 -- 0 0 1.00 60.00 120.00 -- 3825 3375 3825 2175 3825 2025 3675 2025 1800 2025 -- 0.000 1.000 1.000 1.000 0.000 --3 0 0 1 0 7 50 0 -1 0.000 0 1 0 8 -- 0 0 1.00 60.00 120.00 -- 2325 4350 2325 4275 2325 4125 2475 4125 4275 4125 4425 4125 -- 4425 4275 4425 4725 -- 0.000 1.000 1.000 1.000 1.000 1.000 1.000 0.000 --3 0 0 1 0 7 50 0 -1 0.000 0 1 0 8 -- 0 0 1.00 60.00 120.00 -- 5175 4350 5175 4275 5175 4125 5325 4125 7125 4125 7275 4125 -- 7275 4275 7275 4725 -- 0.000 1.000 1.000 1.000 1.000 1.000 1.000 0.000 --4 1 0 100 0 0 10 0.0000 0 75 150 1575 1425 no\001 --4 1 0 100 0 0 10 0.0000 0 135 360 825 525 Entry\001 --4 1 0 100 0 0 10 0.0000 0 75 150 1575 2475 no\001 --4 1 0 100 0 0 10 0.0000 0 105 195 1200 1950 yes\001 --4 1 0 100 0 0 10 0.0000 0 105 195 1200 3000 yes\001 --4 1 0 100 0 0 10 0.0000 0 105 195 2775 1050 yes\001 --4 1 0 100 0 0 10 0.0000 0 75 150 3225 1425 no\001 --4 1 0 100 0 0 10 0.0000 0 75 150 1650 3525 no\001 --4 1 0 100 0 0 10 0.0000 0 105 195 1200 4050 yes\001 --4 1 0 100 0 0 10 0.0000 0 105 195 3150 3525 yes\001 --4 1 0 100 0 0 10 0.0000 0 75 150 2625 3150 no\001 --4 1 0 100 0 0 10 0.0000 0 105 195 3000 4650 yes\001 --4 1 0 100 0 0 10 0.0000 0 105 195 5850 4650 yes\001 --4 1 0 100 0 0 10 0.0000 0 75 150 2475 4275 no\001 --4 1 0 100 0 0 10 0.0000 0 75 150 5325 4275 no\001 --4 1 0 50 0 0 10 0.0000 4 105 285 7800 4650 Exit\001 diff --cc lnet/doc/get.fig index 28db949,28db949..0000000 deleted file mode 100644,100644 --- a/lnet/doc/get.fig +++ /dev/null @@@ -1,33 -1,33 +1,0 @@@ --#FIG 3.2 --Landscape --Center --Inches --Letter --100.00 --Single ---2 --1200 2 --6 2775 900 3525 1200 --4 0 0 100 0 0 10 0.0000 0 105 720 2775 1200 Translation\001 --4 0 0 100 0 0 10 0.0000 0 105 405 2850 1050 Portal\001 ---6 --6 1350 1725 2175 2025 --4 0 0 100 0 0 10 0.0000 0 105 825 1350 2025 Transmission\001 --4 0 0 100 0 0 10 0.0000 0 105 285 1620 1875 Data\001 ---6 --2 1 0 1 0 7 100 0 -1 4.000 0 0 -1 1 0 2 -- 0 0 1.00 60.00 120.00 -- 900 525 2700 750 --2 1 0 1 0 7 100 0 -1 4.000 0 0 -1 1 0 2 -- 0 0 1.00 60.00 120.00 -- 2700 825 2700 1275 --2 1 0 1 0 7 100 0 -1 3.000 0 0 7 1 0 2 -- 0 0 1.00 60.00 120.00 -- 2700 1350 900 1950 --2 2 0 1 0 7 100 0 -1 4.000 0 0 7 0 0 5 -- 2400 300 3600 300 3600 2250 2400 2250 2400 300 --2 2 0 1 0 7 100 0 -1 4.000 0 0 7 0 0 5 -- 0 300 1200 300 1200 2250 0 2250 0 300 --4 1 0 100 0 0 10 0.0000 4 135 495 1800 825 Request\001 --4 1 0 100 0 0 10 0.0000 0 105 540 600 525 Initiator\001 --4 1 0 100 0 0 10 0.0000 0 135 405 3000 525 Target\001 diff --cc lnet/doc/ieee.bst index 4df7c50,4df7c50..0000000 deleted file mode 100644,100644 --- a/lnet/doc/ieee.bst +++ /dev/null @@@ -1,1112 -1,1112 +1,0 @@@ --% --------------------------------------------------------------- --% --% by Paolo.Ienne@di.epfl.ch --% --% --------------------------------------------------------------- --% --% no guarantee is given that the format corresponds perfectly to --% IEEE 8.5" x 11" Proceedings, but most features should be ok. --% --% --------------------------------------------------------------- --% --% `ieee' from BibTeX standard bibliography style `abbrv' --% version 0.99a for BibTeX versions 0.99a or later, LaTeX version 2.09. --% Copyright (C) 1985, all rights reserved. --% Copying of this file is authorized only if either --% (1) you make absolutely no changes to your copy, including name, or --% (2) if you do make changes, you name it something other than --% btxbst.doc, plain.bst, unsrt.bst, alpha.bst, and abbrv.bst. --% This restriction helps ensure that all standard styles are identical. --% The file btxbst.doc has the documentation for this style. -- --ENTRY -- { address -- author -- booktitle -- chapter -- edition -- editor -- howpublished -- institution -- journal -- key -- month -- note -- number -- organization -- pages -- publisher -- school -- series -- title -- type -- volume -- year -- } -- {} -- { label } -- --INTEGERS { output.state before.all mid.sentence after.sentence after.block } -- --FUNCTION {init.state.consts} --{ #0 'before.all := -- #1 'mid.sentence := -- #2 'after.sentence := -- #3 'after.block := --} -- --STRINGS { s t } -- --FUNCTION {output.nonnull} --{ 's := -- output.state mid.sentence = -- { ", " * write$ } -- { output.state after.block = -- { add.period$ write$ -- newline$ -- "\newblock " write$ -- } -- { output.state before.all = -- 'write$ -- { add.period$ " " * write$ } -- if$ -- } -- if$ -- mid.sentence 'output.state := -- } -- if$ -- s --} -- --FUNCTION {output} --{ duplicate$ empty$ -- 'pop$ -- 'output.nonnull -- if$ --} -- --FUNCTION {output.check} --{ 't := -- duplicate$ empty$ -- { pop$ "empty " t * " in " * cite$ * warning$ } -- 'output.nonnull -- if$ --} -- --FUNCTION {output.bibitem} --{ newline$ -- "\bibitem{" write$ -- cite$ write$ -- "}" write$ -- newline$ -- "" -- before.all 'output.state := --} -- --FUNCTION {fin.entry} --{ add.period$ -- write$ -- newline$ --} -- --FUNCTION {new.block} --{ output.state before.all = -- 'skip$ -- { after.block 'output.state := } -- if$ --} -- --FUNCTION {new.sentence} --{ output.state after.block = -- 'skip$ -- { output.state before.all = -- 'skip$ -- { after.sentence 'output.state := } -- if$ -- } -- if$ --} -- --FUNCTION {not} --{ { #0 } -- { #1 } -- if$ --} -- --FUNCTION {and} --{ 'skip$ -- { pop$ #0 } -- if$ --} -- --FUNCTION {or} --{ { pop$ #1 } -- 'skip$ -- if$ --} -- --FUNCTION {new.block.checka} --{ empty$ -- 'skip$ -- 'new.block -- if$ --} -- --FUNCTION {new.block.checkb} --{ empty$ -- swap$ empty$ -- and -- 'skip$ -- 'new.block -- if$ --} -- --FUNCTION {new.sentence.checka} --{ empty$ -- 'skip$ -- 'new.sentence -- if$ --} -- --FUNCTION {new.sentence.checkb} --{ empty$ -- swap$ empty$ -- and -- 'skip$ -- 'new.sentence -- if$ --} -- --FUNCTION {field.or.null} --{ duplicate$ empty$ -- { pop$ "" } -- 'skip$ -- if$ --} -- --FUNCTION {emphasize} --{ duplicate$ empty$ -- { pop$ "" } -- { "{\em " swap$ * "}" * } -- if$ --} -- --INTEGERS { nameptr namesleft numnames } -- --FUNCTION {format.names} --{ 's := -- #1 'nameptr := -- s num.names$ 'numnames := -- numnames 'namesleft := -- { namesleft #0 > } -- { s nameptr "{f.~}{vv~}{ll}{, jj}" format.name$ 't := -- nameptr #1 > -- { namesleft #1 > -- { ", " * t * } -- { numnames #2 > -- { "," * } -- 'skip$ -- if$ -- t "others" = -- { " et~al." * } -- { " and " * t * } -- if$ -- } -- if$ -- } -- 't -- if$ -- nameptr #1 + 'nameptr := -- namesleft #1 - 'namesleft := -- } -- while$ --} -- --FUNCTION {format.authors} --{ author empty$ -- { "" } -- { author format.names } -- if$ --} -- --FUNCTION {format.editors} --{ editor empty$ -- { "" } -- { editor format.names -- editor num.names$ #1 > -- { ", editors" * } -- { ", editor" * } -- if$ -- } -- if$ --} -- --FUNCTION {format.title} --{ title empty$ -- { "" } -- { title "t" change.case$ } -- if$ --} -- --FUNCTION {n.dashify} --{ 't := -- "" -- { t empty$ not } -- { t #1 #1 substring$ "-" = -- { t #1 #2 substring$ "--" = not -- { "--" * -- t #2 global.max$ substring$ 't := -- } -- { { t #1 #1 substring$ "-" = } -- { "-" * -- t #2 global.max$ substring$ 't := -- } -- while$ -- } -- if$ -- } -- { t #1 #1 substring$ * -- t #2 global.max$ substring$ 't := -- } -- if$ -- } -- while$ --} -- --FUNCTION {format.date} --{ year empty$ -- { month empty$ -- { "" } -- { "there's a month but no year in " cite$ * warning$ -- month -- } -- if$ -- } -- { month empty$ -- 'year -- { month " " * year * } -- if$ -- } -- if$ --} -- --FUNCTION {format.btitle} --{ title emphasize --} -- --FUNCTION {tie.or.space.connect} --{ duplicate$ text.length$ #3 < -- { "~" } -- { " " } -- if$ -- swap$ * * --} -- --FUNCTION {either.or.check} --{ empty$ -- 'pop$ -- { "can't use both " swap$ * " fields in " * cite$ * warning$ } -- if$ --} -- --FUNCTION {format.bvolume} --{ volume empty$ -- { "" } -- { "volume" volume tie.or.space.connect -- series empty$ -- 'skip$ -- { " of " * series emphasize * } -- if$ -- "volume and number" number either.or.check -- } -- if$ --} -- --FUNCTION {format.number.series} --{ volume empty$ -- { number empty$ -- { series field.or.null } -- { output.state mid.sentence = -- { "number" } -- { "Number" } -- if$ -- number tie.or.space.connect -- series empty$ -- { "there's a number but no series in " cite$ * warning$ } -- { " in " * series * } -- if$ -- } -- if$ -- } -- { "" } -- if$ --} -- --FUNCTION {format.edition} --{ edition empty$ -- { "" } -- { output.state mid.sentence = -- { edition "l" change.case$ " edition" * } -- { edition "t" change.case$ " edition" * } -- if$ -- } -- if$ --} -- --INTEGERS { multiresult } -- --FUNCTION {multi.page.check} --{ 't := -- #0 'multiresult := -- { multiresult not -- t empty$ not -- and -- } -- { t #1 #1 substring$ -- duplicate$ "-" = -- swap$ duplicate$ "," = -- swap$ "+" = -- or or -- { #1 'multiresult := } -- { t #2 global.max$ substring$ 't := } -- if$ -- } -- while$ -- multiresult --} -- --FUNCTION {format.pages} --{ pages empty$ -- { "" } -- { pages multi.page.check -- { "pages" pages n.dashify tie.or.space.connect } -- { "page" pages tie.or.space.connect } -- if$ -- } -- if$ --} -- --FUNCTION {format.vol.num.pages} --{ volume field.or.null -- number empty$ -- 'skip$ -- { "(" number * ")" * * -- volume empty$ -- { "there's a number but no volume in " cite$ * warning$ } -- 'skip$ -- if$ -- } -- if$ -- pages empty$ -- 'skip$ -- { duplicate$ empty$ -- { pop$ format.pages } -- { ":" * pages n.dashify * } -- if$ -- } -- if$ --} -- --FUNCTION {format.chapter.pages} --{ chapter empty$ -- 'format.pages -- { type empty$ -- { "chapter" } -- { type "l" change.case$ } -- if$ -- chapter tie.or.space.connect -- pages empty$ -- 'skip$ -- { ", " * format.pages * } -- if$ -- } -- if$ --} -- --FUNCTION {format.in.ed.booktitle} --{ booktitle empty$ -- { "" } -- { editor empty$ -- { "In " booktitle emphasize * } -- { "In " format.editors * ", " * booktitle emphasize * } -- if$ -- } -- if$ --} -- --FUNCTION {empty.misc.check} --{ author empty$ title empty$ howpublished empty$ -- month empty$ year empty$ note empty$ -- and and and and and -- key empty$ not and -- { "all relevant fields are empty in " cite$ * warning$ } -- 'skip$ -- if$ --} -- --FUNCTION {format.thesis.type} --{ type empty$ -- 'skip$ -- { pop$ -- type "t" change.case$ -- } -- if$ --} -- --FUNCTION {format.tr.number} --{ type empty$ -- { "Technical Report" } -- 'type -- if$ -- number empty$ -- { "t" change.case$ } -- { number tie.or.space.connect } -- if$ --} -- --FUNCTION {format.article.crossref} --{ key empty$ -- { journal empty$ -- { "need key or journal for " cite$ * " to crossref " * crossref * -- warning$ -- "" -- } -- { "In {\em " journal * "\/}" * } -- if$ -- } -- { "In " key * } -- if$ -- " \cite{" * crossref * "}" * --} -- --FUNCTION {format.crossref.editor} --{ editor #1 "{vv~}{ll}" format.name$ -- editor num.names$ duplicate$ -- #2 > -- { pop$ " et~al." * } -- { #2 < -- 'skip$ -- { editor #2 "{ff }{vv }{ll}{ jj}" format.name$ "others" = -- { " et~al." * } -- { " and " * editor #2 "{vv~}{ll}" format.name$ * } -- if$ -- } -- if$ -- } -- if$ --} -- --FUNCTION {format.book.crossref} --{ volume empty$ -- { "empty volume in " cite$ * "'s crossref of " * crossref * warning$ -- "In " -- } -- { "Volume" volume tie.or.space.connect -- " of " * -- } -- if$ -- editor empty$ -- editor field.or.null author field.or.null = -- or -- { key empty$ -- { series empty$ -- { "need editor, key, or series for " cite$ * " to crossref " * -- crossref * warning$ -- "" * -- } -- { "{\em " * series * "\/}" * } -- if$ -- } -- { key * } -- if$ -- } -- { format.crossref.editor * } -- if$ -- " \cite{" * crossref * "}" * --} -- --FUNCTION {format.incoll.inproc.crossref} --{ editor empty$ -- editor field.or.null author field.or.null = -- or -- { key empty$ -- { booktitle empty$ -- { "need editor, key, or booktitle for " cite$ * " to crossref " * -- crossref * warning$ -- "" -- } -- { "In {\em " booktitle * "\/}" * } -- if$ -- } -- { "In " key * } -- if$ -- } -- { "In " format.crossref.editor * } -- if$ -- " \cite{" * crossref * "}" * --} -- --FUNCTION {article} --{ output.bibitem -- format.authors "author" output.check -- new.block -- format.title "title" output.check -- new.block -- crossref missing$ -- { journal emphasize "journal" output.check -- format.vol.num.pages output -- format.date "year" output.check -- } -- { format.article.crossref output.nonnull -- format.pages output -- } -- if$ -- new.block -- note output -- fin.entry --} -- --FUNCTION {book} --{ output.bibitem -- author empty$ -- { format.editors "author and editor" output.check } -- { format.authors output.nonnull -- crossref missing$ -- { "author and editor" editor either.or.check } -- 'skip$ -- if$ -- } -- if$ -- new.block -- format.btitle "title" output.check -- crossref missing$ -- { format.bvolume output -- new.block -- format.number.series output -- new.sentence -- publisher "publisher" output.check -- address output -- } -- { new.block -- format.book.crossref output.nonnull -- } -- if$ -- format.edition output -- format.date "year" output.check -- new.block -- note output -- fin.entry --} -- --FUNCTION {booklet} --{ output.bibitem -- format.authors output -- new.block -- format.title "title" output.check -- howpublished address new.block.checkb -- howpublished output -- address output -- format.date output -- new.block -- note output -- fin.entry --} -- --FUNCTION {inbook} --{ output.bibitem -- author empty$ -- { format.editors "author and editor" output.check } -- { format.authors output.nonnull -- crossref missing$ -- { "author and editor" editor either.or.check } -- 'skip$ -- if$ -- } -- if$ -- new.block -- format.btitle "title" output.check -- crossref missing$ -- { format.bvolume output -- format.chapter.pages "chapter and pages" output.check -- new.block -- format.number.series output -- new.sentence -- publisher "publisher" output.check -- address output -- } -- { format.chapter.pages "chapter and pages" output.check -- new.block -- format.book.crossref output.nonnull -- } -- if$ -- format.edition output -- format.date "year" output.check -- new.block -- note output -- fin.entry --} -- --FUNCTION {incollection} --{ output.bibitem -- format.authors "author" output.check -- new.block -- format.title "title" output.check -- new.block -- crossref missing$ -- { format.in.ed.booktitle "booktitle" output.check -- format.bvolume output -- format.number.series output -- format.chapter.pages output -- new.sentence -- publisher "publisher" output.check -- address output -- format.edition output -- format.date "year" output.check -- } -- { format.incoll.inproc.crossref output.nonnull -- format.chapter.pages output -- } -- if$ -- new.block -- note output -- fin.entry --} -- --FUNCTION {inproceedings} --{ output.bibitem -- format.authors "author" output.check -- new.block -- format.title "title" output.check -- new.block -- crossref missing$ -- { format.in.ed.booktitle "booktitle" output.check -- format.bvolume output -- format.number.series output -- format.pages output -- address empty$ -- { organization publisher new.sentence.checkb -- organization output -- publisher output -- format.date "year" output.check -- } -- { address output.nonnull -- format.date "year" output.check -- new.sentence -- organization output -- publisher output -- } -- if$ -- } -- { format.incoll.inproc.crossref output.nonnull -- format.pages output -- } -- if$ -- new.block -- note output -- fin.entry --} -- --FUNCTION {conference} { inproceedings } -- --FUNCTION {manual} --{ output.bibitem -- author empty$ -- { organization empty$ -- 'skip$ -- { organization output.nonnull -- address output -- } -- if$ -- } -- { format.authors output.nonnull } -- if$ -- new.block -- format.btitle "title" output.check -- author empty$ -- { organization empty$ -- { address new.block.checka -- address output -- } -- 'skip$ -- if$ -- } -- { organization address new.block.checkb -- organization output -- address output -- } -- if$ -- format.edition output -- format.date output -- new.block -- note output -- fin.entry --} -- --FUNCTION {mastersthesis} --{ output.bibitem -- format.authors "author" output.check -- new.block -- format.title "title" output.check -- new.block -- "Master's thesis" format.thesis.type output.nonnull -- school "school" output.check -- address output -- format.date "year" output.check -- new.block -- note output -- fin.entry --} -- --FUNCTION {misc} --{ output.bibitem -- format.authors output -- title howpublished new.block.checkb -- format.title output -- howpublished new.block.checka -- howpublished output -- format.date output -- new.block -- note output -- fin.entry -- empty.misc.check --} -- --FUNCTION {phdthesis} --{ output.bibitem -- format.authors "author" output.check -- new.block -- format.btitle "title" output.check -- new.block -- "PhD thesis" format.thesis.type output.nonnull -- school "school" output.check -- address output -- format.date "year" output.check -- new.block -- note output -- fin.entry --} -- --FUNCTION {proceedings} --{ output.bibitem -- editor empty$ -- { organization output } -- { format.editors output.nonnull } -- if$ -- new.block -- format.btitle "title" output.check -- format.bvolume output -- format.number.series output -- address empty$ -- { editor empty$ -- { publisher new.sentence.checka } -- { organization publisher new.sentence.checkb -- organization output -- } -- if$ -- publisher output -- format.date "year" output.check -- } -- { address output.nonnull -- format.date "year" output.check -- new.sentence -- editor empty$ -- 'skip$ -- { organization output } -- if$ -- publisher output -- } -- if$ -- new.block -- note output -- fin.entry --} -- --FUNCTION {techreport} --{ output.bibitem -- format.authors "author" output.check -- new.block -- format.title "title" output.check -- new.block -- format.tr.number output.nonnull -- institution "institution" output.check -- address output -- format.date "year" output.check -- new.block -- note output -- fin.entry --} -- --FUNCTION {unpublished} --{ output.bibitem -- format.authors "author" output.check -- new.block -- format.title "title" output.check -- new.block -- note "note" output.check -- format.date output -- fin.entry --} -- --FUNCTION {default.type} { misc } -- --MACRO {jan} {"Jan."} -- --MACRO {feb} {"Feb."} -- --MACRO {mar} {"Mar."} -- --MACRO {apr} {"Apr."} -- --MACRO {may} {"May"} -- --MACRO {jun} {"June"} -- --MACRO {jul} {"July"} -- --MACRO {aug} {"Aug."} -- --MACRO {sep} {"Sept."} -- --MACRO {oct} {"Oct."} -- --MACRO {nov} {"Nov."} -- --MACRO {dec} {"Dec."} -- --MACRO {acmcs} {"ACM Comput. Surv."} -- --MACRO {acta} {"Acta Inf."} -- --MACRO {cacm} {"Commun. ACM"} -- --MACRO {ibmjrd} {"IBM J. Res. Dev."} -- --MACRO {ibmsj} {"IBM Syst.~J."} -- --MACRO {ieeese} {"IEEE Trans. Softw. Eng."} -- --MACRO {ieeetc} {"IEEE Trans. Comput."} -- --MACRO {ieeetcad} -- {"IEEE Trans. Comput.-Aided Design Integrated Circuits"} -- --MACRO {ipl} {"Inf. Process. Lett."} -- --MACRO {jacm} {"J.~ACM"} -- --MACRO {jcss} {"J.~Comput. Syst. Sci."} -- --MACRO {scp} {"Sci. Comput. Programming"} -- --MACRO {sicomp} {"SIAM J. Comput."} -- --MACRO {tocs} {"ACM Trans. Comput. Syst."} -- --MACRO {tods} {"ACM Trans. Database Syst."} -- --MACRO {tog} {"ACM Trans. Gr."} -- --MACRO {toms} {"ACM Trans. Math. Softw."} -- --MACRO {toois} {"ACM Trans. Office Inf. Syst."} -- --MACRO {toplas} {"ACM Trans. Prog. Lang. Syst."} -- --MACRO {tcs} {"Theoretical Comput. Sci."} -- --READ -- --FUNCTION {sortify} --{ purify$ -- "l" change.case$ --} -- --INTEGERS { len } -- --FUNCTION {chop.word} --{ 's := -- 'len := -- s #1 len substring$ = -- { s len #1 + global.max$ substring$ } -- 's -- if$ --} -- --FUNCTION {sort.format.names} --{ 's := -- #1 'nameptr := -- "" -- s num.names$ 'numnames := -- numnames 'namesleft := -- { namesleft #0 > } -- { nameptr #1 > -- { " " * } -- 'skip$ -- if$ -- s nameptr "{vv{ } }{ll{ }}{ f{ }}{ jj{ }}" format.name$ 't := -- nameptr numnames = t "others" = and -- { "et al" * } -- { t sortify * } -- if$ -- nameptr #1 + 'nameptr := -- namesleft #1 - 'namesleft := -- } -- while$ --} -- --FUNCTION {sort.format.title} --{ 't := -- "A " #2 -- "An " #3 -- "The " #4 t chop.word -- chop.word -- chop.word -- sortify -- #1 global.max$ substring$ --} -- --FUNCTION {author.sort} --{ author empty$ -- { key empty$ -- { "to sort, need author or key in " cite$ * warning$ -- "" -- } -- { key sortify } -- if$ -- } -- { author sort.format.names } -- if$ --} -- --FUNCTION {author.editor.sort} --{ author empty$ -- { editor empty$ -- { key empty$ -- { "to sort, need author, editor, or key in " cite$ * warning$ -- "" -- } -- { key sortify } -- if$ -- } -- { editor sort.format.names } -- if$ -- } -- { author sort.format.names } -- if$ --} -- --FUNCTION {author.organization.sort} --{ author empty$ -- { organization empty$ -- { key empty$ -- { "to sort, need author, organization, or key in " cite$ * warning$ -- "" -- } -- { key sortify } -- if$ -- } -- { "The " #4 organization chop.word sortify } -- if$ -- } -- { author sort.format.names } -- if$ --} -- --FUNCTION {editor.organization.sort} --{ editor empty$ -- { organization empty$ -- { key empty$ -- { "to sort, need editor, organization, or key in " cite$ * warning$ -- "" -- } -- { key sortify } -- if$ -- } -- { "The " #4 organization chop.word sortify } -- if$ -- } -- { editor sort.format.names } -- if$ --} -- --FUNCTION {presort} --{ type$ "book" = -- type$ "inbook" = -- or -- 'author.editor.sort -- { type$ "proceedings" = -- 'editor.organization.sort -- { type$ "manual" = -- 'author.organization.sort -- 'author.sort -- if$ -- } -- if$ -- } -- if$ -- " " -- * -- year field.or.null sortify -- * -- " " -- * -- title field.or.null -- sort.format.title -- * -- #1 entry.max$ substring$ -- 'sort.key$ := --} -- --ITERATE {presort} -- --SORT -- --STRINGS { longest.label } -- --INTEGERS { number.label longest.label.width } -- --FUNCTION {initialize.longest.label} --{ "" 'longest.label := -- #1 'number.label := -- #0 'longest.label.width := --} -- --FUNCTION {longest.label.pass} --{ number.label int.to.str$ 'label := -- number.label #1 + 'number.label := -- label width$ longest.label.width > -- { label 'longest.label := -- label width$ 'longest.label.width := -- } -- 'skip$ -- if$ --} -- --EXECUTE {initialize.longest.label} -- --ITERATE {longest.label.pass} -- --FUNCTION {begin.bib} --{ preamble$ empty$ -- 'skip$ -- { preamble$ write$ newline$ } -- if$ -- "\begin{thebibliography}{" longest.label * -- "}\setlength{\itemsep}{-1ex}\small" * write$ newline$ --} -- --EXECUTE {begin.bib} -- --EXECUTE {init.state.consts} -- --ITERATE {call.type$} -- --FUNCTION {end.bib} --{ newline$ -- "\end{thebibliography}" write$ newline$ --} -- --EXECUTE {end.bib} -- --% end of file ieee.bst --% --------------------------------------------------------------- diff --cc lnet/doc/mpi.fig index e1a91b5,e1a91b5..0000000 deleted file mode 100644,100644 --- a/lnet/doc/mpi.fig +++ /dev/null @@@ -1,117 -1,117 +1,0 @@@ --#FIG 3.2 --Landscape --Center --Inches --Letter --100.00 --Single ---2 --1200 2 --6 150 1650 900 2025 --4 1 0 100 0 0 10 0.0000 0 135 735 525 1800 Unexpected\001 --4 1 0 100 0 0 10 0.0000 0 135 585 525 1995 Messages\001 ---6 --6 150 150 900 525 --4 1 0 100 0 0 10 0.0000 0 135 615 525 300 Preposted\001 --4 1 0 100 0 0 10 0.0000 0 105 525 525 495 Receives\001 ---6 --6 2550 4125 3150 4725 --4 1 0 100 0 0 10 0.0000 0 135 600 2850 4275 Length=0\001 --4 1 0 100 0 0 10 0.0000 0 105 540 2850 4470 Truncate\001 --4 1 0 100 0 0 10 0.0000 0 105 480 2850 4665 No Ack\001 ---6 --6 1050 1575 1950 1875 --2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 -- 1050 1575 1950 1575 1950 1875 1050 1875 1050 1575 --4 1 0 100 0 0 10 0.0000 0 105 780 1500 1725 Match Short\001 ---6 --6 5400 1575 6300 2175 --2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 -- 5400 1575 6300 1575 6300 2175 5400 2175 5400 1575 --4 1 0 100 0 0 10 0.0000 0 105 405 5850 1875 Buffer\001 ---6 --6 5400 2400 6300 3000 --2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 -- 5400 2400 6300 2400 6300 3000 5400 3000 5400 2400 --4 1 0 100 0 0 10 0.0000 0 105 405 5850 2700 Buffer\001 ---6 --6 1050 2400 1950 2700 --2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 -- 1050 2400 1950 2400 1950 2700 1050 2700 1050 2400 --4 1 0 100 0 0 10 0.0000 0 105 780 1500 2550 Match Short\001 ---6 --6 1050 825 1950 1125 --2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 -- 1050 825 1950 825 1950 1125 1050 1125 1050 825 --4 1 0 100 0 0 10 0.0000 0 105 765 1500 975 Match None\001 ---6 --2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 -- 0 0 1.00 60.00 120.00 -- 1500 1125 1500 1575 --2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 -- 0 0 1.00 60.00 120.00 -- 3225 2025 4050 3375 --2 1 1 1 0 7 100 0 -1 4.000 0 0 -1 0 0 2 -- 150 675 6600 675 --2 1 1 1 0 7 100 0 -1 4.000 0 0 -1 0 0 2 -- 150 1350 6600 1350 --2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 -- 2400 4125 3300 4125 3300 4725 2400 4725 2400 4125 --2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 -- 0 0 1.00 60.00 120.00 -- 3225 4500 4050 3675 --2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 -- 0 0 1.00 60.00 120.00 -- 3225 1725 5400 1725 --2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 -- 0 0 1.00 60.00 120.00 -- 3225 2550 5400 2550 --2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 -- 0 0 1.00 60.00 120.00 -- 3225 2850 4050 3450 --2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2 -- 0 0 1.00 60.00 120.00 -- 1500 1800 1500 2400 --2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 -- 2400 825 3300 825 3300 1275 2400 1275 2400 825 --2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 -- 0 0 1.00 60.00 120.00 -- 1500 2625 1500 4125 --2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 -- 1050 4125 1950 4125 1950 4425 1050 4425 1050 4125 --2 1 0 1 0 7 100 0 -1 4.000 0 0 -1 1 0 2 -- 0 0 1.00 60.00 120.00 -- 1500 300 1500 825 --2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 -- 0 0 1.00 60.00 120.00 -- 1875 975 2400 975 --2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 -- 0 0 1.00 60.00 120.00 -- 1875 1725 2400 1725 --2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 -- 0 0 1.00 60.00 120.00 -- 1875 2550 2400 2550 --2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 -- 0 0 1.00 60.00 120.00 -- 1875 4275 2400 4275 --2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 -- 2400 1575 3300 1575 3300 2175 2400 2175 2400 1575 --2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 -- 2400 2400 3300 2400 3300 3000 2400 3000 2400 2400 --2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 -- 4050 3300 5250 3300 5250 3750 4050 3750 4050 3300 --4 1 0 100 0 0 10 0.0000 0 105 885 1500 150 Match Entries\001 --4 1 0 100 0 0 10 0.0000 0 135 1290 2850 150 Memory Descriptors\001 --4 1 0 100 0 0 10 0.0000 0 135 1065 5850 150 Memory Regions\001 --4 1 0 100 0 0 10 0.0000 0 135 825 4500 150 Event Queues\001 --4 1 0 100 0 0 10 0.0000 0 105 585 525 1050 RcvMark\001 --4 1 0 100 0 0 10 0.0000 0 105 330 2850 1102 None\001 --4 1 0 100 0 0 10 0.0000 0 135 705 1500 4275 Match Any\001 --4 1 0 50 0 0 10 0.0000 0 150 810 2850 1725 max_offset=\001 --4 1 0 50 0 0 10 0.0000 0 150 840 2850 1875 n - short_len\001 --4 1 0 50 0 0 10 0.0000 0 150 810 2850 2550 max_offset=\001 --4 1 0 50 0 0 10 0.0000 0 150 840 2850 2700 n - short_len\001 --4 1 0 50 0 0 10 0.0000 0 105 405 2850 2100 unlink\001 --4 1 0 50 0 0 10 0.0000 0 105 405 2850 2925 unlink\001 --4 1 0 100 0 0 10 0.0000 0 135 930 4650 3675 Message Queue\001 --4 1 0 100 0 0 10 0.0000 0 135 735 4650 3525 Unexpected\001 diff --cc lnet/doc/portals.fig index 9b1271b,9b1271b..0000000 deleted file mode 100644,100644 --- a/lnet/doc/portals.fig +++ /dev/null @@@ -1,68 -1,68 +1,0 @@@ --#FIG 3.2 --Landscape --Center --Inches --Letter --100.00 --Single ---2 --1200 2 --2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 -- 1350 900 1650 900 1650 1200 1350 1200 1350 900 --2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 -- 1800 1350 2100 1350 2100 1650 1800 1650 1800 1350 --2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 -- 2250 1800 2550 1800 2550 2100 2250 2100 2250 1800 --2 1 1 1 0 7 100 0 -1 4.000 0 0 -1 0 0 2 -- 4200 375 4200 2100 --2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 -- 525 600 1125 600 1125 2100 525 2100 525 600 --2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 -- 4425 1275 4875 1275 4875 1950 4425 1950 4425 1275 --2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 -- 2550 1200 3150 1200 3150 1500 2550 1500 2550 1200 --2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 -- 0 0 1.00 60.00 120.00 -- 3000 1425 4425 1425 --2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 -- 3600 825 3750 825 3750 1125 3600 1125 3600 825 --2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2 -- 0 0 1.00 60.00 120.00 -- 2025 1425 2550 1425 --2 2 0 1 0 7 50 0 -1 0.000 0 0 -1 0 0 5 -- 4425 750 4875 750 4875 1125 4425 1125 4425 750 --2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2 -- 0 0 1.00 60.00 120.00 -- 3675 975 4425 975 --3 0 0 1 0 7 100 0 -1 0.000 0 1 0 2 -- 0 0 1.00 60.00 120.00 -- 825 1050 1350 1050 -- 0.000 0.000 --3 0 0 1 0 7 100 0 -1 0.000 0 1 0 5 -- 0 0 1.00 60.00 120.00 -- 1500 1125 1500 1350 1500 1500 1650 1500 1800 1500 -- 0.000 1.000 1.000 1.000 0.000 --3 0 0 1 0 7 100 0 -1 0.000 0 1 0 5 -- 0 0 1.00 60.00 120.00 -- 1950 1575 1950 1800 1950 1950 2100 1950 2250 1950 -- 0.000 1.000 1.000 1.000 0.000 --3 0 0 1 0 7 100 0 -1 0.000 0 0 0 2 -- 525 975 1125 975 -- 0.000 0.000 --3 0 0 1 0 7 100 0 -1 0.000 0 0 0 2 -- 525 1125 1125 1125 -- 0.000 0.000 --3 0 0 1 0 7 100 0 -1 0.000 0 1 0 7 -- 0 0 1.00 60.00 120.00 -- 3000 1275 3150 1275 3300 1275 3300 1125 3300 975 3450 975 -- 3600 975 -- 0.000 1.000 1.000 1.000 1.000 1.000 0.000 --4 0 0 100 0 0 10 0.0000 0 105 690 1275 750 Match List\001 --4 1 0 100 0 0 10 0.0000 0 105 780 825 525 Portal Table\001 --4 2 0 100 0 0 10 0.0000 0 135 825 4050 2025 Library Space\001 --4 0 0 100 0 0 10 0.0000 0 135 1110 4350 2175 Application Space\001 --4 1 0 100 0 0 10 0.0000 0 135 660 2850 1050 Descriptor\001 --4 1 0 100 0 0 10 0.0000 0 135 540 2850 825 Memory\001 --4 1 0 100 0 0 10 0.0000 0 135 765 3750 675 Event Queue\001 --4 1 0 100 0 0 10 0.0000 0 135 495 4650 675 Regions\001 --4 1 0 100 0 0 10 0.0000 0 135 540 4650 525 Memory\001 diff --cc lnet/doc/portals3.bib index 323b99f,323b99f..0000000 deleted file mode 100644,100644 --- a/lnet/doc/portals3.bib +++ /dev/null @@@ -1,124 -1,124 +1,0 @@@ --@Article{ Cplant, -- title = { {M}assively {P}arallel {C}omputing with -- {C}ommodity {C}omponents }, -- author = { Ron Brightwell and David S. Greenberg and Arthur -- B. Maccabe and Rolf Riesen }, -- journal = { Parallel Computing }, -- volume = { 26 }, -- month = { February }, -- pages = { 243-266 }, -- year = { 2000 } --} -- --@Manual{ Portals, -- organization = { Sandia National Laboratories }, -- title = { {P}uma {P}ortals }, -- note = { http://www.cs.sandia.gov/puma/portals }, -- year = { 1997 } --} -- --@Techreport{ VIA, -- title = { {V}irtual {I}nterface {A}rchitecture -- {S}pecification {V}ersion 1.0 }, -- author = { {Compaq, Microsoft, and Intel} }, -- institution = { Compaq, Microsoft, and Intel }, -- month = { December }, -- year = { 1997 } --} -- --@Techreport{ ST, -- title = { {I}nformation {T}echnology - {S}cheduled -- {T}ransfer {P}rotocol - {W}orking {D}raft 2.0 }, -- author = { {Task Group of Technical Committee T11} }, -- institution = { Accredited Standards Committee NCITS }, -- month = { July }, -- year = { 1998 } --} -- --@Manual{ TFLOPS, -- organization = { Sandia National Laboratories }, -- title = { ASCI Red }, -- note = { http://www.sandia.gov/ASCI/TFLOP }, -- year = { 1996 } --} -- --@Techreport{ GM, -- title = { The {GM} {M}essage {P}assing {S}ystem }, -- author = { {Myricom, Inc.} }, -- institution = { {Myricom, Inc.} }, -- year = { 1997 }, --} -- --@Article{ MPIstandard, -- title = { {MPI}: {A} {M}essage-{P}assing {I}nterface standard }, -- author = { {Message Passing Interface Forum} }, -- journal = { The International Journal of Supercomputer Applications -- and High Performance Computing }, -- volume = { 8 }, -- year = { 1994 } --} -- --@Inproceedings{ PumaOS, -- author = "Lance Shuler and Chu Jong and Rolf Riesen and -- David van Dresser and Arthur B. Maccabe and -- Lee Ann Fisk and T. Mack Stallcup", -- booktitle = "Proceeding of the 1995 Intel Supercomputer -- User's Group Conference", -- title = "The {P}uma Operating System for Massively Parallel Computers", -- organization = "Intel Supercomputer User's Group", -- year = 1995 --} -- --@InProceedings{ SUNMOS, --author = "Arthur B. Maccabe and Kevin S. McCurley and Rolf Riesen and -- Stephen R. Wheat", --title = "{SUNMOS} for the {Intel} {Paragon}: A Brief User's Guide", --booktitle = "Proceedings of the {Intel} Supercomputer Users' Group. 1994 -- Annual North America Users' Conference.", --year = 1994, --pages = "245--251", --month = "June", --location = "ftp.cs.sandia.gov /pub/sunmos/papers/ISUG94-1.ps" --} -- --@InProceedings { PumaMPI, -- title = { Design and Implementation of {MPI} on {P}uma Portals }, -- author = { Ron Brightwell and Lance Shuler }, -- booktitle = { Proceedings of the Second MPI Developer's Conference }, -- pages = { 18-25 }, -- month = { July }, -- year = { 1996 } --} -- --@Inproceedings{ FM2, -- author = { Mario Lauria and Scott Pakin and Andrew Chien }, -- title = { {E}fficient {L}ayering for {H}igh {S}peed -- {C}ommunication: {F}ast {M}essages 2.x }, -- Booktitle = { Proceedings of the IEEE International Symposium -- on High Performance Distributed Computing }, -- year = { 1998 } --} -- --@Manual { CraySHMEM, -- title = "SHMEM Technical Note for C, SG-2516 2.3", -- organization = "Cray Research, Inc.", -- month = "October", -- year = 1994 --} -- --@Manual { MPI2, -- title = "{MPI}-2: {E}xtensions to the {M}essage-{P}assing {I}nterface", -- organization = "Message Passing Interface Forum", -- note = "http://www.mpi-forum.org/docs/mpi-20-html/mpi2-report.html", -- month = "July", -- year = 1997 --} -- --@InProceedings { PMMPI, -- title = { {The Design and Implementation of Zero Copy MPI Using -- Commodity Hardware with a High Performance Network} }, -- author = { Francis O'Carroll and Hiroshi Tezuka and Atsushi Hori -- and Yutaka Ishikawa }, -- booktitle = { Proceedings of the ICS }, -- year = { 1998 } --} diff --cc lnet/doc/portals3.lyx index 8429280,8429280..0000000 deleted file mode 100644,100644 --- a/lnet/doc/portals3.lyx +++ /dev/null @@@ -1,15944 -1,15944 +1,0 @@@ --#LyX 1.2 created this file. For more info see http://www.lyx.org/ --\lyxformat 220 --\textclass report --\begin_preamble --\usepackage{fullpage} --\renewenvironment{comment}% --{\begin{quote}\textbf{Discussion}: \slshape}% --{\end{quote}} --\pagestyle{myheadings} --\end_preamble --\language american --\inputencoding auto --\fontscheme pslatex --\graphics default --\paperfontsize 10 --\spacing single --\papersize letterpaper --\paperpackage a4 --\use_geometry 0 --\use_amsmath 0 --\use_natbib 0 --\use_numerical_citations 0 --\paperorientation portrait --\secnumdepth 2 --\tocdepth 2 --\paragraph_separation indent --\defskip medskip --\quotes_language english --\quotes_times 2 --\papercolumns 1 --\papersides 2 --\paperpagestyle headings -- --\layout Title -- --The Portals 3.2 Message Passing Interface --\newline -- Revision 1.1 --\layout Author -- --Ron Brightwell --\begin_inset Foot --collapsed true -- --\layout Standard -- --R. -- Brightwell and R. -- Riesen are with the Scalable Computing Systems Department, Sandia National -- Laboratories, P.O. -- Box 5800, Albuquerque, NM\SpecialChar ~ --\SpecialChar ~ --87111-1110, bright@cs.sandia.gov, rolf@cs.sandia.gov. --\end_inset -- --, Arthur B. -- Maccabe --\begin_inset Foot --collapsed true -- --\layout Standard -- --A. -- B. -- Maccabe is with the Computer Science Department, University of New Mexico, -- Albuquerque, NM\SpecialChar ~ --\SpecialChar ~ --87131-1386, maccabe@cs.unm.edu. --\end_inset -- --, Rolf Riesen and Trammell Hudson --\layout Abstract -- --This report presents a specification for the Portals 3.2 message passing -- interface. -- Portals 3.2 is intended to allow scalable, high-performance network communicatio --n between nodes of a parallel computing system. -- Specifically, it is designed to support a parallel computing platform composed -- of clusters of commodity workstations connected by a commodity system area -- network fabric. -- In addition, Portals 3.2 is well suited to massively parallel processing -- and embedded systems. -- Portals 3.2 represents an adaption of the data movement layer developed -- for massively parallel processing platforms, such as the 4500-node Intel -- TeraFLOPS machine. -- --\layout Standard -- -- --\begin_inset ERT --status Collapsed -- --\layout Standard -- --\backslash --clearpage --\backslash --pagenumbering{roman} --\backslash --setcounter{page}{3} --\end_inset -- -- --\layout Standard -- -- --\begin_inset LatexCommand \tableofcontents{} -- --\end_inset -- -- --\layout Standard -- -- --\begin_inset ERT --status Collapsed -- --\layout Standard -- --\backslash --cleardoublepage --\end_inset -- -- --\layout Standard -- -- --\begin_inset FloatList figure -- --\end_inset -- -- --\layout Standard -- -- --\begin_inset ERT --status Collapsed -- --\layout Standard -- --\backslash --cleardoublepage --\end_inset -- -- --\layout Standard -- -- --\begin_inset FloatList table -- --\end_inset -- -- --\layout Standard -- -- --\begin_inset ERT --status Collapsed -- --\layout Standard -- --\backslash --cleardoublepage --\end_inset -- -- --\layout Chapter* -- --Summary of Changes for Revision 1.1 --\layout Enumerate -- --Updated version number to 3.2 throughout the document --\layout Enumerate -- --Section --\begin_inset LatexCommand \ref{sub:PtlGetId} -- --\end_inset -- --: added --\family typewriter --PTL_SEGV --\family default -- to error list for --\shape italic --PtlGetId --\shape default --. --\layout Enumerate -- --Section --\begin_inset LatexCommand \ref{sec:meattach} -- --\end_inset -- --: added --\family typewriter --PTL_ML_TOOLONG --\family default -- to error list for --\shape italic --PtlMEAttach --\shape default --. --\layout Enumerate -- --Section --\begin_inset LatexCommand \ref{sec:meunlink} -- --\end_inset -- --: removed text referring to a list of associated memory descriptors. --\layout Enumerate -- --Section --\begin_inset LatexCommand \ref{sec:mdfree} -- --\end_inset -- --: added text to describe unlinking a free-floating memory descriptor. --\layout Enumerate -- --Table --\begin_inset LatexCommand \ref{tab:types} -- --\end_inset -- --: added entry for --\family typewriter --ptl_seq_t --\family default --. --\layout Enumerate -- --Section --\begin_inset LatexCommand \ref{sec:md-type} -- --\end_inset -- --: --\begin_deeper --\layout Enumerate -- --added definition of --\family typewriter --max_offset --\family default --. --\layout Enumerate -- --added text to clarify --\family typewriter --PTL_MD_MANAGE_REMOTE --\family default --. --\end_deeper --\layout Enumerate -- --Section --\begin_inset LatexCommand \ref{sec:mdattach} -- --\end_inset -- --: modified text for --\family typewriter --unlink_op --\family default --. --\layout Enumerate -- --Section --\begin_inset LatexCommand \ref{sec:niinit} -- --\end_inset -- --: added text to clarify multiple calls to --\shape italic --PtlNIInit --\shape default --. --\layout Enumerate -- --Section --\begin_inset LatexCommand \ref{sec:mdattach} -- --\end_inset -- --: added text to clarify --\family typewriter --unlink_nofit --\family default --. --\layout Enumerate -- --Section --\begin_inset LatexCommand \ref{sec:receiving} -- --\end_inset -- --: removed text indicating that an MD will reject a message if the associated -- EQ is full. --\layout Enumerate -- --Section --\begin_inset LatexCommand \ref{sec:mdfree} -- --\end_inset -- --: added --\family typewriter --PTL_MD_INUSE --\family default -- error code and text to indicate that only MDs with no pending operations -- can be unlinked. --\layout Enumerate -- --Table --\begin_inset LatexCommand \ref{tab:retcodes} -- --\end_inset -- --: added --\family typewriter --PTL_MD_INUSE --\family default -- return code. --\layout Enumerate -- --Section --\begin_inset LatexCommand \ref{sec:event-type} -- --\end_inset -- --: added user id field, MD handle field, and NI specific failure field to -- the --\family typewriter --ptl_event_t --\family default -- structure. --\layout Enumerate -- --Table --\begin_inset LatexCommand \ref{tab:types} -- --\end_inset -- --: added --\family typewriter --ptl_ni_fail_t --\family default --. --\layout Enumerate -- --Section --\begin_inset LatexCommand \ref{sec:event-type} -- --\end_inset -- --: added --\family typewriter --PTL_EVENT_UNLINK --\family default -- event type. --\layout Enumerate -- --Table --\begin_inset LatexCommand \ref{tab:func} -- --\end_inset -- --: removed --\shape slanted --PtlTransId --\shape default --. --\layout Enumerate -- --Section --\begin_inset LatexCommand \ref{sec:meattach} -- --\end_inset -- --, Section --\begin_inset LatexCommand \ref{sec:meinsert} -- --\end_inset -- --, Section --\begin_inset LatexCommand \ref{sec:put} -- --\end_inset -- --: listed allowable constants with relevant fields. --\layout Enumerate -- --Table --\begin_inset LatexCommand \ref{tab:func} -- --\end_inset -- --: added --\shape italic --PtlMEAttachAny --\shape default -- function. --\layout Enumerate -- --Table --\begin_inset LatexCommand \ref{tab:retcodes} -- --\end_inset -- --: added --\family typewriter --PTL_PT_FULL --\family default -- return code for --\shape italic --PtlMEAttachAny --\shape default --. --\layout Enumerate -- --Table --\begin_inset LatexCommand \ref{tab:oconsts} -- --\end_inset -- --: updated to reflect new event types. --\layout Enumerate -- --Section --\begin_inset LatexCommand \ref{sec:id-type} -- --\end_inset -- --: added --\family typewriter --ptl_nid_t --\family default --, --\family typewriter --ptl_pid_t --\family default --, and --\family typewriter --ptl_uid_t --\family default --. --\layout Chapter* -- --Summary of Changes for Version 3.1 --\layout Section* -- --Thread Issues --\layout Standard -- --The most significant change to the interface from version 3.0 to 3.1 involves -- the clarification of how the interface interacts with multi-threaded applicatio --ns. -- We adopted a generic thread model in which processes define an address -- space and threads share the address space. -- Consideration of the API in the light of threads lead to several clarifications -- throughout the document: --\layout Enumerate -- --Glossary: --\begin_deeper --\layout Enumerate -- --added a definition for --\emph on --thread --\emph default --, --\layout Enumerate -- --reworded the definition for --\emph on --process --\emph default --. -- --\end_deeper --\layout Enumerate -- --Section\SpecialChar ~ -- --\begin_inset LatexCommand \ref{sec:apiover} -- --\end_inset -- --: added section\SpecialChar ~ -- --\begin_inset LatexCommand \ref{sec:threads} -- --\end_inset -- -- to describe the multi-threading model used by the Portals API. -- --\layout Enumerate -- --Section\SpecialChar ~ -- --\begin_inset LatexCommand \ref{sec:ptlinit} -- --\end_inset -- --: --\emph on --PtlInit --\emph default -- must be called at least once and may be called any number of times. -- --\layout Enumerate -- --Section\SpecialChar ~ -- --\begin_inset LatexCommand \ref{sec:ptlfini} -- --\end_inset -- --: --\emph on --PtlFini --\emph default -- should be called once as the process is terminating and not as each thread -- terminates. -- --\layout Enumerate -- --Section\SpecialChar ~ -- --\begin_inset LatexCommand \ref{sec:pid} -- --\end_inset -- --: Portals does not define thread ids. -- --\layout Enumerate -- --Section\SpecialChar ~ -- --\begin_inset LatexCommand \ref{sec:ni} -- --\end_inset -- --: network interfaces are associated with processes, not threads. -- --\layout Enumerate -- --Section\SpecialChar ~ -- --\begin_inset LatexCommand \ref{sec:niinit} -- --\end_inset -- --: --\emph on --PtlNIInit --\emph default -- must be called at least once and may be called any number of times. -- --\layout Enumerate -- --Section\SpecialChar ~ -- --\begin_inset LatexCommand \ref{sec:eqget} -- --\end_inset -- --: --\emph on --PtlEQGet --\emph default -- returns --\family typewriter --PTL_EQ_EMPTY --\family default -- if a thread is blocked on --\emph on --PtlEQWait --\emph default --. -- --\layout Enumerate -- --Section\SpecialChar ~ -- --\begin_inset LatexCommand \ref{sec:eqwait} -- --\end_inset -- --: waiting threads are awakened in FIFO order. -- --\layout Standard -- --Two functions, --\emph on --PtlNIBarrier --\emph default -- and --\emph on --PtlEQCount --\emph default -- were removed from the API. -- --\emph on --PtlNIBarrier --\emph default -- was defined to block the calling process until all of the processes in -- the application group had invoked --\emph on --PtlNIBarrier --\emph default --. -- We now consider this functionality, along with the concept of groups (see -- the discussion under --\begin_inset Quotes eld --\end_inset -- --other changes --\begin_inset Quotes erd --\end_inset -- --), to be part of the runtime system, not part of the Portals API. -- --\emph on --PtlEQCount --\emph default -- was defined to return the number of events in an event queue. -- Because external operations may lead to new events being added and other -- threads may remove events, the value returned by --\emph on --PtlEQCount --\emph default -- would have to be a hint about the number of events in the event queue. --\layout Section* -- --Handling small, unexpected messages --\layout Standard -- --Another set of changes relates to handling small unexpected messages in -- MPI. -- In designing version 3.0, we assumed that each unexpected message would -- be placed in a unique memory descriptor. -- To avoid the need to process a long list of memory descriptors, we moved -- the memory descriptors out of the match list and hung them off of a single -- match list entry. -- In this way, large unexpected messages would only encounter a single --\begin_inset Quotes eld --\end_inset -- --short message --\begin_inset Quotes erd --\end_inset -- -- match list entry before encountering the --\begin_inset Quotes eld --\end_inset -- --long message --\begin_inset Quotes erd --\end_inset -- -- match list entry. -- Experience with this strategy identified resource management problems with -- this approach. -- In particular, a long sequence of very short (or zero length) messages -- could quickly exhaust the memory descriptors constructed for handling unexpecte --d messages. -- Our new strategy involves the use of several very large memory descriptors -- for small unexpected messages. -- Consecutive unexpected messages will be written into the first of these -- memory descriptors until the memory descriptor fills up. -- When the first of the --\begin_inset Quotes eld --\end_inset -- --small memory --\begin_inset Quotes erd --\end_inset -- -- descriptors fills up, it will be unlinked and subsequent short messages -- will be written into the next --\begin_inset Quotes eld --\end_inset -- --short message --\begin_inset Quotes erd --\end_inset -- -- memory descriptor. -- In this case, a --\begin_inset Quotes eld --\end_inset -- --short message --\begin_inset Quotes erd --\end_inset -- -- memory descriptor will be declared full when it does not have sufficient -- space for the largest small unexpected message. --\layout Standard -- --This lead to two significant changes. -- First, each match list entry now has a single memory descriptor rather -- than a list of memory descriptors. -- Second, in addition to exceeding the operation threshold, a memory descriptor -- can be unlinked when the local offset exceeds a specified value. -- These changes have lead to several changes in this document: --\layout Enumerate -- --Section\SpecialChar ~ -- --\begin_inset LatexCommand \ref{subsec:paddress} -- --\end_inset -- --: --\begin_deeper --\layout Enumerate -- --removed references to the memory descriptor list, --\layout Enumerate -- --changed the portals address translation description to indicate that unlinking -- a memory descriptor implies unlinking the associated match list entry--match -- list entries can no longer be unlinked independently from the memory descriptor. -- --\end_deeper --\layout Enumerate -- --Section\SpecialChar ~ -- --\begin_inset LatexCommand \ref{sec:meattach} -- --\end_inset -- --: --\begin_deeper --\layout Enumerate -- --removed unlink from argument list, --\layout Enumerate -- --removed description of --\family typewriter --ptl_unlink --\family default -- type, --\layout Enumerate -- --changed wording of the error condition when the Portal table index already -- has an associated match list. -- --\end_deeper --\layout Enumerate -- --Section\SpecialChar ~ -- --\begin_inset LatexCommand \ref{sec:meinsert} -- --\end_inset -- --: removed unlink from argument list. -- --\layout Enumerate -- --Section\SpecialChar ~ -- --\begin_inset LatexCommand \ref{sec:md-type} -- --\end_inset -- --: added --\family typewriter --max_offset --\family default --. -- --\layout Enumerate -- --Section\SpecialChar ~ -- --\begin_inset LatexCommand \ref{sec:mdattach} -- --\end_inset -- --: --\begin_deeper --\layout Enumerate -- --added description of --\family typewriter --ptl_unlink --\family default -- type, --\layout Enumerate -- --removed reference to memory descriptor lists, --\layout Enumerate -- --changed wording of the error condition when match list entry already has -- an associated memory descriptor, --\layout Enumerate -- --changed the description of the --\family typewriter --unlink --\family default -- argument. -- --\end_deeper --\layout Enumerate -- --Section\SpecialChar ~ -- --\begin_inset LatexCommand \ref{sec:md} -- --\end_inset -- --: removed --\family typewriter --PtlMDInsert --\family default -- operation. -- --\layout Enumerate -- --Section\SpecialChar ~ -- --\begin_inset LatexCommand \ref{sec:mdbind} -- --\end_inset -- --: removed references to memory descriptor list. -- --\layout Enumerate -- --Section\SpecialChar ~ -- --\begin_inset LatexCommand \ref{sec:mdfree} -- --\end_inset -- --: removed reference to memory descriptor list. -- --\layout Enumerate -- --Section\SpecialChar ~ -- --\begin_inset LatexCommand \ref{sec:summary} -- --\end_inset -- --: removed references to PtlMDInsert. -- --\layout Enumerate -- --Section\SpecialChar ~ -- --\begin_inset LatexCommand \ref{sec:semantics} -- --\end_inset -- --: removed reference to memory descriptor list. -- --\layout Enumerate -- --Section\SpecialChar ~ -- --\begin_inset LatexCommand \ref{sec:exmpi} -- --\end_inset -- --: revised the MPI example to reflect the changes to the interface. -- --\layout Standard -- --Several changes have been made to improve the general documentation of the -- interface. -- --\layout Enumerate -- --Section\SpecialChar ~ -- --\begin_inset LatexCommand \ref{sec:handle-type} -- --\end_inset -- --: documented the special value --\family typewriter --PTL_EQ_NONE --\family default --. -- --\layout Enumerate -- --Section\SpecialChar ~ -- --\begin_inset LatexCommand \ref{sec:id-type} -- --\end_inset -- --: documented the special value --\family typewriter --PTL_ID_ANY --\family default --. -- --\layout Enumerate -- --Section\SpecialChar ~ -- --\begin_inset LatexCommand \ref{sec:mdbind} -- --\end_inset -- --: documented the return value --\family typewriter --PTL_INV_EQ --\layout Enumerate -- --Section\SpecialChar ~ -- --\begin_inset LatexCommand \ref{sec:mdupdate} -- --\end_inset -- --: clarified the description of the --\emph on --PtlMDUpdate --\emph default -- function. -- --\layout Enumerate -- --Section\SpecialChar ~ -- --\begin_inset LatexCommand \ref{sec:implvals} -- --\end_inset -- --: introduced a new section to document the implementation defined values. -- --\layout Enumerate -- --Section\SpecialChar ~ -- --\begin_inset LatexCommand \ref{sec:summary} -- --\end_inset -- --: modified Table\SpecialChar ~ -- --\begin_inset LatexCommand \ref{tab:oconsts} -- --\end_inset -- -- to indicate where each constant is introduced and where it is used. -- --\layout Section* -- --Other changes --\layout Subsection* -- --Implementation defined limits (Section --\begin_inset LatexCommand \ref{sec:niinit} -- --\end_inset -- --) --\layout Standard -- --The earlier version provided implementation defined limits for the maximum -- number of match entries, the maximum number of memory descriptors, etc. -- Rather than spanning the entire implementation, these limits are now associated -- with individual network interfaces. --\layout Subsection* -- --Added User Ids (Section --\begin_inset LatexCommand \ref{sec:uid} -- --\end_inset -- --) --\layout Standard -- --Group Ids had been used to simplify access control entries. -- In particular, a process could allow access for all of the processes in -- a group. -- User Ids have been introduced to regain this functionality. -- We use user ids to fill this role. --\layout Subsection* -- --Removed Group Ids and Rank Ids (Section --\begin_inset LatexCommand \ref{sec:pid} -- --\end_inset -- --) --\layout Standard -- --The earlier version of Portals had two forms for addressing processes: and . -- A process group was defined as the collection processes created during -- application launch. -- Each process in the group was given a unique rank id in the range 0 to -- --\begin_inset Formula $n-1$ --\end_inset -- -- where --\begin_inset Formula $n$ --\end_inset -- -- was the number of processes in the group. -- We removed groups because they are better handled in the runtime system. --\layout Subsection* -- --Match lists (Section --\begin_inset LatexCommand \ref{sec:meattach} -- --\end_inset -- --) --\layout Standard -- --It is no longer illegal to have an existing match entry when calling PtlMEAttach. -- A position argument was added to the list of arguments supplied to --\emph on --PtlMEAttach --\emph default -- to specify whether the new match entry is prepended or appended to the -- existing list. -- If there is no existing match list, the position argument is ignored. --\layout Subsection* -- --Unlinking Memory Descriptors (Section --\begin_inset LatexCommand \ref{sec:md} -- --\end_inset -- --) --\layout Standard -- --Previously, a memory descriptor could be unlinked if the offset exceeded -- a threshold upon the completion of an operation. -- In this version, the unlinking is delayed until there is a matching operation -- which requires more memory than is currently available in the descriptor. -- In addition to changes in section, this lead to a revision of Figure\SpecialChar ~ -- --\begin_inset LatexCommand \ref{fig:flow} -- --\end_inset -- --. --\layout Subsection* -- --Split Phase Operations and Events (Section --\begin_inset LatexCommand \ref{sec:eq} -- --\end_inset -- --) --\layout Standard -- --Previously, there were five types of events: --\family typewriter --PTL_EVENT_PUT --\family default --, --\family typewriter --PTL_EVENT_GET --\family default --, --\family typewriter --PTL_EVENT_REPLY --\family default --, --\family typewriter --PTL_EVENT_SENT --\family default --, and --\family typewriter --PTL_EVENT_ACK. -- --\family default --The first four of these reflected the completion of potentially long operations. -- We have introduced new event types to reflect the fact that long operations -- have a distinct starting point and a distinct completion point. -- Moreover, the completion may be successful or unsuccessful. --\layout Standard -- --In addition to providing a mechanism for reporting failure to higher levels -- of software, this split provides an opportunity for for improved ordering -- semantics. -- Previously, if one process intiated two operations (e.g., two put operations) -- on a remote process, these operations were guaranteed to complete in the -- same order that they were initiated. -- Now, we only guarantee that the initiation events are delivered in the -- same order. -- In particular, the operations do not need to complete in the order that -- they were intiated. --\layout Subsection* -- --Well known proces ids (Section --\begin_inset LatexCommand \ref{sec:niinit} -- --\end_inset -- --) --\layout Standard -- --To support the notion of --\begin_inset Quotes eld --\end_inset -- --well known process ids, --\begin_inset Quotes erd --\end_inset -- -- we added a process id argument to the arguments for PtlNIInit. --\layout Chapter* -- --Glossary --\layout Description -- --API Application Programming Interface. -- A definition of the functions and semantics provided by library of functions. -- --\layout Description -- --Initiator A --\emph on --process --\emph default -- that initiates a message operation. -- --\layout Description -- --Message An application-defined unit of data that is exchanged between --\emph on --processes --\emph default --. -- --\layout Description -- --Message\SpecialChar ~ --Operation Either a put operation, which writes data, or a get operation, -- which reads data. -- --\layout Description -- --Network A network provides point-to-point communication between --\emph on --nodes --\emph default --. -- Internally, a network may provide multiple routes between endpoints (to -- improve fault tolerance or to improve performance characteristics); however, -- multiple paths will not be exposed outside of the network. -- --\layout Description -- --Node A node is an endpoint in a --\emph on --network --\emph default --. -- Nodes provide processing capabilities and memory. -- A node may provide multiple processors (an SMP node) or it may act as a -- --\emph on --gateway --\emph default -- between networks. -- --\layout Description -- --Process A context of execution. -- A process defines a virtual memory (VM) context. -- This context is not shared with other processes. -- Several threads may share the VM context defined by a process. -- --\layout Description -- --Target A --\emph on --process --\emph default -- that is acted upon by a message operation. -- --\layout Description -- --Thread A context of execution that shares a VM context with other threads. -- --\layout Standard -- -- --\begin_inset ERT --status Collapsed -- --\layout Standard -- --\backslash --cleardoublepage --\layout Standard -- --\backslash --setcounter{page}{1} --\backslash --pagenumbering{arabic} --\end_inset -- -- --\layout Chapter -- --Introduction --\begin_inset LatexCommand \label{sec:intro} -- --\end_inset -- -- --\layout Section -- --Overview --\layout Standard -- --This document describes an application programming interface for message -- passing between nodes in a system area network. -- The goal of this interface is to improve the scalability and performance -- of network communication by defining the functions and semantics of message -- passing required for scaling a parallel computing system to ten thousand -- nodes. -- This goal is achieved by providing an interface that will allow a quality -- implementation to take advantage of the inherently scalable design of Portals. --\layout Standard -- --This document is divided into several sections: --\layout Description -- --Section\SpecialChar ~ -- --\begin_inset LatexCommand \ref{sec:intro} -- --\end_inset -- -----Introduction This section describes the purpose and scope of the Portals -- API. -- --\layout Description -- --Section\SpecialChar ~ -- --\begin_inset LatexCommand \ref{sec:apiover} -- --\end_inset -- -----An\SpecialChar ~ --Overview\SpecialChar ~ --of\SpecialChar ~ --the\SpecialChar ~ --Portals\SpecialChar ~ --3.1\SpecialChar ~ --API This section gives a brief overview of the -- Portals API. -- The goal is to introduce the key concepts and terminology used in the descripti --on of the API. -- --\layout Description -- --Section\SpecialChar ~ -- --\begin_inset LatexCommand \ref{sec:api} -- --\end_inset -- -----The\SpecialChar ~ --Portals\SpecialChar ~ --3.2\SpecialChar ~ --API This section describes the functions and semantics of -- the Portals application programming interface. -- --\layout Description -- --Section\SpecialChar ~ -- --\begin_inset LatexCommand \ref{sec:semantics} -- --\end_inset -- ----The\SpecialChar ~ --Semantics\SpecialChar ~ --of\SpecialChar ~ --Message\SpecialChar ~ --Transmission This section describes the semantics -- of message transmission. -- In particular, the information transmitted in each type of message and -- the processing of incoming messages. -- --\layout Description -- --Section\SpecialChar ~ -- --\begin_inset LatexCommand \ref{sec:examples} -- --\end_inset -- -----Examples This section presents several examples intended to illustrates -- the use of the Portals API. -- --\layout Section -- --Purpose --\layout Standard -- --Existing message passing technologies available for commodity cluster networking -- hardware do not meet the scalability goals required by the Cplant\SpecialChar ~ -- --\begin_inset LatexCommand \cite{Cplant} -- --\end_inset -- -- project at Sandia National Laboratories. -- The goal of the Cplant project is to construct a commodity cluster that -- can scale to the order of ten thousand nodes. -- This number greatly exceeds the capacity for which existing message passing -- technologies have been designed and implemented. --\layout Standard -- --In addition to the scalability requirements of the network, these technologies -- must also be able to support a scalable implementation of the Message Passing -- Interface (MPI)\SpecialChar ~ -- --\begin_inset LatexCommand \cite{MPIstandard} -- --\end_inset -- -- standard, which has become the --\shape italic --de facto --\shape default -- standard for parallel scientific computing. -- While MPI does not impose any scalability limitations, existing message -- passing technologies do not provide the functionality needed to allow implement --ations of MPI to meet the scalability requirements of Cplant. --\layout Standard -- --The following are properties of a network architecture that do not impose -- any inherent scalability limitations: --\layout Itemize -- --Connectionless - Many connection-oriented architectures, such as VIA\SpecialChar ~ -- --\begin_inset LatexCommand \cite{VIA} -- --\end_inset -- -- and TCP/IP sockets, have limitations on the number of peer connections -- that can be established. -- --\layout Itemize -- --Network independence - Many communication systems depend on the host processor -- to perform operations in order for messages in the network to be consumed. -- Message consumption from the network should not be dependent on host processor -- activity, such as the operating system scheduler or user-level thread scheduler. -- --\layout Itemize -- --User-level flow control - Many communication systems manage flow control -- internally to avoid depleting resources, which can significantly impact -- performance as the number of communicating processes increases. -- --\layout Itemize -- --OS Bypass - High performance network communication should not involve memory -- copies into or out of a kernel-managed protocol stack. -- --\layout Standard -- --The following are properties of a network architecture that do not impose -- scalability limitations for an implementation of MPI: --\layout Itemize -- --Receiver-managed - Sender-managed message passing implementations require -- a persistent block of memory to be available for every process, requiring -- memory resources to increase with job size and requiring user-level flow -- control mechanisms to manage these resources. -- --\layout Itemize -- --User-level Bypass - While OS Bypass is necessary for high-performance, it -- alone is not sufficient to support the Progress Rule of MPI asynchronous -- operations. -- --\layout Itemize -- --Unexpected messages - Few communication systems have support for receiving -- messages for which there is no prior notification. -- Support for these types of messages is necessary to avoid flow control -- and protocol overhead. -- --\layout Section -- --Background --\layout Standard -- --Portals was originally designed for and implemented on the nCube machine -- as part of the SUNMOS (Sandia/UNM OS)\SpecialChar ~ -- --\begin_inset LatexCommand \cite{SUNMOS} -- --\end_inset -- -- and Puma\SpecialChar ~ -- --\begin_inset LatexCommand \cite{PumaOS} -- --\end_inset -- -- lightweight kernel development projects. -- Portals went through two design phases, the latter of which is used on -- the 4500-node Intel TeraFLOPS machine\SpecialChar ~ -- --\begin_inset LatexCommand \cite{TFLOPS} -- --\end_inset -- --. -- Portals have been very successful in meeting the needs of such a large -- machine, not only as a layer for a high-performance MPI implementation\SpecialChar ~ -- --\begin_inset LatexCommand \cite{PumaMPI} -- --\end_inset -- --, but also for implementing the scalable run-time environment and parallel -- I/O capabilities of the machine. --\layout Standard -- --The second generation Portals implementation was designed to take full advantage -- of the hardware architecture of large MPP machines. -- However, efforts to implement this same design on commodity cluster technology -- identified several limitations, due to the differences in network hardware -- as well as to shortcomings in the design of Portals. --\layout Section -- --Scalability --\layout Standard -- --The primary goal in the design of Portals is scalability. -- Portals are designed specifically for an implementation capable of supporting -- a parallel job running on tens of thousands of nodes. -- Performance is critical only in terms of scalability. -- That is, the level of message passing performance is characterized by how -- far it allows an application to scale and not by how it performs in micro-bench --marks (e.g., a two node bandwidth or latency test). --\layout Standard -- --The Portals API is designed to allow for scalability, not to guarantee it. -- Portals cannot overcome the shortcomings of a poorly designed application -- program. -- Applications that have inherent scalability limitations, either through -- design or implementation, will not be transformed by Portals into scalable -- applications. -- Scalability must be addressed at all levels. -- Portals do not inhibit scalability, but do not guarantee it either. --\layout Standard -- --To support scalability, the Portals interface maintains a minimal amount -- of state. -- Portals provide reliable, ordered delivery of messages between pairs of -- processes. -- They are connectionless: a process is not required to explicitly establish -- a point-to-point connection with another process in order to communicate. -- Moreover, all buffers used in the transmission of messages are maintained -- in user space. -- The target process determines how to respond to incoming messages, and -- messages for which there are no buffers are discarded. --\layout Section -- --Communication Model --\layout Standard -- --Portals combine the characteristics of both one-side and two-sided communication. -- They define a --\begin_inset Quotes eld --\end_inset -- --matching put --\begin_inset Quotes erd --\end_inset -- -- operation and a --\begin_inset Quotes eld --\end_inset -- --matching get --\begin_inset Quotes erd --\end_inset -- -- operation. -- The destination of a put (or send) is not an explicit address; instead, -- each message contains a set of match bits that allow the receiver to determine -- where incoming messages should be placed. -- This flexibility allows Portals to support both traditional one-sided operation --s and two-sided send/receive operations. --\layout Standard -- --Portals allows the target to determine whether incoming messages are acceptable. -- A target process can choose to accept message operations from any specific -- process or can choose to ignore message operations from any specific process. --\layout Section -- --Zero Copy, OS Bypass and Application Bypass --\layout Standard -- --In traditional system architectures, network packets arrive at the network -- interface card (NIC), are passed through one or more protocol layers in -- the operating system, and eventually copied into the address space of the -- application. -- As network bandwidth began to approach memory copy rates, reduction of -- memory copies became a critical concern. -- This concern lead to the development of zero-copy message passing protocols -- in which message copies are eliminated or pipelined to avoid the loss of -- bandwidth. --\layout Standard -- --A typical zero-copy protocol has the NIC generate an interrupt for the CPU -- when a message arrives from the network. -- The interrupt handler then controls the transfer of the incoming message -- into the address space of the appropriate application. -- The interrupt latency, the time from the initiation of an interrupt until -- the interrupt handler is running, is fairly significant. -- To avoid this cost, some modern NICs have processors that can be programmed -- to implement part of a message passing protocol. -- Given a properly designed protocol, it is possible to program the NIC to -- control the transfer of incoming messages, without needing to interrupt -- the CPU. -- Because this strategy does not need to involve the OS on every message -- transfer, it is frequently called --\begin_inset Quotes eld --\end_inset -- --OS Bypass. --\begin_inset Quotes erd --\end_inset -- -- ST\SpecialChar ~ -- --\begin_inset LatexCommand \cite{ST} -- --\end_inset -- --, VIA\SpecialChar ~ -- --\begin_inset LatexCommand \cite{VIA} -- --\end_inset -- --, FM\SpecialChar ~ -- --\begin_inset LatexCommand \cite{FM2} -- --\end_inset -- --, GM\SpecialChar ~ -- --\begin_inset LatexCommand \cite{GM} -- --\end_inset -- --, and Portals are examples of OS Bypass protocols. --\layout Standard -- --Many protocols that support OS Bypass still require that the application -- actively participate in the protocol to ensure progress. -- As an example, the long message protocol of PM requires that the application -- receive and reply to a request to put or get a long message. -- This complicates the runtime environment, requiring a thread to process -- incoming requests, and significantly increases the latency required to -- initiate a long message protocol. -- The Portals message passing protocol does not require activity on the part -- of the application to ensure progress. -- We use the term --\begin_inset Quotes eld --\end_inset -- --Application Bypass --\begin_inset Quotes erd --\end_inset -- -- to refer to this aspect of the Portals protocol. --\layout Section -- --Faults --\layout Standard -- --Given the number of components that we are dealing with and the fact that -- we are interested in supporting applications that run for very long times, -- failures are inevitable. -- The Portals API recognizes that the underlying transport may not be able -- to successfully complete an operation once it has been initiated. -- This is reflected in the fact that the Portals API reports three types -- of events: events indicating the initiation of an operation, events indicating -- the successful completion of an operation, and events indicating the unsuccessf --ul completion of an operation. -- Every initiation event is eventually followed by a successful completion -- event or an unsuccessful completion event. --\layout Standard -- --Between the time an operation is started and the time that the operation -- completes (successfully or unsuccessfully), any memory associated with -- the operation should be considered volatile. -- That is, the memory may be changed in unpredictable ways while the operation -- is progressing. -- Once the operation completes, the memory associated with the operation -- will not be subject to further modification (from this operation). -- Notice that unsuccessful operations may alter memory in an essentially -- unpredictable fashion. --\layout Chapter -- --An Overview of the Portals API --\begin_inset LatexCommand \label{sec:apiover} -- --\end_inset -- -- --\layout Standard -- --In this section, we give a conceptual overview of the Portals API. -- The goal is to provide a context for understanding the detailed description -- of the API presented in the next section. --\layout Section -- --Data Movement --\begin_inset LatexCommand \label{sec:dmsemantics} -- --\end_inset -- -- --\layout Standard -- --A Portal represents an opening in the address space of a process. -- Other processes can use a Portal to read (get) or write (put) the memory -- associated with the portal. -- Every data movement operation involves two processes, the --\series bold --initiator --\series default -- and the --\series bold --target --\series default --. -- The initiator is the process that initiates the data movement operation. -- The target is the process that responds to the operation by either accepting -- the data for a put operation, or replying with the data for a get operation. --\layout Standard -- --In this discussion, activities attributed to a process may refer to activities -- that are actually performed by the process or --\emph on --on behalf of the process --\emph default --. -- The inclusiveness of our terminology is important in the context of --\emph on --application bypass --\emph default --. -- In particular, when we note that the target sends a reply in the case of -- a get operation, it is possible that reply will be generated by another -- component in the system, bypassing the application. --\layout Standard -- --Figures\SpecialChar ~ -- --\begin_inset LatexCommand \ref{fig:put} -- --\end_inset -- -- and --\begin_inset LatexCommand \ref{fig:get} -- --\end_inset -- -- present graphical interpretations of the Portal data movement operations: -- put and get. -- In the case of a put operation, the initiator sends a put request message -- containing the data to the target. -- The target translates the Portal addressing information in the request -- using its local Portal structures. -- When the request has been processed, the target optionally sends an acknowledge --ment message. --\layout Standard -- -- --\begin_inset Float figure --placement htbp --wide false --collapsed false -- --\layout Standard --\align center -- --\begin_inset Graphics FormatVersion 1 -- filename put.eps -- display color -- size_type 0 -- rotateOrigin center -- lyxsize_type 1 -- lyxwidth 218pt -- lyxheight 119pt --\end_inset -- -- --\layout Caption -- --Portal Put (Send) --\begin_inset LatexCommand \label{fig:put} -- --\end_inset -- -- --\end_inset -- -- --\layout Standard -- --In the case of a get operation, the initiator sends a get request to the -- target. -- As with the put operation, the target translates the Portal addressing -- information in the request using its local Portal structures. -- Once it has translated the Portal addressing information, the target sends -- a reply that includes the requested data. --\layout Standard -- -- --\begin_inset Float figure --placement htbp --wide false --collapsed false -- --\layout Standard --\align center -- --\begin_inset Graphics FormatVersion 1 -- filename get.eps -- display color -- size_type 0 -- rotateOrigin center -- lyxsize_type 1 -- lyxwidth 218pt -- lyxheight 119pt --\end_inset -- -- --\layout Caption -- --Portal Get --\begin_inset LatexCommand \label{fig:get} -- --\end_inset -- -- --\end_inset -- -- --\layout Standard -- --We should note that Portal address translations are only performed on nodes -- that respond to operations initiated by other nodes. -- Acknowledgements and replies to get operations bypass the portals address -- translation structures. --\layout Section -- --Portal Addressing --\begin_inset LatexCommand \label{subsec:paddress} -- --\end_inset -- -- --\layout Standard -- --One-sided data movement models (e.g., shmem\SpecialChar ~ -- --\begin_inset LatexCommand \cite{CraySHMEM} -- --\end_inset -- --, ST\SpecialChar ~ -- --\begin_inset LatexCommand \cite{ST} -- --\end_inset -- --, MPI-2\SpecialChar ~ -- --\begin_inset LatexCommand \cite{MPI2} -- --\end_inset -- --) typically use a triple to address memory on a remote node. -- This triple consists of a process id, memory buffer id, and offset. -- The process id identifies the target process, the memory buffer id specifies -- the region of memory to be used for the operation, and the offset specifies -- an offset within the memory buffer. --\layout Standard -- --In addition to the standard address components (process id, memory buffer -- id, and offset), a Portal address includes a set of match bits. -- This addressing model is appropriate for supporting one-sided operations -- as well as traditional two-sided message passing operations. -- Specifically, the Portals API provides the flexibility needed for an efficient -- implementation of MPI-1, which defines two-sided operations with one-sided -- completion semantics. --\layout Standard -- --Figure\SpecialChar ~ -- --\begin_inset LatexCommand \ref{fig:portals} -- --\end_inset -- -- presents a graphical representation of the structures used by a target -- in the interpretation of a Portal address. -- The process id is used to route the message to the appropriate node and -- is not reflected in this diagram. -- The memory buffer id, called the --\series bold --portal id --\series default --, is used as an index into the Portal table. -- Each element of the Portal table identifies a match list. -- Each element of the match list specifies two bit patterns: a set of --\begin_inset Quotes eld --\end_inset -- --don't care --\begin_inset Quotes erd --\end_inset -- -- bits, and a set of --\begin_inset Quotes eld --\end_inset -- --must match --\begin_inset Quotes erd --\end_inset -- -- bits. -- In addition to the two sets of match bits, each match list element has -- at most one memory descriptor. -- Each memory descriptor identifies a memory region and an optional event -- queue. -- The memory region specifies the memory to be used in the operation and -- the event queue is used to record information about these operations. --\layout Standard -- -- --\begin_inset Float figure --placement htbp --wide false --collapsed false -- --\layout Standard --\align center -- --\begin_inset Graphics FormatVersion 1 -- filename portals.eps -- display color -- size_type 0 -- rotateOrigin center -- lyxsize_type 1 -- lyxwidth 305pt -- lyxheight 106pt --\end_inset -- -- --\layout Caption -- --Portal Addressing Structures --\begin_inset LatexCommand \label{fig:portals} -- --\end_inset -- -- --\end_inset -- -- --\layout Standard -- --Figure\SpecialChar ~ -- --\begin_inset LatexCommand \ref{fig:flow} -- --\end_inset -- -- illustrates the steps involved in translating a Portal address, starting -- from the first element in a match list. -- If the match criteria specified in the match list entry are met and the -- memory descriptor list accepts the operation --\begin_inset Foot --collapsed true -- --\layout Standard -- --Memory descriptors can reject operations because a threshold has been exceeded -- or because the memory region does not have sufficient space, see Section\SpecialChar ~ -- --\begin_inset LatexCommand \ref{sec:md} -- --\end_inset -- -- --\end_inset -- --, the operation (put or get) is performed using the memory region specified -- in the memory descriptor. -- If the memory descriptor specifies that it is to be unlinked when a threshold -- has been exceeded, the match list entry is removed from the match list -- and the resources associated with the memory descriptor and match list -- entry are reclaimed. -- Finally, if there is an event queue specified in the memory descriptor, -- the operation is logged in the event queue. --\layout Standard -- -- --\begin_inset Float figure --placement htbp --wide false --collapsed false -- --\layout Standard --\align center -- --\begin_inset Graphics FormatVersion 1 -- filename flow_new.eps -- display color -- size_type 0 -- rotateOrigin center -- lyxsize_type 1 -- lyxwidth 447pt -- lyxheight 282pt --\end_inset -- -- --\layout Caption -- --Portals Address Translation --\begin_inset LatexCommand \label{fig:flow} -- --\end_inset -- -- --\end_inset -- -- --\layout Standard -- --If the match criteria specified in the match list entry are not met, or -- there is no memory descriptor associated with the match list entry, or -- the memory descriptor associated with the match list entry rejects the -- operation, the address translation continues with the next match list entry. -- If the end of the match list has been reached, the address translation -- is aborted and the incoming requested is discarded. --\layout Section -- --Access Control --\layout Standard -- --A process can control access to its portals using an access control list. -- Each entry in the access control list specifies a process id and a Portal -- table index. -- The access control list is actually an array of entries. -- Each incoming request includes an index into the access control list (i.e., -- a --\begin_inset Quotes eld --\end_inset -- --cookie --\begin_inset Quotes erd --\end_inset -- -- or hint). -- If the id of the process issuing the request doesn't match the id specified -- in the access control list entry or the Portal table index specified in -- the request doesn't match the Portal table index specified in the access -- control list entry, the request is rejected. -- Process identifiers and Portal table indexes may include wild card values -- to increase the flexibility of this mechanism. -- --\layout Standard -- --Two aspects of this design merit further discussion. -- First, the model assumes that the information in a message header, the -- sender's id in particular, is trustworthy. -- In most contexts, we assume that the entity that constructs the header -- is trustworthy; however, using cryptographic techniques, we could easily -- devise a protocol that would ensure the authenticity of the sender. --\layout Standard -- --Second, because the access check is performed by the receiver, it is possible -- that a malicious process will generate thousands of messages that will -- be denied by the receiver. -- This could saturate the network and/or the receiver, resulting in a --\emph on --denial of service --\emph default -- attack. -- Moving the check to the sender using capabilities, would remove the potential -- for this form of attack. -- However, the solution introduces the complexities of capability management -- (exchange of capabilities, revocation, protections, etc). --\layout Section -- --Multi-threaded Applications --\begin_inset LatexCommand \label{sec:threads} -- --\end_inset -- -- --\layout Standard -- --The Portals API supports a generic view of multi-threaded applications. -- From the perspective of the Portals API, an application program is defined -- by a set of processes. -- Each process defines a unique address space. -- The Portals API defines access to this address space from other processes -- (using portals addressing and the data movement operations). -- A process may have one or more --\emph on --threads --\emph default -- executing in its address space. -- --\layout Standard -- --With the exception of --\emph on --PtlEQWait --\emph default -- every function in the Portals API is non-blocking and atomic with respect -- to both other threads and external operations that result from data movement -- operations. -- While individual operations are atomic, sequences of these operations may -- be interleaved between different threads and with external operations. -- The Portals API does not provide any mechanisms to control this interleaving. -- It is expected that these mechanisms will be provided by the API used to -- create threads. --\layout Chapter -- --The Portals API --\begin_inset LatexCommand \label{sec:api} -- --\end_inset -- -- --\layout Section -- --Naming Conventions --\begin_inset LatexCommand \label{sec:conv} -- --\end_inset -- -- --\layout Standard -- --The Portals API defines two types of entities: functions and types. -- Function always start with --\emph on --Ptl --\emph default -- and use mixed upper and lower case. -- When used in the body of this report, function names appear in italic face, -- e.g., --\emph on --PtlInit --\emph default --. -- The functions associated with an object type will have names that start -- with --\emph on --Ptl --\emph default --, followed by the two letter object type code shown in Table\SpecialChar ~ -- --\begin_inset LatexCommand \ref{tab:objcodes} -- --\end_inset -- --. -- As an example, the function --\emph on --PtlEQAlloc --\emph default -- allocates resources for an event queue. --\layout Standard -- -- --\begin_inset Float table --placement htbp --wide false --collapsed false -- --\layout Caption -- --Object Type Codes --\begin_inset LatexCommand \label{tab:objcodes} -- --\end_inset -- -- --\begin_inset ERT --status Collapsed -- --\layout Standard -- --\backslash --medskip --\newline -- --\end_inset -- -- --\layout Standard --\align center -- --\size small -- --\begin_inset Tabular -- -- -- -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\emph on --xx --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- Name --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- Section --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- --EQ --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- Event Queue --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:eq} -- --\end_inset -- -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- MD --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- Memory Descriptor --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:md} -- --\end_inset -- -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- ME --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- Match list Entry --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:me} -- --\end_inset -- -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- NI --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- Network Interface --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:ni} -- --\end_inset -- -- --\end_inset -- -- -- -- --\end_inset -- -- --\end_inset -- -- --\layout Standard -- --Type names use lower case with underscores to separate words. -- Each type name starts with --\family typewriter --ptl --\family default --_ and ends with --\family typewriter --_t --\family default --. -- When used in the body of this report, type names appear in a fixed font, -- e.g., --\family typewriter --ptl_match_bits_t --\family default --. --\layout Standard -- --Names for constants use upper case with underscores to separate words. -- Each constant name starts with --\family typewriter --PTL_ --\family default --. -- When used in the body of this report, type names appear in a fixed font, -- e.g., --\family typewriter --PTL_OK --\family default --. --\layout Section -- --Base Types --\layout Standard -- --The Portals API defines a variety of base types. -- These types represent a simple renaming of the base types provided by the -- C programming language. -- In most cases these new type names have been introduced to improve type -- safety and to avoid issues arising from differences in representation sizes -- (e.g., 16-bit or 32-bit integers). --\layout Subsection -- --Sizes --\begin_inset LatexCommand \label{sec:size-t} -- --\end_inset -- -- --\layout Standard -- --The type --\family typewriter --ptl_size_t --\family default -- is an unsigned 64-bit integral type used for representing sizes. --\layout Subsection -- --Handles --\begin_inset LatexCommand \label{sec:handle-type} -- --\end_inset -- -- --\layout Standard -- --Objects maintained by the API are accessed through handles. -- Handle types have names of the form --\family typewriter --ptl_handle_ --\emph on --xx --\emph default --_t --\family default --, where --\emph on --xx --\emph default -- is one of the two letter object type codes shown in Table\SpecialChar ~ -- --\begin_inset LatexCommand \ref{tab:objcodes} -- --\end_inset -- --. -- For example, the type --\family typewriter --ptl_handle_ni_t --\family default -- is used for network interface handles. --\layout Standard -- --Each type of object is given a unique handle type to enhance type checking. -- The type, --\family typewriter --ptl_handle_any_t --\family default --, can be used when a generic handle is needed. -- Every handle value can be converted into a value of type --\family typewriter --ptl_handle_any_t --\family default -- without loss of information. --\layout Standard -- --Handles are not simple values. -- Every portals object is associated with a specific network interface and -- an identifier for this interface (along with an object identifier) is part -- of the handle for the object. --\layout Standard -- --The special value --\family typewriter --PTL_EQ_NONE --\family default --, of type --\family typewriter --ptl_handle_eq_t --\family default --, is used to indicate the absence of an event queue. -- See sections --\begin_inset LatexCommand \ref{sec:mdfree} -- --\end_inset -- -- and\SpecialChar ~ -- --\begin_inset LatexCommand \ref{sec:mdupdate} -- --\end_inset -- -- for uses of this value. --\layout Subsection -- --Indexes --\begin_inset LatexCommand \label{sec:index-type} -- --\end_inset -- -- --\layout Standard -- --The types --\family typewriter --ptl_pt_index_t --\family default -- and --\family typewriter --ptl_ac_index_t --\family default -- are integral types used for representing Portal table indexes and access -- control tables indexes, respectively. -- See section\SpecialChar ~ -- --\begin_inset LatexCommand \ref{sec:niinit} -- --\end_inset -- -- for limits on values of these types. --\layout Subsection -- --Match Bits --\begin_inset LatexCommand \label{sec:mb-type} -- --\end_inset -- -- --\layout Standard -- --The type --\family typewriter --ptl_match_bits_t --\family default -- is capable of holding unsigned 64-bit integer values. --\layout Subsection -- --Network Interfaces --\begin_inset LatexCommand \label{sec:ni-type} -- --\end_inset -- -- --\layout Standard -- --The type --\family typewriter --ptl_interface_t --\family default -- is an integral type used for identifying different network interfaces. -- Users will need to consult the local documentation to determine appropriate -- values for the interfaces available. -- The special value --\family typewriter --PTL_IFACE_DEFAULT --\family default -- identifies the default interface. --\layout Subsection -- --Identifiers --\begin_inset LatexCommand \label{sec:id-type} -- --\end_inset -- -- --\layout Standard -- --The type --\family typewriter --ptl_nid_t --\family default -- is an integral type used for representing node ids --\family typewriter --, ptl_pid_t --\family default -- is an integral type for representing process ids, and --\family typewriter --ptl_uid_t --\family default --is an integral type for representing user ids. --\layout Standard -- --The special values --\family typewriter --PTL_PID_ANY --\family default -- matches any process identifier, PTL_NID_ANY matches any node identifier, -- and --\family typewriter --PTL_UID_ANY --\family default -- matches any user identifier. -- See sections --\begin_inset LatexCommand \ref{sec:meattach} -- --\end_inset -- -- and\SpecialChar ~ -- --\begin_inset LatexCommand \ref{sec:acentry} -- --\end_inset -- -- for uses of these values. --\layout Subsection -- --Status Registers --\begin_inset LatexCommand \label{sec:stat-type} -- --\end_inset -- -- --\layout Standard -- --Each network interface maintains an array of status registers that can be -- accessed using the --\family typewriter --PtlNIStatus --\family default -- function (see Section\SpecialChar ~ -- --\begin_inset LatexCommand \ref{sec:nistatus} -- --\end_inset -- --). -- The type --\family typewriter --ptl_sr_index_t --\family default -- defines the types of indexes that can be used to access the status registers. -- The only index defined for all implementations is --\family typewriter --PTL_SR_DROP_COUNT --\family default -- which identifies the status register that counts the dropped requests for -- the interface. -- Other indexes (and registers) may be defined by the implementation. --\layout Standard -- --The type --\family typewriter --ptl_sr_value_t --\family default -- defines the types of values held in status registers. -- This is a signed integer type. -- The size is implementation dependent, but must be at least 32 bits. --\layout Section -- --Initialization and Cleanup --\begin_inset LatexCommand \label{sec:init} -- --\end_inset -- -- --\layout Standard -- --The Portals API includes a function, --\emph on --PtlInit --\emph default --, to initialize the library and a function, --\emph on --PtlFini --\emph default --, to cleanup after the application is done using the library. --\layout Subsection -- --PtlInit --\begin_inset LatexCommand \label{sec:ptlinit} -- --\end_inset -- -- --\layout LyX-Code -- --int PtlInit( int *max_interfaces ); --\layout Standard --\noindent --The --\emph on --PtlInit --\emph default -- function initializes the Portals library. -- PtlInit must be called at least once by a process before any thread makes -- a Portals function call, but may be safely called more than once. --\layout Subsubsection -- --Return Codes --\layout Description -- --PTL_OK Indicates success. -- --\layout Description -- --PTL_FAIL Indicates an error during initialization. -- --\layout Description -- --PTL_SEGV Indicates that --\family typewriter --max_interfaces --\family default -- is not a legal address. -- --\layout Subsubsection -- --Arguments --\layout Standard -- -- --\begin_inset Tabular -- -- -- -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --max_interfaces --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --output --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --On successful return, this location will hold the maximum number of interfaces -- that can be initialized. --\end_inset -- -- -- -- --\end_inset -- -- --\layout Subsection -- --PtlFini --\begin_inset LatexCommand \label{sec:ptlfini} -- --\end_inset -- -- --\layout LyX-Code -- --void PtlFini( void ); --\layout Standard --\noindent --The --\emph on --PtlFini --\emph default -- function cleans up after the Portals library is no longer needed by a process. -- After this function is called, calls to any of the functions defined by -- the Portal API or use of the structures set up by the Portals API will -- result in undefined behavior. -- This function should be called once and only once during termination by -- a process. -- Typically, this function will be called in the exit sequence of a process. -- Individual threads should not call PtlFini when they terminate. --\layout Section -- --Network Interfaces --\begin_inset LatexCommand \label{sec:ni} -- --\end_inset -- -- --\layout Standard -- --The Portals API supports the use of multiple network interfaces. -- However, each interface is treated as an independent entity. -- Combining interfaces (e.g., --\begin_inset Quotes eld --\end_inset -- --bonding --\begin_inset Quotes erd --\end_inset -- -- to create a higher bandwidth connection) must be implemented by the application -- or embedded in the underlying network. -- Interfaces are treated as independent entities to make it easier to cache -- information on individual network interface cards. --\layout Standard -- --Once initialized, each interface provides a Portal table, an access control -- table, and a collection of status registers. -- See Section\SpecialChar ~ -- --\begin_inset LatexCommand \ref{sec:me} -- --\end_inset -- -- for a discussion of updating Portal table entries using the --\emph on --PtlMEAttach --\emph default -- function. -- See Section\SpecialChar ~ -- --\begin_inset LatexCommand \ref{sec:ac} -- --\end_inset -- -- for a discussion of the initialization and updating of entries in the access -- control table. -- See Section\SpecialChar ~ -- --\begin_inset LatexCommand \ref{sec:nistatus} -- --\end_inset -- -- for a discussion of the --\emph on --PtlNIStatus --\emph default -- function which can be used to determine the value of a status register. --\layout Standard -- --Every other type of Portal object (e.g., memory descriptor, event queue, or -- match list entry) is associated with a specific network interface. -- The association to a network interface is established when the object is -- created and is encoded in the handle for the object. --\layout Standard -- --Each network interface is initialized and shutdown independently. -- The initialization routine, --\emph on --PtlNIInit --\emph default --, returns a handle for an interface object which is used in all subsequent -- Portal operations. -- The --\emph on --PtlNIFini --\emph default -- function is used to shutdown an interface and release any resources that -- are associated with the interface. -- Network interface handles are associated with processes, not threads. -- All threads in a process share all of the network interface handles. --\layout Standard -- --The Portals API also defines the --\emph on --PtlNIStatus --\emph default -- function to query the status registers for a network interface, the --\emph on --PtlNIDist --\emph default -- function to determine the --\begin_inset Quotes eld --\end_inset -- --distance --\begin_inset Quotes erd --\end_inset -- -- to another process, and the --\emph on --PtlNIHandle --\emph default -- function to determine the network interface that an object is associated -- with. --\layout Subsection -- --PtlNIInit --\begin_inset LatexCommand \label{sec:niinit} -- --\end_inset -- -- --\layout LyX-Code -- --typedef struct { --\newline -- int max_match_entries; --\newline -- int max_mem_descriptors; --\newline -- int max_event_queues; --\newline -- ptl_ac_index_t max_atable_index; --\newline -- ptl_pt_index_t max_ptable_index; --\newline --} ptl_ni_limits_t; --\newline -- --\newline --int PtlNIInit( ptl_interface_t interface --\newline -- ptl_pid_t pid, --\newline -- ptl_ni_limits_t* desired, --\newline -- ptl_ni_limits_t* actual, --\newline -- ptl_handle_ni_t* handle ); --\layout Standard -- --Values of type --\family typewriter --ptl_ni_limits_t --\family default -- include the following members: --\layout Description -- --max_match_entries Maximum number of match entries that can be allocated -- at any one time. --\layout Description -- --max_mem_descriptors Maximum number of memory descriptors that can be allocated -- at any one time. --\layout Description -- --max_event_queues Maximum number of event queues that can be allocated at -- any one time. --\layout Description -- --max_atable_index Largest access control table index for this interface, -- valid indexes range from zero to --\family typewriter --max_atable_index --\family default --, inclusive. --\layout Description -- --max_ptable_index Largest Portal table index for this interface, valid indexes -- range from zero to --\family typewriter --max_ptable_index --\family default --, inclusive. --\layout Standard --\noindent --The --\emph on --PtlNIInit --\emph default -- function is used to initialized the Portals API for a network interface. -- This function must be called at least once by each process before any other -- operations that apply to the interface by any process or thread. -- For subsequent calls to --\shape italic --PtlNIInit --\shape default -- from within the same process (either by different threads or the same thread), -- the desired limits will be ignored and the call will return the existing -- NI handle. --\layout Subsubsection -- --Return Codes --\layout Description -- --PTL_OK Indicates success. -- --\layout Description -- --PTL_NOINIT Indicates that the Portals API has not been successfully initialized. -- --\layout Description -- --PTL_INIT_DUP Indicates a duplicate initialization of --\family typewriter --interface --\family default --. -- --\layout Description -- --PTL_INIT_INV Indicates that --\family typewriter --interface --\family default -- is not a valid network interface. -- --\layout Description -- --PTL_NOSPACE Indicates that there is insufficient memory to initialize the -- interface. -- --\layout Description -- --PTL_INV_PROC Indicates that --\family typewriter --pid --\family default -- is not a valid process id. --\layout Description -- --PTL_SEGV Indicates that --\family typewriter --actual --\family default --or --\family typewriter -- handle --\family default -- is not a legal address. -- --\layout Subsubsection -- --Arguments --\layout Standard -- -- --\begin_inset Tabular -- -- -- -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --interface --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --input --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --Identifies the network interface to be initialized. -- (See section\SpecialChar ~ -- --\begin_inset LatexCommand \ref{sec:ni-type} -- --\end_inset -- -- for a discussion of values used to identify network interfaces.) --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --pid --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --input --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --Identifies the desired process id (for well known process ids). -- The value --\family typewriter --PTL_PID_ANY --\family default -- may be used to have the process id assigned by the underlying library. --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --desired --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --input --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --If non-NULL, points to a structure that holds the desired limits. --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --actual --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --output --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --On successful return, the location pointed to by actual will hold the actual -- limits. --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --handle --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --output --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --On successful return, this location will hold a handle for the interface. --\end_inset -- -- -- -- --\end_inset -- -- --\layout Comment -- --The use of desired is implementation dependent. -- In particular, an implementation may choose to ignore this argument. --\layout Subsection -- --PtlNIFini --\begin_inset LatexCommand \label{sec:nifini} -- --\end_inset -- -- --\layout LyX-Code -- --int PtlNIFini( ptl_handle_ni_t interface ); --\layout Standard --\noindent --The --\emph on --PtlNIFini --\emph default -- function is used to release the resources allocated for a network interface. -- Once the --\emph on --PtlNIFini --\emph default -- operation has been started, the results of pending API operations (e.g., -- operations initiated by another thread) for this interface are undefined. -- Similarly, the effects of incoming operations (puts and gets) or return -- values (acknowledgements and replies) for this interface are undefined. --\layout Subsubsection -- --Return Codes --\layout Description -- --PTL_OK Indicates success. -- --\layout Description -- --PTL_NOINIT Indicates that the Portals API has not been successfully initialized. -- --\layout Description -- --PTL_INV_NI Indicates that --\family typewriter --interface --\family default -- is not a valid network interface handle. -- --\layout Subsubsection -- --Arguments --\layout Standard -- -- --\begin_inset Tabular -- -- -- -- -- -- -- --\begin_inset Text -- --\layout Standard -- --interface --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --input --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --A handle for the interface to shutdown. --\end_inset -- -- -- -- --\end_inset -- -- --\layout Subsection -- --PtlNIStatus --\begin_inset LatexCommand \label{sec:nistatus} -- --\end_inset -- -- --\layout LyX-Code -- --int PtlNIStatus( ptl_handle_ni_t interface, --\newline -- ptl_sr_index_t status_register, --\newline -- ptl_sr_value_t* status ); --\layout Standard --\noindent --The --\emph on --PtlNIStatus --\emph default -- function returns the value of a status register for the specified interface. -- (See section\SpecialChar ~ -- --\begin_inset LatexCommand \ref{sec:stat-type} -- --\end_inset -- -- for more information on status register indexes and status register values.) --\layout Subsubsection -- --Return Codes --\layout Description -- --PTL_OK Indicates success. -- --\layout Description -- --PTL_NOINIT Indicates that the Portals API has not been successfully initialized. -- --\layout Description -- --PTL_INV_NI Indicates that --\family typewriter --interface --\family default -- is not a valid network interface handle. -- --\layout Description -- --PTL_INV_SR_INDX Indicates that --\family typewriter --status_register --\family default -- is not a valid status register. -- --\layout Description -- --PTL_SEGV Indicates that --\family typewriter --status --\family default -- is not a legal address. -- --\layout Subsubsection -- --Arguments --\layout Standard -- -- --\begin_inset Tabular -- -- -- -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --interface --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --input --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --A handle for the interface to use. -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --status_register --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --input --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --An index for the status register to read. --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --status --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --output --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --On successful return, this location will hold the current value of the status -- register. --\end_inset -- -- -- -- --\end_inset -- -- --\layout Comment -- --The only status register that must be defined is a drop count register ( --\family typewriter --PTL_SR_DROP_COUNT --\family default --). -- Implementations may define additional status registers. -- Identifiers for the indexes associated with these registers should start -- with the prefix --\family typewriter --PTL_SR_ --\family default --. --\layout Subsection -- --PtlNIDist --\layout LyX-Code -- --int PtlNIDist( ptl_handle_ni_t interface, --\newline -- ptl_process_id_t process, --\newline -- unsigned long* distance ); --\layout Standard --\noindent --The --\emph on --PtlNIDist --\emph default -- function returns the distance to another process using the specified interface. -- Distances are only defined relative to an interface. -- Distance comparisons between different interfaces on the same process may -- be meaningless. --\layout Subsubsection -- --Return Codes --\layout Description -- --PTL_OK Indicates success. -- --\layout Description -- --PTL_NOINIT Indicates that the Portals API has not been successfully initialized. -- --\layout Description -- --PTL_INV_NI Indicates that --\family typewriter --interface --\family default -- is not a valid network interface handle. -- --\layout Description -- --PTL_INV_PROC Indicates that --\family typewriter --process --\family default -- is not a valid process identifier. -- --\layout Description -- --PTL_SEGV Indicates that --\family typewriter --distance --\family default -- is not a legal address. -- --\layout Subsubsection -- --Arguments --\layout Standard -- -- --\begin_inset Tabular -- -- -- -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --interface --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --input --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --A handle for the interface to use. -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --process --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --input --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --An identifier for the process whose distance is being requested. -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --distance --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --output --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --On successful return, this location will hold the distance to the remote -- process. --\end_inset -- -- -- -- --\end_inset -- -- --\layout Comment -- --This function should return a static measure of distance. -- Examples include minimum latency, the inverse of available bandwidth, or -- the number of switches between the two endpoints. --\layout Subsection -- --PtlNIHandle --\layout LyX-Code -- --int PtlNIHandle( ptl_handle_any_t handle, --\newline -- ptl_handle_ni_t* interface ); --\layout Standard --\noindent --The --\emph on --PtlNIHandle --\emph default -- function returns a handle for the network interface with which the object -- identified by --\family typewriter --handle --\family default -- is associated. -- If the object identified by --\family typewriter --handle --\family default -- is a network interface, this function returns the same value it is passed. --\layout Subsubsection -- --Return Codes --\layout Description -- --PTL_OK Indicates success. -- --\layout Description -- --PTL_NOINIT Indicates that the Portals API has not been successfully initialized. -- --\layout Description -- --PTL_INV_HANDLE Indicates that --\family typewriter --handle --\family default -- is not a valid handle. -- --\layout Description -- --PTL_SEGV Indicates that --\family typewriter --interface --\family default -- is not a legal address. -- --\layout Subsubsection -- --Arguments --\layout Standard -- -- --\begin_inset Tabular -- -- -- -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --handle --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --input --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --A handle for the object. --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --interface --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --output --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --On successful return, this location will hold a handle for the network interface -- associated with --\family typewriter --handle --\family default --. --\end_inset -- -- -- -- --\end_inset -- -- --\layout Comment -- --Every handle should encode the network interface and the object id relative -- to this handle. -- Both are presumably encoded using integer values. --\layout Section -- --User Identification --\begin_inset LatexCommand \label{sec:uid} -- --\end_inset -- -- --\layout Standard -- --Every process runs on behalf of a user. -- --\layout Subsection -- --PtlGetUid --\layout LyX-Code -- --int PtlGetUid( ptl_handle_ni_t ni_handle, --\newline -- ptl_uid_t* uid ); --\layout Subsubsection -- --Return Codes --\layout Description -- --PTL_OK Indicates success. -- --\layout Description -- --PTL_INV_NI Indicates that --\family typewriter --ni_handle --\family default -- is not a valid network interface handle. -- --\layout Description -- --PTL_NOINIT Indicates that the Portals API has not been successfully initialized. -- --\layout Description -- --PTL_SEGV Indicates that --\family typewriter --interface --\family default -- is not a legal address. -- --\layout Subsubsection -- --Arguments --\layout Standard -- -- --\begin_inset Tabular -- -- -- -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --handle --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --input --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --A network interface handle. --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --id --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --output --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --On successful return, this location will hold the user id for the calling -- process. --\end_inset -- -- -- -- --\end_inset -- -- --\layout Comment -- --Note that user identifiers are dependent on the network interface(s). -- In particular, if a node has multiple interfaces, a process may have multiple -- user identifiers. --\layout Section -- --Process Identification --\begin_inset LatexCommand \label{sec:pid} -- --\end_inset -- -- --\layout Standard -- --Processes that use the Portals API, can be identified using a node id and -- process id. -- Every node accessible through a network interface has a unique node identifier -- and every process running on a node has a unique process identifier. -- As such, any process in the computing system can be identified by its node -- id and process id. -- --\layout Standard -- --The Portals API defines a type, --\family typewriter --ptl_process_id_t --\family default -- for representing process ids and a function, --\emph on --PtlGetId --\emph default --, which can be used to obtain the id of the current process. --\layout Comment -- --The portals API does not include thread identifiers. -- Messages are delivered to processes (address spaces) not threads (contexts -- of execution). --\layout Subsection -- --The Process Id Type --\begin_inset LatexCommand \label{sec:pid-type} -- --\end_inset -- -- --\layout LyX-Code -- --typedef struct { --\newline -- ptl_nid_t nid; /* node id */ --\newline -- ptl_pid_t pid; /* process id */ --\newline --} ptl_process_id_t; --\layout Standard --\noindent --The --\family typewriter --ptl_process_id_t --\family default -- type uses two identifiers to represent a process id: a node id and a process -- id. -- --\layout Subsection -- --PtlGetId --\begin_inset LatexCommand \label{sub:PtlGetId} -- --\end_inset -- -- --\layout LyX-Code -- --int PtlGetId( ptl_handle_ni_t ni_handle, --\newline -- ptl_process_id_t* id ); --\layout Subsubsection -- --Return Codes --\layout Description -- --PTL_OK Indicates success. -- --\layout Description -- --PTL_INV_NI Indicates that --\family typewriter --ni_handle --\family default -- is not a valid network interface handle. -- --\layout Description -- --PTL_NOINIT Indicates that the Portals API has not been successfully initialized. -- --\layout Description -- --PTL_SEGV Indicates that --\family typewriter --id --\family default -- is not a legal address. -- --\layout Subsubsection -- --Arguments --\layout Standard -- -- --\begin_inset Tabular -- -- -- -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --handle --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --input --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --A network interface handle. --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --id --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --output --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --On successful return, this location will hold the id for the calling process. --\end_inset -- -- -- -- --\end_inset -- -- --\layout Comment -- --Note that process identifiers are dependent on the network interface(s). -- In particular, if a node has multiple interfaces, it may have multiple -- node identifiers. --\layout Section -- --Match List Entries and Match Lists --\begin_inset LatexCommand \label{sec:me} -- --\end_inset -- -- --\layout Standard -- --A match list is a chain of match list entries. -- Each match list entry includes a memory descriptor and a set of match criteria. -- The match criteria can be used to reject incoming requests based on process -- id or the match bits provided in the request. -- A match list is created using the --\emph on --PtlMEAttach --\emph default -- or --\shape italic --PtlMEAttachAny --\shape default -- functions, which create a match list consisting of a single match list -- entry, attaches the match list to the specified Portal index, and returns -- a handle for the match list entry. -- Match entries can be dynamically inserted and removed from a match list -- using the --\emph on --PtlMEInsert --\emph default -- and --\emph on --PtlMEUnlink --\emph default -- functions. --\layout Subsection -- --PtlMEAttach --\begin_inset LatexCommand \label{sec:meattach} -- --\end_inset -- -- --\layout LyX-Code -- --typedef enum { PTL_RETAIN, PTL_UNLINK } ptl_unlink_t; --\newline -- --\layout LyX-Code -- --typedef enum { PTL_INS_BEFORE, PTL_INS_AFTER } ptl_ins_pos_t; --\newline -- --\layout LyX-Code -- --int PtlMEAttach( ptl_handle_ni_t interface, --\newline -- ptl_pt_index_t index, --\newline -- ptl_process_id_t matchid, --\newline -- ptl_match_bits_t match_bits, --\newline -- ptl_match_bits_t ignorebits, --\newline -- ptl_unlink_t unlink, --\newline -- ptl_ins_pos_t position, --\newline -- ptl_handle_me_t* handle ); --\layout Standard --\noindent --Values of the type --\family typewriter --ptl_ins_pos_t --\family default -- are used to control where a new item is inserted. -- The value --\family typewriter --PTL_INS_BEFORE --\family default -- is used to insert the new item before the current item or before the head -- of the list. -- The value --\family typewriter --PTL_INS_AFTER --\family default -- is used to insert the new item after the current item or after the last -- item in the list. -- --\layout Standard -- --The --\emph on --PtlMEAttach --\emph default -- function creates a match list consisting of a single entry and attaches -- this list to the Portal table for --\family typewriter --interface --\family default --. --\layout Subsubsection -- --Return Codes --\layout Description -- --PTL_OK Indicates success. -- --\layout Description -- --PTL_INV_NI Indicates that --\family typewriter --interface --\family default -- is not a valid network interface handle. -- --\layout Description -- --PTL_NOINIT Indicates that the Portals API has not been successfully initialized. -- --\layout Description -- --PTL_INV_PTINDEX Indicates that --\family typewriter --index --\family default -- is not a valid Portal table index. -- --\layout Description -- --PTL_INV_PROC Indicates that --\family typewriter --matchid --\family default -- is not a valid process identifier. -- --\layout Description -- --PTL_NOSPACE Indicates that there is insufficient memory to allocate the -- match list entry. -- --\layout Description -- --PTL_ML_TOOLONG Indicates that the resulting match list is too long. -- The maximum length for a match list is defined by the interface. -- --\layout Subsubsection -- --Arguments --\layout Standard -- -- --\begin_inset Tabular -- -- -- -- -- -- -- --\begin_inset Text -- --\layout Standard --\noindent -- --\family typewriter --interface --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --input --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --A handle for the interface to use. -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard --\noindent -- --\family typewriter --index --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --input --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --The Portal table index where the match list should be attached. --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard --\noindent -- --\family typewriter --matchid --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --input --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --Specifies the match criteria for the process id of the requestor. -- The constants --\family typewriter --PTL_PID_ANY --\family default -- and --\family typewriter --PTL_NID_ANY --\family default -- can be used to wildcard either of the ids in the --\family typewriter --ptl_process_id_t --\family default -- structure. -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard --\noindent -- --\family typewriter --match_bits, ignorebits --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --input --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --Specify the match criteria to apply to the match bits in the incoming request. -- The --\family typewriter --ignorebits --\family default -- are used to mask out insignificant bits in the incoming match bits. -- The resulting bits are then compared to the match list entry's match -- bits to determine if the incoming request meets the match criteria. --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard --\noindent -- --\family typewriter --unlink --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --input --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --Indicates the match list entry should be unlinked when the last memory descripto --r associated with this match list entry is unlinked. -- (Note, the check for unlinking a match entry only occurs when a memory -- descriptor is unlinked.) --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard --\noindent -- --\family typewriter --position --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --input --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --Indicates whether the new match entry should be prepended or appended to -- the existing match list. -- If there is no existing list, this argument is ignored and the new match -- entry becomes the only entry in the list. -- Allowed constants: --\family typewriter --PTL_INS_BEFORE --\family default --, --\family typewriter --PTL_INS_AFTER --\family default --. --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard --\noindent -- --\family typewriter --handle --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --output --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --On successful return, this location will hold a handle for the newly created -- match list entry. --\end_inset -- -- -- -- --\end_inset -- -- --\layout Subsection -- --PtlMEAttachAny --\begin_inset LatexCommand \label{sec:attachany} -- --\end_inset -- -- --\layout LyX-Code -- --int PtlMEAttachAny( ptl_handle_ni_t interface, --\newline -- ptl_pt_index_t *index, --\newline -- ptl_process_id_t matchid, --\newline -- ptl_match_bits_t match_bits, --\newline -- ptl_match_bits_t ignorebits, --\newline -- ptl_unlink_t unlink, --\newline -- ptl_handle_me_t* handle ); --\layout Standard -- --The --\emph on --PtlMEAttachAny --\emph default -- function creates a match list consisting of a single entry and attaches -- this list to an unused Portal table entry for --\family typewriter --interface --\family default --. --\layout Subsubsection -- --Return Codes --\layout Description -- --PTL_OK Indicates success. -- --\layout Description -- --PTL_INV_NI Indicates that --\family typewriter --interface --\family default -- is not a valid network interface handle. -- --\layout Description -- --PTL_NOINIT Indicates that the Portals API has not been successfully initialized. -- --\layout Description -- --PTL_INV_PROC Indicates that --\family typewriter --matchid --\family default -- is not a valid process identifier. -- --\layout Description -- --PTL_NOSPACE Indicates that there is insufficient memory to allocate the -- match list entry. -- --\layout Description -- --PTL_PT_FULL Indicates that there are no free entries in the Portal table. --\layout Subsubsection -- --Arguments --\layout Standard -- -- --\begin_inset Tabular -- -- -- -- -- -- -- --\begin_inset Text -- --\layout Standard --\noindent -- --\family typewriter --interface --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --input --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --A handle for the interface to use. -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard --\noindent -- --\family typewriter --index --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --output --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --On succesfful return, this location will hold the Portal index where the -- match list has been attached. --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard --\noindent -- --\family typewriter --matchid, match_bits, ignorebits, unlink --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --input --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --See the discussion for --\shape italic --PtlMEAttach --\shape default --. --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard --\noindent -- --\family typewriter --handle --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --output --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --On successful return, this location will hold a handle for the newly created -- match list entry. --\end_inset -- -- -- -- --\end_inset -- -- --\layout Subsection -- --PtlMEInsert --\begin_inset LatexCommand \label{sec:meinsert} -- --\end_inset -- -- --\layout LyX-Code -- --int PtlMEInsert( ptl_handle_me_t current, --\newline -- ptl_process_id_t matchid, --\newline -- ptl_match_bits_t match_bits, --\newline -- ptl_match_bits_t ignorebits, --\newline -- ptl_ins_pos_t position, --\newline -- ptl_handle_me_t* handle ); --\layout Standard -- --The --\emph on --PtlMEInsert --\emph default -- function creates a new match list entry and inserts this entry into the -- match list containing --\family typewriter --current --\family default --. --\layout Subsubsection -- --Return Codes --\layout Description -- --PTL_OK Indicates success. -- --\layout Description -- --PTL_NOINIT Indicates that the Portals API has not been successfully initialized. -- --\layout Description -- --PTL_INV_PROC Indicates that --\family typewriter --matchid --\family default -- is not a valid process identifier. -- --\layout Description -- --PTL_INV_ME Indicates that --\family typewriter --current --\family default -- is not a valid match entry handle. -- --\layout Description -- --PTL_ML_TOOLONG Indicates that the resulting match list is too long. -- The maximum length for a match list is defined by the interface. -- --\layout Description -- --PTL_NOSPACE Indicates that there is insufficient memory to allocate the -- match entry. -- --\layout Subsubsection -- --Arguments --\layout Standard -- -- --\begin_inset Tabular -- -- -- -- -- -- -- --\begin_inset Text -- --\layout Standard --\noindent -- --\family typewriter --current --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --input --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --A handle for a match entry. -- The new match entry will be inserted immediately before or immediately -- after this match entry. --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard --\noindent -- --\family typewriter --matchid --\family default --, --\family typewriter --match_bits --\family default --, --\family typewriter --ignorebits --\family default --, --\family typewriter --unlink --\family default -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --input --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --See the discussion for --\emph on --PtlMEAttach --\emph default -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard --\noindent -- --\family typewriter --position --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --input --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --Indicates whether the new match entry should be inserted before or after -- the --\family typewriter --current --\family default -- entry. -- Allowed constants: --\family typewriter --PTL_INS_BEFORE --\family default --, --\family typewriter --PTL_INS_AFTER --\family default --. --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard --\noindent -- --\family typewriter --handle --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --input --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --See the discussion for --\emph on --PtlMEAttach --\emph default --. --\end_inset -- -- -- -- --\end_inset -- -- --\layout Subsection -- --PtlMEUnlink --\begin_inset LatexCommand \label{sec:meunlink} -- --\end_inset -- -- --\layout LyX-Code -- --int PtlMEUnlink( ptl_handle_me_t entry ); --\layout Standard --\noindent --The --\emph on --PtlMEUnlink --\emph default -- function can be used to unlink a match entry from a match list. -- This operation also releases any resources associated with the match entry -- (including the associated memory descriptor). -- It is an error to use the match entry handle after calling --\emph on --PtlMEUnlink --\emph default --. --\layout Subsubsection -- --Return Codes --\layout Description -- --PTL_OK Indicates success. -- --\layout Description -- --PTL_NOINIT Indicates that the Portals API has not been successfully initialized. -- --\layout Description -- --PTL_INV_ME Indicates that --\family typewriter --entry --\family default -- is not a valid match entry handle. -- --\layout Subsubsection -- --Arguments --\layout Standard -- -- --\begin_inset Tabular -- -- -- -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --entry --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --input --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --A handle for the match entry to be unlinked. --\end_inset -- -- -- -- --\end_inset -- -- --\layout Section -- --Memory Descriptors --\begin_inset LatexCommand \label{sec:md} -- --\end_inset -- -- --\layout Standard -- --A memory descriptor contains information about a region of an application -- process' memory and an event queue where information about the operations -- performed on the memory descriptor are recorded. -- The Portals API provides two operations to create memory descriptors: --\emph on --PtlMDAttach --\emph default --, and --\emph on --PtlMDBind --\emph default --; an operation to update a memory descriptor, --\emph on --PtlMDUpdate --\emph default --; and an operation to unlink and release the resources associated with a -- memory descriptor, --\emph on --PtlMDUnlink --\emph default --. --\layout Subsection -- --The Memory Descriptor Type --\begin_inset LatexCommand \label{sec:md-type} -- --\end_inset -- -- --\layout LyX-Code -- --typedef struct { --\newline -- void* start; --\newline -- ptl_size_t length; --\newline -- int threshold; --\newline -- unsigned int max_offset; --\newline -- unsigned int options; --\newline -- void* user_ptr; --\newline -- ptl_handle_eq_t eventq; --\newline --} ptl_md_t; --\layout Standard --\noindent --The --\family typewriter --ptl_md_t --\family default -- type defines the application view of a memory descriptor. -- Values of this type are used to initialize and update the memory descriptors. --\layout Subsubsection -- --Members --\layout Description -- --start,\SpecialChar ~ --length Specify the memory region associated with the memory descriptor. -- The --\family typewriter --start --\family default -- member specifies the starting address for the memory region and the --\family typewriter --length --\family default -- member specifies the length of the region. -- The --\family typewriter --start member --\family default -- can be NULL provided that the --\family typewriter --length --\family default -- member is zero. -- (Zero length buffers are useful to record events.) There are no alignment -- restrictions on the starting address or the length of the region; although, -- unaligned messages may be slower (i.e., lower bandwidth and/or longer latency) -- on some implementations. -- --\layout Description -- --threshold Specifies the maximum number of operations that can be performed -- on the memory descriptor. -- An operation is any action that could possibly generate an event (see Section\SpecialChar ~ -- --\begin_inset LatexCommand \ref{sec:ek-type} -- --\end_inset -- -- for the different types of events). -- In the usual case, the threshold value is decremented for each operation -- on the memory descriptor. -- When the threshold value is zero, the memory descriptor is --\emph on --inactive --\emph default --, and does not respond to operations. -- A memory descriptor can have an initial threshold value of zero to allow -- for manipulation of an inactive memory descriptor by the local process. -- A threshold value of --\family typewriter --PTL_MD_THRESH_INF --\family default -- indicates that there is no bound on the number of operations that may be -- applied to a memory descriptor. -- Note that local operations (e.g., --\emph on --PtlMDUpdate --\emph default --) are not applied to the threshold count. -- --\layout Description -- --max_offset Specifies the maximum local offset of a memory descriptor. -- When the local offset of a memory descriptor exceeds this maximum, the -- memory descriptor becomes --\shape italic --inactive --\shape default -- and does not respond to further operations. --\layout Description -- --options Specifies the behavior of the memory descriptor. -- There are five options that can be selected: enable put operations (yes -- or no), enable get operations (yes or no), offset management (local or -- remote), message truncation (yes or no), and acknowledgement (yes or no). -- Values for this argument can be constructed using a bitwise or of the following -- values: --\begin_deeper --\begin_deeper --\layout Description -- --PTL_MD_OP_PUT Specifies that the memory descriptor will respond to --\emph on --put --\emph default -- operations. -- By default, memory descriptors reject --\emph on --put --\emph default -- operations. -- --\layout Description -- --PTL_MD_OP_GET Specifies that the memory descriptor will respond to --\emph on --get --\emph default -- operations. -- By default, memory descriptors reject --\emph on --get --\emph default -- operations. -- --\layout Description -- --PTL_MD_MANAGE_REMOTE Specifies that the offset used in accessing the memory -- region is provided by the incoming request. -- By default, the offset is maintained locally. -- When the offset is maintained locally, the offset is incremented by the -- length of the request so that the next operation (put and/or get) will -- access the next part of the memory region. --\layout Description -- --PTL_MD_TRUNCATE Specifies that the length provided in the incoming request -- can be reduced to match the memory available in the region. -- (The memory available in a memory region is determined by subtracting the -- offset from the length of the memory region.) By default, if the length -- in the incoming operation is greater than the amount of memory available, -- the operation is rejected. -- --\layout Description -- --PTL_MD_ACK_DISABLE Specifies that an acknowledgement should --\emph on --not --\emph default -- be sent for incoming --\emph on --put --\emph default -- operations, even if requested. -- By default, acknowledgements are sent for --\emph on --put --\emph default -- operations that request an acknowledgement. -- Acknowledgements are never sent for --\emph on --get --\emph default -- operations. -- The value sent in the reply serves as an implicit acknowledgement. -- --\end_deeper --\layout Standard -- -- --\series bold --Note --\series default --: It is not considered an error to have a memory descriptor that does not -- respond to either --\emph on --put --\emph default -- or --\emph on --get --\emph default -- operations: Every memory descriptor responds to --\emph on --reply --\emph default -- operations. -- Nor is it considered an error to have a memory descriptor that responds -- to both --\emph on --put --\emph default -- and --\emph on --get --\emph default -- operations. -- --\end_deeper --\layout Description -- --user_ptr A user-specified value that is associated with the memory descriptor. -- The value does not need to be a pointer, but must fit in the space used -- by a pointer. -- This value (along with other values) is recorded in events associated with -- operations on this memory descriptor. --\begin_inset Foot --collapsed true -- --\layout Standard -- --Tying the memory descriptor to a user-defined value can be useful when multiple -- memory descriptor share the same event queue or when the memory descriptor -- needs to be associated with a data structure maintained by the application. -- For example, an MPI implementation can set the --\family typewriter --user_ptr --\family default -- argument to the value of an MPI Request. -- This direct association allows for processing of memory descriptor's by -- the MPI implementation without a table lookup or a search for the appropriate -- MPI Request. --\end_inset -- -- --\layout Description -- --eventq A handle for the event queue used to log the operations performed -- on the memory region. -- If this argument is --\family typewriter --PTl_EQ_NONE --\family default --, operations performed on this memory descriptor are not logged. -- --\layout Subsection -- --PtlMDAttach --\begin_inset LatexCommand \label{sec:mdattach} -- --\end_inset -- -- --\layout LyX-Code -- --int PtlMDAttach( ptl_handle_me_t match, --\newline -- ptl_md_t mem_desc, --\newline -- ptl_unlink_t unlink_op, --\newline -- ptl_unlink_t unlink_nofit, --\newline -- ptl_handle_md_t* handle ); --\layout Standard --\noindent --Values of the type --\family typewriter --ptl_unlink_t --\family default -- are used to control whether an item is unlinked from a list. -- The value --\family typewriter --PTL_UNLINK --\family default -- enables unlinking. -- The value --\family typewriter --PTL_RETAIN --\family default -- disables unlinking. --\layout Standard -- --The --\emph on --PtlMDAttach --\emph default -- operation is used to create a memory descriptor and attach it to a match -- list entry. -- An error code is returned if this match list entry already has an associated -- memory descriptor. --\layout Subsubsection -- --Return Codes --\layout Description -- --PTL_OK Indicates success. -- --\layout Description -- --PTL_NOINIT Indicates that the Portals API has not been successfully initialized. -- --\layout Description -- --PTL_INUSE Indicates that --\family typewriter --match --\family default -- already has a memory descriptor attached. -- --\layout Description -- --PTL_INV_ME Indicates that --\family typewriter --match --\family default -- is not a valid match entry handle. -- --\layout Description -- --PTL_ILL_MD Indicates that --\family typewriter --mem_desc --\family default -- is not a legal memory descriptor. -- This may happen because the memory region defined in --\family typewriter --mem_desc --\family default -- is invalid or because the network interface associated with the --\family typewriter --eventq --\family default -- in --\family typewriter --mem_desc --\family default -- is not the same as the network interface associated with --\family typewriter --match --\family default --. -- --\layout Description -- --PTL_NOSPACE Indicates that there is insufficient memory to allocate the -- memory descriptor. -- --\layout Subsubsection -- --Arguments --\layout Standard -- -- --\begin_inset Tabular -- -- -- -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --match --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --input --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --A handle for the match entry that the memory descriptor will be associated -- with. --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --mem_desc --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --input --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --Provides initial values for the application visible parts of a memory descriptor. -- Other than its use for initialization, there is no linkage between this -- structure and the memory descriptor maintained by the API. -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --unlink_op --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --input --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --A flag to indicate whether the memory descriptor is unlinked when it becomes -- inactive, either because the operation threshold drops to zero or because -- the maximum offset has been exceeded. -- (Note, the check for unlinking a memory descriptor only occurs after a -- the completion of a successful operation. -- If the threshold is set to zero during initialization or using --\emph on --PtlMDUpdate --\emph default --, the memory descriptor is --\series bold --not --\series default -- unlinked.) --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --unlink_nofit --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --input --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --A flag to indicate whether the memory descriptor is unlinked when the space -- remaining in the memory descriptor is not sufficient for a matching operation. -- If an incoming message arrives arrives at a memory descriptor that does -- not have sufficient space and the --\series bold --PTL_MD_TRUNCATE --\series default -- operation is not specified, the memory descriptor will be unlinked. --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --handle --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --output --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --On successful return, this location will hold a handle for the newly created -- memory descriptor. -- The --\family typewriter --handle --\family default -- argument can be NULL, in which case the handle will not be returned. --\end_inset -- -- -- -- --\end_inset -- -- --\layout Subsection -- --PtlMDBind --\begin_inset LatexCommand \label{sec:mdbind} -- --\end_inset -- -- --\layout LyX-Code -- --int PtlMDBind( ptl_handle_ni_t interface, --\newline -- ptl_md_t mem_desc, --\newline -- ptl_handle_md_t* handle ); --\layout Standard --\noindent --The --\emph on --PtlMDBind --\emph default -- operation is used to create a --\begin_inset Quotes eld --\end_inset -- --free floating --\begin_inset Quotes erd --\end_inset -- -- memory descriptor, i.e., a memory descriptor that is not associated with -- a match list entry. --\layout Subsubsection -- --Return Codes --\layout Description -- --PTL_OK Indicates success. -- --\layout Description -- --PTL_NOINIT Indicates that the Portals API has not been successfully initialized. -- --\layout Description -- --PTL_INV_NI Indicates that --\family typewriter --interface --\family default -- is not a valid match entry handle. -- --\layout Description -- --PTL_ILL_MD Indicates that --\family typewriter --mem_desc --\family default -- is not a legal memory descriptor. -- This may happen because the memory region defined in --\family typewriter --mem_desc --\family default -- is invalid or because the network interface associated with the --\family typewriter --eventq --\family default -- in --\family typewriter --mem_desc --\family default -- is not the same as the network interface, --\family typewriter --interface --\family default --. -- --\layout Description -- --PTL_INV_EQ Indicates that the event queue associated with --\family typewriter --mem_desc --\family default -- is not valid. -- --\layout Description -- --PTL_NOSPACE Indicates that there is insufficient memory to allocate the -- memory descriptor. -- --\layout Description -- --PTL_SEGV Indicates that --\family typewriter --handle --\family default -- is not a legal address. -- --\layout Subsubsection -- --Arguments --\layout Standard -- -- --\begin_inset Tabular -- -- -- -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --interface --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --input --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --A handle for the network interface with which the memory descriptor will -- be associated. --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --mem_desc --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --input --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --Provides initial values for the application visible parts of a memory descriptor. -- Other than its use for initialization, there is no linkage between this -- structure and the memory descriptor maintained by the API. -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --handle --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --output --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --On successful return, this location will hold a handle for the newly created -- memory descriptor. -- The --\family typewriter --handle --\family default -- argument must be a valid address and cannot be NULL. --\end_inset -- -- -- -- --\end_inset -- -- --\layout Subsection -- --PtlMDUnlink --\begin_inset LatexCommand \label{sec:mdfree} -- --\end_inset -- -- --\layout LyX-Code -- --int PtlMDUnlink( ptl_handle_md_t mem_desc ); --\layout Standard --\noindent --The --\emph on --PtlMDUnlink --\emph default -- function unlinks the memory descriptor from any match list entry it may -- be linked to and releases the resources associated with a memory descriptor. -- (This function does not free the memory region associated with the memory -- descriptor.) This function also releases the resources associated with a -- floating memory descriptor. -- Only memory descriptors with no pending operations may be unlinked. --\layout Subsubsection -- --Return Codes --\layout Description -- --PTL_OK Indicates success. -- --\layout Description -- --PTL_NOINIT Indicates that the Portals API has not been successfully initialized. -- --\layout Description -- --PTL_INV_MD Indicates that --\family typewriter --mem_desc --\family default -- is not a valid memory descriptor handle. --\layout Description -- --PTL_MD_INUSE Indicates that --\family typewriter --mem_desc --\family default -- has pending operations and cannot be unlinked. --\layout Subsubsection -- --Arguments --\layout Standard -- -- --\begin_inset Tabular -- -- -- -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --mem_desc --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --input --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --A handle for the memory descriptor to be released. --\end_inset -- -- -- -- --\end_inset -- -- --\layout Subsection -- --PtlMDUpdate --\begin_inset LatexCommand \label{sec:mdupdate} -- --\end_inset -- -- --\layout LyX-Code -- --int PtlMDUpdate( ptl_handle_md_t mem_desc, --\newline -- ptl_md_t* old_md, --\newline -- ptl_md_t* new_md, --\newline -- ptl_handle_eq_t testq ); --\layout Standard --\noindent --The --\emph on --PtlMDUpdate --\emph default -- function provides a conditional, atomic update operation for memory descriptors. -- The memory descriptor identified by --\family typewriter --mem_desc --\family default -- is only updated if the event queue identified by --\family typewriter --testq --\family default -- is empty. -- The intent is to only enable updates to the memory descriptor when no new -- messages have arrived since the last time the queue was checked. -- See section\SpecialChar ~ -- --\begin_inset LatexCommand \ref{sec:exmpi} -- --\end_inset -- -- for an example of how this function can be used. --\layout Standard -- --If --\family typewriter --new --\family default -- is not NULL the memory descriptor identified by handle will be updated -- to reflect the values in the structure pointed to by --\family typewriter --new --\family default -- if --\family typewriter --testq --\family default -- has the value --\family typewriter --PTL_EQ_NONE --\family default -- or if the event queue identified by --\family typewriter --testq --\family default -- is empty. -- If --\family typewriter --old --\family default -- is not NULL, the current value of the memory descriptor identified by --\family typewriter --mem_desc --\family default -- is recorded in the location identified by --\family typewriter --old --\family default --. --\layout Subsubsection -- --Return Codes --\layout Description -- --PTL_OK Indicates success. -- --\layout Description -- --PTL_NOINIT Indicates that the Portals API has not been successfully initialized. -- --\layout Description -- --PTL_NOUPDATE Indicates that the update was not performed because --\family typewriter --testq --\family default -- was not empty. -- --\layout Description -- --PTL_INV_MD Indicates that --\family typewriter --mem_desc --\family default -- is not a valid memory descriptor handle. -- --\layout Description -- --PTL_ILL_MD Indicates that the value pointed to by --\family typewriter --new --\family default -- is not a legal memory descriptor (e.g., the memory region specified by the -- memory descriptor may be invalid). -- --\layout Description -- --PTL_INV_EQ Indicates that --\family typewriter --testq --\family default -- is not a valid event queue handle. -- --\layout Description -- --PTL_SEGV Indicates that --\family typewriter --new --\family default -- or --\family typewriter --old --\family default -- is not a legal address. -- --\layout Subsubsection -- --Arguments --\layout Standard -- -- --\begin_inset Tabular -- -- -- -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --mem_desc --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --input --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --A handle for the memory descriptor to update. --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --old_md --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --output --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --If --\family typewriter --old_md --\family default -- is not the value --\family typewriter --NULL --\family default --, the current value of the memory descriptor will be stored in the location -- identified by --\family typewriter --old --\family default --_md. --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --new_md --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --input --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --If --\family typewriter --new_md --\family default -- is not the value --\family typewriter --NULL --\family default --, this argument provides the new values for the memory descriptor, if the -- update is performed. --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --testq --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --input --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --A handle for an event queue used to predicate the update. -- If --\family typewriter --testq --\family default -- is equal to --\family typewriter --PTL_EQ_NONE --\family default --, the update is performed unconditionally. -- Otherwise, the update is performed if and only if --\family typewriter --testq --\family default -- is empty. -- If the update is not performed, the function returns the value --\family typewriter --PTL_NOUPDATE --\family default --. -- (Note, the --\family typewriter --testq --\family default -- argument does not need to be the same as the event queue associated with -- the memory descriptor.) --\end_inset -- -- -- -- --\end_inset -- -- --\layout Standard -- --The conditional update can be used to ensure that the memory descriptor -- has not changed between the time it was examined and the time it is updated. -- In particular, it is needed to support an MPI implementation where the -- activity of searching an unexpected message queue and posting a receive -- must be atomic. --\layout Section -- --Events and Event Queues --\begin_inset LatexCommand \label{sec:eq} -- --\end_inset -- -- --\layout Standard -- --Event queues are used to log operations performed on memory descriptors. -- They can also be used to hold acknowledgements for completed --\emph on --put --\emph default -- operations and to note when the data specified in a --\emph on --put --\emph default -- operation has been sent (i.e., when it is safe to reuse the buffer that holds -- this data). -- Multiple memory descriptors can share a single event queue. --\layout Standard -- --In addition to the --\family typewriter --ptl_handle_eq_t --\family default -- type, the Portals API defines two types associated with events: The --\family typewriter -- --\newline --ptl_event_kind_t --\family default -- type defines the kinds of events that can be stored in an event queue. -- The --\family typewriter --ptl_event_t --\family default -- type defines a structure that holds the information associated with an -- event. --\layout Standard -- --The Portals API also provides four functions for dealing with event queues: -- The --\emph on --PtlEQAlloc --\emph default -- function is used to allocate the API resources needed for an event queue, -- the --\emph on --PtlEQFree --\emph default -- function is used to release these resources, the --\emph on --PtlEQGet --\emph default -- function can be used to get the next event from an event queue, and the -- --\emph on --PtlEQWait --\emph default -- function can be used to block a process (or thread) until an event queue -- has at least one event. --\layout Subsection -- --Kinds of Events --\begin_inset LatexCommand \label{sec:ek-type} -- --\end_inset -- -- --\layout LyX-Code -- --typedef enum { --\newline -- PTL_EVENT_GET_START, PTL_EVENT_GET_END, PTL_EVENT_GET_FAIL, --\newline -- PTL_EVENT_PUT_START, PTL_EVENT_PUT_END, PTL_EVENT_PUT_FAIL, --\newline -- PTL_EVENT_REPLY_START, PTL_EVENT_REPLY_END, PTL_EVENT_REPLY_FAIL, --\newline -- PTL_EVENT_SEND_START, PTL_EVENT_SEND_END, PTL_EVENT_SEND_FAIL, --\newline -- PTL_EVENT_ACK, --\newline -- PTL_EVENT_UNLINK --\newline --} ptl_event_kind_t; --\layout Standard --\noindent --The Portals API defines fourteen types of events that can be logged in an -- event queue: --\layout Description -- --PTL_EVENT_GET_START A remote --\emph on --get --\emph default -- operation has been started on the memory descriptor. -- The memory region associated with this descriptor should not be altered -- until the corresponding END or FAIL event is logged. --\layout Description -- --PTL_EVENT_GET_END A previously initiated --\emph on --get --\emph default -- operation completed successfully. -- This event is logged after the reply has been sent by the local node. -- As such, the process could free the memory descriptor once it sees this -- event. -- --\layout Description -- --PTL_EVENT_GET_FAIL A previously initiated --\emph on --get --\emph default -- operation completed unsuccessfully. -- This event is logged after the reply has been sent by the local node. -- As such, the process could free the memory descriptor once it sees this -- event. -- --\layout Description -- --PTL_EVENT_PUT_START A remote --\emph on --put --\emph default -- operation has been started on the memory descriptor. -- The memory region associated with this descriptor should should be considered -- volatile until the corresponding END or FAIL event is logged. --\layout Description -- --PTL_EVENT_PUT_END A previously initiated --\emph on --put --\emph default -- operation completed successfully. -- The underlying layers will not alter the memory (on behalf of this operation) -- once this event has been logged. -- --\layout Description -- --PTL_EVENT_PUT_FAIL A previously initiated --\emph on --put --\emph default -- operation completed unsuccessfully. -- The underlying layers will not alter the memory (on behalf of this operation) -- once this event has been logged. -- --\layout Description -- --PTL_EVENT_REPLY_START A --\emph on --reply --\emph default -- operation has been started on the memory descriptor. -- --\layout Description -- --PTL_EVENT_REPLY_END A previously initiated --\emph on --reply --\emph default -- operation has completed successfully . -- This event is logged after the data (if any) from the reply has been written -- into the memory descriptor. -- --\layout Description -- --PTL_EVENT_REPLY_FAIL A previously initiated --\emph on --reply --\emph default -- operation has completed unsuccessfully. -- This event is logged after the data (if any) from the reply has been written -- into the memory descriptor. -- --\layout Description -- --PTL_EVENT_ACK An --\emph on --acknowledgement --\emph default -- was received. -- This event is logged when the acknowledgement is received --\layout Description -- --PTL_EVENT_SEND_START An outgoing --\emph on --send --\emph default -- operation has been started. -- The memory region associated with this descriptor should not be altered -- until the corresponding END or FAIL event is logged. --\layout Description -- --PTL_EVENT_SEND_END A previously initiated --\emph on --send --\emph default -- operation has completed successfully. -- This event is logged after the entire buffer has been sent and it is safe -- for the application to reuse the buffer. -- --\layout Description -- --PTL_EVENT_SEND_FAIL A previously initiated --\emph on --send --\emph default -- operation has completed unsuccessfully. -- The process can safely manipulate the memory or free the memory descriptor -- once it sees this event. --\layout Description -- --PTL_EVENT_UNLINK A memory descriptor associated with this event queue has -- been automatically unlinked. -- This event is not generated when a memory descriptor is explicitly unlinked -- by calling --\shape italic --PtlMDUnlink --\shape default --. -- This event does not decrement the threshold count. --\layout Subsection -- --Event Ordering --\layout Standard -- --The Portals API guarantees that a when a process initiates two operations -- on a remote process, the operations will be initiated on the remote process -- in the same order that they were initiated on the original process. -- As an example, if process A intitates two --\emph on --put --\emph default -- operations, --\emph on --x --\emph default -- and --\emph on --y --\emph default --, on process B, the Portals API guarantees that process A will receive the -- --\family typewriter --PTL_EVENT_SEND_START --\family default -- events for --\emph on --x --\emph default -- and --\emph on --y --\emph default -- in the same order that process B receives the --\family typewriter --PTL_EVENT_PUT_START --\family default -- events for --\emph on --x --\emph default -- and --\emph on --y --\emph default --. -- Notice that the API does not guarantee that the start events will be delivered -- in the same order that process A initiated the --\emph on --x --\emph default -- and --\emph on --y --\emph default -- operations. -- If process A needs to ensure the ordering of these operations, it should -- include code to wait for the initiation of --\emph on --x --\emph default -- before it initiates --\emph on --y --\emph default --. --\layout Subsection -- --Failure Notification --\layout Standard -- --Operations may fail to complete successfully; however, unless the node itself -- fails, every operation that is started will eventually complete. -- While an operation is in progress, the memory associated with the operation -- should not be viewed (in the case of a put or a reply) or altered (in the -- case of a send or get). -- Operation completion, whether successful or unsuccessful, is final. -- That is, when an operation completes, the memory associated with the operation -- will no longer be read or altered by the operation. -- A network interface can use the --\family typewriter --ptl_ni_fail_t --\family default -- to define more specific information regarding the failure of the operation -- and record this information in the --\family typewriter --ni_fail_type --\family default -- field of the event. --\layout Subsection -- --The Event Type --\begin_inset LatexCommand \label{sec:event-type} -- --\end_inset -- -- --\layout LyX-Code -- --typedef struct { --\newline -- ptl_event_kind_t type; --\newline -- ptl_process_id_t initiator; --\newline -- ptl_uid_t uid; --\layout LyX-Code -- -- ptl_pt_index_t portal; --\newline -- ptl_match_bits_t match_bits; --\newline -- ptl_size_t rlength; --\newline -- ptl_size_t mlength; --\newline -- ptl_size_t offset; --\newline -- ptl_handle_md_t md_handle; --\newline -- ptl_md_t mem_desc; --\newline -- ptl_hdr_data_t hdr_data; --\newline -- ptl_seq_t link; --\newline -- ptl_ni_fail_t ni_fail_type; --\newline -- volatile ptl_seq_t sequence; --\newline --} ptl_event_t; --\layout Standard --\noindent --An event structure includes the following members: --\layout Description -- --type Indicates the type of the event. -- --\layout Description -- --initiator The id of the initiator. -- --\layout Description -- --portal The Portal table index specified in the request. -- --\layout Description -- --match_bits A copy of the match bits specified in the request. -- See section\SpecialChar ~ -- --\begin_inset LatexCommand \ref{sec:me} -- --\end_inset -- -- for more information on match bits. -- --\layout Description -- --rlength The length (in bytes) specified in the request. -- --\layout Description -- --mlength The length (in bytes) of the data that was manipulated by the operation. -- For truncated operations, the manipulated length will be the number of -- bytes specified by the memory descriptor (possibly with an offset) operation. -- For all other operations, the manipulated length will be the length of -- the requested operation. -- --\layout Description -- --offset Is the displacement (in bytes) into the memory region that the operation -- used. -- The offset can be determined by the operation (see Section\SpecialChar ~ -- --\begin_inset LatexCommand \ref{sec:datamovement} -- --\end_inset -- --) for a remote managed memory descriptor, or by the local memory descriptor -- (see Section\SpecialChar ~ -- --\begin_inset LatexCommand \ref{sec:md} -- --\end_inset -- --). -- --\layout Description -- --md_handle Is the handle to the memory descriptor associated with the event. --\layout Description -- --mem_desc Is the state of the memory descriptor immediately after the event -- has been processed. -- --\layout Description -- --hdr_data 64 bits of out-of-band user data (see Section\SpecialChar ~ -- --\begin_inset LatexCommand \ref{sec:put} -- --\end_inset -- --). -- --\layout Description -- --link The --\emph on --link --\emph default -- member is used to link --\family typewriter --START --\family default -- events with the --\family typewriter --END --\family default -- or --\family typewriter --FAIL --\family default -- event that signifies completion of the operation. -- The --\emph on --link --\emph default -- member will be the same for the two events associated with an operation. -- The link member is also used to link an --\family typewriter --UNLINK --\family default -- event with the event that caused the memory descriptor to be unlinked. --\layout Description -- --sequence The sequence number for this event. -- Sequence numbers are unique to each event. --\layout Comment -- --The --\emph on --sequence --\emph default -- member is the last member and is volatile to support SMP implementations. -- When an event structure is filled in, the --\emph on --sequence --\emph default -- member should be written after all other members have been updated. -- Moreover, a memory barrier should be inserted between the updating of other -- members and the updating of the --\emph on --sequence --\emph default -- member. --\layout Subsection -- --PtlEQAlloc --\begin_inset LatexCommand \label{sec:eqalloc} -- --\end_inset -- -- --\layout LyX-Code -- --int PtlEQAlloc( ptl_handle_ni_t interface, --\newline -- ptl_size_t count, --\newline -- ptl_handle_eq_t* handle ); --\layout Standard --\noindent --The --\emph on --PtlEQAlloc --\emph default -- function is used to build an event queue. -- --\layout Subsubsection -- --Return Codes --\layout Description -- --PTL_OK Indicates success. -- --\layout Description -- --PTL_NOINIT Indicates that the Portals API has not been successfully initialized. -- --\layout Description -- --PTL_INV_NI Indicates that --\family typewriter --interface --\family default -- is not a valid network interface handle. -- --\layout Description -- --PTL_NOSPACE Indicates that there is insufficient memory to allocate the -- event queue. -- --\layout Description -- --PTL_SEGV Indicates that --\family typewriter --handle --\family default -- is not a legal address. -- --\layout Subsubsection -- --Arguments --\layout Standard -- -- --\begin_inset Tabular -- -- -- -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --interface --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --input --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --A handle for the interface with which the event queue will be associated. --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --count --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --input --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --The number of events that can be stored in the event queue. --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --handle --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --output --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --On successful return, this location will hold a handle for the newly created -- event queue. --\end_inset -- -- -- -- --\end_inset -- -- --\layout Subsection -- --PtlEQFree --\begin_inset LatexCommand \label{sec:eqfree} -- --\end_inset -- -- --\layout LyX-Code -- --int PtlEQFree( ptl_handle_eq_t eventq ); --\layout Standard --\noindent --The --\emph on --PtlEQFree --\emph default -- function releases the resources associated with an event queue. -- It is up to the user to insure that no memory descriptors are associated -- with the event queue once it is freed. -- --\layout Subsubsection -- --Return Codes --\layout Description -- --PTL_OK Indicates success. -- --\layout Description -- --PTL_NOINIT Indicates that the Portals API has not been successfully initialized. -- --\layout Description -- --PTL_INV_EQ Indicates that --\family typewriter --eventq --\family default -- is not a valid event queue handle. -- --\layout Subsubsection -- --Arguments --\layout Standard -- -- --\begin_inset Tabular -- -- -- -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --eventq --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --input --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --A handle for the event queue to be released. --\end_inset -- -- -- -- --\end_inset -- -- --\layout Subsection -- --PtlEQGet --\begin_inset LatexCommand \label{sec:eqget} -- --\end_inset -- -- --\layout LyX-Code -- --int PtlEQGet( ptl_handle_eq_t eventq, --\newline -- ptl_event_t* event ); --\layout Standard --\noindent --The --\emph on --PTLEQGet --\emph default -- function is a nonblocking function that can be used to get the next event -- in an event queue. -- The event is removed from the queue. --\layout Subsubsection -- --Return Codes --\layout Description -- --PTL_OK Indicates success. -- --\layout Description -- --PTL_EQ_DROPPED Indicates success (i.e., an event is returned) and that at -- least one event between this event and the last event obtained (using --\emph on --PtlEQGet --\emph default -- or --\emph on --PtlEQWait --\emph default --) from this event queue has been dropped due to limited space in the event -- queue. -- --\layout Description -- --PTL_NOINIT Indicates that the Portals API has not been successfully initialized. -- --\layout Description -- --PTL_EQ_EMPTY Indicates that --\family typewriter --eventq --\family default -- is empty or another thread is waiting on --\emph on --PtlEQWait --\emph default --. -- --\layout Description -- --PTL_INV_EQ Indicates that --\family typewriter --eventq --\family default -- is not a valid event queue handle. -- --\layout Description -- --PTL_SEGV Indicates that --\family typewriter --event --\family default -- is not a legal address. -- --\layout Subsubsection -- --Arguments --\layout Standard -- -- --\begin_inset Tabular -- -- -- -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --eventq --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --input --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --A handle for the event queue. --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --event --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --output --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --On successful return, this location will hold the values associated with -- the next event in the event queue. --\end_inset -- -- -- -- --\end_inset -- -- --\layout Subsection -- --PtlEQWait --\begin_inset LatexCommand \label{sec:eqwait} -- --\end_inset -- -- --\layout LyX-Code -- --int PtlEQWait( ptl_handle_eq_t eventq, --\newline -- ptl_event_t* event ); --\layout Standard --\noindent --The --\emph on --PTLEQWait --\emph default -- function can be used to block the calling process (thread) until there -- is an event in an event queue. -- This function also returns the next event in the event queue and removes -- this event from the queue. -- This is the only blocking operation in the Portals 3.2 API. -- In the event that multiple threads are waiting on the same event queue, -- PtlEQWait is guaranteed to wake exactly one thread, but the order in which -- they are awakened is not specified. --\layout Subsubsection -- --Return Codes --\layout Description -- --PTL_OK Indicates success. -- --\layout Description -- --PTL_EQ_DROPPED Indicates success (i.e., an event is returned) and that at -- least one event between this event and the last event obtained (using --\emph on --PtlEQGet --\emph default -- or --\emph on --PtlEQWait --\emph default --) from this event queue has been dropped due to limited space in the event -- queue. -- --\layout Description -- --PTL_NOINIT Indicates that the Portals API has not been successfully initialized. -- --\layout Description -- --PTL_INV_EQ Indicates that --\family typewriter --eventq --\family default -- is not a valid event queue handle. -- --\layout Description -- --PTL_SEGV Indicates that --\family typewriter --event --\family default -- is not a legal address. -- queue handle. -- --\layout Subsubsection -- --Arguments --\layout Standard --\noindent -- --\begin_inset Tabular -- -- -- -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --eventq --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --input --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --A handle for the event queue to wait on. -- The calling process (thread) will be blocked until --\family typewriter --eventq --\family default -- is not empty. --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --event --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --output --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --On successful return, this location will hold the values associated with -- the next event in the event queue. --\end_inset -- -- -- -- --\end_inset -- -- --\layout Section -- --The Access Control Table --\begin_inset LatexCommand \label{sec:ac} -- --\end_inset -- -- --\layout Standard -- --Processes can use the access control table to control which processes are -- allowed to perform operations on Portal table entries. -- Each communication interface has a Portal table and an access control table. -- The access control table for the default interface contains an entry at -- index zero that allows all processes with the same user id to communicate. -- Entries in the access control table can be manipulated using the --\emph on --PtlACEntry --\emph default -- function. --\layout Subsection -- --PtlACEntry --\begin_inset LatexCommand \label{sec:acentry} -- --\end_inset -- -- --\layout LyX-Code -- --int PtlACEntry( ptl_handle_ni_t interface, --\newline -- ptl_ac_index_t index, --\newline -- ptl_process_id_t matchid, --\newline -- ptl_uid_t user_id, --\newline -- ptl_pt_index_t portal ); --\layout Standard --\noindent --The --\emph on --PtlACEntry --\emph default -- function can be used to update an entry in the access control table for -- an interface. --\layout Subsubsection -- --Return Codes --\layout Description -- --PTL_OK Indicates success. -- --\layout Description -- --PTL_NOINIT Indicates that the Portals API has not been successfully initialized. -- --\layout Description -- --PTL_INV_NI Indicates that --\family typewriter --interface --\family default -- is not a valid network interface handle. -- --\layout Description -- --PTL_AC_INV_INDEX Indicates that --\family typewriter --index --\family default -- is not a valid access control table index. -- --\layout Description -- --PTL_INV_PROC Indicates that --\family typewriter --matchid --\family default -- is not a valid process identifier. -- --\layout Description -- --PTL_PT_INV_INDEX Indicates that --\family typewriter --portal --\family default -- is not a valid Portal table index. -- --\layout Subsubsection -- --Arguments --\layout Standard -- -- --\begin_inset Tabular -- -- -- -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --interface --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --input --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --Identifies the interface to use. --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --index --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --input --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --The index of the entry in the access control table to update. --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --matchid --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --input --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --Identifies the process(es) that are allowed to perform operations. -- The constants --\family typewriter --PTL_PID_ANY --\family default -- and --\family typewriter --PTL_NID_ANY --\family default -- can be used to wildcard either of the ids in the --\family typewriter --ptl_process_id_t --\family default -- structure. -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --user_id --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --input --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --Identifies the user that is allowed to perform operations. -- The value --\family typewriter --PTL_UID_ANY --\family default -- can be used to wildcard the user. --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --portal --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --input --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --Identifies the Portal index(es) that can be used. -- The value --\family typewriter --PTL_PT_INDEX_ANY --\family default -- can be used to wildcard the Portal index. --\end_inset -- -- -- -- --\end_inset -- -- --\layout Section -- --Data Movement Operations --\begin_inset LatexCommand \label{sec:datamovement} -- --\end_inset -- -- --\layout Standard -- --The Portals API provides two data movement operations: --\emph on --PtlPut --\emph default -- and --\emph on --PtlGet --\emph default --. --\layout Subsection -- --PtlPut --\begin_inset LatexCommand \label{sec:put} -- --\end_inset -- -- --\layout LyX-Code -- --typedef enum { PTL_ACK_REQ, PTL_NOACK_REQ } ptl_ack_req_t; --\newline -- --\newline --int PtlPut( ptl_handle_md_t mem_desc, --\newline -- ptl_ack_req_t ack_req, --\newline -- ptl_process_id_t target, --\newline -- ptl_pt_index_t portal, --\newline -- ptl_ac_index_t cookie, --\newline -- ptl_match_bits_t match_bits, --\newline -- ptl_size_t offset, --\newline -- ptl_hdr_data_t hdr_data ); --\layout Standard --\noindent --Values of the type --\family typewriter --ptl_ack_req_t --\family default -- are used to control whether an acknowledgement should be sent when the -- operation completes (i.e., when the data has been written to a memory descriptor -- of the --\family typewriter --target --\family default -- process). -- The value --\family typewriter --PTL_ACK_REQ --\family default -- requests an acknowledgement, the value --\family typewriter --PTL_NOACK_REQ --\family default -- requests that no acknowledgement should be generated. --\layout Standard -- --The --\emph on --PtlPut --\emph default -- function initiates an asynchronous put operation. -- There are several events associated with a put operation: initiation of -- the send on the local node ( --\family typewriter --PTL_EVENT_SEND_START --\family default --), completion of the send on the local node ( --\family typewriter --PTL_EVENT_SEND_END --\family default -- or --\family typewriter --PTL_EVENT_SEND_FAIL --\family default --), and, when the send completes successfully, the receipt of an acknowledgement -- ( --\family typewriter --PTL_EVENT_ACK --\family default --) indicating that the operation was accepted by the target. -- These events will be logged in the event queue associated with the memory -- descriptor ( --\family typewriter --mem_desc --\family default --) used in the put operation. -- Using a memory descriptor that does not have an associated event queue -- results in these events being discarded. -- In this case, the application must have another mechanism (e.g., a higher -- level protocol) for determining when it is safe to modify the memory region -- associated with the memory descriptor. --\layout Subsubsection -- --Return Codes --\layout Description -- --PTL_OK Indicates success. -- --\layout Description -- --PTL_NOINIT Indicates that the Portals API has not been successfully initialized. -- --\layout Description -- --PTL_INV_MD Indicates that --\family typewriter --mem_desc --\family default -- is not a valid memory descriptor. -- --\layout Description -- --PTL_INV_PROC Indicates that --\family typewriter --target --\family default -- is not a valid process id. -- --\layout Subsubsection -- --Arguments --\layout Standard -- -- --\begin_inset Tabular -- -- -- -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --mem_desc --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --input --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --A handle for the memory descriptor that describes the memory to be sent. -- If the memory descriptor has an event queue associated with it, it will -- be used to record events when the message has been sent (PTL_EVENT_SEND_START, -- PTL_EVENT_SEND_END). -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ack_req --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --input --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --Controls whether an acknowledgement event is requested. -- Acknowledgements are only sent when they are requested by the initiating -- process --\series bold --and --\series default -- the memory descriptor has an event queue --\series bold --and --\series default -- the target memory descriptor enables them. -- Allowed constants: --\family typewriter --PTL_ACK_REQ --\family default --, --\family typewriter --PTL_NOACK_REQ --\family default --. --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --target --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --input --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --A process id for the target process. --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --portal --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --input --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --The index in the remote Portal table. --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --cookie --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --input --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --The index into the access control table of the target process. --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --match_bits --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --input --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --The match bits to use for message selection at the target process. --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --offset --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --input --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --The offset into the target memory descriptor (only used when the target -- memory descriptor has the --\family typewriter --PTL_MD_MANAGE_REMOTE --\family default -- option set). --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --hdr_data --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --input --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --64 bits of user data that can be included in message header. -- This data is written to an event queue entry at the target if an event -- queue is present on the matching memory descriptor. --\end_inset -- -- -- -- --\end_inset -- -- --\layout Subsection -- --PtlGet --\begin_inset LatexCommand \label{sec:get} -- --\end_inset -- -- --\layout LyX-Code -- --int PtlGet( ptl_handle_md_t mem_desc, --\newline -- ptl_process_id_t target, --\newline -- ptl_pt_index_t portal, --\newline -- ptl_ac_index_t cookie, --\newline -- ptl_match_bits_t match_bits, --\newline -- ptl_size_t offset ); --\layout Standard --\noindent --The --\emph on --PtlGet --\emph default -- function initiates a remote read operation. -- There are two event pairs associated with a get operation , when the data -- is sent from the remote node, a --\family typewriter --PTL_EVENT_GET{START|END} --\family default -- event pair is registered on the remote node; and when the data is returned -- from the remote node a --\family typewriter --PTL_EVENT_REPLY{START|END} --\family default -- event pair is registered on the local node. --\layout Subsubsection -- --Return Codes --\layout Description -- --PTL_OK Indicates success. -- --\layout Description -- --PTL_NOINIT Indicates that the Portals API has not been successfully initialized. -- --\layout Description -- --PTL_INV_MD Indicates that --\family typewriter --mem_desc --\family default -- is not a valid memory descriptor. -- --\layout Description -- --PTL_INV_PROC Indicates that --\family typewriter --target --\family default -- is not a valid process id. -- --\layout Subsubsection -- --Arguments --\layout Standard -- -- --\begin_inset Tabular -- -- -- -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --mem_desc --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --input --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --A handle for the memory descriptor that describes the memory into which -- the requested data will be received. -- The memory descriptor can have an event queue associated with it to record -- events, such as when the message receive has started ( --\family typewriter --PTL_EVENT_REPLY --\family default --_ --\family typewriter --START --\family default --). --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --target --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --input --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --A process id for the target process. --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --portal --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --input --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --The index in the remote Portal table. --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --cookie --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --input --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --The index into the access control table of the target process. --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --match_bits --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --input --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --The match bits to use for message selection at the target process. --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --offset --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --input --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --The offset into the target memory descriptor (only used when the target -- memory descriptor has the --\family typewriter --PTL_MD_MANAGE_REMOTE --\family default -- option set). --\end_inset -- -- -- -- --\end_inset -- -- --\layout Section -- --Summary --\layout Standard -- -- --\begin_inset LatexCommand \label{sec:summary} -- --\end_inset -- -- We conclude this section by summarizing the names introduced by the Portals -- 3.2 API. -- We start by summarizing the names of the types introduced by the API. -- This is followed by a summary of the functions introduced by the API. -- Which is followed by a summary of the function return codes. -- Finally, we conclude with a summary of the other constant values introduced -- by the API. --\layout Standard -- --Table\SpecialChar ~ -- --\begin_inset LatexCommand \ref{tab:types} -- --\end_inset -- -- presents a summary of the types defined by the Portals API. -- The first column in this table gives the type name, the second column gives -- a brief description of the type, the third column identifies the section -- where the type is defined, and the fourth column lists the functions that -- have arguments of this type. --\layout Standard -- -- --\begin_inset Float table --placement htbp --wide false --collapsed false -- --\layout Caption -- --Types Defined by the Portals 3.2 API --\begin_inset LatexCommand \label{tab:types} -- --\end_inset -- -- --\layout Standard -- -- --\begin_inset ERT --status Collapsed -- --\layout Standard -- --\backslash --medskip --\end_inset -- -- --\layout Standard --\noindent -- --\size small -- --\begin_inset Tabular -- -- -- -- -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold -- Name --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold -- Meaning --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold -- Sect --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold -- Functions --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_ac_index_t --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --indexes for an access control table --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:index-type} -- --\end_inset -- -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --PtlACEntry, PtlPut, PtlGet --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_ack_req_t --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --acknowledgement request types --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:put} -- --\end_inset -- -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --PtlPut --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_event_kind_t --\family default -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --kinds of events --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:ek-type} -- --\end_inset -- -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --PtlGet --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_event_t --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --information about events --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:event-type} -- --\end_inset -- -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --PtlEQGet --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --plt_seq_t --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --event sequence number --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:event-type} -- --\end_inset -- -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --PtlEQGet, PtlEQWait --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_handle_any_t --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --handles for any object --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:handle-type} -- --\end_inset -- -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --PtlNIHandle --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_handle_eq_t --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --handles for event queues --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:handle-type} -- --\end_inset -- -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --PtlEQAlloc, PtlEQFree, PtlEQGet, PtlEQWait, PtlMDUpdate --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_handle_md_t --\family default -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --handles for memory descriptors --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:handle-type} -- --\end_inset -- -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --PtlMDAlloc, PtlMDUnlink, PtlMDUpdate, PtlMEAttach, PtlMEAttachAny, PtlMEInsert, -- PtlPut, PtlGet --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_handle_me_t --\family default -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --handles for match entries --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:handle-type} -- --\end_inset -- -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --PtlMEAttach, PtlMEAttachAny, PtlMEInsert, PtlMEUnlink --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_handle_ni_t --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --handles for network interfaces --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:handle-type} -- --\end_inset -- -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --PtlNIInit, PtlNIFini, PtlNIStatus, PtlNIDist, PtlEQAlloc, PtlACEntry, PtlPut, -- PtlGet --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_nid_t --\family default -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --node identifiers --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:id-type} -- --\end_inset -- -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent -- PtlGetId,PtlACEntry --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_pid_t --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --process identifier --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:id-type} -- --\end_inset -- -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --PtlGetId, PtlACEntry --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_uid_t --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --user indentifier --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:id-type} -- --\end_inset -- -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --PtlGetUid, PtlACEntry --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_ins_pos_t --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --insertion position (before or after) --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:meattach} -- --\end_inset -- -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --PtlMEAttach, PtlMEAttachAny, PtlMEInsert --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_interface_t --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --identifiers for network interfaces --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:ni-type} -- --\end_inset -- -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --PtlNIInit --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_match_bits_t --\family default -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --match (and ignore) bits --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:mb-type} -- --\end_inset -- -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --PtlMEAttach, PtlMEAttachAny, PtlMEInsert, PtlPut, PtlGet --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_md_t --\family default -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --memory descriptors --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:md-type} -- --\end_inset -- -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --PtlMDAttach, PtlMDUpdate --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_ni_fail_t --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --network interface-specific failures --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:eq} -- --\end_inset -- -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --PtlEQGet, PtlEQWait --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_process_id_t --\family default -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --process identifiers --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:pid-type} -- --\end_inset -- -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --PtlGetId, PtlNIDist, PtlMEAttach, PtlMEAttachAny, PtlACEntry, PtlPut, PtlGet -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_pt_index_t --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --indexes for Portal tables --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:index-type} -- --\end_inset -- -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --PtlMEAttach, PtlMEAttachAny, PtlACEntry --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_size_t --\family default -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --sizes --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:size-t} -- --\end_inset -- -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --PtlEQAlloc, PtlPut, PtlGet --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_sr_index_t --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --indexes for status registers --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:stat-type} -- --\end_inset -- -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --PtlNIStatus --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_sr_value_t --\family default -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --values in status registers --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:stat-type} -- --\end_inset -- -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --PtlNIStatus --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_unlink_t --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --unlink options --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:meattach} -- --\end_inset -- -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --PtlMEAttach, PtlMEAttachAny, PtlMEInsert, PtlMDAttach --\end_inset -- -- -- -- --\end_inset -- -- --\end_inset -- -- --\layout Standard -- --Table\SpecialChar ~ -- --\begin_inset LatexCommand \ref{tab:func} -- --\end_inset -- -- presents a summary of the functions defined by the Portals API. -- The first column in this table gives the name for the function, the second -- column gives a brief description of the operation implemented by the function, -- and the third column identifies the section where the function is defined. --\layout Standard -- -- --\begin_inset Float table --placement htbp --wide false --collapsed false -- --\layout Caption -- --Functions Defined by the Portals 3.2 API --\begin_inset LatexCommand \label{tab:func} -- --\end_inset -- -- --\layout Standard -- -- --\begin_inset ERT --status Collapsed -- --\layout Standard -- --\backslash --medskip --\end_inset -- -- --\layout Standard --\align center -- --\size small -- --\begin_inset Tabular -- -- -- -- -- -- -- --\begin_inset Text -- --\layout Standard -- --Name --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- Operation --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- Section --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- --PtlACEntry --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- update an entry in an access control table --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:ac} -- --\end_inset -- -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- PtlEQAlloc --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- create an event queue --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:eq} -- --\end_inset -- -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- PtlEQGet --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- get the next event from an event queue --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:eq} -- --\end_inset -- -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- PtlEQFree --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- release the resources for an event queue --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:eq} -- --\end_inset -- -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- PtlEQWait --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- wait for a new event in an event queue --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:eq} -- --\end_inset -- -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- PtlFini --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- shutdown the Portals API --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:init} -- --\end_inset -- -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- PtlGet --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- perform a get operation --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:datamovement} -- --\end_inset -- -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- PtlGetId --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- get the id for the current process --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:pid} -- --\end_inset -- -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- PtlInit --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- initialize the Portals API --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:init} -- --\end_inset -- -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- PtlMDAttach --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- create a memory descriptor and attach it to a match entry --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:md} -- --\end_inset -- -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- PtlMDBind --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- create a free-floating memory descriptor --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:mdbind} -- --\end_inset -- -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- PtlMDUnlink --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- remove a memory descriptor from a list and release its resources --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:md} -- --\end_inset -- -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- PtlMDUpdate --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- update a memory descriptor --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:md} -- --\end_inset -- -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- PtlMEAttach --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --create a match entry and attach it to a Portal table --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:me} -- --\end_inset -- -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- --PtlMEAttachAny --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --create a match entry and attach it to a free Portal table entry --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:attachany} -- --\end_inset -- -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- PtlMEInsert --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- create a match entry and insert it in a list --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:me} -- --\end_inset -- -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- PtlMEUnlink --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- remove a match entry from a list and release its resources --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:me} -- --\end_inset -- -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- PtlNIDist --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- get the distance to another process --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:ni} -- --\end_inset -- -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- PtlNIFini --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- shutdown a network interface --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:ni} -- --\end_inset -- -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- PtlNIHandle --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- get the network interface handle for an object --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:ni} -- --\end_inset -- -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- PtlNIInit --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- initialize a network interface --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:ni} -- --\end_inset -- -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- PtlNIStatus --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- read a network interface status register --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:ni} -- --\end_inset -- -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- PtlPut --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- perform a put operation --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:datamovement} -- --\end_inset -- -- --\end_inset -- -- -- -- --\end_inset -- -- --\end_inset -- -- --\layout Standard -- --Table\SpecialChar ~ -- --\begin_inset LatexCommand \ref{tab:retcodes} -- --\end_inset -- -- summarizes the return codes used by functions defined by the Portals API. -- All of these constants are integer values. -- The first column of this table gives the symbolic name for the constant, -- the second column gives a brief description of the value, and the third -- column identifies the functions that can return this value. --\layout Standard -- -- --\begin_inset Float table --placement htbp --wide false --collapsed false -- --\layout Caption -- --Function Return Codes for the Portals 3.2 API --\begin_inset LatexCommand \label{tab:retcodes} -- --\end_inset -- -- --\layout Standard -- -- --\begin_inset ERT --status Collapsed -- --\layout Standard -- --\backslash --medskip --\end_inset -- -- --\layout Standard --\align center -- --\size small -- --\begin_inset Tabular -- -- -- -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --Name --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --Meaning --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --Functions --\series default -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --PTL_AC_INV_INDEX --\family default -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --invalid access control table index --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent -- PtlACEntry --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --PTL_EQ_DROPPED --\family default -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --at least one event has been dropped --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent -- PtlEQGet, PtlWait --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --PTL_EQ_EMPTY --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --no events available in an event queue --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent -- PtlEQGet --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --PTL_FAIL --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --error during initialization or cleanup --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent -- PtlInit, PtlFini --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --PTL_ILL_MD --\family default -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --illegal memory descriptor values --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --PtlMDAttach, PtlMDBind, PtlMDUpdate --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --PTL_INIT_DUP --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --duplicate initialization of an interface --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --PtlNIInit --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --PTL_INIT_INV --\family default -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --initialization of an invalid interface --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --PtlNIInit --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --PTL_INUSE --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --the ME already has an MD --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --PtlMDAttach --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --PTL_INV_ASIZE --\family default -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --invalid access control table size --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --PtlNIInit --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --PTL_INV_EQ --\family default -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --invalid event queue handle --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --PtlMDUpdate, PtlEQFree, PtlEQGet --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --PTL_INV_HANDLE --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --invalid handle --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --PtlNIHandle --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --PTL_INV_MD --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --invalid memory descriptor handle --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --PtlMDUnlink, PtlMDUpdate --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --PTL_INV_ME --\family default -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --invalid match entry handle --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --PtlMDAttach --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --PTL_INV_NI --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --invalid network interface handle --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --PtlNIDist, PtlNIFini, PtlMDBind, PtlEQAlloc --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --PTL_INV_PROC --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --invalid process identifier --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --PtlNIInit, PtlNIDist, PtlMEAttach, PtlMEInsert, PtlACEntry, PtlPut, PtlGet -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --PTL_INV_PTINDEX --\family default -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --invalid Portal table index --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent -- PtlMEAttach --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --PTL_INV_REG --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --invalid status register --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent -- PtlNIStatus --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --PTL_INV_SR_INDX --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --invalid status register index --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent -- PtlNIStatus --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --PTL_ML_TOOLONG --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --match list too long --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent -- PtlMEAttach, PtlMEInsert --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --PTL_MD_INUSE --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --MD has pending operations --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --PtlMDUnlink --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --PTL_NOINIT --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --uninitialized API --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent -- --\emph on --all --\emph default --, except PtlInit --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --PTL_NOSPACE --\family default -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --insufficient memory --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --PtlNIInit, PtlMDAttach, PtlMDBind, PtlEQAlloc, PtlMEAttach, PtlMEInsert -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --PTL_NOUPDATE --\family default -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- no update was performed --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent -- PtlMDUpdate --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --PTL_PT_FULL --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --Portal table is full --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --PtlMEAttachAny --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --PTL_OK --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- success --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent -- --\emph on --all --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --PTL_SEGV --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --addressing violation --\end_inset -- -- --\begin_inset Text -- --\layout Standard --\noindent --PtlNIInit, PtlNIStatus, PtlNIDist, PtlNIHandle, PtlMDBind, PtlMDUpdate, -- PtlEQAlloc, PtlEQGet, PtlEQWait --\end_inset -- -- -- -- --\end_inset -- -- --\end_inset -- -- --\layout Standard -- --Table\SpecialChar ~ -- --\begin_inset LatexCommand \ref{tab:oconsts} -- --\end_inset -- -- summarizes the remaining constant values introduced by the Portals API. -- The first column in this table presents the symbolic name for the constant, -- the second column gives a brief description of the value, the third column -- identifies the type for the value, and the fourth column identifies the -- sections in which the value is mentioned. --\layout Standard -- -- --\begin_inset Float table --placement htbp --wide false --collapsed false -- --\layout Caption -- --Other Constants Defined by the Portals 3.2 API --\begin_inset LatexCommand \label{tab:oconsts} -- --\end_inset -- -- --\layout Standard -- -- --\begin_inset ERT --status Collapsed -- --\layout Standard -- --\backslash --medskip --\end_inset -- -- --\layout Standard --\align center -- --\size small -- --\begin_inset Tabular -- -- -- -- -- -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --Name --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --Meaning --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --Base type --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --Intr. --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --Ref. --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --PTL_ACK_REQ --\family default -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --request an acknowledgement --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_ack_req_t --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:put} -- --\end_inset -- -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --PTL_EQ_NONE --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --a NULL event queue handle --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_handle_eq_t --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:handle-type} -- --\end_inset -- -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:md} -- --\end_inset -- --, --\begin_inset LatexCommand \ref{sec:mdupdate} -- --\end_inset -- -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --PTL_EVENT_GET_START --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --get event start --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_event_kind_t --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:ek-type} -- --\end_inset -- -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:get} -- --\end_inset -- -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --PTL_EVENT_GET_END --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --get event end --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_event_kind_t --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:ek-type} -- --\end_inset -- -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:get} -- --\end_inset -- -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --PTL_EVENT_GET_FAIL --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --get event fail --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_event_kind_t --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:ek-type} -- --\end_inset -- -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:get} -- --\end_inset -- -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --PTL_EVENT_PUT_START --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --put event start --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_event_kind_t --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:ek-type} -- --\end_inset -- -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:put} -- --\end_inset -- -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --PTL_EVENT_PUT_END --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --put event end --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_event_kind_t --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:ek-type} -- --\end_inset -- -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:put} -- --\end_inset -- -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --PTL_EVENT_PUT_FAIL --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --put event fail --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_event_kind_t --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:ek-type} -- --\end_inset -- -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:put} -- --\end_inset -- -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --PTL_EVENT_REPLY_START --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --reply event start --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_event_kind_t --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:ek-type} -- --\end_inset -- -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:get} -- --\end_inset -- -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --PTL_EVENT_REPLY_END --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --reply event end --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_event_kind_t --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:ek-type} -- --\end_inset -- -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:get} -- --\end_inset -- -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --PTL_EVENT_REPLY_FAIL --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --reply event fail --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_event_kind_t --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:ek-type} -- --\end_inset -- -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:get} -- --\end_inset -- -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --PTL_EVENT_ACK_START --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --acknowledgement event start --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_event_kind_t --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:ek-type} -- --\end_inset -- -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:put} -- --\end_inset -- -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --PTL_EVENT_ACK_END --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --acknowledgement event end --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_event_kind_t --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:ek-type} -- --\end_inset -- -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:put} -- --\end_inset -- -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --PTL_EVENT_ACK_FAIL --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --acknowledgement event fail --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_event_kind_t --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:ek-type} -- --\end_inset -- -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:put} -- --\end_inset -- -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --PTL_EVENT_SEND_START --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --send event start --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_event_kind_t --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:ek-type} -- --\end_inset -- -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:put} -- --\end_inset -- -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --PTL_EVENT_SEND_END --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --send event end --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_event_kind_t --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:ek-type} -- --\end_inset -- -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:put} -- --\end_inset -- -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --PTL_EVENT_SEND_FAIL --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --send event fail --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_event_kind_t --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:ek-type} -- --\end_inset -- -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:put} -- --\end_inset -- -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --PTL_EVENT_UNLINK --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --unlink event --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_event_kind_t --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:ek-type} -- --\end_inset -- -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:md-type} -- --\end_inset -- -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --PTL_PID_ANY --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --wildcard for process id fields --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_pid_t --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:id-type} -- --\end_inset -- -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:meattach} -- --\end_inset -- --, --\begin_inset LatexCommand \ref{sec:acentry} -- --\end_inset -- -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --PTL_NID_ANY --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --wildcard for node id fields --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_nid_t --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:id-type} -- --\end_inset -- -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:meattach} -- --\end_inset -- --, --\begin_inset LatexCommand \ref{sec:acentry} -- --\end_inset -- -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --PTL_UID_ANY --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --wildcard for user id --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_uid_t --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:id-type} -- --\end_inset -- -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:meattach} -- --\end_inset -- --, --\begin_inset LatexCommand \ref{sec:acentry} -- --\end_inset -- -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --PTL_IFACE_DEFAULT --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --default interface --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_interface_t --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:ni-type} -- --\end_inset -- -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --PTL_INS_AFTER --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --insert after --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_ins_pos_t --\family default -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:meinsert} -- --\end_inset -- -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --PTL_INS_BEFORE --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --insert before --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_ins_pos_t --\family default -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:meinsert} -- --\end_inset -- -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --PTL_MD_ACK_DISABLE --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --a flag to disable acknowledgements --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --int --\family default -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:md-type} -- --\end_inset -- -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --PTL_MD_MANAGE_REMOTE --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --a flag to enable the use of remote offsets --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --int --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:md-type} -- --\end_inset -- -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:put} -- --\end_inset -- --, --\begin_inset LatexCommand \ref{sec:get} -- --\end_inset -- -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --PTL_MD_OP_GET --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --a flag to enable get operations --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --int --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:md-type} -- --\end_inset -- -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --PTL_MD_OP_PUT --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --a flag to enable put operations --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --int --\family default -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:md-type} -- --\end_inset -- -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --PTL_MD_THRESH_INF --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --infinite threshold for a memory descriptor --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --int --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:md-type} -- --\end_inset -- -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --PTL_MD_TRUNCATE --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --a flag to enable truncation of a request --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --int --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:md-type} -- --\end_inset -- -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --PTL_NOACK_REQ --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --request no acknowledgement --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_ack_req_t --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:put} -- --\end_inset -- -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --PTL_PT_INDEX_ANY --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --wildcard for Portal indexes --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_pt_index_t --\family default -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:acentry} -- --\end_inset -- -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --PTL_RETAIN --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --disable unlinking --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_unlink_t --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:mdattach} -- --\end_inset -- -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --PTL_SR_DROP_COUNT --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --index for the dropped count register --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_sr_index_t --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:stat-type} -- --\end_inset -- -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:nistatus} -- --\end_inset -- -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --PTL_UNLINK --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --enable unlinking --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_unlink_t --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\begin_inset LatexCommand \ref{sec:mdattach} -- --\end_inset -- -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --\end_inset -- -- -- -- --\end_inset -- -- --\end_inset -- -- --\layout Chapter -- --The Semantics of Message Transmission --\begin_inset LatexCommand \label{sec:semantics} -- --\end_inset -- -- --\layout Standard -- --The portals API uses four types of messages: put requests, acknowledgements, -- get requests, and replies. -- In this section, we describe the information passed on the wire for each -- type of message. -- We also describe how this information is used to process incoming messages. --\layout Section -- --Sending Messages --\layout Standard -- --Table\SpecialChar ~ -- --\begin_inset LatexCommand \ref{tab:put-wire} -- --\end_inset -- -- summarizes the information that is transmitted for a put request. -- The first column provides a descriptive name for the information, the second -- column provides the type for this information, the third column identifies -- the source of the information, and the fourth column provides additional -- notes. -- Most information that is transmitted is obtained directly from the --\emph on --PtlPut --\emph default -- operation. -- Notice that the handle for the memory descriptor used in the --\emph on --PtlPut --\emph default -- operation is transmitted even though this value cannot be interpreted by -- the target. -- A value of anything other than --\family typewriter --PTL_MD_NONE --\family default --, is interpreted as a request for an acknowledgement. --\layout Standard -- -- --\begin_inset Float table --placement htbp --wide false --collapsed false -- --\layout Caption -- --Information Passed in a Put Request --\begin_inset LatexCommand \label{tab:put-wire} -- --\end_inset -- -- --\layout Standard -- -- --\begin_inset ERT --status Collapsed -- --\layout Standard -- --\backslash --medskip --\end_inset -- -- --\layout Standard --\align center -- --\size small -- --\begin_inset Tabular -- -- -- -- -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --Information --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --Type --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --\emph on --PtlPut --\emph default -- arg --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --Notes --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- --operation --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --int --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --indicates a put request --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- --initiator --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_process_id_t --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --local information --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- --user --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_uid_t --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --local information --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- --target --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_process_id_t --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --target --\family default -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- --portal index --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_pt_index_t --\family default -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --portal --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- --cookie --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_ac_index_t --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --cookie --\family default -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- --match bits --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_match_bits_t --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --match_bits --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- --offset --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_size_t --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --offset --\family default -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- --memory desc --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_handle_md_t --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --mem_desc --\family default -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --no ack if --\family typewriter --PTL_MD_NONE --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- --length --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_size_t --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --mem_desc --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --length --\family default -- member --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- --data --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family roman --\emph on --bytes --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --mem_desc --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --start --\family default -- and --\family typewriter --length --\family default -- members --\end_inset -- -- -- -- --\end_inset -- -- --\end_inset -- -- --\layout Standard -- --Table\SpecialChar ~ -- --\begin_inset LatexCommand \ref{tab:ack-wire} -- --\end_inset -- -- summarizes the information transmitted in an acknowledgement. -- Most of the information is simply echoed from the put request. -- Notice that the initiator and target are obtained directly from the put -- request, but are swapped in generating the acknowledgement. -- The only new piece of information in the acknowledgement is the manipulated -- length which is determined as the put request is satisfied. --\layout Standard -- -- --\begin_inset Float table --placement htbp --wide false --collapsed false -- --\layout Caption -- --Information Passed in an Acknowledgement --\begin_inset LatexCommand \label{tab:ack-wire} -- --\end_inset -- -- --\layout Standard -- -- --\begin_inset ERT --status Collapsed -- --\layout Standard -- --\backslash --medskip --\end_inset -- -- --\layout Standard --\align center -- --\size small -- --\begin_inset Tabular -- -- -- -- -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --Information --\series default -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --Type --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --Put Information --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --Notes --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- --operation --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --int --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- indicates an acknowledgement --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- initiator --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_process_id_t --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- target --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- target --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_process_id_t --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- initiator --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- portal index --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_pt_index_t --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- portal index --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- echo --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- match bits --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_match_bits_t --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- match bits --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- echo --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- offset --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_size_t --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- offset --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- echo --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- memory desc --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter -- ptl_handle_md_t --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- memory desc --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- echo --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- requested length --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter -- ptl_size_t --\family default -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- length --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- echo --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- manipulated length --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter -- ptl_size_t --\family default -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- obtained from the operation --\end_inset -- -- -- -- --\end_inset -- -- --\end_inset -- -- --\layout Standard -- --Table\SpecialChar ~ -- --\begin_inset LatexCommand \ref{tab:get-wire} -- --\end_inset -- -- summarizes the information that is transmitted for a get request. -- Like the information transmitted in a put request, most of the information -- transmitted in a get request is obtained directly from the --\emph on --PtlGet --\emph default -- operation. -- Unlike put requests, get requests do not include the event queue handle. -- In this case, the reply is generated whenever the operation succeeds and -- the memory descriptor must not be unlinked until the reply is received. -- As such, there is no advantage to explicitly sending the event queue handle. --\layout Standard -- -- --\begin_inset Float table --placement htbp --wide false --collapsed false -- --\layout Caption -- --Information Passed in a Get Request --\begin_inset LatexCommand \label{tab:get-wire} -- --\end_inset -- -- --\layout Standard -- -- --\begin_inset ERT --status Collapsed -- --\layout Standard -- --\backslash --medskip --\end_inset -- -- --\layout Standard --\align center -- --\size small -- --\begin_inset Tabular -- -- -- -- -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --Information --\series default -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --Type --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --\emph on --PtlGet --\emph default -- argument --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --Notes --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- --operation --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --int --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --indicates a get operation --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- --initiator --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_process_id_t --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --local information --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- --user --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_uid_t --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --local information --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- --target --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_process_id_t --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --target --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- --portal index --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_pt_index_t --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --portal --\family default -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- --cookie --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_ac_index_t --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --cookie --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- --match bits --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_match_bits_t --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --match_bits --\family default -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- --offset --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_size_t --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --offset --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- --memory desc --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_handle_md_t --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --mem_desc --\family default -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- --length --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_size_t --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --mem_desc --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --length --\family default -- member --\end_inset -- -- -- -- --\end_inset -- -- --\end_inset -- -- --\layout Standard -- --Table\SpecialChar ~ -- --\begin_inset LatexCommand \ref{tab:reply-wire} -- --\end_inset -- -- summarizes the information transmitted in a reply. -- Like an acknowledgement, most of the information is simply echoed from -- the get request. -- The initiator and target are obtained directly from the get request, but -- are swapped in generating the acknowledgement. -- The only new information in the acknowledgement are the manipulated length -- and the data, which are determined as the get request is satisfied. --\layout Standard -- -- --\begin_inset Float table --placement htbp --wide false --collapsed false -- --\layout Caption -- --Information Passed in a Reply --\begin_inset LatexCommand \label{tab:reply-wire} -- --\end_inset -- -- --\layout Standard -- -- --\begin_inset ERT --status Collapsed -- --\layout Standard -- --\backslash --medskip --\end_inset -- -- --\layout Standard --\align center -- --\size small -- --\begin_inset Tabular -- -- -- -- -- -- -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --Information --\series default -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --Type --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --Put Information --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\series bold --Notes --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- --operation --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --int --\family default -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --indicates an acknowledgement --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- --initiator --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_process_id_t --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --target --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- --target --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_process_id_t --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --initiator --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- --portal index --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_pt_index_t --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --portal index --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --echo --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- --match bits --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_match_bits_t --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --match bits --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --echo --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- --offset --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_size_t --\family default -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --offset --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --echo --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- --memory desc --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_handle_md_t --\family default -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --memory desc --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --echo --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- --requested length --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_size_t --\family default -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --length --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --echo --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- --manipulated length --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\family typewriter --ptl_size_t --\family default -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --obtained from the operation --\end_inset -- -- -- -- --\begin_inset Text -- --\layout Standard -- --data --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- -- --\emph on --bytes --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --\end_inset -- -- --\begin_inset Text -- --\layout Standard -- --obtained from the operation --\end_inset -- -- -- -- --\end_inset -- -- --\end_inset -- -- --\layout Section -- --Receiving Messages --\begin_inset LatexCommand \label{sec:receiving} -- --\end_inset -- -- --\layout Standard -- --When an incoming message arrives on a network interface, the communication -- system first checks that the target process identified in the request is -- a valid process that has initialized the network interface (i.e., that the -- target process has a valid Portal table). -- If this test fails, the communication system discards the message and increment --s the dropped message count for the interface. -- The remainder of the processing depends on the type of the incoming message. -- Put and get messages are subject to access control checks and translation -- (searching a match list), while acknowledgement and reply messages bypass -- the access control checks and the translation step. --\layout Standard -- --Acknowledgement messages include a handle for the memory descriptor used -- in the original --\emph on --PtlPut --\emph default -- operation. -- This memory descriptor will identify the event queue where the event should -- be recorded. -- Upon receipt of an acknowledgement, the runtime system only needs to confirm -- that the memory descriptor and event queue still exist and that there is -- space for another event. -- Should the any of these conditions fail, the message is simply discarded -- and the dropped message count for the interface is incremented. -- Otherwise, the system builds an acknowledgement event from the information -- in the acknowledgement message and adds it to the event queue. --\layout Standard -- --Reception of reply messages is also relatively straightforward. -- Each reply message includes a handle for a memory descriptor. -- If this descriptor exists, it is used to receive the message. -- A reply message will be dropped if the memory descriptor identified in -- the request doesn't exist. -- In either of this case, the dropped message count for the interface is -- incremented. -- These are the only reasons for dropping reply messages. -- Every memory descriptor accepts and truncates incoming reply messages, -- eliminating the other potential reasons for rejecting a reply message. --\layout Standard -- --The critical step in processing an incoming put or get request involves -- mapping the request to a memory descriptor. -- This step starts by using the Portal index in the incoming request to identify -- a list of match entries. -- This list of match entries is searched in order until a match entry is -- found whose match criteria matches the match bits in the incoming request -- and whose memory descriptor accepts the request. --\layout Standard -- --Because acknowledge and reply messages are generated in response to requests -- made by the process receiving these messages, the checks performed by the -- runtime system for acknowledgements and replies are minimal. -- In contrast, put and get messages are generated by remote processes and -- the checks performed for these messages are more extensive. -- Incoming put or get messages may be rejected because: --\layout Itemize -- --the Portal index supplied in the request is not valid; --\layout Itemize -- --the cookie supplied in the request is not a valid access control entry; -- --\layout Itemize -- --the access control entry identified by the cookie does not match the identifier -- of the requesting process; --\layout Itemize -- --the access control entry identified by the access control entry does not -- match the Portal index supplied in the request; or --\layout Itemize -- --the match bits supplied in the request do not match any of the match entries -- with a memory descriptor that accepts the request. -- --\layout Standard -- --In all cases, if the message is rejected, the incoming message is discarded -- and the dropped message count for the interface is incremented. --\layout Standard -- --A memory descriptor may reject an incoming request for any of the following -- reasons: --\layout Itemize -- --the --\family typewriter --PTL_MD_PUT --\family default -- or --\family typewriter --PTL_MD_GET --\family default -- option has not been enabled and the operation is put or get, respectively; -- --\layout Itemize -- --the length specified in the request is too long for the memory descriptor -- and the --\family typewriter --PTL_MD_TRUNCATE --\family default -- option has not been enabled. --\layout Chapter -- --Examples --\begin_inset LatexCommand \label{sec:examples} -- --\end_inset -- -- --\layout Comment -- --The examples presented in this chapter have not been updated to reflect -- the current API. --\layout Standard -- --In this section we present several example to illustrate expected usage -- patterns for the Portals 3.2 API. -- The first example describes how to implement parallel servers using the -- features of the Portals 3.2 API. -- This example covers the access control list and the use of remote managed -- offsets. -- The second example presents an approach to dealing with dropped requests. -- This example covers aspects of match lists and memory descriptors. -- The final example covers message reception in MPI. -- This example illustrates more sophisticated uses of matching and a procedure -- to update a memory descriptor. --\layout Section -- --Parallel File Servers --\begin_inset LatexCommand \label{sec:expfs} -- --\end_inset -- -- --\layout Standard -- --Figure\SpecialChar ~ -- --\begin_inset LatexCommand \ref{fig:file} -- --\end_inset -- -- illustrates the logical structure of a parallel file server. -- In this case, the parallel server consists of four servers that stripe -- application data across four disks. -- We would like to present applications with the illusion that the file server -- is a single entity. -- We will assume that all of the processes that constitute the parallel server -- have the same user id. --\layout Standard -- -- --\begin_inset Float figure --placement htbp --wide false --collapsed false -- --\layout Standard --\align center -- --\begin_inset Graphics FormatVersion 1 -- filename file.eps -- display color -- size_type 0 -- rotateOrigin center -- lyxsize_type 1 -- lyxwidth 196pt -- lyxheight 147pt --\end_inset -- -- --\layout Caption -- --Parallel File Server --\begin_inset LatexCommand \label{fig:file} -- --\end_inset -- -- --\end_inset -- -- --\layout Standard -- --When an application establishes a connection to the parallel file server, -- it will allocate a Portal and access control list entry for communicating -- with the server. -- The access control list entry will include the Portal and match any process -- in the parallel file server's, so all of the file server processes will -- have access to the portal. -- The Portal information and access control entry will be sent to the file -- server at this time. -- If the application and server need to have multiple, concurrent I/O operations, -- they can use additional portals or match entries to keep the operations -- from interfering with one another. --\layout Standard -- --When an application initiates an I/O operation, it first builds a memory -- descriptor that describes the memory region involved in the operation. -- This memory descriptor will enable the appropriate operation (put for read -- operations and get for write operations) and enable the use of remote offsets -- (this lets the servers decide where their data should be placed in the -- memory region). -- After creating the memory descriptor and linking it into the appropriate -- Portal entry, the application sends a read or write request (using --\emph on --PtlPut --\emph default --) to one of the file server processes. -- The file server processes can then use put or get operations with the appropria --te offsets to fill or retrieve the contents of the application's buffer. -- To know when the operation has completed, the application can add an event -- queue to the memory descriptor and add up the lengths of the remote operations -- until the sum is the size of the requested I/O operation. --\layout Section -- --Dealing with Dropped Requests --\begin_inset LatexCommand \label{sec:exdrop} -- --\end_inset -- -- --\layout Standard -- --If a process does not anticipate unexpected requests, they will be discarded. -- Applications using the Portals API can query the dropped count for the -- interface to determine the number of requests that have been dropped (see -- Section\SpecialChar ~ -- --\begin_inset LatexCommand \ref{sec:nistatus} -- --\end_inset -- --). -- While this approach minimizes resource consumption, it does not provide -- information that might be critical in debugging the implementation of a -- higher level protocol. --\layout Standard -- --To keep track of more information about dropped requests, we use a memory -- descriptor that truncates each incoming request to zero bytes and logs -- the --\begin_inset Quotes eld --\end_inset -- --dropped --\begin_inset Quotes erd --\end_inset -- -- operations in an event queue. -- Note that the operations are not dropped in the Portals sense, because -- the operation succeeds. --\layout Standard -- --The following code fragment illustrates an implementation of this approach. -- In this case, we assume that a thread is launched to execute the function -- --\family typewriter --watch_drop --\family default --. -- This code starts by building an event queue to log truncated operations -- and a memory descriptor to truncate the incoming requests. -- This example only captures --\begin_inset Quotes eld --\end_inset -- --dropped --\begin_inset Quotes erd --\end_inset -- -- requests for a single portal. -- In a more realistic situation, the memory descriptor would be appended -- to the match list for every portal. -- We also assume that the thread is capable of keeping up with the --\begin_inset Quotes eld --\end_inset -- --dropped --\begin_inset Quotes erd --\end_inset -- -- requests. -- If this is not the case, we could use a finite threshold on the memory -- descriptor to capture the first few dropped requests. --\layout LyX-Code -- -- --\size small --#include --\newline --#include --\newline --#include --\newline -- --\newline --#define DROP_SIZE 32 /* number of dropped requests to track */ --\newline -- --\newline --int watch_drop( ptl_handle_ni_t ni, ptl_pt_index_t index ) { --\newline -- ptl_handle_eq_t drop_events; --\newline -- ptl_event_t event; --\newline -- ptl_handle_md_t drop_em; --\newline -- ptl_md_t drop_desc; --\newline -- ptl_process_id_t any_proc; --\newline -- ptl_handle_me_t match_any; --\newline -- --\newline -- /* create the event queue */ --\newline -- if( PtlEQAlloc(ni, DROP_SIZE, &drop_events) != PTL_OK ) { --\newline -- fprintf( stderr, "Couldn't create the event queue --\backslash --n" ); --\newline -- exit( 1 ); --\newline -- } --\newline -- --\newline -- /* build a match entry */ --\newline -- any_proc.nid = PTL_ID_ANY; --\newline -- any_proc.pid = PTL_ID_ANY; --\newline -- PtlMEAttach( index, any_proc, 0, ~(ptl_match_bits_t)0, PTL_RETAIN, --\newline -- &match_any ); --\newline -- --\newline -- /* create the memory descriptor */ --\newline -- drop_desc.start = NULL; --\newline -- drop_desc.length = 0; --\newline -- drop_desc.threshold = PTL_MD_THRESH_INF; --\newline -- drop_desc.options = PTL_MD_OP_PUT | PTL_MD_OP_GET | PTL_MD_TRUNCATE; --\newline -- drop_desc.user_ptr = NULL; --\newline -- drop_desc.eventq = drop_events; --\newline -- if( PtlMDAttach(match_any, drop_desc, &drop_em) != PTL_OK ) { --\newline -- fprintf( stderr, "Couldn't create the memory descriptor --\backslash --n" ); --\newline -- exit( 1 ); --\newline -- } --\newline -- --\newline -- /* watch for "dropped" requests */ --\newline -- while( 1 ) { --\newline -- if( PtlEQWait( drop_events, &event ) != PTL_OK ) break; --\newline -- fprintf( stderr, "Dropped request from gid = event.initiator.gid, -- event.initiator.rid ); --\newline -- } --\newline --} --\layout Section -- --Message Transmission in MPI --\begin_inset LatexCommand \label{sec:exmpi} -- --\end_inset -- -- --\layout Standard -- --We conclude this section with a fairly extensive example that describes -- an approach to implementing message transmission for MPI. -- Like many MPI implementations, we distinguish two message transmission -- protocols: a short message protocol and a long message protocol. -- We use the constant --\family typewriter --MPI_LONG_LENGTH --\family default -- to determine the size of a long message. --\layout Standard -- --For small messages, the sender simply sends the message and presumes that -- the message will be received (i.e., the receiver has allocated a memory region -- to receive the message body). -- For large messages, the sender also sends the message, but does not presume -- that the message body will be saved. -- Instead, the sender builds a memory descriptor for the message and enables -- get operations on this descriptor. -- If the target does not save the body of the message, it will record an -- event for the put operation. -- When the process later issues a matching MPI receive, it will perform a -- get operation to retrieve the body of the message. --\layout Standard -- --To facilitate receive side matching based on the protocol, we use the most -- significant bit in the match bits to indicate the protocol: 1 for long -- messages and 0 for short messages. --\layout Standard -- --The following code presents a function that implements the send side of -- the protocol. -- The global variable --\family typewriter --EndGet --\family default -- is the last match entry attached to the Portal index used for posting long -- messages. -- This entry does not match any incoming requests (i.e., the memory descriptor -- rejects all get operations) and is built during initialization of the MPI -- library. -- The other global variable, --\family typewriter --MPI_NI --\family default --, is a handle for the network interface used by the MPI implementation. --\layout LyX-Code -- -- --\size small --extern ptl_handle_me_t EndGet; --\newline --extern ptl_handle_ni_t MPI_NI; --\newline -- --\newline --void MPIsend( void *buf, ptl_size_t len, void *data, ptl_handle_eq_t eventq, --\newline -- ptl_process_id target, ptl_match_bits_t match ) --\newline --{ --\newline -- ptl_handle_md_t send_handle; --\newline -- ptl_md_t mem_desc; --\newline -- ptl_ack_req_t want_ack; --\newline -- --\newline -- mem_desc.start = buf; --\newline -- mem_desc.length = len; --\newline -- mem_desc.threshold = 1; --\newline -- mem_desc.options = PTL_MD_GET_OP; --\newline -- mem_desc.user_ptr = data; --\newline -- mem_desc.eventq = eventq; --\newline -- --\newline -- if( len >= MPI_LONG_LENGTH ) { --\newline -- ptl_handle_me_t me_handle; --\newline -- --\newline -- /* add a match entry to the end of the get list */ --\newline -- PtlMEInsert( target, match, 0, PTL_UNLINK, PTL_INS_BEFORE, EndGet, -- &me_handle ); --\newline -- PtlMDAttach( me_handle, mem_desc, PTL_UNLINK, NULL ); --\newline -- --\newline -- /* we want an ack for long messages */ --\newline -- want_ack = PTL_ACK_REQ; --\newline -- --\newline -- /* set the protocol bit to indicate that this is a long message -- */ --\newline -- match |= 1<<63; --\newline -- } else { --\newline -- /* we don't want an ack for short messages */ --\newline -- want_ack = PTL_ACK_REQ; --\newline -- --\newline -- /* set the protocol bit to indicate that this is a short message -- */ --\newline -- match &= ~(1<<63); --\newline -- } --\newline -- --\newline -- /* create a memory descriptor and send it */ --\newline -- PtlMDBind( MPI_NI, mem_desc, &send_handle ); --\newline -- PtlPut( send_handle, want_ack, target, MPI_SEND_PINDEX, MPI_AINDEX, match, -- 0 ); --\newline --} --\layout Standard -- --The --\emph on --MPISend --\emph default -- function returns as soon as the message has been scheduled for transmission. -- The event queue argument, --\family typewriter --eventq --\family default --, can be used to determine the disposition of the message. -- Assuming that --\family typewriter --eventq --\family default -- is not --\family typewriter --PTL_EQ_NONE --\family default --, a --\family typewriter --PTL_EVENT_SENT --\family default -- event will be recorded for each message as the message is transmitted. -- For small messages, this is the only event that will be recorded in --\family typewriter --eventq --\family default --. -- In contrast, long messages include an explicit request for an acknowledgement. -- If the --\family typewriter --target --\family default -- process has posted a matching receive, the acknowledgement will be sent -- as the message is received. -- If a matching receive has not been posted, the message will be discarded -- and no acknowledgement will be sent. -- When the --\family typewriter --target --\family default -- process later issues a matching receive, the receive will be translated -- into a get operation and a --\family typewriter --PTL_EVENT_GET --\family default -- event will be recorded in --\family typewriter --eventq --\family default --. --\layout Standard -- --Figure\SpecialChar ~ -- --\begin_inset LatexCommand \ref{fig:mpi} -- --\end_inset -- -- illustrates the organization of the match list used for receiving MPI messages. -- The initial entries (not shown in this figure) would be used to match the -- MPI receives that have been preposted by the application. -- The preposted receives are followed by a match entry, --\emph on --RcvMark --\emph default --, that marks the boundary between preposted receives and the memory descriptors -- used for --\begin_inset Quotes eld --\end_inset -- --unexpected --\begin_inset Quotes erd --\end_inset -- -- messages. -- The --\emph on --RcvMark --\emph default -- entry is followed by a small collection of match entries that match unexpected -- --\begin_inset Quotes eld --\end_inset -- --short --\begin_inset Quotes erd --\end_inset -- -- messages, i.e., messages that have a 0 in the most significant bit of their -- match bits. -- The memory descriptors associated with these match entries will append -- the incoming message to the associated memory descriptor and record an -- event in an event queue for unexpected messages. -- The unexpected short message matching entries are followed by a match entry -- that will match messages that were not matched by the preceding match entries, -- i.e., the unexpected long messages. -- The memory descriptor associated with this match entry truncates the message -- body and records an event in the event queue for unexpected messages. -- Note that of the memory descriptors used for unexpected messages share -- a common event queue. -- This makes it possible to process the unexpected messages in the order -- in which they arrived, regardless of. --\layout Standard -- -- --\begin_inset Float figure --placement htbp --wide false --collapsed false -- --\layout Standard --\align center -- --\begin_inset Graphics FormatVersion 1 -- filename mpi.eps -- display color -- size_type 0 -- rotateOrigin center -- lyxsize_type 1 -- lyxwidth 389pt -- lyxheight 284pt --\end_inset -- -- --\layout Caption -- --Message Reception in MPI --\begin_inset LatexCommand \label{fig:mpi} -- --\end_inset -- -- --\end_inset -- -- --\layout Standard -- --When the local MPI process posts an MPI receive, we must first search the -- events unexpected message queue to see if a matching message has already -- arrived. -- If no matching message is found, a match entry for the receive is inserted -- before the --\emph on --RcvMark --\emph default -- entry--after the match entries for all of the previously posted receives -- and before the match entries for the unexpected messages. -- This ensures that preposted receives are matched in the order that they -- were posted (a requirement of MPI). -- --\layout Standard -- --While this strategy respects the temporal semantics of MPI, it introduces -- a race condition: a matching message might arrive after the events in the -- unexpected message queue have been searched, but before the match entry -- for the receive has been inserted in the match list. -- --\layout Standard -- --To avoid this race condition we start by setting the --\family typewriter --threshold --\family default -- of the memory descriptor to 0, making the descriptor inactive. -- We then insert the match entry into the match list and proceed to search -- the events in the unexpected message queue. -- A matching message that arrives as we are searching the unexpected message -- queue will not be accepted by the memory descriptor and, if not matched -- by an earlier match list element, will add an event to the unexpected message -- queue. -- After searching the events in the unexpected message queue, we update the -- memory descriptor, setting the threshold to 1 to activate the memory descriptor. -- This update is predicated by the condition that the unexpected message -- queue is empty. -- We repeat the process of searching the unexpected message queue until the -- update succeeds. --\layout Standard -- --The following code fragment illustrates this approach. -- Because events must be removed from the unexpected message queue to be -- examined, this code fragment assumes the existence of a user managed event -- list, --\family typewriter --Rcvd --\family default --, for the events that have already been removed from the unexpected message -- queue. -- In an effort to keep the example focused on the basic protocol, we have -- omitted the code that would be needed to manage the memory descriptors -- used for unexpected short messages. -- In particular, we simply leave messages in these descriptors until they -- are received by the application. -- In a robust implementation, we would introduce code to ensure that short -- unexpected messages are removed from these memory descriptors so that they -- can be re-used. --\layout LyX-Code -- -- --\size small --extern ptl_handle_eq_t UnexpQueue; --\newline --extern ptl_handle_me_t RcvMark; --\newline --extern ptl_handle_me_t ShortMatch; --\newline -- --\newline --typedef struct event_list_tag { --\newline -- ptl_event_t event; --\newline -- struct event_list_tag* next; --\newline --} event_list; --\newline -- --\newline --extern event_list Rcvd; --\newline -- --\newline --void AppendRcvd( ptl_event_t event ) --\newline --{ --\newline -- /* append an event onto the Rcvd list */ --\newline --} --\newline -- --\newline --int SearchRcvd( void *buf, ptl_size_t len, ptl_process_id_t sender, ptl_match_bi --ts_t match, --\newline -- ptl_match_bits_t ignore, ptl_event_t *event ) --\newline --{ --\newline -- /* Search the Rcvd event queue, looking for a message that matches the -- requested message. --\newline -- * If one is found, remove the event from the Rcvd list and return it. -- */ --\newline --} --\newline -- --\newline --typedef enum { RECEIVED, POSTED } receive_state; --\newline -- --\newline --receive_state CopyMsg( void *buf, ptl_size_t &length, ptl_event_t event, -- ptl_md_t md_buf ) --\newline --{ --\newline -- ptl_md_t md_buf; --\newline -- ptl_handle_me_t me_handle; --\newline -- --\newline -- if( event.rlength >= MPI_LONG_LENGTH ) { --\newline -- PtlMDBind( MPI_NI, md_buf, &md_handle ); --\newline -- PtlGet( event.initiator, MPI_GET_PINDEX, 0, event.match_bits, MPI_AINDEX, -- md_handle ); --\newline -- return POSTED; --\newline -- } else { --\newline -- /* copy the message */ --\newline -- if( event.mlength < *length ) *length = event.mlength; --\newline -- memcpy( buf, (char*)event.md_desc.start+event.offset, *length ); --\newline -- return RECEIVED; --\newline -- } --\newline --} --\newline -- --\newline --receive_state MPIreceive( void *buf, ptl_size_t &len, void *MPI_data, ptl_handle --_eq_t eventq, --\newline -- ptl_process_id_t sender, ptl_match_bits_t match, -- ptl_match_bits_t ignore ) --\newline --{ --\newline -- ptl_md_t md_buf; --\newline -- ptl_handle_md_t md_handle; --\newline -- ptl_handle_me_t me_handle; --\newline -- ptl_event_t event; --\newline -- --\newline -- /* build a memory descriptor for the receive */ --\newline -- md_buf.start = buf; --\newline -- md_buf.length = *len; --\newline -- md_buf.threshold = 0; /* temporarily disabled */ --\newline -- md_buf.options = PTL_MD_PUT_OP; --\newline -- md_buf.user_ptr = MPI_data; --\newline -- md_buf.eventq = eventq; --\newline -- --\newline -- /* see if we have already received the message */ --\newline -- if( SearchRcvd(buf, len, sender, match, ignore, &event) ) --\newline -- return CopyMsg( buf, len, event, md_buf ); --\newline -- --\newline -- /* create the match entry and attach the memory descriptor */ --\newline -- PtlMEInsert(sender, match, ignore, PTL_UNLINK, PTL_INS_BEFORE, RcvMark, -- &me_handle); --\newline -- PtlMDAttach( me_handle, md_buf, PTL_UNLINK, &md_handle ); --\newline -- --\newline -- md_buf.threshold = 1; --\newline -- do --\newline -- if( PtlEQGet( UnexpQueue, &event ) != PTL_EQ_EMPTY ) { --\newline -- if( MPIMatch(event, match, ignore, sender) ) { --\newline -- return CopyMsg( buf, len, (char*)event.md_desc.start+event.offset, -- md_buf ); --\newline -- } else { --\newline -- AppendRcvd( event ); --\newline -- } --\newline -- } --\newline -- while( PtlMDUpdate(md_handle, NULL, &md_buf, unexp_queue) == PTL_NOUPDATE -- ); --\newline -- return POSTED; --\newline --} --\layout Chapter* -- --Acknowledgments --\layout Standard -- --Several people have contributed to the philosophy, design, and implementation -- of the Portals message passing architecture as it has evolved. -- We acknowledge the following people for their contributions: Al Audette, -- Lee Ann Fisk, David Greenberg, Tramm Hudson, Gabi Istrail, Chu Jong, Mike -- Levenhagen, Jim Otto, Mark Sears, Lance Shuler, Mack Stallcup, Jeff VanDyke, -- Dave van Dresser, Lee Ward, and Stephen Wheat. -- --\layout Standard -- -- --\begin_inset LatexCommand \BibTeX[ieee]{portals3} -- --\end_inset -- -- --\the_end diff --cc lnet/doc/put.fig index 5235b6d,5235b6d..0000000 deleted file mode 100644,100644 --- a/lnet/doc/put.fig +++ /dev/null @@@ -1,32 -1,32 +1,0 @@@ --#FIG 3.2 --Landscape --Center --Inches --Letter --100.00 --Single ---2 --1200 2 --6 1350 900 2175 1200 --4 0 0 100 0 0 10 0.0000 0 105 825 1350 1200 Transmission\001 --4 0 0 100 0 0 10 0.0000 0 105 285 1620 1050 Data\001 ---6 --2 1 0 1 0 7 100 0 -1 4.000 0 0 -1 1 0 2 -- 0 0 1.00 60.00 120.00 -- 2700 1275 2700 1725 --2 1 0 1 0 7 100 0 -1 4.000 0 0 -1 1 0 2 -- 0 0 1.00 60.00 120.00 -- 900 525 2700 1200 --2 2 0 1 0 7 100 0 -1 3.000 0 0 7 0 0 5 -- 0 300 1200 300 1200 2250 0 2250 0 300 --2 2 0 1 0 7 100 0 -1 3.000 0 0 7 0 0 5 -- 2400 300 3600 300 3600 2250 2400 2250 2400 300 --2 1 1 1 0 7 100 0 -1 4.000 0 0 7 1 0 2 -- 0 0 1.00 60.00 120.00 -- 2699 1788 899 1938 --4 0 0 100 0 0 10 0.0000 0 105 720 2775 1650 Translation\001 --4 1 0 100 0 0 10 0.0000 0 135 555 1800 2025 Optional\001 --4 1 0 100 0 0 10 0.0000 0 135 1170 1800 2175 Acknowledgement\001 --4 0 0 100 0 0 10 0.0000 0 105 405 2850 1500 Portal\001 --4 1 0 100 0 0 10 0.0000 0 135 405 3000 525 Target\001 --4 1 0 100 0 0 10 0.0000 0 105 540 600 525 Initiator\001 diff --cc lnet/include/.cvsignore index 94d3790,94d3790..0000000 deleted file mode 100644,100644 --- a/lnet/include/.cvsignore +++ /dev/null @@@ -1,6 -1,6 +1,0 @@@ --config.h --stamp-h --stamp-h1 --stamp-h.in --Makefile --Makefile.in diff --cc lnet/include/Makefile.am index 2b3eb8c,2b3eb8c..0000000 deleted file mode 100644,100644 --- a/lnet/include/Makefile.am +++ /dev/null @@@ -1,3 -1,3 +1,0 @@@ --SUBDIRS = linux portals -- --EXTRA_DIST = cygwin-ioctl.h diff --cc lnet/include/cygwin-ioctl.h index 8a33957,8a33957..0000000 deleted file mode 100644,100644 --- a/lnet/include/cygwin-ioctl.h +++ /dev/null @@@ -1,81 -1,81 +1,0 @@@ --/* -- * linux/ioctl.h for Linux by H.H. Bergman. -- */ -- --#ifndef _ASMI386_IOCTL_H --#define _ASMI386_IOCTL_H -- --/* ioctl command encoding: 32 bits total, command in lower 16 bits, -- * size of the parameter structure in the lower 14 bits of the -- * upper 16 bits. -- * Encoding the size of the parameter structure in the ioctl request -- * is useful for catching programs compiled with old versions -- * and to avoid overwriting user space outside the user buffer area. -- * The highest 2 bits are reserved for indicating the ``access mode''. -- * NOTE: This limits the max parameter size to 16kB -1 ! -- */ -- --/* -- * The following is for compatibility across the various Linux -- * platforms. The i386 ioctl numbering scheme doesn't really enforce -- * a type field. De facto, however, the top 8 bits of the lower 16 -- * bits are indeed used as a type field, so we might just as well make -- * this explicit here. Please be sure to use the decoding macros -- * below from now on. -- */ --#undef _IO --#undef _IOR --#undef _IOW --#undef _IOC --#undef IOC_IN --#undef IOC_OUT -- --#define _IOC_NRBITS 8 --#define _IOC_TYPEBITS 8 --#define _IOC_SIZEBITS 14 --#define _IOC_DIRBITS 2 -- --#define _IOC_NRMASK ((1 << _IOC_NRBITS)-1) --#define _IOC_TYPEMASK ((1 << _IOC_TYPEBITS)-1) --#define _IOC_SIZEMASK ((1 << _IOC_SIZEBITS)-1) --#define _IOC_DIRMASK ((1 << _IOC_DIRBITS)-1) -- --#define _IOC_NRSHIFT 0 --#define _IOC_TYPESHIFT (_IOC_NRSHIFT+_IOC_NRBITS) --#define _IOC_SIZESHIFT (_IOC_TYPESHIFT+_IOC_TYPEBITS) --#define _IOC_DIRSHIFT (_IOC_SIZESHIFT+_IOC_SIZEBITS) -- --/* -- * Direction bits. -- */ --#define _IOC_NONE 0U --#define _IOC_WRITE 1U --#define _IOC_READ 2U -- --#define _IOC(dir,type,nr,size) \ -- (((dir) << _IOC_DIRSHIFT) | \ -- ((type) << _IOC_TYPESHIFT) | \ -- ((nr) << _IOC_NRSHIFT) | \ -- ((size) << _IOC_SIZESHIFT)) -- --/* used to create numbers */ --#define _IO(type,nr) _IOC(_IOC_NONE,(type),(nr),0) --#define _IOR(type,nr,size) _IOC(_IOC_READ,(type),(nr),sizeof(size)) --#define _IOW(type,nr,size) _IOC(_IOC_WRITE,(type),(nr),sizeof(size)) --#define _IOWR(type,nr,size) _IOC(_IOC_READ|_IOC_WRITE,(type),(nr),sizeof(size)) -- --/* used to decode ioctl numbers.. */ --#define _IOC_DIR(nr) (((nr) >> _IOC_DIRSHIFT) & _IOC_DIRMASK) --#define _IOC_TYPE(nr) (((nr) >> _IOC_TYPESHIFT) & _IOC_TYPEMASK) --#define _IOC_NR(nr) (((nr) >> _IOC_NRSHIFT) & _IOC_NRMASK) --#define _IOC_SIZE(nr) (((nr) >> _IOC_SIZESHIFT) & _IOC_SIZEMASK) -- --/* ...and for the drivers/sound files... */ -- --#define IOC_IN (_IOC_WRITE << _IOC_DIRSHIFT) --#define IOC_OUT (_IOC_READ << _IOC_DIRSHIFT) --#define IOC_INOUT ((_IOC_WRITE|_IOC_READ) << _IOC_DIRSHIFT) --#define IOCSIZE_MASK (_IOC_SIZEMASK << _IOC_SIZESHIFT) --#define IOCSIZE_SHIFT (_IOC_SIZESHIFT) -- --#endif /* _ASMI386_IOCTL_H */ diff --cc lnet/include/linux/Makefile.am index 3c28c6e8,3c28c6e8..0000000 deleted file mode 100644,100644 --- a/lnet/include/linux/Makefile.am +++ /dev/null @@@ -1,4 -1,4 +1,0 @@@ --linuxdir = $(includedir)/linux -- --EXTRA_DIST = kp30.h kpr.h libcfs.h lustre_list.h portals_compat25.h \ -- portals_lib.h diff --cc lnet/include/linux/kp30.h index b5f1041,db63a08..0000000 deleted file mode 100644,100644 --- a/lnet/include/linux/kp30.h +++ /dev/null @@@ -1,750 -1,731 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- */ --#ifndef _KP30_INCLUDED --#define _KP30_INCLUDED -- --#include --#define PORTAL_DEBUG - - #ifndef offsetof - # define offsetof(typ,memb) ((int)((char *)&(((typ *)0)->memb))) - #endif - - #define LOWEST_BIT_SET(x) ((x) & ~((x) - 1)) -- --#ifdef __KERNEL__ --# include --# include --# include --# include --# include --# include --# include --# include --# include --# include -- --#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) --#define schedule_work schedule_task --#define prepare_work(wq,cb,cbdata) \ --do { \ -- INIT_TQUEUE((wq), 0, 0); \ -- PREPARE_TQUEUE((wq), (cb), (cbdata)); \ --} while (0) -- --#define PageUptodate Page_Uptodate --#define our_recalc_sigpending(current) recalc_sigpending(current) --#define num_online_cpus() smp_num_cpus --static inline void our_cond_resched(void) --{ -- if (current->need_resched) -- schedule (); --} --#define work_struct_t struct tq_struct -- --#else -- --#define prepare_work(wq,cb,cbdata) \ --do { \ -- INIT_WORK((wq), (void *)(cb), (void *)(cbdata)); \ --} while (0) --#define wait_on_page wait_on_page_locked --#define our_recalc_sigpending(current) recalc_sigpending() --#define strtok(a,b) strpbrk(a, b) --static inline void our_cond_resched(void) --{ -- cond_resched(); --} --#define work_struct_t struct work_struct -- --#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) */ -- --#ifdef PORTAL_DEBUG --extern void kportal_assertion_failed(char *expr, char *file, const char *func, -- const int line); --#define LASSERT(e) ((e) ? 0 : kportal_assertion_failed( #e , __FILE__, \ -- __FUNCTION__, __LINE__)) --#define LASSERTF(cond, fmt...) \ -- do { \ -- if (unlikely(!(cond))) { \ -- portals_debug_msg(DEBUG_SUBSYSTEM, D_EMERG, __FILE__,\ -- __FUNCTION__,__LINE__, CDEBUG_STACK,\ -- "ASSERTION(" #cond ") failed:" fmt);\ -- LBUG(); \ -- } \ -- } while (0) -- --#else --#define LASSERT(e) --#define LASSERTF(cond, fmt...) do { } while (0) --#endif -- --#ifdef CONFIG_SMP --#define LASSERT_SPIN_LOCKED(lock) LASSERT(spin_is_locked(lock)) --#else --#define LASSERT_SPIN_LOCKED(lock) do {} while(0) --#endif -- --#ifdef __arch_um__ --#define LBUG_WITH_LOC(file, func, line) \ --do { \ -- CEMERG("LBUG - trying to dump log to /tmp/lustre-log\n"); \ -- portals_debug_dumplog(); \ -- portals_run_lbug_upcall(file, func, line); \ -- panic("LBUG"); \ --} while (0) --#else --#define LBUG_WITH_LOC(file, func, line) \ --do { \ -- CEMERG("LBUG\n"); \ -- CERROR("STACK: %s\n", portals_debug_dumpstack()); \ -- portals_debug_dumplog(); \ -- portals_run_lbug_upcall(file, func, line); \ -- set_task_state(current, TASK_UNINTERRUPTIBLE); \ -- schedule(); \ --} while (0) --#endif /* __arch_um__ */ -- --#define LBUG() LBUG_WITH_LOC(__FILE__, __FUNCTION__, __LINE__) -- --/* -- * Memory -- */ --#ifdef PORTAL_DEBUG --extern atomic_t portal_kmemory; -- --# define portal_kmem_inc(ptr, size) \ --do { \ -- atomic_add(size, &portal_kmemory); \ --} while (0) -- --# define portal_kmem_dec(ptr, size) do { \ -- atomic_sub(size, &portal_kmemory); \ --} while (0) -- --#else --# define portal_kmem_inc(ptr, size) do {} while (0) --# define portal_kmem_dec(ptr, size) do {} while (0) --#endif /* PORTAL_DEBUG */ -- --#define PORTAL_VMALLOC_SIZE 16384 -- --#define PORTAL_ALLOC_GFP(ptr, size, mask) \ --do { \ -- LASSERT(!in_interrupt() || \ -- (size <= PORTAL_VMALLOC_SIZE && mask == GFP_ATOMIC)); \ -- if ((size) > PORTAL_VMALLOC_SIZE) \ -- (ptr) = vmalloc(size); \ -- else \ -- (ptr) = kmalloc((size), (mask)); \ -- if ((ptr) == NULL) { \ -- CERROR("PORTALS: out of memory at %s:%d (tried to alloc '"\ -- #ptr "' = %d)\n", __FILE__, __LINE__, (int)(size));\ -- CERROR("PORTALS: %d total bytes allocated by portals\n", \ -- atomic_read(&portal_kmemory)); \ -- } else { \ -- portal_kmem_inc((ptr), (size)); \ -- memset((ptr), 0, (size)); \ -- } \ -- CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %d at %p (tot %d).\n", \ -- (int)(size), (ptr), atomic_read (&portal_kmemory)); \ --} while (0) -- --#define PORTAL_ALLOC(ptr, size) \ -- PORTAL_ALLOC_GFP(ptr, size, GFP_NOFS) -- --#define PORTAL_ALLOC_ATOMIC(ptr, size) \ -- PORTAL_ALLOC_GFP(ptr, size, GFP_ATOMIC) -- --#define PORTAL_FREE(ptr, size) \ --do { \ -- int s = (size); \ -- if ((ptr) == NULL) { \ -- CERROR("PORTALS: free NULL '" #ptr "' (%d bytes) at " \ -- "%s:%d\n", s, __FILE__, __LINE__); \ -- break; \ -- } \ -- if (s > PORTAL_VMALLOC_SIZE) \ -- vfree(ptr); \ -- else \ -- kfree(ptr); \ -- portal_kmem_dec((ptr), s); \ -- CDEBUG(D_MALLOC, "kfreed '" #ptr "': %d at %p (tot %d).\n", \ -- s, (ptr), atomic_read(&portal_kmemory)); \ --} while (0) -- --/* ------------------------------------------------------------------- */ -- --#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -- --#define PORTAL_SYMBOL_REGISTER(x) inter_module_register(#x, THIS_MODULE, &x) --#define PORTAL_SYMBOL_UNREGISTER(x) inter_module_unregister(#x) -- --#define PORTAL_SYMBOL_GET(x) ((typeof(&x))inter_module_get(#x)) --#define PORTAL_SYMBOL_PUT(x) inter_module_put(#x) -- --#define PORTAL_MODULE_USE MOD_INC_USE_COUNT --#define PORTAL_MODULE_UNUSE MOD_DEC_USE_COUNT --#else -- --#define PORTAL_SYMBOL_REGISTER(x) --#define PORTAL_SYMBOL_UNREGISTER(x) -- --#define PORTAL_SYMBOL_GET(x) symbol_get(x) --#define PORTAL_SYMBOL_PUT(x) symbol_put(x) -- --#define PORTAL_MODULE_USE try_module_get(THIS_MODULE) --#define PORTAL_MODULE_UNUSE module_put(THIS_MODULE) -- --#endif -- --/******************************************************************************/ -- --#ifdef PORTALS_PROFILING --#define prof_enum(FOO) PROF__##FOO --enum { -- prof_enum(our_recvmsg), -- prof_enum(our_sendmsg), -- prof_enum(socknal_recv), -- prof_enum(lib_parse), -- prof_enum(conn_list_walk), -- prof_enum(memcpy), -- prof_enum(lib_finalize), -- prof_enum(pingcli_time), -- prof_enum(gmnal_send), -- prof_enum(gmnal_recv), -- MAX_PROFS --}; -- --struct prof_ent { -- char *str; -- /* hrmph. wrap-tastic. */ -- u32 starts; -- u32 finishes; -- cycles_t total_cycles; -- cycles_t start; -- cycles_t end; --}; -- --extern struct prof_ent prof_ents[MAX_PROFS]; -- --#define PROF_START(FOO) \ -- do { \ -- struct prof_ent *pe = &prof_ents[PROF__##FOO]; \ -- pe->starts++; \ -- pe->start = get_cycles(); \ -- } while (0) -- --#define PROF_FINISH(FOO) \ -- do { \ -- struct prof_ent *pe = &prof_ents[PROF__##FOO]; \ -- pe->finishes++; \ -- pe->end = get_cycles(); \ -- pe->total_cycles += (pe->end - pe->start); \ -- } while (0) --#else /* !PORTALS_PROFILING */ --#define PROF_START(FOO) do {} while(0) --#define PROF_FINISH(FOO) do {} while(0) --#endif /* PORTALS_PROFILING */ -- --/* debug.c */ --extern spinlock_t stack_backtrace_lock; -- --char *portals_debug_dumpstack(void); --void portals_run_upcall(char **argv); --void portals_run_lbug_upcall(char * file, const char *fn, const int line); --void portals_debug_dumplog(void); --int portals_debug_init(unsigned long bufsize); --int portals_debug_cleanup(void); --int portals_debug_clear_buffer(void); --int portals_debug_mark_buffer(char *text); --int portals_debug_set_daemon(unsigned int cmd, unsigned int length, -- char *file, unsigned int size); --__s32 portals_debug_copy_to_user(char *buf, unsigned long len); --#if (__GNUC__) --/* Use the special GNU C __attribute__ hack to have the compiler check the -- * printf style argument string against the actual argument count and -- * types. -- */ --#ifdef printf --# warning printf has been defined as a macro... --# undef printf --#endif --void portals_debug_msg(int subsys, int mask, char *file, const char *fn, -- const int line, unsigned long stack, -- char *format, ...) -- __attribute__ ((format (printf, 7, 8))); --#else --void portals_debug_msg(int subsys, int mask, char *file, const char *fn, -- const int line, unsigned long stack, -- const char *format, ...); --#endif /* __GNUC__ */ --void portals_debug_set_level(unsigned int debug_level); -- --# define fprintf(a, format, b...) CDEBUG(D_OTHER, format , ## b) --# define printf(format, b...) CDEBUG(D_OTHER, format , ## b) --# define time(a) CURRENT_TIME -- --extern void kportal_daemonize (char *name); --extern void kportal_blockallsigs (void); -- --#else /* !__KERNEL__ */ --# include --# include --#ifndef __CYGWIN__ --# include --#else --# include --#endif --# include --# include --# include --# include --# ifndef DEBUG_SUBSYSTEM --# define DEBUG_SUBSYSTEM S_UNDEFINED --# endif --# ifdef PORTAL_DEBUG --# undef NDEBUG --# include --# define LASSERT(e) assert(e) - # define LASSERTF(cond, args...) assert(cond) -# define LASSERTF(cond, args...) \ -do { \ - if (!(cond)) \ - CERROR(args); \ - assert(cond); \ -} while (0) --# else --# define LASSERT(e) --# define LASSERTF(cond, args...) do { } while (0) --# endif --# define printk(format, args...) printf (format, ## args) --# define PORTAL_ALLOC(ptr, size) do { (ptr) = malloc(size); } while (0); --# define PORTAL_FREE(a, b) do { free(a); } while (0); --void portals_debug_dumplog(void); --# define portals_debug_msg(subsys, mask, file, fn, line, stack, format, a...) \ -- printf("%02x:%06x (@%lu %s:%s,l. %d %d %lu): " format, \ -- (subsys), (mask), (long)time(0), file, fn, line, \ -- getpid() , stack, ## a); - - #undef CWARN - #undef CERROR - #define CWARN(format, a...) CDEBUG(D_WARNING, format, ## a) - #define CERROR(format, a...) CDEBUG(D_ERROR, format, ## a) --#endif -- --/* support decl needed both by kernel and liblustre */ --char *portals_nid2str(int nal, ptl_nid_t nid, char *str); -char *portals_id2str(int nal, ptl_process_id_t nid, char *str); -- --#ifndef CURRENT_TIME --# define CURRENT_TIME time(0) --#endif -- --/******************************************************************************/ --/* Light-weight trace -- * Support for temporary event tracing with minimal Heisenberg effect. */ --#define LWT_SUPPORT 0 -- - #define LWT_MEMORY (64<<20) - #define LWT_MAX_CPUS 4 -#define LWT_MEMORY (16<<20) -- -#if !KLWT_SUPPORT -/* kernel hasn't defined this? */ --typedef struct { - cycles_t lwte_when; - long long lwte_when; -- char *lwte_where; -- void *lwte_task; -- long lwte_p1; -- long lwte_p2; -- long lwte_p3; -- long lwte_p4; - #if BITS_PER_LONG > 32 -# if BITS_PER_LONG > 32 -- long lwte_pad; - #endif -# endif --} lwt_event_t; -#endif /* !KLWT_SUPPORT */ -- --#if LWT_SUPPORT - #ifdef __KERNEL__ - #define LWT_EVENTS_PER_PAGE (PAGE_SIZE / sizeof (lwt_event_t)) -# ifdef __KERNEL__ -# if !KLWT_SUPPORT -- --typedef struct _lwt_page { -- struct list_head lwtp_list; -- struct page *lwtp_page; -- lwt_event_t *lwtp_events; --} lwt_page_t; -- --typedef struct { -- int lwtc_current_index; -- lwt_page_t *lwtc_current_page; --} lwt_cpu_t; -- --extern int lwt_enabled; --extern lwt_cpu_t lwt_cpus[]; - - extern int lwt_init (void); - extern void lwt_fini (void); - extern int lwt_lookup_string (int *size, char *knlptr, - char *usrptr, int usrsize); - extern int lwt_control (int enable, int clear); - extern int lwt_snapshot (cycles_t *now, int *ncpu, int *total_size, - void *user_ptr, int user_size); -- --/* Note that we _don't_ define LWT_EVENT at all if LWT_SUPPORT isn't set. -- * This stuff is meant for finding specific problems; it never stays in -- * production code... */ -- --#define LWTSTR(n) #n --#define LWTWHERE(f,l) f ":" LWTSTR(l) -#define LWT_EVENTS_PER_PAGE (PAGE_SIZE / sizeof (lwt_event_t)) -- --#define LWT_EVENT(p1, p2, p3, p4) \ --do { \ -- unsigned long flags; \ -- lwt_cpu_t *cpu; \ -- lwt_page_t *p; \ -- lwt_event_t *e; \ - \ - local_irq_save (flags); \ -- \ -- if (lwt_enabled) { \ - local_irq_save (flags); \ - \ -- cpu = &lwt_cpus[smp_processor_id()]; \ -- p = cpu->lwtc_current_page; \ -- e = &p->lwtp_events[cpu->lwtc_current_index++]; \ -- \ -- if (cpu->lwtc_current_index >= LWT_EVENTS_PER_PAGE) { \ -- cpu->lwtc_current_page = \ -- list_entry (p->lwtp_list.next, \ -- lwt_page_t, lwtp_list); \ -- cpu->lwtc_current_index = 0; \ -- } \ -- \ -- e->lwte_when = get_cycles(); \ -- e->lwte_where = LWTWHERE(__FILE__,__LINE__); \ -- e->lwte_task = current; \ -- e->lwte_p1 = (long)(p1); \ -- e->lwte_p2 = (long)(p2); \ -- e->lwte_p3 = (long)(p3); \ -- e->lwte_p4 = (long)(p4); \ - } \ -- \ - local_irq_restore (flags); \ - local_irq_restore (flags); \ - } \ --} while (0) - #else /* __KERNEL__ */ - #define LWT_EVENT(p1,p2,p3,p4) /* no userland implementation yet */ - #endif /* __KERNEL__ */ - -#endif /* !KLWT_SUPPORT */ - -extern int lwt_init (void); -extern void lwt_fini (void); -extern int lwt_lookup_string (int *size, char *knlptr, - char *usrptr, int usrsize); -extern int lwt_control (int enable, int clear); -extern int lwt_snapshot (cycles_t *now, int *ncpu, int *total_size, - void *user_ptr, int user_size); -# else /* __KERNEL__ */ -# define LWT_EVENT(p1,p2,p3,p4) /* no userland implementation yet */ -# endif /* __KERNEL__ */ --#endif /* LWT_SUPPORT */ -- --struct portals_device_userstate --{ -- int pdu_memhog_pages; -- struct page *pdu_memhog_root_page; --}; -- --#include -- --/* -- * USER LEVEL STUFF BELOW -- */ -- --#define PORTAL_IOCTL_VERSION 0x00010007 --#define PING_SYNC 0 --#define PING_ASYNC 1 -- --struct portal_ioctl_hdr { -- __u32 ioc_len; -- __u32 ioc_version; --}; -- --struct portals_debug_ioctl_data --{ -- struct portal_ioctl_hdr hdr; -- unsigned int subs; -- unsigned int debug; --}; -- --#define PORTAL_IOC_INIT(data) \ --do { \ -- memset(&data, 0, sizeof(data)); \ -- data.ioc_version = PORTAL_IOCTL_VERSION; \ -- data.ioc_len = sizeof(data); \ --} while (0) -- --/* FIXME check conflict with lustre_lib.h */ --#define PTL_IOC_DEBUG_MASK _IOWR('f', 250, long) -- --static inline int portal_ioctl_packlen(struct portal_ioctl_data *data) --{ -- int len = sizeof(*data); -- len += size_round(data->ioc_inllen1); -- len += size_round(data->ioc_inllen2); -- return len; --} -- --static inline int portal_ioctl_is_invalid(struct portal_ioctl_data *data) --{ -- if (data->ioc_len > (1<<30)) { -- CERROR ("PORTALS ioctl: ioc_len larger than 1<<30\n"); -- return 1; -- } -- if (data->ioc_inllen1 > (1<<30)) { -- CERROR ("PORTALS ioctl: ioc_inllen1 larger than 1<<30\n"); -- return 1; -- } -- if (data->ioc_inllen2 > (1<<30)) { -- CERROR ("PORTALS ioctl: ioc_inllen2 larger than 1<<30\n"); -- return 1; -- } -- if (data->ioc_inlbuf1 && !data->ioc_inllen1) { -- CERROR ("PORTALS ioctl: inlbuf1 pointer but 0 length\n"); -- return 1; -- } -- if (data->ioc_inlbuf2 && !data->ioc_inllen2) { -- CERROR ("PORTALS ioctl: inlbuf2 pointer but 0 length\n"); -- return 1; -- } -- if (data->ioc_pbuf1 && !data->ioc_plen1) { -- CERROR ("PORTALS ioctl: pbuf1 pointer but 0 length\n"); -- return 1; -- } -- if (data->ioc_pbuf2 && !data->ioc_plen2) { -- CERROR ("PORTALS ioctl: pbuf2 pointer but 0 length\n"); -- return 1; -- } -- if (data->ioc_plen1 && !data->ioc_pbuf1) { -- CERROR ("PORTALS ioctl: plen1 nonzero but no pbuf1 pointer\n"); -- return 1; -- } -- if (data->ioc_plen2 && !data->ioc_pbuf2) { -- CERROR ("PORTALS ioctl: plen2 nonzero but no pbuf2 pointer\n"); -- return 1; -- } -- if (portal_ioctl_packlen(data) != data->ioc_len ) { -- CERROR ("PORTALS ioctl: packlen != ioc_len\n"); -- return 1; -- } -- if (data->ioc_inllen1 && -- data->ioc_bulk[data->ioc_inllen1 - 1] != '\0') { -- CERROR ("PORTALS ioctl: inlbuf1 not 0 terminated\n"); -- return 1; -- } -- if (data->ioc_inllen2 && -- data->ioc_bulk[size_round(data->ioc_inllen1) + -- data->ioc_inllen2 - 1] != '\0') { -- CERROR ("PORTALS ioctl: inlbuf2 not 0 terminated\n"); -- return 1; -- } -- return 0; --} -- --#ifndef __KERNEL__ --static inline int portal_ioctl_pack(struct portal_ioctl_data *data, char **pbuf, -- int max) --{ -- char *ptr; -- struct portal_ioctl_data *overlay; -- data->ioc_len = portal_ioctl_packlen(data); -- data->ioc_version = PORTAL_IOCTL_VERSION; -- -- if (*pbuf && portal_ioctl_packlen(data) > max) -- return 1; -- if (*pbuf == NULL) { -- *pbuf = malloc(data->ioc_len); -- } -- if (!*pbuf) -- return 1; -- overlay = (struct portal_ioctl_data *)*pbuf; -- memcpy(*pbuf, data, sizeof(*data)); -- -- ptr = overlay->ioc_bulk; -- if (data->ioc_inlbuf1) -- LOGL(data->ioc_inlbuf1, data->ioc_inllen1, ptr); -- if (data->ioc_inlbuf2) -- LOGL(data->ioc_inlbuf2, data->ioc_inllen2, ptr); -- if (portal_ioctl_is_invalid(overlay)) -- return 1; -- -- return 0; --} --#else --#include -- --/* buffer MUST be at least the size of portal_ioctl_hdr */ --static inline int portal_ioctl_getdata(char *buf, char *end, void *arg) --{ -- struct portal_ioctl_hdr *hdr; -- struct portal_ioctl_data *data; -- int err; -- ENTRY; -- -- hdr = (struct portal_ioctl_hdr *)buf; -- data = (struct portal_ioctl_data *)buf; -- -- err = copy_from_user(buf, (void *)arg, sizeof(*hdr)); -- if ( err ) { -- EXIT; -- return err; -- } -- -- if (hdr->ioc_version != PORTAL_IOCTL_VERSION) { -- CERROR ("PORTALS: version mismatch kernel vs application\n"); -- return -EINVAL; -- } -- -- if (hdr->ioc_len + buf >= end) { -- CERROR ("PORTALS: user buffer exceeds kernel buffer\n"); -- return -EINVAL; -- } -- -- -- if (hdr->ioc_len < sizeof(struct portal_ioctl_data)) { -- CERROR ("PORTALS: user buffer too small for ioctl\n"); -- return -EINVAL; -- } -- -- err = copy_from_user(buf, (void *)arg, hdr->ioc_len); -- if ( err ) { -- EXIT; -- return err; -- } -- -- if (portal_ioctl_is_invalid(data)) { -- CERROR ("PORTALS: ioctl not correctly formatted\n"); -- return -EINVAL; -- } -- -- if (data->ioc_inllen1) { -- data->ioc_inlbuf1 = &data->ioc_bulk[0]; -- } -- -- if (data->ioc_inllen2) { -- data->ioc_inlbuf2 = &data->ioc_bulk[0] + -- size_round(data->ioc_inllen1); -- } -- -- EXIT; -- return 0; --} --#endif -- --/* ioctls for manipulating snapshots 30- */ --#define IOC_PORTAL_TYPE 'e' --#define IOC_PORTAL_MIN_NR 30 -- --#define IOC_PORTAL_PING _IOWR('e', 30, long) -- --#define IOC_PORTAL_CLEAR_DEBUG _IOWR('e', 32, long) --#define IOC_PORTAL_MARK_DEBUG _IOWR('e', 33, long) --#define IOC_PORTAL_PANIC _IOWR('e', 34, long) --#define IOC_PORTAL_NAL_CMD _IOWR('e', 35, long) --#define IOC_PORTAL_GET_NID _IOWR('e', 36, long) --#define IOC_PORTAL_FAIL_NID _IOWR('e', 37, long) -- --#define IOC_PORTAL_LWT_CONTROL _IOWR('e', 39, long) --#define IOC_PORTAL_LWT_SNAPSHOT _IOWR('e', 40, long) --#define IOC_PORTAL_LWT_LOOKUP_STRING _IOWR('e', 41, long) --#define IOC_PORTAL_MEMHOG _IOWR('e', 42, long) --#define IOC_PORTAL_MAX_NR 42 -- --enum { -- QSWNAL = 1, -- SOCKNAL = 2, -- GMNAL = 3, -- /* 4 unused */ -- TCPNAL = 5, - SCIMACNAL = 6, - ROUTER = 7, - IBNAL = 8, - ROUTER = 6, - OPENIBNAL = 7, -- NAL_ENUM_END_MARKER --}; - - #ifdef __KERNEL__ - extern ptl_handle_ni_t kqswnal_ni; - extern ptl_handle_ni_t ksocknal_ni; - extern ptl_handle_ni_t kgmnal_ni; - extern ptl_handle_ni_t kibnal_ni; - extern ptl_handle_ni_t kscimacnal_ni; - #endif -- - #define PTL_NALFMT_SIZE 26 /* %u:%u.%u.%u.%u (10+4+4+4+3+1) */ -#define PTL_NALFMT_SIZE 30 /* %u:%u.%u.%u.%u,%u (10+4+4+4+3+4+1) */ -- --#define NAL_MAX_NR (NAL_ENUM_END_MARKER - 1) -- --#define NAL_CMD_REGISTER_PEER_FD 100 --#define NAL_CMD_CLOSE_CONNECTION 101 --#define NAL_CMD_REGISTER_MYNID 102 --#define NAL_CMD_PUSH_CONNECTION 103 --#define NAL_CMD_GET_CONN 104 - #define NAL_CMD_DEL_AUTOCONN 105 - #define NAL_CMD_ADD_AUTOCONN 106 - #define NAL_CMD_GET_AUTOCONN 107 -#define NAL_CMD_DEL_PEER 105 -#define NAL_CMD_ADD_PEER 106 -#define NAL_CMD_GET_PEER 107 --#define NAL_CMD_GET_TXDESC 108 --#define NAL_CMD_ADD_ROUTE 109 --#define NAL_CMD_DEL_ROUTE 110 --#define NAL_CMD_GET_ROUTE 111 --#define NAL_CMD_NOTIFY_ROUTER 112 -#define NAL_CMD_ADD_INTERFACE 113 -#define NAL_CMD_DEL_INTERFACE 114 -#define NAL_CMD_GET_INTERFACE 115 - -- --enum { -- DEBUG_DAEMON_START = 1, -- DEBUG_DAEMON_STOP = 2, -- DEBUG_DAEMON_PAUSE = 3, -- DEBUG_DAEMON_CONTINUE = 4, - }; - - /* XXX remove to lustre ASAP */ - struct lustre_peer { - ptl_nid_t peer_nid; - ptl_handle_ni_t peer_ni; --}; -- - /* module.c */ - typedef int (*nal_cmd_handler_t)(struct portals_cfg *, void * private); - int kportal_nal_register(int nal, nal_cmd_handler_t handler, void * private); - int kportal_nal_unregister(int nal); -- --enum cfg_record_type { -- PORTALS_CFG_TYPE = 1, -- LUSTRE_CFG_TYPE = 123, --}; -- --typedef int (*cfg_record_cb_t)(enum cfg_record_type, int len, void *data); - int kportal_nal_cmd(struct portals_cfg *); - - ptl_handle_ni_t *kportal_get_ni (int nal); - void kportal_put_ni (int nal); -- --#ifdef __CYGWIN__ --# ifndef BITS_PER_LONG --# if (~0UL) == 0xffffffffUL --# define BITS_PER_LONG 32 --# else --# define BITS_PER_LONG 64 --# endif --# endif --#endif -- --#if BITS_PER_LONG > 32 --# define LI_POISON ((int)0x5a5a5a5a5a5a5a5a) --# define LL_POISON ((long)0x5a5a5a5a5a5a5a5a) --# define LP_POISON ((void *)(long)0x5a5a5a5a5a5a5a5a) --#else --# define LI_POISON ((int)0x5a5a5a5a) --# define LL_POISON ((long)0x5a5a5a5a) --# define LP_POISON ((void *)(long)0x5a5a5a5a) --#endif -- --#if defined(__x86_64__) --# define LPU64 "%Lu" --# define LPD64 "%Ld" --# define LPX64 "%#Lx" --# define LPSZ "%lu" --# define LPSSZ "%ld" --#elif (BITS_PER_LONG == 32 || __WORDSIZE == 32) --# define LPU64 "%Lu" --# define LPD64 "%Ld" --# define LPX64 "%#Lx" --# define LPSZ "%u" --# define LPSSZ "%d" --#elif (BITS_PER_LONG == 64 || __WORDSIZE == 64) --# define LPU64 "%lu" --# define LPD64 "%ld" --# define LPX64 "%#lx" --# define LPSZ "%lu" --# define LPSSZ "%ld" --#endif --#ifndef LPU64 --# error "No word size defined" --#endif -- --#endif diff --cc lnet/include/linux/kpr.h index ee50b59,1127698..0000000 deleted file mode 100644,100644 --- a/lnet/include/linux/kpr.h +++ /dev/null @@@ -1,191 -1,176 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * * vim:expandtab:shiftwidth=8:tabstop=8: - * */ - * vim:expandtab:shiftwidth=8:tabstop=8: - */ --#ifndef _KPR_H --#define _KPR_H -- - # include /* for ptl_hdr_t */ -# include /* for ptl_hdr_t */ -- --/******************************************************************************/ --/* Kernel Portals Router interface */ -- --typedef void (*kpr_fwd_callback_t)(void *arg, int error); // completion callback -- --/* space for routing targets to stash "stuff" in a forwarded packet */ --typedef union { -- long long _alignment; -- void *_space[16]; /* scale with CPU arch */ --} kprfd_scratch_t; -- --/* Kernel Portals Routing Forwarded message Descriptor */ --typedef struct { -- struct list_head kprfd_list; /* stash in queues (routing target can use) */ -- ptl_nid_t kprfd_target_nid; /* final destination NID */ -- ptl_nid_t kprfd_gateway_nid; /* gateway NID */ -- ptl_hdr_t *kprfd_hdr; /* header in wire byte order */ -- int kprfd_nob; /* # payload bytes */ -- int kprfd_niov; /* # payload frags */ -- ptl_kiov_t *kprfd_kiov; /* payload fragments */ -- void *kprfd_router_arg; /* originating NAL's router arg */ -- kpr_fwd_callback_t kprfd_callback; /* completion callback */ -- void *kprfd_callback_arg; /* completion callback arg */ -- kprfd_scratch_t kprfd_scratch; /* scratchpad for routing targets */ --} kpr_fwd_desc_t; -- --typedef void (*kpr_fwd_t)(void *arg, kpr_fwd_desc_t *fwd); --typedef void (*kpr_notify_t)(void *arg, ptl_nid_t peer, int alive); -- --/* NAL's routing interface (Kernel Portals Routing Nal Interface) */ --typedef const struct { -- int kprni_nalid; /* NAL's id */ -- void *kprni_arg; /* Arg to pass when calling into NAL */ -- kpr_fwd_t kprni_fwd; /* NAL's forwarding entrypoint */ -- kpr_notify_t kprni_notify; /* NAL's notification entrypoint */ --} kpr_nal_interface_t; -- --/* Router's routing interface (Kernel Portals Routing Router Interface) */ --typedef const struct { -- /* register the calling NAL with the router and get back the handle for -- * subsequent calls */ -- int (*kprri_register) (kpr_nal_interface_t *nal_interface, -- void **router_arg); -- -- /* ask the router to find a gateway that forwards to 'nid' and is a -- * peer of the calling NAL; assume caller will send 'nob' bytes of -- * payload there */ -- int (*kprri_lookup) (void *router_arg, ptl_nid_t nid, int nob, -- ptl_nid_t *gateway_nid); -- -- /* hand a packet over to the router for forwarding */ -- kpr_fwd_t kprri_fwd_start; -- -- /* hand a packet back to the router for completion */ -- void (*kprri_fwd_done) (void *router_arg, kpr_fwd_desc_t *fwd, -- int error); -- -- /* notify the router about peer state */ -- void (*kprri_notify) (void *router_arg, ptl_nid_t peer, -- int alive, time_t when); -- -- /* the calling NAL is shutting down */ -- void (*kprri_shutdown) (void *router_arg); -- -- /* deregister the calling NAL with the router */ -- void (*kprri_deregister) (void *router_arg); -- --} kpr_router_interface_t; -- --/* Convenient struct for NAL to stash router interface/args */ --typedef struct { -- kpr_router_interface_t *kpr_interface; -- void *kpr_arg; --} kpr_router_t; - - /* Router's control interface (Kernel Portals Routing Control Interface) */ - typedef const struct { - int (*kprci_add_route)(int gateway_nal, ptl_nid_t gateway_nid, - ptl_nid_t lo_nid, ptl_nid_t hi_nid); - int (*kprci_del_route)(int gateway_nal, ptl_nid_t gateway_nid, - ptl_nid_t lo_nid, ptl_nid_t hi_nid); - int (*kprci_get_route)(int index, int *gateway_nal, - ptl_nid_t *gateway, - ptl_nid_t *lo_nid, ptl_nid_t *hi_nid, - int *alive); - int (*kprci_notify)(int gateway_nal, ptl_nid_t gateway_nid, - int alive, time_t when); - } kpr_control_interface_t; -- - extern kpr_control_interface_t kpr_control_interface; --extern kpr_router_interface_t kpr_router_interface; -- --static inline int --kpr_register (kpr_router_t *router, kpr_nal_interface_t *nalif) --{ -- int rc; -- -- router->kpr_interface = PORTAL_SYMBOL_GET (kpr_router_interface); -- if (router->kpr_interface == NULL) -- return (-ENOENT); -- -- rc = (router->kpr_interface)->kprri_register (nalif, &router->kpr_arg); -- if (rc != 0) -- router->kpr_interface = NULL; -- -- PORTAL_SYMBOL_PUT (kpr_router_interface); -- return (rc); --} -- --static inline int --kpr_routing (kpr_router_t *router) --{ -- return (router->kpr_interface != NULL); --} -- --static inline int --kpr_lookup (kpr_router_t *router, ptl_nid_t nid, int nob, ptl_nid_t *gateway_nid) --{ -- if (!kpr_routing (router)) -- return (-ENETUNREACH); -- -- return (router->kpr_interface->kprri_lookup(router->kpr_arg, nid, nob, -- gateway_nid)); --} -- --static inline void --kpr_fwd_init (kpr_fwd_desc_t *fwd, ptl_nid_t nid, ptl_hdr_t *hdr, -- int nob, int niov, ptl_kiov_t *kiov, -- kpr_fwd_callback_t callback, void *callback_arg) --{ -- fwd->kprfd_target_nid = nid; -- fwd->kprfd_gateway_nid = nid; -- fwd->kprfd_hdr = hdr; -- fwd->kprfd_nob = nob; -- fwd->kprfd_niov = niov; -- fwd->kprfd_kiov = kiov; -- fwd->kprfd_callback = callback; -- fwd->kprfd_callback_arg = callback_arg; --} -- --static inline void --kpr_fwd_start (kpr_router_t *router, kpr_fwd_desc_t *fwd) --{ -- if (!kpr_routing (router)) -- fwd->kprfd_callback (fwd->kprfd_callback_arg, -ENETUNREACH); -- else -- router->kpr_interface->kprri_fwd_start (router->kpr_arg, fwd); --} -- --static inline void --kpr_fwd_done (kpr_router_t *router, kpr_fwd_desc_t *fwd, int error) --{ -- LASSERT (kpr_routing (router)); -- router->kpr_interface->kprri_fwd_done (router->kpr_arg, fwd, error); --} -- --static inline void --kpr_notify (kpr_router_t *router, -- ptl_nid_t peer, int alive, time_t when) --{ -- if (!kpr_routing (router)) -- return; -- -- router->kpr_interface->kprri_notify(router->kpr_arg, peer, alive, when); --} -- --static inline void --kpr_shutdown (kpr_router_t *router) --{ -- if (kpr_routing (router)) -- router->kpr_interface->kprri_shutdown (router->kpr_arg); --} -- --static inline void --kpr_deregister (kpr_router_t *router) --{ -- if (!kpr_routing (router)) -- return; -- router->kpr_interface->kprri_deregister (router->kpr_arg); -- router->kpr_interface = NULL; --} -- --#endif /* _KPR_H */ diff --cc lnet/include/linux/libcfs.h index 66ee471,d1a5c44..0000000 deleted file mode 100644,100644 --- a/lnet/include/linux/libcfs.h +++ /dev/null @@@ -1,303 -1,343 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- */ --#ifndef _LIBCFS_H --#define _LIBCFS_H - -#include - -#ifdef __KERNEL__ -# include -# include -#else -# include -# define do_gettimeofday(tv) gettimeofday(tv, NULL); -typedef unsigned long long cycles_t; -#endif -- --#define PORTAL_DEBUG -- --#ifndef offsetof - # define offsetof(typ,memb) ((int)((char *)&(((typ *)0)->memb))) -# define offsetof(typ,memb) ((unsigned long)((char *)&(((typ *)0)->memb))) --#endif -- --#define LOWEST_BIT_SET(x) ((x) & ~((x) - 1)) - -#ifndef __KERNEL__ -/* Userpace byte flipping */ -# include -# include -# define __swab16(x) bswap_16(x) -# define __swab32(x) bswap_32(x) -# define __swab64(x) bswap_64(x) -# define __swab16s(x) do {*(x) = bswap_16(*(x));} while (0) -# define __swab32s(x) do {*(x) = bswap_32(*(x));} while (0) -# define __swab64s(x) do {*(x) = bswap_64(*(x));} while (0) -# if __BYTE_ORDER == __LITTLE_ENDIAN -# define le16_to_cpu(x) (x) -# define cpu_to_le16(x) (x) -# define le32_to_cpu(x) (x) -# define cpu_to_le32(x) (x) -# define le64_to_cpu(x) (x) -# define cpu_to_le64(x) (x) -# else -# if __BYTE_ORDER == __BIG_ENDIAN -# define le16_to_cpu(x) bswap_16(x) -# define cpu_to_le16(x) bswap_16(x) -# define le32_to_cpu(x) bswap_32(x) -# define cpu_to_le32(x) bswap_32(x) -# define le64_to_cpu(x) bswap_64(x) -# define cpu_to_le64(x) bswap_64(x) -# else -# error "Unknown byte order" -# endif /* __BIG_ENDIAN */ -# endif /* __LITTLE_ENDIAN */ -#endif /* ! __KERNEL__ */ -- --/* -- * Debugging -- */ --extern unsigned int portal_subsystem_debug; --extern unsigned int portal_stack; --extern unsigned int portal_debug; --extern unsigned int portal_printk; -- --#include --struct ptldebug_header { -- __u32 ph_len; -- __u32 ph_flags; -- __u32 ph_subsys; -- __u32 ph_mask; -- __u32 ph_cpu_id; -- __u32 ph_sec; -- __u64 ph_usec; -- __u32 ph_stack; -- __u32 ph_pid; -- __u32 ph_extern_pid; -- __u32 ph_line_num; --} __attribute__((packed)); -- --#define PH_FLAG_FIRST_RECORD 1 -- --/* Debugging subsystems (32 bits, non-overlapping) */ --#define S_UNDEFINED 0x00000001 --#define S_MDC 0x00000002 --#define S_MDS 0x00000004 --#define S_OSC 0x00000008 --#define S_OST 0x00000010 --#define S_CLASS 0x00000020 --#define S_LOG 0x00000040 --#define S_LLITE 0x00000080 --#define S_RPC 0x00000100 --#define S_MGMT 0x00000200 --#define S_PORTALS 0x00000400 --#define S_SOCKNAL 0x00000800 --#define S_QSWNAL 0x00001000 --#define S_PINGER 0x00002000 --#define S_FILTER 0x00004000 --#define S_PTLBD 0x00008000 --#define S_ECHO 0x00010000 --#define S_LDLM 0x00020000 --#define S_LOV 0x00040000 --#define S_GMNAL 0x00080000 --#define S_PTLROUTER 0x00100000 --#define S_COBD 0x00200000 - #define S_IBNAL 0x00400000 -#define S_OPENIBNAL 0x00400000 -#define S_SM 0x00800000 -#define S_ASOBD 0x01000000 -#define S_LMV 0x02000000 -#define S_CMOBD 0x04000000 -- --/* If you change these values, please keep portals/utils/debug.c -- * up to date! */ -- --/* Debugging masks (32 bits, non-overlapping) */ --#define D_TRACE 0x00000001 /* ENTRY/EXIT markers */ --#define D_INODE 0x00000002 --#define D_SUPER 0x00000004 --#define D_EXT2 0x00000008 /* anything from ext2_debug */ --#define D_MALLOC 0x00000010 /* print malloc, free information */ --#define D_CACHE 0x00000020 /* cache-related items */ --#define D_INFO 0x00000040 /* general information */ --#define D_IOCTL 0x00000080 /* ioctl related information */ --#define D_BLOCKS 0x00000100 /* ext2 block allocation */ --#define D_NET 0x00000200 /* network communications */ --#define D_WARNING 0x00000400 /* CWARN(...) == CDEBUG (D_WARNING, ...) */ --#define D_BUFFS 0x00000800 --#define D_OTHER 0x00001000 --#define D_DENTRY 0x00002000 --#define D_PORTALS 0x00004000 /* ENTRY/EXIT markers */ --#define D_PAGE 0x00008000 /* bulk page handling */ --#define D_DLMTRACE 0x00010000 --#define D_ERROR 0x00020000 /* CERROR(...) == CDEBUG (D_ERROR, ...) */ --#define D_EMERG 0x00040000 /* CEMERG(...) == CDEBUG (D_EMERG, ...) */ --#define D_HA 0x00080000 /* recovery and failover */ --#define D_RPCTRACE 0x00100000 /* for distributed debugging */ --#define D_VFSTRACE 0x00200000 --#define D_READA 0x00400000 /* read-ahead */ - -#define D_MMAP 0x00800000 -#define D_CONFIG 0x01000000 --#ifdef __KERNEL__ --# include /* THREAD_SIZE */ --#else --# ifndef THREAD_SIZE /* x86_64 has THREAD_SIZE in userspace */ --# define THREAD_SIZE 8192 --# endif --#endif -- --#define LUSTRE_TRACE_SIZE (THREAD_SIZE >> 5) -- --#ifdef __KERNEL__ --# ifdef __ia64__ --# define CDEBUG_STACK (THREAD_SIZE - \ -- ((unsigned long)__builtin_dwarf_cfa() & \ -- (THREAD_SIZE - 1))) --# else --# define CDEBUG_STACK (THREAD_SIZE - \ -- ((unsigned long)__builtin_frame_address(0) & \ -- (THREAD_SIZE - 1))) - # endif -# endif /* __ia64__ */ -- --#define CHECK_STACK(stack) \ -- do { \ -- if ((stack) > 3*THREAD_SIZE/4 && (stack) > portal_stack) { \ -- portals_debug_msg(DEBUG_SUBSYSTEM, D_WARNING, \ -- __FILE__, __FUNCTION__, __LINE__, \ -- (stack),"maximum lustre stack %u\n",\ -- portal_stack = (stack)); \ -- /*panic("LBUG");*/ \ -- } \ -- } while (0) - #else /* __KERNEL__ */ -#else /* !__KERNEL__ */ --#define CHECK_STACK(stack) do { } while(0) --#define CDEBUG_STACK (0L) --#endif /* __KERNEL__ */ -- --#if 1 --#define CDEBUG(mask, format, a...) \ --do { \ -- CHECK_STACK(CDEBUG_STACK); \ -- if (((mask) & (D_ERROR | D_EMERG | D_WARNING)) || \ -- (portal_debug & (mask) && \ -- portal_subsystem_debug & DEBUG_SUBSYSTEM)) \ -- portals_debug_msg(DEBUG_SUBSYSTEM, mask, \ -- __FILE__, __FUNCTION__, __LINE__, \ -- CDEBUG_STACK, format, ## a); \ - } while (0) - - #define CDEBUG_MAX_LIMIT 600 - #define CDEBUG_LIMIT(cdebug_mask, cdebug_format, a...) \ - do { \ - static unsigned long cdebug_next; \ - static int cdebug_count, cdebug_delay = 1; \ - \ - CHECK_STACK(CDEBUG_STACK); \ - if (time_after(jiffies, cdebug_next)) { \ - portals_debug_msg(DEBUG_SUBSYSTEM, cdebug_mask, __FILE__, \ - __FUNCTION__, __LINE__, CDEBUG_STACK, \ - cdebug_format, ## a); \ - if (cdebug_count) { \ - portals_debug_msg(DEBUG_SUBSYSTEM, cdebug_mask, \ - __FILE__, __FUNCTION__, __LINE__, \ - 0, cdebug_format, ## a); \ - cdebug_count = 0; \ - } \ - if (time_after(jiffies, cdebug_next+(CDEBUG_MAX_LIMIT+10)*HZ))\ - cdebug_delay = cdebug_delay > 8 ? cdebug_delay/8 : 1; \ - else \ - cdebug_delay = cdebug_delay*2 >= CDEBUG_MAX_LIMIT*HZ? \ - CDEBUG_MAX_LIMIT * HZ : cdebug_delay*2; \ - cdebug_next = jiffies + cdebug_delay; \ - } else { \ - portals_debug_msg(DEBUG_SUBSYSTEM, \ - portal_debug & ~(D_EMERG|D_ERROR|D_WARNING),\ - __FILE__, __FUNCTION__, __LINE__, \ - CDEBUG_STACK, cdebug_format, ## a); \ - cdebug_count++; \ - } \ --} while (0) -- - #define CWARN(format, a...) CDEBUG_LIMIT(D_WARNING, format, ## a) - #define CERROR(format, a...) CDEBUG_LIMIT(D_ERROR, format, ## a) -#define CWARN(format, a...) CDEBUG(D_WARNING, format, ## a) -#define CERROR(format, a...) CDEBUG(D_ERROR, format, ## a) --#define CEMERG(format, a...) CDEBUG(D_EMERG, format, ## a) -- --#define GOTO(label, rc) \ --do { \ -- long GOTO__ret = (long)(rc); \ -- CDEBUG(D_TRACE,"Process leaving via %s (rc=%lu : %ld : %lx)\n", \ -- #label, (unsigned long)GOTO__ret, (signed long)GOTO__ret,\ -- (signed long)GOTO__ret); \ -- goto label; \ --} while (0) -- --#define RETURN(rc) \ --do { \ -- typeof(rc) RETURN__ret = (rc); \ -- CDEBUG(D_TRACE, "Process leaving (rc=%lu : %ld : %lx)\n", \ -- (long)RETURN__ret, (long)RETURN__ret, (long)RETURN__ret);\ -- return RETURN__ret; \ --} while (0) -- --#define ENTRY \ --do { \ -- CDEBUG(D_TRACE, "Process entered\n"); \ --} while (0) -- --#define EXIT \ --do { \ -- CDEBUG(D_TRACE, "Process leaving\n"); \ --} while(0) --#else --#define CDEBUG(mask, format, a...) do { } while (0) - #define CWARN(format, a...) do { } while (0) - #define CERROR(format, a...) printk("<3>" format, ## a) - #define CEMERG(format, a...) printk("<0>" format, ## a) -#define CWARN(format, a...) printk(KERN_WARNING format, ## a) -#define CERROR(format, a...) printk(KERN_ERR format, ## a) -#define CEMERG(format, a...) printk(KERN_EMERG format, ## a) --#define GOTO(label, rc) do { (void)(rc); goto label; } while (0) --#define RETURN(rc) return (rc) --#define ENTRY do { } while (0) --#define EXIT do { } while (0) --#endif - -/* initial pid */ -# if CRAY_PORTALS -/* - * - * 1) ptl_pid_t in cray portals is only 16 bits, not 32 bits, therefore this is too - * big. - * - * 2) the implementation of ernal in cray portals further restricts the pid space - * that may be used to 0 <= pid <= 255 (an 8 bit value). Returns an error at nal - * init time for any pid outside this range. Other nals in cray portals don't have - * this restriction. - * */ -#define LUSTRE_PTL_PID 9 -# else -#define LUSTRE_PTL_PID 12345 -# endif - -#define LUSTRE_SRV_PTL_PID LUSTRE_PTL_PID -- --#define PORTALS_CFG_VERSION 0x00010001; -- --struct portals_cfg { -- __u32 pcfg_version; -- __u32 pcfg_command; -- -- __u32 pcfg_nal; -- __u32 pcfg_flags; -- -- __u32 pcfg_gw_nal; -- __u64 pcfg_nid; -- __u64 pcfg_nid2; -- __u64 pcfg_nid3; -- __u32 pcfg_id; -- __u32 pcfg_misc; -- __u32 pcfg_fd; -- __u32 pcfg_count; -- __u32 pcfg_size; -- __u32 pcfg_wait; -- -- __u32 pcfg_plen1; /* buffers in userspace */ -- char *pcfg_pbuf1; -- __u32 pcfg_plen2; /* buffers in userspace */ -- char *pcfg_pbuf2; --}; -- --#define PCFG_INIT(pcfg, cmd) \ --do { \ -- memset(&pcfg, 0, sizeof(pcfg)); \ -- pcfg.pcfg_version = PORTALS_CFG_VERSION; \ -- pcfg.pcfg_command = (cmd); \ -- \ --} while (0) - -typedef int (nal_cmd_handler_fn)(struct portals_cfg *, void *); -int libcfs_nal_cmd_register(int nal, nal_cmd_handler_fn *handler, void *arg); -int libcfs_nal_cmd(struct portals_cfg *pcfg); -void libcfs_nal_cmd_unregister(int nal); -- --struct portal_ioctl_data { -- __u32 ioc_len; -- __u32 ioc_version; -- __u64 ioc_nid; -- __u64 ioc_nid2; -- __u64 ioc_nid3; -- __u32 ioc_count; -- __u32 ioc_nal; -- __u32 ioc_nal_cmd; -- __u32 ioc_fd; -- __u32 ioc_id; -- -- __u32 ioc_flags; -- __u32 ioc_size; -- -- __u32 ioc_wait; -- __u32 ioc_timeout; -- __u32 ioc_misc; -- -- __u32 ioc_inllen1; -- char *ioc_inlbuf1; -- __u32 ioc_inllen2; -- char *ioc_inlbuf2; -- -- __u32 ioc_plen1; /* buffers in userspace */ -- char *ioc_pbuf1; -- __u32 ioc_plen2; /* buffers in userspace */ -- char *ioc_pbuf2; -- -- char ioc_bulk[0]; --}; - -- --#ifdef __KERNEL__ -- --#include -- --struct libcfs_ioctl_handler { -- struct list_head item; -- int (*handle_ioctl)(struct portal_ioctl_data *data, -- unsigned int cmd, unsigned long args); --}; -- --#define DECLARE_IOCTL_HANDLER(ident, func) \ -- struct libcfs_ioctl_handler ident = { \ -- .item = LIST_HEAD_INIT(ident.item), \ -- .handle_ioctl = func \ -- } -- --int libcfs_register_ioctl(struct libcfs_ioctl_handler *hand); --int libcfs_deregister_ioctl(struct libcfs_ioctl_handler *hand); -- --#endif -- --#define _LIBCFS_H -- --#endif /* _LIBCFS_H */ diff --cc lnet/include/linux/lustre_list.h index a218f2c,a218f2c..0000000 deleted file mode 100644,100644 --- a/lnet/include/linux/lustre_list.h +++ /dev/null @@@ -1,246 -1,246 +1,0 @@@ --#ifndef _LUSTRE_LIST_H --#define _LUSTRE_LIST_H -- --#ifdef __KERNEL__ --#include --#else --/* -- * Simple doubly linked list implementation. -- * -- * Some of the internal functions ("__xxx") are useful when -- * manipulating whole lists rather than single entries, as -- * sometimes we already know the next/prev entries and we can -- * generate better code by using them directly rather than -- * using the generic single-entry routines. -- */ -- --#define prefetch(a) ((void)a) -- --struct list_head { -- struct list_head *next, *prev; --}; -- --typedef struct list_head list_t; -- --#define LIST_HEAD_INIT(name) { &(name), &(name) } -- --#define LIST_HEAD(name) \ -- struct list_head name = LIST_HEAD_INIT(name) -- --#define INIT_LIST_HEAD(ptr) do { \ -- (ptr)->next = (ptr); (ptr)->prev = (ptr); \ --} while (0) -- --/* -- * Insert a new entry between two known consecutive entries. -- * -- * This is only for internal list manipulation where we know -- * the prev/next entries already! -- */ --static inline void __list_add(struct list_head * new, -- struct list_head * prev, -- struct list_head * next) --{ -- next->prev = new; -- new->next = next; -- new->prev = prev; -- prev->next = new; --} -- --/** -- * list_add - add a new entry -- * @new: new entry to be added -- * @head: list head to add it after -- * -- * Insert a new entry after the specified head. -- * This is good for implementing stacks. -- */ --static inline void list_add(struct list_head *new, struct list_head *head) --{ -- __list_add(new, head, head->next); --} -- --/** -- * list_add_tail - add a new entry -- * @new: new entry to be added -- * @head: list head to add it before -- * -- * Insert a new entry before the specified head. -- * This is useful for implementing queues. -- */ --static inline void list_add_tail(struct list_head *new, struct list_head *head) --{ -- __list_add(new, head->prev, head); --} -- --/* -- * Delete a list entry by making the prev/next entries -- * point to each other. -- * -- * This is only for internal list manipulation where we know -- * the prev/next entries already! -- */ --static inline void __list_del(struct list_head * prev, struct list_head * next) --{ -- next->prev = prev; -- prev->next = next; --} -- --/** -- * list_del - deletes entry from list. -- * @entry: the element to delete from the list. -- * Note: list_empty on entry does not return true after this, the entry is in an undefined state. -- */ --static inline void list_del(struct list_head *entry) --{ -- __list_del(entry->prev, entry->next); --} -- --/** -- * list_del_init - deletes entry from list and reinitialize it. -- * @entry: the element to delete from the list. -- */ --static inline void list_del_init(struct list_head *entry) --{ -- __list_del(entry->prev, entry->next); -- INIT_LIST_HEAD(entry); --} -- --/** -- * list_move - delete from one list and add as another's head -- * @list: the entry to move -- * @head: the head that will precede our entry -- */ --static inline void list_move(struct list_head *list, struct list_head *head) --{ -- __list_del(list->prev, list->next); -- list_add(list, head); --} -- --/** -- * list_move_tail - delete from one list and add as another's tail -- * @list: the entry to move -- * @head: the head that will follow our entry -- */ --static inline void list_move_tail(struct list_head *list, -- struct list_head *head) --{ -- __list_del(list->prev, list->next); -- list_add_tail(list, head); --} -- --/** -- * list_empty - tests whether a list is empty -- * @head: the list to test. -- */ --static inline int list_empty(struct list_head *head) --{ -- return head->next == head; --} -- --static inline void __list_splice(struct list_head *list, -- struct list_head *head) --{ -- struct list_head *first = list->next; -- struct list_head *last = list->prev; -- struct list_head *at = head->next; -- -- first->prev = head; -- head->next = first; -- -- last->next = at; -- at->prev = last; --} -- --/** -- * list_splice - join two lists -- * @list: the new list to add. -- * @head: the place to add it in the first list. -- */ --static inline void list_splice(struct list_head *list, struct list_head *head) --{ -- if (!list_empty(list)) -- __list_splice(list, head); --} -- --/** -- * list_splice_init - join two lists and reinitialise the emptied list. -- * @list: the new list to add. -- * @head: the place to add it in the first list. -- * -- * The list at @list is reinitialised -- */ --static inline void list_splice_init(struct list_head *list, -- struct list_head *head) --{ -- if (!list_empty(list)) { -- __list_splice(list, head); -- INIT_LIST_HEAD(list); -- } --} -- --/** -- * list_entry - get the struct for this entry -- * @ptr: the &struct list_head pointer. -- * @type: the type of the struct this is embedded in. -- * @member: the name of the list_struct within the struct. -- */ --#define list_entry(ptr, type, member) \ -- ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member))) -- --/** -- * list_for_each - iterate over a list -- * @pos: the &struct list_head to use as a loop counter. -- * @head: the head for your list. -- */ --#define list_for_each(pos, head) \ -- for (pos = (head)->next, prefetch(pos->next); pos != (head); \ -- pos = pos->next, prefetch(pos->next)) -- --/** -- * list_for_each_prev - iterate over a list in reverse order -- * @pos: the &struct list_head to use as a loop counter. -- * @head: the head for your list. -- */ --#define list_for_each_prev(pos, head) \ -- for (pos = (head)->prev, prefetch(pos->prev); pos != (head); \ -- pos = pos->prev, prefetch(pos->prev)) -- --/** -- * list_for_each_safe - iterate over a list safe against removal of list entry -- * @pos: the &struct list_head to use as a loop counter. -- * @n: another &struct list_head to use as temporary storage -- * @head: the head for your list. -- */ --#define list_for_each_safe(pos, n, head) \ -- for (pos = (head)->next, n = pos->next; pos != (head); \ -- pos = n, n = pos->next) -- --/** -- * list_for_each_entry - iterate over list of given type -- * @pos: the type * to use as a loop counter. -- * @head: the head for your list. -- * @member: the name of the list_struct within the struct. -- */ --#define list_for_each_entry(pos, head, member) \ -- for (pos = list_entry((head)->next, typeof(*pos), member), \ -- prefetch(pos->member.next); \ -- &pos->member != (head); \ -- pos = list_entry(pos->member.next, typeof(*pos), member), \ -- prefetch(pos->member.next)) -- --/** -- * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry -- * @pos: the type * to use as a loop counter. -- * @n: another type * to use as temporary storage -- * @head: the head for your list. -- * @member: the name of the list_struct within the struct. -- */ --#define list_for_each_entry_safe(pos, n, head, member) \ -- for (pos = list_entry((head)->next, typeof(*pos), member), \ -- n = list_entry(pos->member.next, typeof(*pos), member); \ -- &pos->member != (head); \ -- pos = n, n = list_entry(n->member.next, typeof(*n), member)) -- --#endif /* if !__KERNEL__*/ --#endif /* if !_LUSTRE_LIST_H */ diff --cc lnet/include/linux/portals_compat25.h index 3d0aff0,7fe6dfc..0000000 deleted file mode 100644,100644 --- a/lnet/include/linux/portals_compat25.h +++ /dev/null @@@ -1,92 -1,91 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- */ --#ifndef _PORTALS_COMPAT_H --#define _PORTALS_COMPAT_H -- --// XXX BUG 1511 -- remove this stanza and all callers when bug 1511 is resolved --#if SPINLOCK_DEBUG --# if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) || defined(CONFIG_RH_2_4_20) --# define SIGNAL_MASK_ASSERT() \ -- LASSERT(current->sighand->siglock.magic == SPINLOCK_MAGIC) --# else --# define SIGNAL_MASK_ASSERT() \ -- LASSERT(current->sigmask_lock.magic == SPINLOCK_MAGIC) --# endif --#else --# define SIGNAL_MASK_ASSERT() --#endif --// XXX BUG 1511 -- remove this stanza and all callers when bug 1511 is resolved -- --#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) -- --# define SIGNAL_MASK_LOCK(task, flags) \ -- spin_lock_irqsave(&task->sighand->siglock, flags) --# define SIGNAL_MASK_UNLOCK(task, flags) \ -- spin_unlock_irqrestore(&task->sighand->siglock, flags) --# define USERMODEHELPER(path, argv, envp) \ -- call_usermodehelper(path, argv, envp, 1) --# define RECALC_SIGPENDING recalc_sigpending() --# define CURRENT_SECONDS get_seconds() - # define smp_num_cpus NR_CPUS -- --#elif defined(CONFIG_RH_2_4_20) /* RH 2.4.x */ -- --# define SIGNAL_MASK_LOCK(task, flags) \ -- spin_lock_irqsave(&task->sighand->siglock, flags) --# define SIGNAL_MASK_UNLOCK(task, flags) \ -- spin_unlock_irqrestore(&task->sighand->siglock, flags) --# define USERMODEHELPER(path, argv, envp) \ -- call_usermodehelper(path, argv, envp) --# define RECALC_SIGPENDING recalc_sigpending() --# define CURRENT_SECONDS CURRENT_TIME -- --# define kernel_text_address(addr) is_kernel_text_address(addr) --extern int is_kernel_text_address(unsigned long addr); -- --#else /* 2.4.x */ -- --# define SIGNAL_MASK_LOCK(task, flags) \ -- spin_lock_irqsave(&task->sigmask_lock, flags) --# define SIGNAL_MASK_UNLOCK(task, flags) \ -- spin_unlock_irqrestore(&task->sigmask_lock, flags) --# define USERMODEHELPER(path, argv, envp) \ -- call_usermodehelper(path, argv, envp) --# define RECALC_SIGPENDING recalc_sigpending(current) --# define CURRENT_SECONDS CURRENT_TIME -- --# define kernel_text_address(addr) is_kernel_text_address(addr) --extern int is_kernel_text_address(unsigned long addr); -- --#endif -- --#if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20)) --# define THREAD_NAME(comm, len, fmt, a...) \ -- snprintf(comm, len, fmt "|%d", ## a, current->thread.extern_pid) --#elif defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) --# define THREAD_NAME(comm, len, fmt, a...) \ -- snprintf(comm, len,fmt"|%d", ## a,current->thread.mode.tt.extern_pid) --#else --# define THREAD_NAME(comm, len, fmt, a...) \ -- snprintf(comm, len, fmt, ## a) --#endif -- --#ifdef HAVE_PAGE_LIST --/* 2.4 alloc_page users can use page->list */ --#define PAGE_LIST_ENTRY list --#define PAGE_LIST(page) ((page)->list) --#else --/* 2.6 alloc_page users can use page->lru */ --#define PAGE_LIST_ENTRY lru --#define PAGE_LIST(page) ((page)->lru) --#endif -- --#ifndef HAVE_CPU_ONLINE --#define cpu_online(cpu) (test_bit(cpu_online_map, &(cpu))) --#endif --#ifndef HAVE_CPUMASK_T --#define cpu_set(cpu, map) (set_bit(cpu, &(map))) --typedef unsigned long cpumask_t; --#endif -- --#endif /* _PORTALS_COMPAT_H */ diff --cc lnet/include/linux/portals_lib.h index 609290d,8778a52..0000000 deleted file mode 100644,100644 --- a/lnet/include/linux/portals_lib.h +++ /dev/null @@@ -1,198 -1,90 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * Copyright (C) 2001 Cluster File Systems, Inc. -- * -- * This file is part of Lustre, http://www.lustre.org. -- * -- * Lustre is free software; you can redistribute it and/or -- * modify it under the terms of version 2 of the GNU General Public -- * License as published by the Free Software Foundation. -- * -- * Lustre is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- * GNU General Public License for more details. -- * -- * You should have received a copy of the GNU General Public License -- * along with Lustre; if not, write to the Free Software -- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -- * -- * Basic library routines. -- * -- */ -- --#ifndef _PORTALS_LIB_H --#define _PORTALS_LIB_H -- --#ifndef __KERNEL__ --# include --#else --# include --#endif -- --#undef MIN --#define MIN(a,b) (((a)<(b)) ? (a): (b)) --#undef MAX --#define MAX(a,b) (((a)>(b)) ? (a): (b)) --#define MKSTR(ptr) ((ptr))? (ptr) : "" -- --static inline int size_round (int val) --{ -- return (val + 7) & (~0x7); --} -- --static inline int size_round16(int val) --{ -- return (val + 0xf) & (~0xf); --} -- --static inline int size_round32(int val) --{ -- return (val + 0x1f) & (~0x1f); --} -- --static inline int size_round0(int val) --{ -- if (!val) -- return 0; -- return (val + 1 + 7) & (~0x7); --} -- --static inline size_t round_strlen(char *fset) --{ -- return size_round(strlen(fset) + 1); - } - - #ifdef __KERNEL__ - static inline char *strdup(const char *str) - { - int len = strlen(str) + 1; - char *tmp = kmalloc(len, GFP_KERNEL); - if (tmp) - memcpy(tmp, str, len); - - return tmp; --} - #endif - - #ifdef __KERNEL__ - # define NTOH__u32(var) le32_to_cpu(var) - # define NTOH__u64(var) le64_to_cpu(var) - # define HTON__u32(var) cpu_to_le32(var) - # define HTON__u64(var) cpu_to_le64(var) - #else - # define expansion_u64(var) \ - ({ __u64 ret; \ - switch (sizeof(var)) { \ - case 8: (ret) = (var); break; \ - case 4: (ret) = (__u32)(var); break; \ - case 2: (ret) = (__u16)(var); break; \ - case 1: (ret) = (__u8)(var); break; \ - }; \ - (ret); \ - }) - # define NTOH__u32(var) (var) - # define NTOH__u64(var) (expansion_u64(var)) - # define HTON__u32(var) (var) - # define HTON__u64(var) (expansion_u64(var)) - #endif - - /* - * copy sizeof(type) bytes from pointer to var and move ptr forward. - * return EFAULT if pointer goes beyond end - */ - #define UNLOGV(var,type,ptr,end) \ - do { \ - var = *(type *)ptr; \ - ptr += sizeof(type); \ - if (ptr > end ) \ - return -EFAULT; \ - } while (0) - - /* the following two macros convert to little endian */ - /* type MUST be __u32 or __u64 */ - #define LUNLOGV(var,type,ptr,end) \ - do { \ - var = NTOH##type(*(type *)ptr); \ - ptr += sizeof(type); \ - if (ptr > end ) \ - return -EFAULT; \ - } while (0) - - /* now log values */ - #define LOGV(var,type,ptr) \ - do { \ - *((type *)ptr) = var; \ - ptr += sizeof(type); \ - } while (0) - - /* and in network order */ - #define LLOGV(var,type,ptr) \ - do { \ - *((type *)ptr) = HTON##type(var); \ - ptr += sizeof(type); \ - } while (0) - - - /* - * set var to point at (type *)ptr, move ptr forward with sizeof(type) - * return from function with EFAULT if ptr goes beyond end - */ - #define UNLOGP(var,type,ptr,end) \ - do { \ - var = (type *)ptr; \ - ptr += sizeof(type); \ - if (ptr > end ) \ - return -EFAULT; \ - } while (0) - - #define LOGP(var,type,ptr) \ - do { \ - memcpy(ptr, var, sizeof(type)); \ - ptr += sizeof(type); \ - } while (0) - - /* - * set var to point at (char *)ptr, move ptr forward by size_round(len); - * return from function with EFAULT if ptr goes beyond end - */ - #define UNLOGL(var,type,len,ptr,end) \ - do { \ - var = (type *)ptr; \ - ptr += size_round(len * sizeof(type)); \ - if (ptr > end ) \ - return -EFAULT; \ - } while (0) - - #define UNLOGL0(var,type,len,ptr,end) \ - do { \ - UNLOGL(var,type,len,ptr,end); \ - if ( *((char *)ptr - size_round(len) + len - 1) != '\0') \ - return -EFAULT; \ - } while (0) -- --#define LOGL(var,len,ptr) \ --do { \ -- if (var) \ -- memcpy((char *)ptr, (const char *)var, len); \ -- ptr += size_round(len); \ --} while (0) -- --#define LOGU(var,len,ptr) \ --do { \ -- if (var) \ -- memcpy((char *)var, (const char *)ptr, len); \ -- ptr += size_round(len); \ --} while (0) -- --#define LOGL0(var,len,ptr) \ --do { \ -- if (!len) \ -- break; \ -- memcpy((char *)ptr, (const char *)var, len); \ -- *((char *)(ptr) + len) = 0; \ -- ptr += size_round(len + 1); \ --} while (0) -- --#endif /* _PORTALS_LIB_H */ diff --cc lnet/include/lnet/Makefile.am index 5ed6090,4043f66..0000000 deleted file mode 100644,100644 --- a/lnet/include/lnet/Makefile.am +++ /dev/null @@@ -1,10 -1,10 +1,0 @@@ --portalsdir=$(includedir)/portals -- --if UTILS --portals_HEADERS = list.h --endif -- - EXTRA_DIST = api.h api-support.h arg-blocks.h defines.h errno.h \ - internal.h lib-dispatch.h lib-nal.h lib-p30.h lib-types.h \ - list.h lltrace.h myrnal.h nal.h nalids.h p30.h ppid.h ptlctl.h \ -EXTRA_DIST = api.h api-support.h build_check.h errno.h \ - internal.h lib-p30.h lib-types.h list.h \ - lltrace.h myrnal.h nal.h nalids.h p30.h ptlctl.h \ -- socknal.h stringtab.h types.h diff --cc lnet/include/lnet/api-support.h index af4a2dc,c5994c6..0000000 deleted file mode 100644,100644 --- a/lnet/include/lnet/api-support.h +++ /dev/null @@@ -1,27 -1,22 +1,0 @@@ - # define DEBUG_SUBSYSTEM S_PORTALS - # define PORTAL_DEBUG - -#include "build_check.h" -- --#ifndef __KERNEL__ --# include --# include --# include --# include -- --/* Lots of POSIX dependencies to support PtlEQWait_timeout */ --# include --# include --# include --#endif -- --#include --#include --#include -- --#include --#include - #include -- - /* Hack for 2.4.18 macro name collision */ - #ifdef yield - #undef yield - #endif diff --cc lnet/include/lnet/api.h index a83749b,56b7b99..0000000 deleted file mode 100644,100644 --- a/lnet/include/lnet/api.h +++ /dev/null @@@ -1,159 -1,138 +1,0 @@@ --#ifndef P30_API_H --#define P30_API_H - -#include "build_check.h" -- --#include -- - #ifndef PTL_NO_WRAP - int PtlInit(void); - int PtlInitialized(void); -int PtlInit(int *); --void PtlFini(void); -- - int PtlNIInit(ptl_interface_t interface, ptl_pt_index_t ptl_size_in, - ptl_ac_index_t acl_size_in, ptl_pid_t requested_pid, - ptl_handle_ni_t * interface_out); -int PtlNIInit(ptl_interface_t interface, ptl_pid_t requested_pid, - ptl_ni_limits_t *desired_limits, ptl_ni_limits_t *actual_limits, - ptl_handle_ni_t *interface_out); -- --int PtlNIInitialized(ptl_interface_t); -- --int PtlNIFini(ptl_handle_ni_t interface_in); - - #endif -- --int PtlGetId(ptl_handle_ni_t ni_handle, ptl_process_id_t *id); - -int PtlGetUid(ptl_handle_ni_t ni_handle, ptl_uid_t *uid); -- -- --/* -- * Network interfaces -- */ - - #ifndef PTL_NO_WRAP - int PtlNIBarrier(ptl_handle_ni_t interface_in); - #endif -- --int PtlNIStatus(ptl_handle_ni_t interface_in, ptl_sr_index_t register_in, -- ptl_sr_value_t * status_out); -- --int PtlNIDist(ptl_handle_ni_t interface_in, ptl_process_id_t process_in, -- unsigned long *distance_out); -- - #ifndef PTL_NO_WRAP --int PtlNIHandle(ptl_handle_any_t handle_in, ptl_handle_ni_t * interface_out); - #endif - -- - /* - * PtlNIDebug: - * - * This is not an official Portals 3 API call. It is provided - * by the reference implementation to allow the maintainers an - * easy way to turn on and off debugging information in the - * library. Do not use it in code that is not intended for use - * with any version other than the portable reference library. - */ - unsigned int PtlNIDebug(ptl_handle_ni_t ni, unsigned int mask_in); -- --/* -- * PtlNIFailNid -- * -- * Not an official Portals 3 API call. It provides a way of simulating -- * communications failures to all (nid == PTL_NID_ANY), or specific peers -- * (via multiple calls), either until further notice (threshold == -1), or -- * for a specific number of messages. Passing a threshold of zero, "heals" -- * the given peer. -- */ --int PtlFailNid (ptl_handle_ni_t ni, ptl_nid_t nid, unsigned int threshold); -- -/* - * PtlSnprintHandle: - * - * This is not an official Portals 3 API call. It is provided - * so that an application can print an opaque handle. - */ -void PtlSnprintHandle (char *str, int str_len, ptl_handle_any_t handle); -- --/* -- * Match entries -- */ -- --int PtlMEAttach(ptl_handle_ni_t interface_in, ptl_pt_index_t index_in, -- ptl_process_id_t match_id_in, ptl_match_bits_t match_bits_in, -- ptl_match_bits_t ignore_bits_in, ptl_unlink_t unlink_in, -- ptl_ins_pos_t pos_in, ptl_handle_me_t * handle_out); -- --int PtlMEInsert(ptl_handle_me_t current_in, ptl_process_id_t match_id_in, -- ptl_match_bits_t match_bits_in, ptl_match_bits_t ignore_bits_in, -- ptl_unlink_t unlink_in, ptl_ins_pos_t position_in, -- ptl_handle_me_t * handle_out); -- --int PtlMEUnlink(ptl_handle_me_t current_in); -- --int PtlMEUnlinkList(ptl_handle_me_t current_in); - - int PtlTblDump(ptl_handle_ni_t ni, int index_in); - int PtlMEDump(ptl_handle_me_t current_in); -- -- -- --/* -- * Memory descriptors -- */ -- - #ifndef PTL_NO_WRAP --int PtlMDAttach(ptl_handle_me_t current_in, ptl_md_t md_in, -- ptl_unlink_t unlink_in, ptl_handle_md_t * handle_out); -- --int PtlMDBind(ptl_handle_ni_t ni_in, ptl_md_t md_in, - ptl_handle_md_t * handle_out); - ptl_unlink_t unlink_in, ptl_handle_md_t * handle_out); -- --int PtlMDUnlink(ptl_handle_md_t md_in); -- --int PtlMDUpdate(ptl_handle_md_t md_in, ptl_md_t * old_inout, -- ptl_md_t * new_inout, ptl_handle_eq_t testq_in); -- - #endif -- --/* These should not be called by users */ --int PtlMDUpdate_internal(ptl_handle_md_t md_in, ptl_md_t * old_inout, -- ptl_md_t * new_inout, ptl_handle_eq_t testq_in, -- ptl_seq_t sequence_in); -- -- -- -- --/* -- * Event queues -- */ - #ifndef PTL_NO_WRAP - - /* These should be called by users */ --int PtlEQAlloc(ptl_handle_ni_t ni_in, ptl_size_t count_in, - int (*callback) (ptl_event_t * event), - ptl_handle_eq_t * handle_out); - ptl_eq_handler_t handler, - ptl_handle_eq_t *handle_out); --int PtlEQFree(ptl_handle_eq_t eventq_in); - - int PtlEQCount(ptl_handle_eq_t eventq_in, ptl_size_t * count_out); -- --int PtlEQGet(ptl_handle_eq_t eventq_in, ptl_event_t * event_out); -- -- --int PtlEQWait(ptl_handle_eq_t eventq_in, ptl_event_t * event_out); -- - int PtlEQWait_timeout(ptl_handle_eq_t eventq_in, ptl_event_t * event_out, - int timeout); - #endif -int PtlEQPoll(ptl_handle_eq_t *eventqs_in, int neq_in, int timeout, - ptl_event_t *event_out, int *which_out); -- --/* -- * Access Control Table -- */ --int PtlACEntry(ptl_handle_ni_t ni_in, ptl_ac_index_t index_in, -- ptl_process_id_t match_id_in, ptl_pt_index_t portal_in); -- -- --/* -- * Data movement -- */ -- --int PtlPut(ptl_handle_md_t md_in, ptl_ack_req_t ack_req_in, -- ptl_process_id_t target_in, ptl_pt_index_t portal_in, -- ptl_ac_index_t cookie_in, ptl_match_bits_t match_bits_in, -- ptl_size_t offset_in, ptl_hdr_data_t hdr_data_in); -- --int PtlGet(ptl_handle_md_t md_in, ptl_process_id_t target_in, -- ptl_pt_index_t portal_in, ptl_ac_index_t cookie_in, -- ptl_match_bits_t match_bits_in, ptl_size_t offset_in); -- -- -- --#endif diff --cc lnet/include/lnet/errno.h index 08f084a,42f2626..0000000 deleted file mode 100644,100644 --- a/lnet/include/lnet/errno.h +++ /dev/null @@@ -1,60 -1,53 +1,0 @@@ --#ifndef _P30_ERRNO_H_ --#define _P30_ERRNO_H_ -- -#include "build_check.h" --/* -- * include/portals/errno.h -- * -- * Shared error number lists -- */ -- --/* If you change these, you must update the string table in api-errno.c */ --typedef enum { - PTL_OK = 0, - PTL_SEGV = 1, - PTL_OK = 0, - PTL_SEGV = 1, -- - PTL_NOSPACE = 2, - PTL_INUSE = 3, - PTL_VAL_FAILED = 4, - PTL_NO_SPACE = 2, - PTL_ME_IN_USE = 3, - PTL_VAL_FAILED = 4, -- - PTL_NAL_FAILED = 5, - PTL_NOINIT = 6, - PTL_INIT_DUP = 7, - PTL_INIT_INV = 8, - PTL_AC_INV_INDEX = 9, - PTL_NAL_FAILED = 5, - PTL_NO_INIT = 6, - PTL_IFACE_DUP = 7, - PTL_IFACE_INVALID = 8, -- - PTL_INV_ASIZE = 10, - PTL_INV_HANDLE = 11, - PTL_INV_MD = 12, - PTL_INV_ME = 13, - PTL_INV_NI = 14, - PTL_HANDLE_INVALID = 9, - PTL_MD_INVALID = 10, - PTL_ME_INVALID = 11, --/* If you change these, you must update the string table in api-errno.c */ - PTL_ILL_MD = 15, - PTL_INV_PROC = 16, - PTL_INV_PSIZE = 17, - PTL_INV_PTINDEX = 18, - PTL_INV_REG = 19, - PTL_PROCESS_INVALID = 12, - PTL_PT_INDEX_INVALID = 13, -- - PTL_INV_SR_INDX = 20, - PTL_ML_TOOLONG = 21, - PTL_ADDR_UNKNOWN = 22, - PTL_INV_EQ = 23, - PTL_EQ_DROPPED = 24, - PTL_SR_INDEX_INVALID = 14, - PTL_EQ_INVALID = 15, - PTL_EQ_DROPPED = 16, -- - PTL_EQ_EMPTY = 25, - PTL_NOUPDATE = 26, - PTL_FAIL = 27, - PTL_NOT_IMPLEMENTED = 28, - PTL_NO_ACK = 29, - PTL_EQ_EMPTY = 17, - PTL_MD_NO_UPDATE = 18, - PTL_FAIL = 19, -- - PTL_IOV_TOO_MANY = 30, - PTL_IOV_TOO_SMALL = 31, - PTL_IOV_INVALID = 20, -- - PTL_EQ_INUSE = 32, - PTL_EQ_IN_USE = 21, -- - PTL_MAX_ERRNO = 32 - PTL_NI_INVALID = 22, - PTL_MD_ILLEGAL = 23, - - PTL_MAX_ERRNO = 24 --} ptl_err_t; --/* If you change these, you must update the string table in api-errno.c */ -- --extern const char *ptl_err_str[]; -- --#endif diff --cc lnet/include/lnet/internal.h index a70b465,eae00a0..0000000 deleted file mode 100644,100644 --- a/lnet/include/lnet/internal.h +++ /dev/null @@@ -1,43 -1,16 +1,0 @@@ --#ifndef _P30_INTERNAL_H_ --#define _P30_INTERNAL_H_ -- -#include "build_check.h" --/* -- * p30/internal.h -- * -- * Internals for the API level library that are not needed -- * by the user application -- */ -- --#include - - extern int ptl_init; /* Has the library be initialized */ - - extern int ptl_ni_init(void); - extern int ptl_me_init(void); - extern int ptl_md_init(void); - extern int ptl_eq_init(void); - - extern int ptl_me_ni_init(nal_t * nal); - extern int ptl_md_ni_init(nal_t * nal); - extern int ptl_eq_ni_init(nal_t * nal); - - extern void ptl_ni_fini(void); - extern void ptl_me_fini(void); - extern void ptl_md_fini(void); - extern void ptl_eq_fini(void); - - extern void ptl_me_ni_fini(nal_t * nal); - extern void ptl_md_ni_fini(nal_t * nal); - extern void ptl_eq_ni_fini(nal_t * nal); -- - static inline ptl_eq_t * - ptl_handle2usereq (ptl_handle_eq_t *handle) - { - /* EQ handles are a little wierd. On the "user" side, the cookie - * is just a pointer to a queue of events in shared memory. It's - * cb_eq_handle is the "real" handle which we pass when we - * call do_forward(). */ - return (ptl_eq_t *)((unsigned long)handle->cookie); - } -extern int ptl_init; /* Has the library been initialized */ -- --#endif diff --cc lnet/include/lnet/lib-lnet.h index b1a6e04,4daf219..0000000 deleted file mode 100644,100644 --- a/lnet/include/lnet/lib-lnet.h +++ /dev/null @@@ -1,407 -1,460 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * lib-p30.h -- * -- * Top level include for library side routines -- */ -- --#ifndef _LIB_P30_H_ --#define _LIB_P30_H_ - -#include "build_check.h" -- --#ifdef __KERNEL__ --# include --# include --#else --# include --# include -# include --#endif --#include --#include --#include -#include --#include - #include - #include -- --static inline int ptl_is_wire_handle_none (ptl_handle_wire_t *wh) --{ -- return (wh->wh_interface_cookie == PTL_WIRE_HANDLE_NONE.wh_interface_cookie && -- wh->wh_object_cookie == PTL_WIRE_HANDLE_NONE.wh_object_cookie); --} -- - #define state_lock(nal,flagsp) \ - do { \ - CDEBUG(D_PORTALS, "taking state lock\n"); \ - nal->cb_cli(nal, flagsp); \ - } while (0) -#ifdef __KERNEL__ -#define LIB_LOCK(nal,flags) \ - spin_lock_irqsave(&(nal)->libnal_ni.ni_lock, flags) -#define LIB_UNLOCK(nal,flags) \ - spin_unlock_irqrestore(&(nal)->libnal_ni.ni_lock, flags) -#else -#define LIB_LOCK(nal,flags) \ - (pthread_mutex_lock(&(nal)->libnal_ni.ni_mutex), (flags) = 0) -#define LIB_UNLOCK(nal,flags) \ - pthread_mutex_unlock(&(nal)->libnal_ni.ni_mutex) -#endif -- - #define state_unlock(nal,flagsp) \ - { \ - CDEBUG(D_PORTALS, "releasing state lock\n"); \ - nal->cb_sti(nal, flagsp); \ - } -- --#ifdef PTL_USE_LIB_FREELIST -- --#define MAX_MES 2048 --#define MAX_MDS 2048 --#define MAX_MSGS 2048 /* Outstanding messages */ --#define MAX_EQS 512 -- - extern int lib_freelist_init (nal_cb_t *nal, lib_freelist_t *fl, int nobj, int objsize); - extern void lib_freelist_fini (nal_cb_t *nal, lib_freelist_t *fl); -extern int lib_freelist_init (lib_nal_t *nal, lib_freelist_t *fl, int nobj, int objsize); -extern void lib_freelist_fini (lib_nal_t *nal, lib_freelist_t *fl); -- --static inline void * --lib_freelist_alloc (lib_freelist_t *fl) --{ - /* ALWAYS called with statelock held */ - /* ALWAYS called with liblock held */ -- lib_freeobj_t *o; -- -- if (list_empty (&fl->fl_list)) -- return (NULL); -- -- o = list_entry (fl->fl_list.next, lib_freeobj_t, fo_list); -- list_del (&o->fo_list); -- return ((void *)&o->fo_contents); --} -- --static inline void --lib_freelist_free (lib_freelist_t *fl, void *obj) --{ - /* ALWAYS called with statelock held */ - /* ALWAYS called with liblock held */ -- lib_freeobj_t *o = list_entry (obj, lib_freeobj_t, fo_contents); -- -- list_add (&o->fo_list, &fl->fl_list); --} -- -- --static inline lib_eq_t * - lib_eq_alloc (nal_cb_t *nal) -lib_eq_alloc (lib_nal_t *nal) --{ - /* NEVER called with statelock held */ - /* NEVER called with liblock held */ -- unsigned long flags; -- lib_eq_t *eq; -- - state_lock (nal, &flags); - eq = (lib_eq_t *)lib_freelist_alloc (&nal->ni.ni_free_eqs); - state_unlock (nal, &flags); - LIB_LOCK (nal, flags); - eq = (lib_eq_t *)lib_freelist_alloc (&nal->libnal_ni.ni_free_eqs); - LIB_UNLOCK (nal, flags); -- -- return (eq); --} -- --static inline void - lib_eq_free (nal_cb_t *nal, lib_eq_t *eq) -lib_eq_free (lib_nal_t *nal, lib_eq_t *eq) --{ - /* ALWAYS called with statelock held */ - lib_freelist_free (&nal->ni.ni_free_eqs, eq); - /* ALWAYS called with liblock held */ - lib_freelist_free (&nal->libnal_ni.ni_free_eqs, eq); --} -- --static inline lib_md_t * - lib_md_alloc (nal_cb_t *nal, ptl_md_t *umd) -lib_md_alloc (lib_nal_t *nal, ptl_md_t *umd) --{ - /* NEVER called with statelock held */ - /* NEVER called with liblock held */ -- unsigned long flags; -- lib_md_t *md; -- - state_lock (nal, &flags); - md = (lib_md_t *)lib_freelist_alloc (&nal->ni.ni_free_mds); - state_unlock (nal, &flags); - LIB_LOCK (nal, flags); - md = (lib_md_t *)lib_freelist_alloc (&nal->libnal_ni.ni_free_mds); - LIB_UNLOCK (nal, flags); -- -- return (md); --} -- --static inline void - lib_md_free (nal_cb_t *nal, lib_md_t *md) -lib_md_free (lib_nal_t *nal, lib_md_t *md) --{ - /* ALWAYS called with statelock held */ - lib_freelist_free (&nal->ni.ni_free_mds, md); - /* ALWAYS called with liblock held */ - lib_freelist_free (&nal->libnal_ni.ni_free_mds, md); --} -- --static inline lib_me_t * - lib_me_alloc (nal_cb_t *nal) -lib_me_alloc (lib_nal_t *nal) --{ - /* NEVER called with statelock held */ - /* NEVER called with liblock held */ -- unsigned long flags; -- lib_me_t *me; -- - state_lock (nal, &flags); - me = (lib_me_t *)lib_freelist_alloc (&nal->ni.ni_free_mes); - state_unlock (nal, &flags); - LIB_LOCK (nal, flags); - me = (lib_me_t *)lib_freelist_alloc (&nal->libnal_ni.ni_free_mes); - LIB_UNLOCK (nal, flags); -- -- return (me); --} -- --static inline void - lib_me_free (nal_cb_t *nal, lib_me_t *me) -lib_me_free (lib_nal_t *nal, lib_me_t *me) --{ - /* ALWAYS called with statelock held */ - lib_freelist_free (&nal->ni.ni_free_mes, me); - /* ALWAYS called with liblock held */ - lib_freelist_free (&nal->libnal_ni.ni_free_mes, me); --} -- --static inline lib_msg_t * - lib_msg_alloc (nal_cb_t *nal) -lib_msg_alloc (lib_nal_t *nal) --{ - /* NEVER called with statelock held */ - /* NEVER called with liblock held */ -- unsigned long flags; -- lib_msg_t *msg; -- - state_lock (nal, &flags); - msg = (lib_msg_t *)lib_freelist_alloc (&nal->ni.ni_free_msgs); - state_unlock (nal, &flags); - LIB_LOCK (nal, flags); - msg = (lib_msg_t *)lib_freelist_alloc (&nal->libnal_ni.ni_free_msgs); - LIB_UNLOCK (nal, flags); -- -- if (msg != NULL) { -- /* NULL pointers, clear flags etc */ -- memset (msg, 0, sizeof (*msg)); -- msg->ack_wmd = PTL_WIRE_HANDLE_NONE; -- } -- return(msg); --} -- --static inline void - lib_msg_free (nal_cb_t *nal, lib_msg_t *msg) -lib_msg_free (lib_nal_t *nal, lib_msg_t *msg) --{ - /* ALWAYS called with statelock held */ - lib_freelist_free (&nal->ni.ni_free_msgs, msg); - /* ALWAYS called with liblock held */ - lib_freelist_free (&nal->libnal_ni.ni_free_msgs, msg); --} -- --#else -- --static inline lib_eq_t * - lib_eq_alloc (nal_cb_t *nal) -lib_eq_alloc (lib_nal_t *nal) --{ - /* NEVER called with statelock held */ - /* NEVER called with liblock held */ -- lib_eq_t *eq; -- -- PORTAL_ALLOC(eq, sizeof(*eq)); -- return (eq); --} -- --static inline void - lib_eq_free (nal_cb_t *nal, lib_eq_t *eq) -lib_eq_free (lib_nal_t *nal, lib_eq_t *eq) --{ - /* ALWAYS called with statelock held */ - /* ALWAYS called with liblock held */ -- PORTAL_FREE(eq, sizeof(*eq)); --} -- --static inline lib_md_t * - lib_md_alloc (nal_cb_t *nal, ptl_md_t *umd) -lib_md_alloc (lib_nal_t *nal, ptl_md_t *umd) --{ - /* NEVER called with statelock held */ - /* NEVER called with liblock held */ -- lib_md_t *md; -- int size; -- int niov; -- -- if ((umd->options & PTL_MD_KIOV) != 0) { - niov = umd->niov; - niov = umd->length; -- size = offsetof(lib_md_t, md_iov.kiov[niov]); -- } else { - niov = ((umd->options & PTL_MD_IOV) != 0) ? - umd->niov : 1; - niov = ((umd->options & PTL_MD_IOVEC) != 0) ? - umd->length : 1; -- size = offsetof(lib_md_t, md_iov.iov[niov]); -- } -- -- PORTAL_ALLOC(md, size); -- -- if (md != NULL) { -- /* Set here in case of early free */ -- md->options = umd->options; -- md->md_niov = niov; -- } -- -- return (md); --} -- --static inline void - lib_md_free (nal_cb_t *nal, lib_md_t *md) -lib_md_free (lib_nal_t *nal, lib_md_t *md) --{ - /* ALWAYS called with statelock held */ - /* ALWAYS called with liblock held */ -- int size; -- -- if ((md->options & PTL_MD_KIOV) != 0) -- size = offsetof(lib_md_t, md_iov.kiov[md->md_niov]); -- else -- size = offsetof(lib_md_t, md_iov.iov[md->md_niov]); -- -- PORTAL_FREE(md, size); --} -- --static inline lib_me_t * - lib_me_alloc (nal_cb_t *nal) -lib_me_alloc (lib_nal_t *nal) --{ - /* NEVER called with statelock held */ - /* NEVER called with liblock held */ -- lib_me_t *me; -- -- PORTAL_ALLOC(me, sizeof(*me)); -- return (me); --} -- --static inline void - lib_me_free(nal_cb_t *nal, lib_me_t *me) -lib_me_free(lib_nal_t *nal, lib_me_t *me) --{ - /* ALWAYS called with statelock held */ - /* ALWAYS called with liblock held */ -- PORTAL_FREE(me, sizeof(*me)); --} -- --static inline lib_msg_t * - lib_msg_alloc(nal_cb_t *nal) -lib_msg_alloc(lib_nal_t *nal) --{ - /* NEVER called with statelock held; may be in interrupt... */ - /* NEVER called with liblock held; may be in interrupt... */ -- lib_msg_t *msg; -- -- if (in_interrupt()) -- PORTAL_ALLOC_ATOMIC(msg, sizeof(*msg)); -- else -- PORTAL_ALLOC(msg, sizeof(*msg)); -- -- if (msg != NULL) { -- /* NULL pointers, clear flags etc */ -- memset (msg, 0, sizeof (*msg)); -- msg->ack_wmd = PTL_WIRE_HANDLE_NONE; -- } -- return (msg); --} -- --static inline void - lib_msg_free(nal_cb_t *nal, lib_msg_t *msg) -lib_msg_free(lib_nal_t *nal, lib_msg_t *msg) --{ - /* ALWAYS called with statelock held */ - /* ALWAYS called with liblock held */ -- PORTAL_FREE(msg, sizeof(*msg)); --} --#endif -- - extern lib_handle_t *lib_lookup_cookie (nal_cb_t *nal, __u64 cookie, int type); - extern void lib_initialise_handle (nal_cb_t *nal, lib_handle_t *lh, int type); - extern void lib_invalidate_handle (nal_cb_t *nal, lib_handle_t *lh); -extern lib_handle_t *lib_lookup_cookie (lib_nal_t *nal, __u64 cookie, int type); -extern void lib_initialise_handle (lib_nal_t *nal, lib_handle_t *lh, int type); -extern void lib_invalidate_handle (lib_nal_t *nal, lib_handle_t *lh); -- --static inline void - ptl_eq2handle (ptl_handle_eq_t *handle, lib_eq_t *eq) -ptl_eq2handle (ptl_handle_eq_t *handle, lib_nal_t *nal, lib_eq_t *eq) --{ - handle->nal_idx = nal->libnal_ni.ni_api->nal_handle.nal_idx; -- handle->cookie = eq->eq_lh.lh_cookie; --} -- --static inline lib_eq_t * - ptl_handle2eq (ptl_handle_eq_t *handle, nal_cb_t *nal) -ptl_handle2eq (ptl_handle_eq_t *handle, lib_nal_t *nal) --{ - /* ALWAYS called with statelock held */ - /* ALWAYS called with liblock held */ -- lib_handle_t *lh = lib_lookup_cookie (nal, handle->cookie, -- PTL_COOKIE_TYPE_EQ); -- if (lh == NULL) -- return (NULL); -- -- return (lh_entry (lh, lib_eq_t, eq_lh)); --} -- --static inline void - ptl_md2handle (ptl_handle_md_t *handle, lib_md_t *md) -ptl_md2handle (ptl_handle_md_t *handle, lib_nal_t *nal, lib_md_t *md) --{ - handle->nal_idx = nal->libnal_ni.ni_api->nal_handle.nal_idx; -- handle->cookie = md->md_lh.lh_cookie; --} -- --static inline lib_md_t * - ptl_handle2md (ptl_handle_md_t *handle, nal_cb_t *nal) -ptl_handle2md (ptl_handle_md_t *handle, lib_nal_t *nal) --{ - /* ALWAYS called with statelock held */ - /* ALWAYS called with liblock held */ -- lib_handle_t *lh = lib_lookup_cookie (nal, handle->cookie, -- PTL_COOKIE_TYPE_MD); -- if (lh == NULL) -- return (NULL); -- -- return (lh_entry (lh, lib_md_t, md_lh)); --} -- --static inline lib_md_t * - ptl_wire_handle2md (ptl_handle_wire_t *wh, nal_cb_t *nal) -ptl_wire_handle2md (ptl_handle_wire_t *wh, lib_nal_t *nal) --{ - /* ALWAYS called with statelock held */ - /* ALWAYS called with liblock held */ -- lib_handle_t *lh; -- - if (wh->wh_interface_cookie != nal->ni.ni_interface_cookie) - if (wh->wh_interface_cookie != nal->libnal_ni.ni_interface_cookie) -- return (NULL); -- -- lh = lib_lookup_cookie (nal, wh->wh_object_cookie, -- PTL_COOKIE_TYPE_MD); -- if (lh == NULL) -- return (NULL); -- -- return (lh_entry (lh, lib_md_t, md_lh)); --} -- --static inline void - ptl_me2handle (ptl_handle_me_t *handle, lib_me_t *me) -ptl_me2handle (ptl_handle_me_t *handle, lib_nal_t *nal, lib_me_t *me) --{ - handle->nal_idx = nal->libnal_ni.ni_api->nal_handle.nal_idx; -- handle->cookie = me->me_lh.lh_cookie; --} -- --static inline lib_me_t * - ptl_handle2me (ptl_handle_me_t *handle, nal_cb_t *nal) -ptl_handle2me (ptl_handle_me_t *handle, lib_nal_t *nal) --{ - /* ALWAYS called with statelock held */ - /* ALWAYS called with liblock held */ -- lib_handle_t *lh = lib_lookup_cookie (nal, handle->cookie, -- PTL_COOKIE_TYPE_ME); -- if (lh == NULL) -- return (NULL); -- -- return (lh_entry (lh, lib_me_t, me_lh)); --} -- - extern int lib_init(nal_cb_t * cb, ptl_nid_t nid, ptl_pid_t pid, int gsize, - ptl_pt_index_t tbl_size, ptl_ac_index_t ac_size); - extern int lib_fini(nal_cb_t * cb); - extern void lib_dispatch(nal_cb_t * cb, void *private, int index, - void *arg_block, void *ret_block); - extern char *dispatch_name(int index); -extern int lib_init(lib_nal_t *libnal, nal_t *apinal, - ptl_process_id_t pid, - ptl_ni_limits_t *desired_limits, - ptl_ni_limits_t *actual_limits); -extern int lib_fini(lib_nal_t *libnal); -- --/* - * When the NAL detects an incoming message, it should call - * lib_parse() decode it. The NAL callbacks will be handed - * the private cookie as a way for the NAL to maintain state - * about which transaction is being processed. An extra parameter, - * lib_cookie will contain the necessary information for - * finalizing the message. - * - * After it has finished the handling the message, it should - * call lib_finalize() with the lib_cookie parameter. - * Call backs will be made to write events, send acks or - * replies and so on. - * When the NAL detects an incoming message header, it should call - * lib_parse() decode it. If the message header is garbage, lib_parse() - * returns immediately with failure, otherwise the NAL callbacks will be - * called to receive the message body. They are handed the private cookie - * as a way for the NAL to maintain state about which transaction is being - * processed. An extra parameter, lib_msg contains the lib-level message - * state for passing to lib_finalize() when the message body has been - * received. -- */ - extern void lib_enq_event_locked (nal_cb_t *nal, void *private, -extern void lib_enq_event_locked (lib_nal_t *nal, void *private, -- lib_eq_t *eq, ptl_event_t *ev); - extern void lib_finalize (nal_cb_t *nal, void *private, lib_msg_t *msg, - ptl_err_t status); - extern void lib_parse (nal_cb_t *nal, ptl_hdr_t *hdr, void *private); - extern lib_msg_t *lib_fake_reply_msg (nal_cb_t *nal, ptl_nid_t peer_nid, - lib_md_t *getmd); - extern void print_hdr (nal_cb_t * nal, ptl_hdr_t * hdr); -extern void lib_finalize (lib_nal_t *nal, void *private, lib_msg_t *msg, - ptl_ni_fail_t ni_fail_type); -extern ptl_err_t lib_parse (lib_nal_t *nal, ptl_hdr_t *hdr, void *private); -extern lib_msg_t *lib_create_reply_msg (lib_nal_t *nal, ptl_nid_t peer_nid, - lib_msg_t *get_msg); -extern void print_hdr (lib_nal_t * nal, ptl_hdr_t * hdr); -- -- --extern ptl_size_t lib_iov_nob (int niov, struct iovec *iov); --extern void lib_copy_iov2buf (char *dest, int niov, struct iovec *iov, -- ptl_size_t offset, ptl_size_t len); --extern void lib_copy_buf2iov (int niov, struct iovec *iov, ptl_size_t offset, -- char *src, ptl_size_t len); --extern int lib_extract_iov (int dst_niov, struct iovec *dst, -- int src_niov, struct iovec *src, -- ptl_size_t offset, ptl_size_t len); -- --extern ptl_size_t lib_kiov_nob (int niov, ptl_kiov_t *iov); --extern void lib_copy_kiov2buf (char *dest, int niov, ptl_kiov_t *kiov, -- ptl_size_t offset, ptl_size_t len); --extern void lib_copy_buf2kiov (int niov, ptl_kiov_t *kiov, ptl_size_t offset, -- char *src, ptl_size_t len); --extern int lib_extract_kiov (int dst_niov, ptl_kiov_t *dst, -- int src_niov, ptl_kiov_t *src, -- ptl_size_t offset, ptl_size_t len); -- --extern void lib_assert_wire_constants (void); -- - extern ptl_err_t lib_recv (nal_cb_t *nal, void *private, lib_msg_t *msg, lib_md_t *md, -extern ptl_err_t lib_recv (lib_nal_t *nal, void *private, lib_msg_t *msg, lib_md_t *md, -- ptl_size_t offset, ptl_size_t mlen, ptl_size_t rlen); - extern ptl_err_t lib_send (nal_cb_t *nal, void *private, lib_msg_t *msg, -extern ptl_err_t lib_send (lib_nal_t *nal, void *private, lib_msg_t *msg, -- ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, -- lib_md_t *md, ptl_size_t offset, ptl_size_t len); -- - extern void lib_md_deconstruct(nal_cb_t * nal, lib_md_t * md_in, - ptl_md_t * md_out); - extern void lib_md_unlink(nal_cb_t * nal, lib_md_t * md_in); - extern void lib_me_unlink(nal_cb_t * nal, lib_me_t * me_in); -extern int lib_api_ni_status (nal_t *nal, ptl_sr_index_t sr_idx, - ptl_sr_value_t *status); -extern int lib_api_ni_dist (nal_t *nal, ptl_process_id_t *pid, - unsigned long *dist); - -extern int lib_api_eq_alloc (nal_t *nal, ptl_size_t count, - ptl_eq_handler_t callback, - ptl_handle_eq_t *handle); -extern int lib_api_eq_free(nal_t *nal, ptl_handle_eq_t *eqh); -extern int lib_api_eq_poll (nal_t *nal, - ptl_handle_eq_t *eventqs, int neq, int timeout_ms, - ptl_event_t *event, int *which); - -extern int lib_api_me_attach(nal_t *nal, - ptl_pt_index_t portal, - ptl_process_id_t match_id, - ptl_match_bits_t match_bits, - ptl_match_bits_t ignore_bits, - ptl_unlink_t unlink, ptl_ins_pos_t pos, - ptl_handle_me_t *handle); -extern int lib_api_me_insert(nal_t *nal, - ptl_handle_me_t *current_meh, - ptl_process_id_t match_id, - ptl_match_bits_t match_bits, - ptl_match_bits_t ignore_bits, - ptl_unlink_t unlink, ptl_ins_pos_t pos, - ptl_handle_me_t *handle); -extern int lib_api_me_unlink (nal_t *nal, ptl_handle_me_t *meh); -extern void lib_me_unlink(lib_nal_t *nal, lib_me_t *me); - -extern int lib_api_get_id(nal_t *nal, ptl_process_id_t *pid); - -extern void lib_md_unlink(lib_nal_t *nal, lib_md_t *md); -extern void lib_md_deconstruct(lib_nal_t *nal, lib_md_t *lmd, ptl_md_t *umd); -extern int lib_api_md_attach(nal_t *nal, ptl_handle_me_t *meh, - ptl_md_t *umd, ptl_unlink_t unlink, - ptl_handle_md_t *handle); -extern int lib_api_md_bind(nal_t *nal, ptl_md_t *umd, ptl_unlink_t unlink, - ptl_handle_md_t *handle); -extern int lib_api_md_unlink (nal_t *nal, ptl_handle_md_t *mdh); -extern int lib_api_md_update (nal_t *nal, ptl_handle_md_t *mdh, - ptl_md_t *oldumd, ptl_md_t *newumd, - ptl_handle_eq_t *testqh); - -extern int lib_api_get(nal_t *apinal, ptl_handle_md_t *mdh, - ptl_process_id_t *id, - ptl_pt_index_t portal, ptl_ac_index_t ac, - ptl_match_bits_t match_bits, ptl_size_t offset); -extern int lib_api_put(nal_t *apinal, ptl_handle_md_t *mdh, - ptl_ack_req_t ack, ptl_process_id_t *id, - ptl_pt_index_t portal, ptl_ac_index_t ac, - ptl_match_bits_t match_bits, - ptl_size_t offset, ptl_hdr_data_t hdr_data); -extern int lib_api_fail_nid(nal_t *apinal, ptl_nid_t nid, unsigned int threshold); - --#endif diff --cc lnet/include/lnet/lib-p30.h index b1a6e04,4daf219..0000000 deleted file mode 100644,100644 --- a/lnet/include/lnet/lib-p30.h +++ /dev/null @@@ -1,407 -1,460 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * lib-p30.h -- * -- * Top level include for library side routines -- */ -- --#ifndef _LIB_P30_H_ --#define _LIB_P30_H_ - -#include "build_check.h" -- --#ifdef __KERNEL__ --# include --# include --#else --# include --# include -# include --#endif --#include --#include --#include -#include --#include - #include - #include -- --static inline int ptl_is_wire_handle_none (ptl_handle_wire_t *wh) --{ -- return (wh->wh_interface_cookie == PTL_WIRE_HANDLE_NONE.wh_interface_cookie && -- wh->wh_object_cookie == PTL_WIRE_HANDLE_NONE.wh_object_cookie); --} -- - #define state_lock(nal,flagsp) \ - do { \ - CDEBUG(D_PORTALS, "taking state lock\n"); \ - nal->cb_cli(nal, flagsp); \ - } while (0) -#ifdef __KERNEL__ -#define LIB_LOCK(nal,flags) \ - spin_lock_irqsave(&(nal)->libnal_ni.ni_lock, flags) -#define LIB_UNLOCK(nal,flags) \ - spin_unlock_irqrestore(&(nal)->libnal_ni.ni_lock, flags) -#else -#define LIB_LOCK(nal,flags) \ - (pthread_mutex_lock(&(nal)->libnal_ni.ni_mutex), (flags) = 0) -#define LIB_UNLOCK(nal,flags) \ - pthread_mutex_unlock(&(nal)->libnal_ni.ni_mutex) -#endif -- - #define state_unlock(nal,flagsp) \ - { \ - CDEBUG(D_PORTALS, "releasing state lock\n"); \ - nal->cb_sti(nal, flagsp); \ - } -- --#ifdef PTL_USE_LIB_FREELIST -- --#define MAX_MES 2048 --#define MAX_MDS 2048 --#define MAX_MSGS 2048 /* Outstanding messages */ --#define MAX_EQS 512 -- - extern int lib_freelist_init (nal_cb_t *nal, lib_freelist_t *fl, int nobj, int objsize); - extern void lib_freelist_fini (nal_cb_t *nal, lib_freelist_t *fl); -extern int lib_freelist_init (lib_nal_t *nal, lib_freelist_t *fl, int nobj, int objsize); -extern void lib_freelist_fini (lib_nal_t *nal, lib_freelist_t *fl); -- --static inline void * --lib_freelist_alloc (lib_freelist_t *fl) --{ - /* ALWAYS called with statelock held */ - /* ALWAYS called with liblock held */ -- lib_freeobj_t *o; -- -- if (list_empty (&fl->fl_list)) -- return (NULL); -- -- o = list_entry (fl->fl_list.next, lib_freeobj_t, fo_list); -- list_del (&o->fo_list); -- return ((void *)&o->fo_contents); --} -- --static inline void --lib_freelist_free (lib_freelist_t *fl, void *obj) --{ - /* ALWAYS called with statelock held */ - /* ALWAYS called with liblock held */ -- lib_freeobj_t *o = list_entry (obj, lib_freeobj_t, fo_contents); -- -- list_add (&o->fo_list, &fl->fl_list); --} -- -- --static inline lib_eq_t * - lib_eq_alloc (nal_cb_t *nal) -lib_eq_alloc (lib_nal_t *nal) --{ - /* NEVER called with statelock held */ - /* NEVER called with liblock held */ -- unsigned long flags; -- lib_eq_t *eq; -- - state_lock (nal, &flags); - eq = (lib_eq_t *)lib_freelist_alloc (&nal->ni.ni_free_eqs); - state_unlock (nal, &flags); - LIB_LOCK (nal, flags); - eq = (lib_eq_t *)lib_freelist_alloc (&nal->libnal_ni.ni_free_eqs); - LIB_UNLOCK (nal, flags); -- -- return (eq); --} -- --static inline void - lib_eq_free (nal_cb_t *nal, lib_eq_t *eq) -lib_eq_free (lib_nal_t *nal, lib_eq_t *eq) --{ - /* ALWAYS called with statelock held */ - lib_freelist_free (&nal->ni.ni_free_eqs, eq); - /* ALWAYS called with liblock held */ - lib_freelist_free (&nal->libnal_ni.ni_free_eqs, eq); --} -- --static inline lib_md_t * - lib_md_alloc (nal_cb_t *nal, ptl_md_t *umd) -lib_md_alloc (lib_nal_t *nal, ptl_md_t *umd) --{ - /* NEVER called with statelock held */ - /* NEVER called with liblock held */ -- unsigned long flags; -- lib_md_t *md; -- - state_lock (nal, &flags); - md = (lib_md_t *)lib_freelist_alloc (&nal->ni.ni_free_mds); - state_unlock (nal, &flags); - LIB_LOCK (nal, flags); - md = (lib_md_t *)lib_freelist_alloc (&nal->libnal_ni.ni_free_mds); - LIB_UNLOCK (nal, flags); -- -- return (md); --} -- --static inline void - lib_md_free (nal_cb_t *nal, lib_md_t *md) -lib_md_free (lib_nal_t *nal, lib_md_t *md) --{ - /* ALWAYS called with statelock held */ - lib_freelist_free (&nal->ni.ni_free_mds, md); - /* ALWAYS called with liblock held */ - lib_freelist_free (&nal->libnal_ni.ni_free_mds, md); --} -- --static inline lib_me_t * - lib_me_alloc (nal_cb_t *nal) -lib_me_alloc (lib_nal_t *nal) --{ - /* NEVER called with statelock held */ - /* NEVER called with liblock held */ -- unsigned long flags; -- lib_me_t *me; -- - state_lock (nal, &flags); - me = (lib_me_t *)lib_freelist_alloc (&nal->ni.ni_free_mes); - state_unlock (nal, &flags); - LIB_LOCK (nal, flags); - me = (lib_me_t *)lib_freelist_alloc (&nal->libnal_ni.ni_free_mes); - LIB_UNLOCK (nal, flags); -- -- return (me); --} -- --static inline void - lib_me_free (nal_cb_t *nal, lib_me_t *me) -lib_me_free (lib_nal_t *nal, lib_me_t *me) --{ - /* ALWAYS called with statelock held */ - lib_freelist_free (&nal->ni.ni_free_mes, me); - /* ALWAYS called with liblock held */ - lib_freelist_free (&nal->libnal_ni.ni_free_mes, me); --} -- --static inline lib_msg_t * - lib_msg_alloc (nal_cb_t *nal) -lib_msg_alloc (lib_nal_t *nal) --{ - /* NEVER called with statelock held */ - /* NEVER called with liblock held */ -- unsigned long flags; -- lib_msg_t *msg; -- - state_lock (nal, &flags); - msg = (lib_msg_t *)lib_freelist_alloc (&nal->ni.ni_free_msgs); - state_unlock (nal, &flags); - LIB_LOCK (nal, flags); - msg = (lib_msg_t *)lib_freelist_alloc (&nal->libnal_ni.ni_free_msgs); - LIB_UNLOCK (nal, flags); -- -- if (msg != NULL) { -- /* NULL pointers, clear flags etc */ -- memset (msg, 0, sizeof (*msg)); -- msg->ack_wmd = PTL_WIRE_HANDLE_NONE; -- } -- return(msg); --} -- --static inline void - lib_msg_free (nal_cb_t *nal, lib_msg_t *msg) -lib_msg_free (lib_nal_t *nal, lib_msg_t *msg) --{ - /* ALWAYS called with statelock held */ - lib_freelist_free (&nal->ni.ni_free_msgs, msg); - /* ALWAYS called with liblock held */ - lib_freelist_free (&nal->libnal_ni.ni_free_msgs, msg); --} -- --#else -- --static inline lib_eq_t * - lib_eq_alloc (nal_cb_t *nal) -lib_eq_alloc (lib_nal_t *nal) --{ - /* NEVER called with statelock held */ - /* NEVER called with liblock held */ -- lib_eq_t *eq; -- -- PORTAL_ALLOC(eq, sizeof(*eq)); -- return (eq); --} -- --static inline void - lib_eq_free (nal_cb_t *nal, lib_eq_t *eq) -lib_eq_free (lib_nal_t *nal, lib_eq_t *eq) --{ - /* ALWAYS called with statelock held */ - /* ALWAYS called with liblock held */ -- PORTAL_FREE(eq, sizeof(*eq)); --} -- --static inline lib_md_t * - lib_md_alloc (nal_cb_t *nal, ptl_md_t *umd) -lib_md_alloc (lib_nal_t *nal, ptl_md_t *umd) --{ - /* NEVER called with statelock held */ - /* NEVER called with liblock held */ -- lib_md_t *md; -- int size; -- int niov; -- -- if ((umd->options & PTL_MD_KIOV) != 0) { - niov = umd->niov; - niov = umd->length; -- size = offsetof(lib_md_t, md_iov.kiov[niov]); -- } else { - niov = ((umd->options & PTL_MD_IOV) != 0) ? - umd->niov : 1; - niov = ((umd->options & PTL_MD_IOVEC) != 0) ? - umd->length : 1; -- size = offsetof(lib_md_t, md_iov.iov[niov]); -- } -- -- PORTAL_ALLOC(md, size); -- -- if (md != NULL) { -- /* Set here in case of early free */ -- md->options = umd->options; -- md->md_niov = niov; -- } -- -- return (md); --} -- --static inline void - lib_md_free (nal_cb_t *nal, lib_md_t *md) -lib_md_free (lib_nal_t *nal, lib_md_t *md) --{ - /* ALWAYS called with statelock held */ - /* ALWAYS called with liblock held */ -- int size; -- -- if ((md->options & PTL_MD_KIOV) != 0) -- size = offsetof(lib_md_t, md_iov.kiov[md->md_niov]); -- else -- size = offsetof(lib_md_t, md_iov.iov[md->md_niov]); -- -- PORTAL_FREE(md, size); --} -- --static inline lib_me_t * - lib_me_alloc (nal_cb_t *nal) -lib_me_alloc (lib_nal_t *nal) --{ - /* NEVER called with statelock held */ - /* NEVER called with liblock held */ -- lib_me_t *me; -- -- PORTAL_ALLOC(me, sizeof(*me)); -- return (me); --} -- --static inline void - lib_me_free(nal_cb_t *nal, lib_me_t *me) -lib_me_free(lib_nal_t *nal, lib_me_t *me) --{ - /* ALWAYS called with statelock held */ - /* ALWAYS called with liblock held */ -- PORTAL_FREE(me, sizeof(*me)); --} -- --static inline lib_msg_t * - lib_msg_alloc(nal_cb_t *nal) -lib_msg_alloc(lib_nal_t *nal) --{ - /* NEVER called with statelock held; may be in interrupt... */ - /* NEVER called with liblock held; may be in interrupt... */ -- lib_msg_t *msg; -- -- if (in_interrupt()) -- PORTAL_ALLOC_ATOMIC(msg, sizeof(*msg)); -- else -- PORTAL_ALLOC(msg, sizeof(*msg)); -- -- if (msg != NULL) { -- /* NULL pointers, clear flags etc */ -- memset (msg, 0, sizeof (*msg)); -- msg->ack_wmd = PTL_WIRE_HANDLE_NONE; -- } -- return (msg); --} -- --static inline void - lib_msg_free(nal_cb_t *nal, lib_msg_t *msg) -lib_msg_free(lib_nal_t *nal, lib_msg_t *msg) --{ - /* ALWAYS called with statelock held */ - /* ALWAYS called with liblock held */ -- PORTAL_FREE(msg, sizeof(*msg)); --} --#endif -- - extern lib_handle_t *lib_lookup_cookie (nal_cb_t *nal, __u64 cookie, int type); - extern void lib_initialise_handle (nal_cb_t *nal, lib_handle_t *lh, int type); - extern void lib_invalidate_handle (nal_cb_t *nal, lib_handle_t *lh); -extern lib_handle_t *lib_lookup_cookie (lib_nal_t *nal, __u64 cookie, int type); -extern void lib_initialise_handle (lib_nal_t *nal, lib_handle_t *lh, int type); -extern void lib_invalidate_handle (lib_nal_t *nal, lib_handle_t *lh); -- --static inline void - ptl_eq2handle (ptl_handle_eq_t *handle, lib_eq_t *eq) -ptl_eq2handle (ptl_handle_eq_t *handle, lib_nal_t *nal, lib_eq_t *eq) --{ - handle->nal_idx = nal->libnal_ni.ni_api->nal_handle.nal_idx; -- handle->cookie = eq->eq_lh.lh_cookie; --} -- --static inline lib_eq_t * - ptl_handle2eq (ptl_handle_eq_t *handle, nal_cb_t *nal) -ptl_handle2eq (ptl_handle_eq_t *handle, lib_nal_t *nal) --{ - /* ALWAYS called with statelock held */ - /* ALWAYS called with liblock held */ -- lib_handle_t *lh = lib_lookup_cookie (nal, handle->cookie, -- PTL_COOKIE_TYPE_EQ); -- if (lh == NULL) -- return (NULL); -- -- return (lh_entry (lh, lib_eq_t, eq_lh)); --} -- --static inline void - ptl_md2handle (ptl_handle_md_t *handle, lib_md_t *md) -ptl_md2handle (ptl_handle_md_t *handle, lib_nal_t *nal, lib_md_t *md) --{ - handle->nal_idx = nal->libnal_ni.ni_api->nal_handle.nal_idx; -- handle->cookie = md->md_lh.lh_cookie; --} -- --static inline lib_md_t * - ptl_handle2md (ptl_handle_md_t *handle, nal_cb_t *nal) -ptl_handle2md (ptl_handle_md_t *handle, lib_nal_t *nal) --{ - /* ALWAYS called with statelock held */ - /* ALWAYS called with liblock held */ -- lib_handle_t *lh = lib_lookup_cookie (nal, handle->cookie, -- PTL_COOKIE_TYPE_MD); -- if (lh == NULL) -- return (NULL); -- -- return (lh_entry (lh, lib_md_t, md_lh)); --} -- --static inline lib_md_t * - ptl_wire_handle2md (ptl_handle_wire_t *wh, nal_cb_t *nal) -ptl_wire_handle2md (ptl_handle_wire_t *wh, lib_nal_t *nal) --{ - /* ALWAYS called with statelock held */ - /* ALWAYS called with liblock held */ -- lib_handle_t *lh; -- - if (wh->wh_interface_cookie != nal->ni.ni_interface_cookie) - if (wh->wh_interface_cookie != nal->libnal_ni.ni_interface_cookie) -- return (NULL); -- -- lh = lib_lookup_cookie (nal, wh->wh_object_cookie, -- PTL_COOKIE_TYPE_MD); -- if (lh == NULL) -- return (NULL); -- -- return (lh_entry (lh, lib_md_t, md_lh)); --} -- --static inline void - ptl_me2handle (ptl_handle_me_t *handle, lib_me_t *me) -ptl_me2handle (ptl_handle_me_t *handle, lib_nal_t *nal, lib_me_t *me) --{ - handle->nal_idx = nal->libnal_ni.ni_api->nal_handle.nal_idx; -- handle->cookie = me->me_lh.lh_cookie; --} -- --static inline lib_me_t * - ptl_handle2me (ptl_handle_me_t *handle, nal_cb_t *nal) -ptl_handle2me (ptl_handle_me_t *handle, lib_nal_t *nal) --{ - /* ALWAYS called with statelock held */ - /* ALWAYS called with liblock held */ -- lib_handle_t *lh = lib_lookup_cookie (nal, handle->cookie, -- PTL_COOKIE_TYPE_ME); -- if (lh == NULL) -- return (NULL); -- -- return (lh_entry (lh, lib_me_t, me_lh)); --} -- - extern int lib_init(nal_cb_t * cb, ptl_nid_t nid, ptl_pid_t pid, int gsize, - ptl_pt_index_t tbl_size, ptl_ac_index_t ac_size); - extern int lib_fini(nal_cb_t * cb); - extern void lib_dispatch(nal_cb_t * cb, void *private, int index, - void *arg_block, void *ret_block); - extern char *dispatch_name(int index); -extern int lib_init(lib_nal_t *libnal, nal_t *apinal, - ptl_process_id_t pid, - ptl_ni_limits_t *desired_limits, - ptl_ni_limits_t *actual_limits); -extern int lib_fini(lib_nal_t *libnal); -- --/* - * When the NAL detects an incoming message, it should call - * lib_parse() decode it. The NAL callbacks will be handed - * the private cookie as a way for the NAL to maintain state - * about which transaction is being processed. An extra parameter, - * lib_cookie will contain the necessary information for - * finalizing the message. - * - * After it has finished the handling the message, it should - * call lib_finalize() with the lib_cookie parameter. - * Call backs will be made to write events, send acks or - * replies and so on. - * When the NAL detects an incoming message header, it should call - * lib_parse() decode it. If the message header is garbage, lib_parse() - * returns immediately with failure, otherwise the NAL callbacks will be - * called to receive the message body. They are handed the private cookie - * as a way for the NAL to maintain state about which transaction is being - * processed. An extra parameter, lib_msg contains the lib-level message - * state for passing to lib_finalize() when the message body has been - * received. -- */ - extern void lib_enq_event_locked (nal_cb_t *nal, void *private, -extern void lib_enq_event_locked (lib_nal_t *nal, void *private, -- lib_eq_t *eq, ptl_event_t *ev); - extern void lib_finalize (nal_cb_t *nal, void *private, lib_msg_t *msg, - ptl_err_t status); - extern void lib_parse (nal_cb_t *nal, ptl_hdr_t *hdr, void *private); - extern lib_msg_t *lib_fake_reply_msg (nal_cb_t *nal, ptl_nid_t peer_nid, - lib_md_t *getmd); - extern void print_hdr (nal_cb_t * nal, ptl_hdr_t * hdr); -extern void lib_finalize (lib_nal_t *nal, void *private, lib_msg_t *msg, - ptl_ni_fail_t ni_fail_type); -extern ptl_err_t lib_parse (lib_nal_t *nal, ptl_hdr_t *hdr, void *private); -extern lib_msg_t *lib_create_reply_msg (lib_nal_t *nal, ptl_nid_t peer_nid, - lib_msg_t *get_msg); -extern void print_hdr (lib_nal_t * nal, ptl_hdr_t * hdr); -- -- --extern ptl_size_t lib_iov_nob (int niov, struct iovec *iov); --extern void lib_copy_iov2buf (char *dest, int niov, struct iovec *iov, -- ptl_size_t offset, ptl_size_t len); --extern void lib_copy_buf2iov (int niov, struct iovec *iov, ptl_size_t offset, -- char *src, ptl_size_t len); --extern int lib_extract_iov (int dst_niov, struct iovec *dst, -- int src_niov, struct iovec *src, -- ptl_size_t offset, ptl_size_t len); -- --extern ptl_size_t lib_kiov_nob (int niov, ptl_kiov_t *iov); --extern void lib_copy_kiov2buf (char *dest, int niov, ptl_kiov_t *kiov, -- ptl_size_t offset, ptl_size_t len); --extern void lib_copy_buf2kiov (int niov, ptl_kiov_t *kiov, ptl_size_t offset, -- char *src, ptl_size_t len); --extern int lib_extract_kiov (int dst_niov, ptl_kiov_t *dst, -- int src_niov, ptl_kiov_t *src, -- ptl_size_t offset, ptl_size_t len); -- --extern void lib_assert_wire_constants (void); -- - extern ptl_err_t lib_recv (nal_cb_t *nal, void *private, lib_msg_t *msg, lib_md_t *md, -extern ptl_err_t lib_recv (lib_nal_t *nal, void *private, lib_msg_t *msg, lib_md_t *md, -- ptl_size_t offset, ptl_size_t mlen, ptl_size_t rlen); - extern ptl_err_t lib_send (nal_cb_t *nal, void *private, lib_msg_t *msg, -extern ptl_err_t lib_send (lib_nal_t *nal, void *private, lib_msg_t *msg, -- ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, -- lib_md_t *md, ptl_size_t offset, ptl_size_t len); -- - extern void lib_md_deconstruct(nal_cb_t * nal, lib_md_t * md_in, - ptl_md_t * md_out); - extern void lib_md_unlink(nal_cb_t * nal, lib_md_t * md_in); - extern void lib_me_unlink(nal_cb_t * nal, lib_me_t * me_in); -extern int lib_api_ni_status (nal_t *nal, ptl_sr_index_t sr_idx, - ptl_sr_value_t *status); -extern int lib_api_ni_dist (nal_t *nal, ptl_process_id_t *pid, - unsigned long *dist); - -extern int lib_api_eq_alloc (nal_t *nal, ptl_size_t count, - ptl_eq_handler_t callback, - ptl_handle_eq_t *handle); -extern int lib_api_eq_free(nal_t *nal, ptl_handle_eq_t *eqh); -extern int lib_api_eq_poll (nal_t *nal, - ptl_handle_eq_t *eventqs, int neq, int timeout_ms, - ptl_event_t *event, int *which); - -extern int lib_api_me_attach(nal_t *nal, - ptl_pt_index_t portal, - ptl_process_id_t match_id, - ptl_match_bits_t match_bits, - ptl_match_bits_t ignore_bits, - ptl_unlink_t unlink, ptl_ins_pos_t pos, - ptl_handle_me_t *handle); -extern int lib_api_me_insert(nal_t *nal, - ptl_handle_me_t *current_meh, - ptl_process_id_t match_id, - ptl_match_bits_t match_bits, - ptl_match_bits_t ignore_bits, - ptl_unlink_t unlink, ptl_ins_pos_t pos, - ptl_handle_me_t *handle); -extern int lib_api_me_unlink (nal_t *nal, ptl_handle_me_t *meh); -extern void lib_me_unlink(lib_nal_t *nal, lib_me_t *me); - -extern int lib_api_get_id(nal_t *nal, ptl_process_id_t *pid); - -extern void lib_md_unlink(lib_nal_t *nal, lib_md_t *md); -extern void lib_md_deconstruct(lib_nal_t *nal, lib_md_t *lmd, ptl_md_t *umd); -extern int lib_api_md_attach(nal_t *nal, ptl_handle_me_t *meh, - ptl_md_t *umd, ptl_unlink_t unlink, - ptl_handle_md_t *handle); -extern int lib_api_md_bind(nal_t *nal, ptl_md_t *umd, ptl_unlink_t unlink, - ptl_handle_md_t *handle); -extern int lib_api_md_unlink (nal_t *nal, ptl_handle_md_t *mdh); -extern int lib_api_md_update (nal_t *nal, ptl_handle_md_t *mdh, - ptl_md_t *oldumd, ptl_md_t *newumd, - ptl_handle_eq_t *testqh); - -extern int lib_api_get(nal_t *apinal, ptl_handle_md_t *mdh, - ptl_process_id_t *id, - ptl_pt_index_t portal, ptl_ac_index_t ac, - ptl_match_bits_t match_bits, ptl_size_t offset); -extern int lib_api_put(nal_t *apinal, ptl_handle_md_t *mdh, - ptl_ack_req_t ack, ptl_process_id_t *id, - ptl_pt_index_t portal, ptl_ac_index_t ac, - ptl_match_bits_t match_bits, - ptl_size_t offset, ptl_hdr_data_t hdr_data); -extern int lib_api_fail_nid(nal_t *apinal, ptl_nid_t nid, unsigned int threshold); - --#endif diff --cc lnet/include/lnet/lib-types.h index d05d3fa,cfcef2b..0000000 deleted file mode 100644,100644 --- a/lnet/include/lnet/lib-types.h +++ /dev/null @@@ -1,267 -1,359 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * p30/lib-types.h -- * -- * Types used by the library side routines that do not need to be -- * exposed to the user application -- */ -- --#ifndef _LIB_TYPES_H_ --#define _LIB_TYPES_H_ - -#include "build_check.h" -- --#include -#include --#ifdef __KERNEL__ --# include --# include --# include --#else --# define PTL_USE_LIB_FREELIST --# include --#endif - - /* struct nal_cb_t is defined in lib-nal.h */ - typedef struct nal_cb_t nal_cb_t; -- --typedef char *user_ptr; --typedef struct lib_msg_t lib_msg_t; --typedef struct lib_ptl_t lib_ptl_t; --typedef struct lib_ac_t lib_ac_t; --typedef struct lib_me_t lib_me_t; --typedef struct lib_md_t lib_md_t; --typedef struct lib_eq_t lib_eq_t; -- --#define WIRE_ATTR __attribute__((packed)) -- --/* The wire handle's interface cookie only matches one network interface in -- * one epoch (i.e. new cookie when the interface restarts or the node -- * reboots). The object cookie only matches one object on that interface -- * during that object's lifetime (i.e. no cookie re-use). */ --typedef struct { -- __u64 wh_interface_cookie; -- __u64 wh_object_cookie; --} WIRE_ATTR ptl_handle_wire_t; -- --/* byte-flip insensitive! */ --#define PTL_WIRE_HANDLE_NONE \ --((const ptl_handle_wire_t) {.wh_interface_cookie = -1, .wh_object_cookie = -1}) -- --typedef enum { -- PTL_MSG_ACK = 0, -- PTL_MSG_PUT, -- PTL_MSG_GET, -- PTL_MSG_REPLY, -- PTL_MSG_HELLO, --} ptl_msg_type_t; -- --/* The variant fields of the portals message header are aligned on an 8 -- * byte boundary in the message header. Note that all types used in these -- * wire structs MUST be fixed size and the smaller types are placed at the -- * end. */ --typedef struct ptl_ack { -- ptl_handle_wire_t dst_wmd; -- ptl_match_bits_t match_bits; -- ptl_size_t mlength; --} WIRE_ATTR ptl_ack_t; -- --typedef struct ptl_put { -- ptl_handle_wire_t ack_wmd; -- ptl_match_bits_t match_bits; -- ptl_hdr_data_t hdr_data; -- ptl_pt_index_t ptl_index; -- ptl_size_t offset; --} WIRE_ATTR ptl_put_t; -- --typedef struct ptl_get { -- ptl_handle_wire_t return_wmd; -- ptl_match_bits_t match_bits; -- ptl_pt_index_t ptl_index; -- ptl_size_t src_offset; -- ptl_size_t sink_length; --} WIRE_ATTR ptl_get_t; -- --typedef struct ptl_reply { -- ptl_handle_wire_t dst_wmd; --} WIRE_ATTR ptl_reply_t; -- --typedef struct ptl_hello { -- __u64 incarnation; -- __u32 type; --} WIRE_ATTR ptl_hello_t; -- --typedef struct { -- ptl_nid_t dest_nid; -- ptl_nid_t src_nid; -- ptl_pid_t dest_pid; -- ptl_pid_t src_pid; -- __u32 type; /* ptl_msg_type_t */ -- __u32 payload_length; /* payload data to follow */ -- /*<------__u64 aligned------->*/ -- union { -- ptl_ack_t ack; -- ptl_put_t put; -- ptl_get_t get; -- ptl_reply_t reply; -- ptl_hello_t hello; -- } msg; --} WIRE_ATTR ptl_hdr_t; -- --/* A HELLO message contains the portals magic number and protocol version -- * code in the header's dest_nid, the peer's NID in the src_nid, and -- * PTL_MSG_HELLO in the type field. All other common fields are zero -- * (including payload_size; i.e. no payload). -- * This is for use by byte-stream NALs (e.g. TCP/IP) to check the peer is -- * running the same protocol and to find out its NID, so that hosts with -- * multiple IP interfaces can have a single NID. These NALs should exchange -- * HELLO messages when a connection is first established. -- * Individual NALs can put whatever else they fancy in ptl_hdr_t::msg. -- */ --typedef struct { -- __u32 magic; /* PORTALS_PROTO_MAGIC */ -- __u16 version_major; /* increment on incompatible change */ -- __u16 version_minor; /* increment on compatible change */ --} WIRE_ATTR ptl_magicversion_t; -- --#define PORTALS_PROTO_MAGIC 0xeebc0ded -- - #define PORTALS_PROTO_VERSION_MAJOR 0 - #define PORTALS_PROTO_VERSION_MINOR 3 -#define PORTALS_PROTO_VERSION_MAJOR 1 -#define PORTALS_PROTO_VERSION_MINOR 0 -- --typedef struct { -- long recv_count, recv_length, send_count, send_length, drop_count, -- drop_length, msgs_alloc, msgs_max; --} lib_counters_t; -- --/* temporary expedient: limit number of entries in discontiguous MDs */ - #define PTL_MTU (512<<10) - #define PTL_MD_MAX_IOV 128 -#define PTL_MTU (1<<20) -#define PTL_MD_MAX_IOV 256 -- --struct lib_msg_t { -- struct list_head msg_list; -- lib_md_t *md; -- ptl_handle_wire_t ack_wmd; -- ptl_event_t ev; --}; -- --struct lib_ptl_t { -- ptl_pt_index_t size; -- struct list_head *tbl; --}; -- --struct lib_ac_t { -- int next_free; --}; -- --typedef struct { -- struct list_head lh_hash_chain; -- __u64 lh_cookie; --} lib_handle_t; -- --#define lh_entry(ptr, type, member) \ -- ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member))) -- --struct lib_eq_t { -- struct list_head eq_list; -- lib_handle_t eq_lh; - ptl_seq_t sequence; - ptl_size_t size; - ptl_event_t *base; - ptl_seq_t eq_enq_seq; - ptl_seq_t eq_deq_seq; - ptl_size_t eq_size; - ptl_event_t *eq_events; -- int eq_refcount; - int (*event_callback) (ptl_event_t * event); - ptl_eq_handler_t eq_callback; -- void *eq_addrkey; --}; -- --struct lib_me_t { -- struct list_head me_list; -- lib_handle_t me_lh; -- ptl_process_id_t match_id; -- ptl_match_bits_t match_bits, ignore_bits; -- ptl_unlink_t unlink; -- lib_md_t *md; --}; -- --struct lib_md_t { -- struct list_head md_list; -- lib_handle_t md_lh; -- lib_me_t *me; -- user_ptr start; -- ptl_size_t offset; -- ptl_size_t length; -- ptl_size_t max_size; -- int threshold; -- int pending; - ptl_unlink_t unlink; -- unsigned int options; -- unsigned int md_flags; -- void *user_ptr; -- lib_eq_t *eq; -- void *md_addrkey; -- unsigned int md_niov; /* # frags */ -- union { -- struct iovec iov[PTL_MD_MAX_IOV]; -- ptl_kiov_t kiov[PTL_MD_MAX_IOV]; -- } md_iov; --}; -- - #define PTL_MD_FLAG_UNLINK (1 << 0) -#define PTL_MD_FLAG_ZOMBIE (1 << 0) -#define PTL_MD_FLAG_AUTO_UNLINK (1 << 1) - -static inline int lib_md_exhausted (lib_md_t *md) -{ - return (md->threshold == 0 || - ((md->options & PTL_MD_MAX_SIZE) != 0 && - md->offset + md->max_size > md->length)); -} -- --#ifdef PTL_USE_LIB_FREELIST --typedef struct --{ -- void *fl_objs; /* single contiguous array of objects */ -- int fl_nobjs; /* the number of them */ -- int fl_objsize; /* the size (including overhead) of each of them */ -- struct list_head fl_list; /* where they are enqueued */ --} lib_freelist_t; -- --typedef struct --{ -- struct list_head fo_list; /* enqueue on fl_list */ -- void *fo_contents; /* aligned contents */ --} lib_freeobj_t; --#endif -- --typedef struct { -- /* info about peers we are trying to fail */ -- struct list_head tp_list; /* stash in ni.ni_test_peers */ -- ptl_nid_t tp_nid; /* matching nid */ -- unsigned int tp_threshold; /* # failures to simulate */ --} lib_test_peer_t; -- --#define PTL_COOKIE_TYPE_MD 1 --#define PTL_COOKIE_TYPE_ME 2 --#define PTL_COOKIE_TYPE_EQ 3 --#define PTL_COOKIE_TYPES 4 --/* PTL_COOKIE_TYPES must be a power of 2, so the cookie type can be -- * extracted by masking with (PTL_COOKIE_TYPES - 1) */ -- - typedef struct { - int up; - int refcnt; - ptl_nid_t nid; - ptl_pid_t pid; - int num_nodes; - unsigned int debug; - lib_ptl_t tbl; - lib_ac_t ac; - lib_counters_t counters; -typedef struct lib_ni -{ - nal_t *ni_api; - ptl_process_id_t ni_pid; - lib_ptl_t ni_portals; - lib_counters_t ni_counters; - ptl_ni_limits_t ni_actual_limits; -- -- int ni_lh_hash_size; /* size of lib handle hash table */ -- struct list_head *ni_lh_hash_table; /* all extant lib handles, this interface */ -- __u64 ni_next_object_cookie; /* cookie generator */ -- __u64 ni_interface_cookie; /* uniquely identifies this ni in this epoch */ -- - struct list_head ni_test_peers; - struct list_head ni_test_peers; -- --#ifdef PTL_USE_LIB_FREELIST - lib_freelist_t ni_free_mes; - lib_freelist_t ni_free_msgs; - lib_freelist_t ni_free_mds; - lib_freelist_t ni_free_eqs; - lib_freelist_t ni_free_mes; - lib_freelist_t ni_free_msgs; - lib_freelist_t ni_free_mds; - lib_freelist_t ni_free_eqs; --#endif - struct list_head ni_active_msgs; - struct list_head ni_active_mds; - struct list_head ni_active_eqs; - - struct list_head ni_active_msgs; - struct list_head ni_active_mds; - struct list_head ni_active_eqs; - -#ifdef __KERNEL__ - spinlock_t ni_lock; - wait_queue_head_t ni_waitq; -#else - pthread_mutex_t ni_mutex; - pthread_cond_t ni_cond; -#endif --} lib_ni_t; - - -typedef struct lib_nal -{ - /* lib-level interface state */ - lib_ni_t libnal_ni; - - /* NAL-private data */ - void *libnal_data; - - /* - * send: Sends a preformatted header and payload data to a - * specified remote process. The payload is scattered over 'niov' - * fragments described by iov, starting at 'offset' for 'mlen' - * bytes. - * NB the NAL may NOT overwrite iov. - * PTL_OK on success => NAL has committed to send and will call - * lib_finalize on completion - */ - ptl_err_t (*libnal_send) - (struct lib_nal *nal, void *private, lib_msg_t *cookie, - ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, - unsigned int niov, struct iovec *iov, - size_t offset, size_t mlen); - - /* as send, but with a set of page fragments (NULL if not supported) */ - ptl_err_t (*libnal_send_pages) - (struct lib_nal *nal, void *private, lib_msg_t * cookie, - ptl_hdr_t * hdr, int type, ptl_nid_t nid, ptl_pid_t pid, - unsigned int niov, ptl_kiov_t *iov, - size_t offset, size_t mlen); - /* - * recv: Receives an incoming message from a remote process. The - * payload is to be received into the scattered buffer of 'niov' - * fragments described by iov, starting at 'offset' for 'mlen' - * bytes. Payload bytes after 'mlen' up to 'rlen' are to be - * discarded. - * NB the NAL may NOT overwrite iov. - * PTL_OK on success => NAL has committed to receive and will call - * lib_finalize on completion - */ - ptl_err_t (*libnal_recv) - (struct lib_nal *nal, void *private, lib_msg_t * cookie, - unsigned int niov, struct iovec *iov, - size_t offset, size_t mlen, size_t rlen); - - /* as recv, but with a set of page fragments (NULL if not supported) */ - ptl_err_t (*libnal_recv_pages) - (struct lib_nal *nal, void *private, lib_msg_t * cookie, - unsigned int niov, ptl_kiov_t *iov, - size_t offset, size_t mlen, size_t rlen); - - /* - * (un)map: Tell the NAL about some memory it will access. - * *addrkey passed to libnal_unmap() is what libnal_map() set it to. - * type of *iov depends on options. - * Set to NULL if not required. - */ - ptl_err_t (*libnal_map) - (struct lib_nal *nal, unsigned int niov, struct iovec *iov, - void **addrkey); - void (*libnal_unmap) - (struct lib_nal *nal, unsigned int niov, struct iovec *iov, - void **addrkey); - - /* as (un)map, but with a set of page fragments */ - ptl_err_t (*libnal_map_pages) - (struct lib_nal *nal, unsigned int niov, ptl_kiov_t *iov, - void **addrkey); - void (*libnal_unmap_pages) - (struct lib_nal *nal, unsigned int niov, ptl_kiov_t *iov, - void **addrkey); - - void (*libnal_printf)(struct lib_nal *nal, const char *fmt, ...); - - /* Calculate a network "distance" to given node */ - int (*libnal_dist) (struct lib_nal *nal, ptl_nid_t nid, unsigned long *dist); -} lib_nal_t; -- --#endif diff --cc lnet/include/lnet/list.h index 37d9952,37d9952..0000000 deleted file mode 100644,100644 --- a/lnet/include/lnet/list.h +++ /dev/null @@@ -1,243 -1,243 +1,0 @@@ --#ifndef _LINUX_LIST_H --/* -- * Simple doubly linked list implementation. -- * -- * Some of the internal functions ("__xxx") are useful when -- * manipulating whole lists rather than single entries, as -- * sometimes we already know the next/prev entries and we can -- * generate better code by using them directly rather than -- * using the generic single-entry routines. -- */ -- --struct list_head { -- struct list_head *next, *prev; --}; -- --typedef struct list_head list_t; -- --#define LIST_HEAD_INIT(name) { &(name), &(name) } -- --#define LIST_HEAD(name) \ -- struct list_head name = LIST_HEAD_INIT(name) -- --#define INIT_LIST_HEAD(ptr) do { \ -- (ptr)->next = (ptr); (ptr)->prev = (ptr); \ --} while (0) -- --/* -- * Insert a new entry between two known consecutive entries. -- * -- * This is only for internal list manipulation where we know -- * the prev/next entries already! -- */ --static inline void __list_add(struct list_head * new, -- struct list_head * prev, -- struct list_head * next) --{ -- next->prev = new; -- new->next = next; -- new->prev = prev; -- prev->next = new; --} -- --/** -- * list_add - add a new entry -- * @new: new entry to be added -- * @head: list head to add it after -- * -- * Insert a new entry after the specified head. -- * This is good for implementing stacks. -- */ --static inline void list_add(struct list_head *new, struct list_head *head) --{ -- __list_add(new, head, head->next); --} -- --/** -- * list_add_tail - add a new entry -- * @new: new entry to be added -- * @head: list head to add it before -- * -- * Insert a new entry before the specified head. -- * This is useful for implementing queues. -- */ --static inline void list_add_tail(struct list_head *new, struct list_head *head) --{ -- __list_add(new, head->prev, head); --} -- --/* -- * Delete a list entry by making the prev/next entries -- * point to each other. -- * -- * This is only for internal list manipulation where we know -- * the prev/next entries already! -- */ --static inline void __list_del(struct list_head * prev, struct list_head * next) --{ -- next->prev = prev; -- prev->next = next; --} -- --/** -- * list_del - deletes entry from list. -- * @entry: the element to delete from the list. -- * Note: list_empty on entry does not return true after this, the entry is in an undefined state. -- */ --static inline void list_del(struct list_head *entry) --{ -- __list_del(entry->prev, entry->next); --} -- --/** -- * list_del_init - deletes entry from list and reinitialize it. -- * @entry: the element to delete from the list. -- */ --static inline void list_del_init(struct list_head *entry) --{ -- __list_del(entry->prev, entry->next); -- INIT_LIST_HEAD(entry); --} --#endif -- --#ifndef list_for_each_entry --/** -- * list_move - delete from one list and add as another's head -- * @list: the entry to move -- * @head: the head that will precede our entry -- */ --static inline void list_move(struct list_head *list, struct list_head *head) --{ -- __list_del(list->prev, list->next); -- list_add(list, head); --} -- --/** -- * list_move_tail - delete from one list and add as another's tail -- * @list: the entry to move -- * @head: the head that will follow our entry -- */ --static inline void list_move_tail(struct list_head *list, -- struct list_head *head) --{ -- __list_del(list->prev, list->next); -- list_add_tail(list, head); --} --#endif -- --#ifndef _LINUX_LIST_H --#define _LINUX_LIST_H --/** -- * list_empty - tests whether a list is empty -- * @head: the list to test. -- */ --static inline int list_empty(struct list_head *head) --{ -- return head->next == head; --} -- --static inline void __list_splice(struct list_head *list, -- struct list_head *head) --{ -- struct list_head *first = list->next; -- struct list_head *last = list->prev; -- struct list_head *at = head->next; -- -- first->prev = head; -- head->next = first; -- -- last->next = at; -- at->prev = last; --} -- --/** -- * list_splice - join two lists -- * @list: the new list to add. -- * @head: the place to add it in the first list. -- */ --static inline void list_splice(struct list_head *list, struct list_head *head) --{ -- if (!list_empty(list)) -- __list_splice(list, head); --} -- --/** -- * list_splice_init - join two lists and reinitialise the emptied list. -- * @list: the new list to add. -- * @head: the place to add it in the first list. -- * -- * The list at @list is reinitialised -- */ --static inline void list_splice_init(struct list_head *list, -- struct list_head *head) --{ -- if (!list_empty(list)) { -- __list_splice(list, head); -- INIT_LIST_HEAD(list); -- } --} -- --/** -- * list_entry - get the struct for this entry -- * @ptr: the &struct list_head pointer. -- * @type: the type of the struct this is embedded in. -- * @member: the name of the list_struct within the struct. -- */ --#define list_entry(ptr, type, member) \ -- ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member))) -- --/** -- * list_for_each - iterate over a list -- * @pos: the &struct list_head to use as a loop counter. -- * @head: the head for your list. -- */ --#define list_for_each(pos, head) \ -- for (pos = (head)->next ; pos != (head); pos = pos->next ) -- --/** -- * list_for_each_prev - iterate over a list in reverse order -- * @pos: the &struct list_head to use as a loop counter. -- * @head: the head for your list. -- */ --#define list_for_each_prev(pos, head) \ -- for (pos = (head)->prev ; pos != (head); pos = pos->prev) -- --/** -- * list_for_each_safe - iterate over a list safe against removal of list entry -- * @pos: the &struct list_head to use as a loop counter. -- * @n: another &struct list_head to use as temporary storage -- * @head: the head for your list. -- */ --#define list_for_each_safe(pos, n, head) \ -- for (pos = (head)->next, n = pos->next; pos != (head); \ -- pos = n, n = pos->next) -- --#endif -- --#ifndef list_for_each_entry --/** -- * list_for_each_entry - iterate over list of given type -- * @pos: the type * to use as a loop counter. -- * @head: the head for your list. -- * @member: the name of the list_struct within the struct. -- */ --#define list_for_each_entry(pos, head, member) \ -- for (pos = list_entry((head)->next, typeof(*pos), member); \ -- &pos->member != (head); \ -- pos = list_entry(pos->member.next, typeof(*pos), member)) --#endif -- --#ifndef list_for_each_entry_safe --/** -- * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry -- * @pos: the type * to use as a loop counter. -- * @n: another type * to use as temporary storage -- * @head: the head for your list. -- * @member: the name of the list_struct within the struct. -- */ --#define list_for_each_entry_safe(pos, n, head, member) \ -- for (pos = list_entry((head)->next, typeof(*pos), member), \ -- n = list_entry(pos->member.next, typeof(*pos), member); \ -- &pos->member != (head); \ -- pos = n, n = list_entry(n->member.next, typeof(*n), member)) --#endif diff --cc lnet/include/lnet/lltrace.h index 5f266e2,5f266e2..0000000 deleted file mode 100644,100644 --- a/lnet/include/lnet/lltrace.h +++ /dev/null @@@ -1,175 -1,175 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * Compile with: -- * cc -I../../portals/include -o fio fio.c -L../../portals/linux/utils -lptlctl -- */ --#ifndef __LTRACE_H_ --#define __LTRACE_H_ -- --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include -- --static inline int ltrace_write_file(char* fname) --{ -- char* argv[3]; -- -- argv[0] = "debug_kernel"; -- argv[1] = fname; -- argv[2] = "1"; -- -- fprintf(stderr, "[ptlctl] %s %s %s\n", argv[0], argv[1], argv[2]); -- -- return jt_dbg_debug_kernel(3, argv); --} -- --static inline int ltrace_clear() --{ -- char* argv[1]; -- -- argv[0] = "clear"; -- -- fprintf(stderr, "[ptlctl] %s\n", argv[0]); -- -- return jt_dbg_clear_debug_buf(1, argv); --} -- --static inline int ltrace_mark(int indent_level, char* text) --{ -- char* argv[2]; -- char mark_buf[PATH_MAX]; -- -- snprintf(mark_buf, PATH_MAX, "====%d=%s", indent_level, text); -- -- argv[0] = "mark"; -- argv[1] = mark_buf; -- return jt_dbg_mark_debug_buf(2, argv); --} -- --static inline int ltrace_applymasks() --{ -- char* argv[2]; -- argv[0] = "list"; -- argv[1] = "applymasks"; -- -- fprintf(stderr, "[ptlctl] %s %s\n", argv[0], argv[1]); -- -- return jt_dbg_list(2, argv); --} -- -- --static inline int ltrace_filter(char* subsys_or_mask) --{ -- char* argv[2]; -- argv[0] = "filter"; -- argv[1] = subsys_or_mask; -- return jt_dbg_filter(2, argv); --} -- --static inline int ltrace_show(char* subsys_or_mask) --{ -- char* argv[2]; -- argv[0] = "show"; -- argv[1] = subsys_or_mask; -- return jt_dbg_show(2, argv); --} -- --static inline int ltrace_start() --{ -- int rc = 0; -- dbg_initialize(0, NULL); --#ifdef PORTALS_DEV_ID -- rc = register_ioc_dev(PORTALS_DEV_ID, PORTALS_DEV_PATH); --#endif -- ltrace_filter("class"); -- ltrace_filter("socknal"); -- ltrace_filter("qswnal"); -- ltrace_filter("gmnal"); -- ltrace_filter("portals"); -- -- ltrace_show("all_types"); -- ltrace_filter("trace"); -- ltrace_filter("malloc"); -- ltrace_filter("net"); -- ltrace_filter("page"); -- ltrace_filter("other"); -- ltrace_filter("info"); -- ltrace_applymasks(); -- -- return rc; --} -- -- --static inline void ltrace_stop() --{ --#ifdef PORTALS_DEV_ID -- unregister_ioc_dev(PORTALS_DEV_ID); --#endif --} -- --static inline int not_uml() --{ -- /* Return Values: -- * 0 when run under UML -- * 1 when run on host -- * <0 when lookup failed -- */ -- struct stat buf; -- int rc = stat("/dev/ubd", &buf); -- rc = ((rc<0) && (errno == ENOENT)) ? 1 : rc; -- if (rc<0) { -- fprintf(stderr, "Cannot stat /dev/ubd: %s\n", strerror(errno)); -- rc = 1; /* Assume host */ -- } -- return rc; --} -- --#define LTRACE_MAX_NOB 256 --static inline void ltrace_add_processnames(char* fname) --{ -- char cmdbuf[LTRACE_MAX_NOB]; -- struct timeval tv; -- struct timezone tz; -- int nob; -- int underuml = !not_uml(); -- -- gettimeofday(&tv, &tz); -- -- nob = snprintf(cmdbuf, LTRACE_MAX_NOB, "ps --no-headers -eo \""); -- -- /* Careful - these format strings need to match the CDEBUG -- * formats in portals/linux/debug.c EXACTLY -- */ -- nob += snprintf(cmdbuf+nob, LTRACE_MAX_NOB, "%02x:%06x:%d:%lu.%06lu ", -- S_RPC >> 24, D_VFSTRACE, 0, tv.tv_sec, tv.tv_usec); -- -- if (underuml && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))) { -- nob += snprintf (cmdbuf+nob, LTRACE_MAX_NOB, -- "(%s:%d:%s() %d | %d+%lu): ", -- "lltrace.h", __LINE__, __FUNCTION__, 0, 0, 0L); -- } -- else { -- nob += snprintf (cmdbuf+nob, LTRACE_MAX_NOB, -- "(%s:%d:%s() %d+%lu): ", -- "lltrace.h", __LINE__, __FUNCTION__, 0, 0L); -- } -- -- nob += snprintf(cmdbuf+nob, LTRACE_MAX_NOB, " %%p %%c\" >> %s", fname); -- system(cmdbuf); --} -- --#endif diff --cc lnet/include/lnet/lnet.h index 8b1495e,4b8631d..0000000 deleted file mode 100644,100644 --- a/lnet/include/lnet/lnet.h +++ /dev/null @@@ -1,71 -1,26 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- */ --#ifndef _P30_H_ --#define _P30_H_ - -#include "build_check.h" -- --/* -- * p30.h -- * -- * User application interface file -- */ -- --#if defined (__KERNEL__) --#include --#include --#else --#include --#include --#endif -- --#include - #include --#include - #include - - extern int __p30_initialized; /* for libraries & test codes */ - extern int __p30_myr_initialized; /* that don't know if p30 */ - extern int __p30_ip_initialized; /* had been initialized yet */ - extern ptl_handle_ni_t __myr_ni_handle, __ip_ni_handle; - - extern int __p30_myr_timeout; /* in seconds, for PtlNIBarrier, */ - extern int __p30_ip_timeout; /* PtlReduce_all, & PtlBroadcast_all */ - - /* - * Debugging flags reserved for the Portals reference library. - * These are not part of the API as described in the SAND report - * but are for the use of the maintainers of the reference implementation. - * - * It is not expected that the real implementations will export - * this functionality. - */ - #define PTL_DEBUG_NONE 0ul - #define PTL_DEBUG_ALL (0x0FFFul) /* Only the Portals flags */ - - #define __bit(x) ((unsigned long) 1<<(x)) - #define PTL_DEBUG_PUT __bit(0) - #define PTL_DEBUG_GET __bit(1) - #define PTL_DEBUG_REPLY __bit(2) - #define PTL_DEBUG_ACK __bit(3) - #define PTL_DEBUG_DROP __bit(4) - #define PTL_DEBUG_REQUEST __bit(5) - #define PTL_DEBUG_DELIVERY __bit(6) - #define PTL_DEBUG_UNLINK __bit(7) - #define PTL_DEBUG_THRESHOLD __bit(8) - #define PTL_DEBUG_API __bit(9) - - /* - * These eight are reserved for the NAL to define - * It should probably give them better names... - */ - #define PTL_DEBUG_NI_ALL (0xF000ul) /* Only the NAL flags */ - #define PTL_DEBUG_NI0 __bit(24) - #define PTL_DEBUG_NI1 __bit(25) - #define PTL_DEBUG_NI2 __bit(26) - #define PTL_DEBUG_NI3 __bit(27) - #define PTL_DEBUG_NI4 __bit(28) - #define PTL_DEBUG_NI5 __bit(29) - #define PTL_DEBUG_NI6 __bit(30) - #define PTL_DEBUG_NI7 __bit(31) -- --#endif diff --cc lnet/include/lnet/lnetctl.h index 12ef47a,a81a371..0000000 deleted file mode 100644,100644 --- a/lnet/include/lnet/lnetctl.h +++ /dev/null @@@ -1,88 -1,102 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * Copyright (C) 2001, 2002 Cluster File Systems, Inc. -- * -- * This file is part of Portals, http://www.sf.net/projects/lustre/ -- * -- * Portals is free software; you can redistribute it and/or -- * modify it under the terms of version 2 of the GNU General Public -- * License as published by the Free Software Foundation. -- * -- * Portals is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- * GNU General Public License for more details. -- * -- * You should have received a copy of the GNU General Public License -- * along with Portals; if not, write to the Free Software -- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -- * -- * header for libptlctl.a -- */ --#ifndef _PTLCTL_H_ --#define _PTLCTL_H_ - -#include -#include -#include -- --#define PORTALS_DEV_ID 0 --#define PORTALS_DEV_PATH "/dev/portals" --#define OBD_DEV_ID 1 --#define OBD_DEV_PATH "/dev/obd" -#define SMFS_DEV_ID 2 -#define SMFS_DEV_PATH "/dev/snapdev" -- --int ptl_name2nal(char *str); --int ptl_parse_ipaddr (__u32 *ipaddrp, char *str); --int ptl_parse_nid (ptl_nid_t *nidp, char *str); --char * ptl_nid2str (char *buffer, ptl_nid_t nid); -- --int ptl_initialize(int argc, char **argv); --int jt_ptl_network(int argc, char **argv); --int jt_ptl_print_autoconnects (int argc, char **argv); --int jt_ptl_add_autoconnect (int argc, char **argv); --int jt_ptl_del_autoconnect (int argc, char **argv); -int jt_ptl_print_interfaces(int argc, char **argv); -int jt_ptl_add_interface(int argc, char **argv); -int jt_ptl_del_interface(int argc, char **argv); -int jt_ptl_print_peers (int argc, char **argv); -int jt_ptl_add_peer (int argc, char **argv); -int jt_ptl_del_peer (int argc, char **argv); --int jt_ptl_print_connections (int argc, char **argv); --int jt_ptl_connect(int argc, char **argv); --int jt_ptl_disconnect(int argc, char **argv); --int jt_ptl_push_connection(int argc, char **argv); --int jt_ptl_print_active_txs(int argc, char **argv); --int jt_ptl_ping(int argc, char **argv); --int jt_ptl_shownid(int argc, char **argv); --int jt_ptl_mynid(int argc, char **argv); --int jt_ptl_add_uuid(int argc, char **argv); --int jt_ptl_add_uuid_old(int argc, char **argv); /* backwards compatibility */ --int jt_ptl_close_uuid(int argc, char **argv); --int jt_ptl_del_uuid(int argc, char **argv); --int jt_ptl_rxmem (int argc, char **argv); --int jt_ptl_txmem (int argc, char **argv); --int jt_ptl_nagle (int argc, char **argv); --int jt_ptl_add_route (int argc, char **argv); --int jt_ptl_del_route (int argc, char **argv); --int jt_ptl_notify_router (int argc, char **argv); --int jt_ptl_print_routes (int argc, char **argv); --int jt_ptl_fail_nid (int argc, char **argv); --int jt_ptl_lwt(int argc, char **argv); --int jt_ptl_memhog(int argc, char **argv); -- --int dbg_initialize(int argc, char **argv); --int jt_dbg_filter(int argc, char **argv); --int jt_dbg_show(int argc, char **argv); --int jt_dbg_list(int argc, char **argv); --int jt_dbg_debug_kernel(int argc, char **argv); --int jt_dbg_debug_daemon(int argc, char **argv); --int jt_dbg_debug_file(int argc, char **argv); --int jt_dbg_clear_debug_buf(int argc, char **argv); --int jt_dbg_mark_debug_buf(int argc, char **argv); --int jt_dbg_modules(int argc, char **argv); --int jt_dbg_panic(int argc, char **argv); -- --int ptl_set_cfg_record_cb(cfg_record_cb_t cb); -- --/* l_ioctl.c */ - typedef int (ioc_handler_t)(int dev_id, int opc, void *buf); -typedef int (ioc_handler_t)(int dev_id, unsigned int opc, void *buf); --void set_ioc_handler(ioc_handler_t *handler); --int register_ioc_dev(int dev_id, const char * dev_name); --void unregister_ioc_dev(int dev_id); --int set_ioctl_dump(char * file); - int l_ioctl(int dev_id, int opc, void *buf); - int parse_dump(char * dump_file, int (*ioc_func)(int dev_id, int opc, void *)); -int l_ioctl(int dev_id, unsigned int opc, void *buf); -int parse_dump(char * dump_file, ioc_handler_t ioc_func); --int jt_ioc_dump(int argc, char **argv); -extern char *dump_filename; -int dump(int dev_id, unsigned int opc, void *buf); -- --#endif diff --cc lnet/include/lnet/myrnal.h index 13790f7,13790f7..0000000 deleted file mode 100644,100644 --- a/lnet/include/lnet/myrnal.h +++ /dev/null @@@ -1,23 -1,23 +1,0 @@@ --#ifndef MYRNAL_H --#define MYRNAL_H -- --#define MAX_ARGS_LEN (256) --#define MAX_RET_LEN (128) --#define MYRNAL_MAX_ACL_SIZE (64) --#define MYRNAL_MAX_PTL_SIZE (64) -- --#define P3CMD (100) --#define P3SYSCALL (200) --#define P3REGISTER (300) -- --enum { PTL_MLOCKALL }; -- --typedef struct { -- void *args; -- size_t args_len; -- void *ret; -- size_t ret_len; -- int p3cmd; --} myrnal_forward_t; -- --#endif /* MYRNAL_H */ diff --cc lnet/include/lnet/nal.h index 7cb3ab7,bf86569..0000000 deleted file mode 100644,100644 --- a/lnet/include/lnet/nal.h +++ /dev/null @@@ -1,47 -1,87 +1,0 @@@ --#ifndef _NAL_H_ --#define _NAL_H_ - -#include "build_check.h" -- --/* -- * p30/nal.h -- * -- * The API side NAL declarations -- */ -- --#include - - #ifdef yield - #undef yield - #endif -- --typedef struct nal_t nal_t; -- --struct nal_t { - ptl_ni_t ni; - int refct; - void *nal_data; - int *timeout; /* for libp30api users */ - int (*forward) (nal_t * nal, int index, /* Function ID */ - void *args, size_t arg_len, void *ret, size_t ret_len); - /* common interface state */ - int nal_refct; - ptl_handle_ni_t nal_handle; -- - int (*shutdown) (nal_t * nal, int interface); - /* NAL-private data */ - void *nal_data; -- - int (*validate) (nal_t * nal, void *base, size_t extent); - /* NAL API implementation - * NB only nal_ni_init needs to be set when the NAL registers itself */ - int (*nal_ni_init) (nal_t *nal, ptl_pid_t requested_pid, - ptl_ni_limits_t *req, ptl_ni_limits_t *actual); - - void (*nal_ni_fini) (nal_t *nal); -- - void (*yield) (nal_t * nal); - int (*nal_get_id) (nal_t *nal, ptl_process_id_t *id); - int (*nal_ni_status) (nal_t *nal, ptl_sr_index_t register, ptl_sr_value_t *status); - int (*nal_ni_dist) (nal_t *nal, ptl_process_id_t *id, unsigned long *distance); - int (*nal_fail_nid) (nal_t *nal, ptl_nid_t nid, unsigned int threshold); -- - void (*lock) (nal_t * nal, unsigned long *flags); - int (*nal_me_attach) (nal_t *nal, ptl_pt_index_t portal, - ptl_process_id_t match_id, - ptl_match_bits_t match_bits, ptl_match_bits_t ignore_bits, - ptl_unlink_t unlink, ptl_ins_pos_t pos, - ptl_handle_me_t *handle); - int (*nal_me_insert) (nal_t *nal, ptl_handle_me_t *me, - ptl_process_id_t match_id, - ptl_match_bits_t match_bits, ptl_match_bits_t ignore_bits, - ptl_unlink_t unlink, ptl_ins_pos_t pos, - ptl_handle_me_t *handle); - int (*nal_me_unlink) (nal_t *nal, ptl_handle_me_t *me); - - int (*nal_md_attach) (nal_t *nal, ptl_handle_me_t *me, - ptl_md_t *md, ptl_unlink_t unlink, - ptl_handle_md_t *handle); - int (*nal_md_bind) (nal_t *nal, - ptl_md_t *md, ptl_unlink_t unlink, - ptl_handle_md_t *handle); - int (*nal_md_unlink) (nal_t *nal, ptl_handle_md_t *md); - int (*nal_md_update) (nal_t *nal, ptl_handle_md_t *md, - ptl_md_t *old_md, ptl_md_t *new_md, - ptl_handle_eq_t *testq); -- - void (*unlock) (nal_t * nal, unsigned long *flags); - }; - int (*nal_eq_alloc) (nal_t *nal, ptl_size_t count, - ptl_eq_handler_t handler, - ptl_handle_eq_t *handle); - int (*nal_eq_free) (nal_t *nal, ptl_handle_eq_t *eq); - int (*nal_eq_poll) (nal_t *nal, - ptl_handle_eq_t *eqs, int neqs, int timeout, - ptl_event_t *event, int *which); -- - typedef nal_t *(ptl_interface_t) (int, ptl_pt_index_t, ptl_ac_index_t, ptl_pid_t requested_pid); - extern nal_t *PTL_IFACE_IP(int, ptl_pt_index_t, ptl_ac_index_t, ptl_pid_t requested_pid); - extern nal_t *PTL_IFACE_MYR(int, ptl_pt_index_t, ptl_ac_index_t, ptl_pid_t requested_pid); - int (*nal_ace_entry) (nal_t *nal, ptl_ac_index_t index, - ptl_process_id_t match_id, ptl_pt_index_t portal); - - int (*nal_put) (nal_t *nal, ptl_handle_md_t *md, ptl_ack_req_t ack, - ptl_process_id_t *target, ptl_pt_index_t portal, - ptl_ac_index_t ac, ptl_match_bits_t match, - ptl_size_t offset, ptl_hdr_data_t hdr_data); - int (*nal_get) (nal_t *nal, ptl_handle_md_t *md, - ptl_process_id_t *target, ptl_pt_index_t portal, - ptl_ac_index_t ac, ptl_match_bits_t match, - ptl_size_t offset); -}; -- - extern nal_t *ptl_hndl2nal(ptl_handle_any_t * any); -extern nal_t *ptl_hndl2nal(ptl_handle_any_t *any); -- - #ifndef PTL_IFACE_DEFAULT - #define PTL_IFACE_DEFAULT (PTL_IFACE_IP) -#ifdef __KERNEL__ -extern int ptl_register_nal(ptl_interface_t interface, nal_t *nal); -extern void ptl_unregister_nal(ptl_interface_t interface); --#endif -- --#endif diff --cc lnet/include/lnet/nalids.h index 1b837b4,55a991b..0000000 deleted file mode 100644,100644 --- a/lnet/include/lnet/nalids.h +++ /dev/null @@@ -1,4 -1,2 +1,0 @@@ - #define PTL_IFACE_TCP 1 - #define PTL_IFACE_ER 2 - #define PTL_IFACE_SS 3 - #define PTL_IFACE_MAX 4 -#include "build_check.h" - diff --cc lnet/include/lnet/p30.h index 8b1495e,4b8631d..0000000 deleted file mode 100644,100644 --- a/lnet/include/lnet/p30.h +++ /dev/null @@@ -1,71 -1,26 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- */ --#ifndef _P30_H_ --#define _P30_H_ - -#include "build_check.h" -- --/* -- * p30.h -- * -- * User application interface file -- */ -- --#if defined (__KERNEL__) --#include --#include --#else --#include --#include --#endif -- --#include - #include --#include - #include - - extern int __p30_initialized; /* for libraries & test codes */ - extern int __p30_myr_initialized; /* that don't know if p30 */ - extern int __p30_ip_initialized; /* had been initialized yet */ - extern ptl_handle_ni_t __myr_ni_handle, __ip_ni_handle; - - extern int __p30_myr_timeout; /* in seconds, for PtlNIBarrier, */ - extern int __p30_ip_timeout; /* PtlReduce_all, & PtlBroadcast_all */ - - /* - * Debugging flags reserved for the Portals reference library. - * These are not part of the API as described in the SAND report - * but are for the use of the maintainers of the reference implementation. - * - * It is not expected that the real implementations will export - * this functionality. - */ - #define PTL_DEBUG_NONE 0ul - #define PTL_DEBUG_ALL (0x0FFFul) /* Only the Portals flags */ - - #define __bit(x) ((unsigned long) 1<<(x)) - #define PTL_DEBUG_PUT __bit(0) - #define PTL_DEBUG_GET __bit(1) - #define PTL_DEBUG_REPLY __bit(2) - #define PTL_DEBUG_ACK __bit(3) - #define PTL_DEBUG_DROP __bit(4) - #define PTL_DEBUG_REQUEST __bit(5) - #define PTL_DEBUG_DELIVERY __bit(6) - #define PTL_DEBUG_UNLINK __bit(7) - #define PTL_DEBUG_THRESHOLD __bit(8) - #define PTL_DEBUG_API __bit(9) - - /* - * These eight are reserved for the NAL to define - * It should probably give them better names... - */ - #define PTL_DEBUG_NI_ALL (0xF000ul) /* Only the NAL flags */ - #define PTL_DEBUG_NI0 __bit(24) - #define PTL_DEBUG_NI1 __bit(25) - #define PTL_DEBUG_NI2 __bit(26) - #define PTL_DEBUG_NI3 __bit(27) - #define PTL_DEBUG_NI4 __bit(28) - #define PTL_DEBUG_NI5 __bit(29) - #define PTL_DEBUG_NI6 __bit(30) - #define PTL_DEBUG_NI7 __bit(31) -- --#endif diff --cc lnet/include/lnet/ptlctl.h index 12ef47a,a81a371..0000000 deleted file mode 100644,100644 --- a/lnet/include/lnet/ptlctl.h +++ /dev/null @@@ -1,88 -1,102 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * Copyright (C) 2001, 2002 Cluster File Systems, Inc. -- * -- * This file is part of Portals, http://www.sf.net/projects/lustre/ -- * -- * Portals is free software; you can redistribute it and/or -- * modify it under the terms of version 2 of the GNU General Public -- * License as published by the Free Software Foundation. -- * -- * Portals is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- * GNU General Public License for more details. -- * -- * You should have received a copy of the GNU General Public License -- * along with Portals; if not, write to the Free Software -- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -- * -- * header for libptlctl.a -- */ --#ifndef _PTLCTL_H_ --#define _PTLCTL_H_ - -#include -#include -#include -- --#define PORTALS_DEV_ID 0 --#define PORTALS_DEV_PATH "/dev/portals" --#define OBD_DEV_ID 1 --#define OBD_DEV_PATH "/dev/obd" -#define SMFS_DEV_ID 2 -#define SMFS_DEV_PATH "/dev/snapdev" -- --int ptl_name2nal(char *str); --int ptl_parse_ipaddr (__u32 *ipaddrp, char *str); --int ptl_parse_nid (ptl_nid_t *nidp, char *str); --char * ptl_nid2str (char *buffer, ptl_nid_t nid); -- --int ptl_initialize(int argc, char **argv); --int jt_ptl_network(int argc, char **argv); --int jt_ptl_print_autoconnects (int argc, char **argv); --int jt_ptl_add_autoconnect (int argc, char **argv); --int jt_ptl_del_autoconnect (int argc, char **argv); -int jt_ptl_print_interfaces(int argc, char **argv); -int jt_ptl_add_interface(int argc, char **argv); -int jt_ptl_del_interface(int argc, char **argv); -int jt_ptl_print_peers (int argc, char **argv); -int jt_ptl_add_peer (int argc, char **argv); -int jt_ptl_del_peer (int argc, char **argv); --int jt_ptl_print_connections (int argc, char **argv); --int jt_ptl_connect(int argc, char **argv); --int jt_ptl_disconnect(int argc, char **argv); --int jt_ptl_push_connection(int argc, char **argv); --int jt_ptl_print_active_txs(int argc, char **argv); --int jt_ptl_ping(int argc, char **argv); --int jt_ptl_shownid(int argc, char **argv); --int jt_ptl_mynid(int argc, char **argv); --int jt_ptl_add_uuid(int argc, char **argv); --int jt_ptl_add_uuid_old(int argc, char **argv); /* backwards compatibility */ --int jt_ptl_close_uuid(int argc, char **argv); --int jt_ptl_del_uuid(int argc, char **argv); --int jt_ptl_rxmem (int argc, char **argv); --int jt_ptl_txmem (int argc, char **argv); --int jt_ptl_nagle (int argc, char **argv); --int jt_ptl_add_route (int argc, char **argv); --int jt_ptl_del_route (int argc, char **argv); --int jt_ptl_notify_router (int argc, char **argv); --int jt_ptl_print_routes (int argc, char **argv); --int jt_ptl_fail_nid (int argc, char **argv); --int jt_ptl_lwt(int argc, char **argv); --int jt_ptl_memhog(int argc, char **argv); -- --int dbg_initialize(int argc, char **argv); --int jt_dbg_filter(int argc, char **argv); --int jt_dbg_show(int argc, char **argv); --int jt_dbg_list(int argc, char **argv); --int jt_dbg_debug_kernel(int argc, char **argv); --int jt_dbg_debug_daemon(int argc, char **argv); --int jt_dbg_debug_file(int argc, char **argv); --int jt_dbg_clear_debug_buf(int argc, char **argv); --int jt_dbg_mark_debug_buf(int argc, char **argv); --int jt_dbg_modules(int argc, char **argv); --int jt_dbg_panic(int argc, char **argv); -- --int ptl_set_cfg_record_cb(cfg_record_cb_t cb); -- --/* l_ioctl.c */ - typedef int (ioc_handler_t)(int dev_id, int opc, void *buf); -typedef int (ioc_handler_t)(int dev_id, unsigned int opc, void *buf); --void set_ioc_handler(ioc_handler_t *handler); --int register_ioc_dev(int dev_id, const char * dev_name); --void unregister_ioc_dev(int dev_id); --int set_ioctl_dump(char * file); - int l_ioctl(int dev_id, int opc, void *buf); - int parse_dump(char * dump_file, int (*ioc_func)(int dev_id, int opc, void *)); -int l_ioctl(int dev_id, unsigned int opc, void *buf); -int parse_dump(char * dump_file, ioc_handler_t ioc_func); --int jt_ioc_dump(int argc, char **argv); -extern char *dump_filename; -int dump(int dev_id, unsigned int opc, void *buf); -- --#endif diff --cc lnet/include/lnet/socklnd.h index 27e6f8e,27e6f8e..0000000 deleted file mode 100644,100644 --- a/lnet/include/lnet/socklnd.h +++ /dev/null @@@ -1,14 -1,14 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * -- * -- * #defines shared between socknal implementation and utilities -- */ -- --#define SOCKNAL_CONN_NONE (-1) --#define SOCKNAL_CONN_ANY 0 --#define SOCKNAL_CONN_CONTROL 1 --#define SOCKNAL_CONN_BULK_IN 2 --#define SOCKNAL_CONN_BULK_OUT 3 --#define SOCKNAL_CONN_NTYPES 4 diff --cc lnet/include/lnet/stringtab.h index 33e4375,33e4375..0000000 deleted file mode 100644,100644 --- a/lnet/include/lnet/stringtab.h +++ /dev/null @@@ -1,3 -1,3 +1,0 @@@ --/* -- * stringtab.h -- */ diff --cc lnet/include/lnet/types.h index 80995e9,0bada40..0000000 deleted file mode 100644,100644 --- a/lnet/include/lnet/types.h +++ /dev/null @@@ -1,171 -1,193 +1,0 @@@ --#ifndef _P30_TYPES_H_ --#define _P30_TYPES_H_ - - #include -- - #ifdef __KERNEL__ - # include - # include - #else - # include - # define do_gettimeofday(tv) gettimeofday(tv, NULL); - typedef unsigned long long cycles_t; - #endif -#include "build_check.h" -- -#include --#include - -/* This implementation uses the same type for API function return codes and - * the completion status in an event */ -#define PTL_NI_OK PTL_OK -typedef ptl_err_t ptl_ni_fail_t; -- -typedef __u32 ptl_uid_t; -typedef __u32 ptl_jid_t; --typedef __u64 ptl_nid_t; --typedef __u32 ptl_pid_t; --typedef __u32 ptl_pt_index_t; --typedef __u32 ptl_ac_index_t; --typedef __u64 ptl_match_bits_t; --typedef __u64 ptl_hdr_data_t; --typedef __u32 ptl_size_t; - -#define PTL_TIME_FOREVER (-1) -- --typedef struct { -- unsigned long nal_idx; /* which network interface */ -- __u64 cookie; /* which thing on that interface */ --} ptl_handle_any_t; -- --typedef ptl_handle_any_t ptl_handle_ni_t; --typedef ptl_handle_any_t ptl_handle_eq_t; --typedef ptl_handle_any_t ptl_handle_md_t; --typedef ptl_handle_any_t ptl_handle_me_t; -- - #define PTL_HANDLE_NONE \ -#define PTL_INVALID_HANDLE \ -- ((const ptl_handle_any_t){.nal_idx = -1, .cookie = -1}) - #define PTL_EQ_NONE PTL_HANDLE_NONE -#define PTL_EQ_NONE PTL_INVALID_HANDLE -- - static inline int PtlHandleEqual (ptl_handle_any_t h1, ptl_handle_any_t h2) -static inline int PtlHandleIsEqual (ptl_handle_any_t h1, ptl_handle_any_t h2) --{ -- return (h1.nal_idx == h2.nal_idx && h1.cookie == h2.cookie); --} -- -#define PTL_UID_ANY ((ptl_uid_t) -1) -#define PTL_JID_ANY ((ptl_jid_t) -1) --#define PTL_NID_ANY ((ptl_nid_t) -1) --#define PTL_PID_ANY ((ptl_pid_t) -1) -- --typedef struct { -- ptl_nid_t nid; -- ptl_pid_t pid; /* node id / process id */ --} ptl_process_id_t; -- --typedef enum { -- PTL_RETAIN = 0, -- PTL_UNLINK --} ptl_unlink_t; -- --typedef enum { -- PTL_INS_BEFORE, -- PTL_INS_AFTER --} ptl_ins_pos_t; - - typedef struct { - struct page *kiov_page; - unsigned int kiov_len; - unsigned int kiov_offset; - } ptl_kiov_t; -- --typedef struct { -- void *start; -- ptl_size_t length; -- int threshold; -- int max_size; -- unsigned int options; -- void *user_ptr; - ptl_handle_eq_t eventq; - unsigned int niov; - ptl_handle_eq_t eq_handle; --} ptl_md_t; -- --/* Options for the MD structure */ - #define PTL_MD_OP_PUT (1 << 0) - #define PTL_MD_OP_GET (1 << 1) - #define PTL_MD_MANAGE_REMOTE (1 << 2) - #define PTL_MD_AUTO_UNLINK (1 << 3) - #define PTL_MD_TRUNCATE (1 << 4) - #define PTL_MD_ACK_DISABLE (1 << 5) - #define PTL_MD_IOV (1 << 6) - #define PTL_MD_MAX_SIZE (1 << 7) - #define PTL_MD_KIOV (1 << 8) -#define PTL_MD_OP_PUT (1 << 0) -#define PTL_MD_OP_GET (1 << 1) -#define PTL_MD_MANAGE_REMOTE (1 << 2) -/* unused (1 << 3) */ -#define PTL_MD_TRUNCATE (1 << 4) -#define PTL_MD_ACK_DISABLE (1 << 5) -#define PTL_MD_IOVEC (1 << 6) -#define PTL_MD_MAX_SIZE (1 << 7) -#define PTL_MD_KIOV (1 << 8) -#define PTL_MD_EVENT_START_DISABLE (1 << 9) -#define PTL_MD_EVENT_END_DISABLE (1 << 10) - -/* For compatibility with Cray Portals */ -#define PTL_MD_LUSTRE_COMPLETION_SEMANTICS 0 -#define PTL_MD_PHYS 0 -- --#define PTL_MD_THRESH_INF (-1) - -/* NB lustre portals uses struct iovec internally! */ -typedef struct iovec ptl_md_iovec_t; - -typedef struct { - struct page *kiov_page; - unsigned int kiov_len; - unsigned int kiov_offset; -} ptl_kiov_t; -- --typedef enum { - PTL_EVENT_GET, - PTL_EVENT_PUT, - PTL_EVENT_REPLY, - PTL_EVENT_GET_START, - PTL_EVENT_GET_END, - - PTL_EVENT_PUT_START, - PTL_EVENT_PUT_END, - - PTL_EVENT_REPLY_START, - PTL_EVENT_REPLY_END, - -- PTL_EVENT_ACK, - PTL_EVENT_SENT, - - PTL_EVENT_SEND_START, - PTL_EVENT_SEND_END, - -- PTL_EVENT_UNLINK, --} ptl_event_kind_t; -- --#define PTL_SEQ_BASETYPE long --typedef unsigned PTL_SEQ_BASETYPE ptl_seq_t; --#define PTL_SEQ_GT(a,b) (((signed PTL_SEQ_BASETYPE)((a) - (b))) > 0) -- --/* XXX -- * cygwin need the pragma line, not clear if it's needed in other places. -- * checking!!! -- */ --#ifdef __CYGWIN__ --#pragma pack(push, 4) --#endif --typedef struct { -- ptl_event_kind_t type; - ptl_err_t status; - int unlinked; -- ptl_process_id_t initiator; - ptl_pt_index_t portal; - ptl_uid_t uid; - ptl_jid_t jid; - ptl_pt_index_t pt_index; -- ptl_match_bits_t match_bits; -- ptl_size_t rlength; - ptl_size_t mlength; - ptl_size_t offset; - ptl_md_t mem_desc; - ptl_size_t mlength; - ptl_size_t offset; - ptl_handle_md_t md_handle; - ptl_md_t md; -- ptl_hdr_data_t hdr_data; - struct timeval arrival_time; - ptl_seq_t link; - ptl_ni_fail_t ni_fail_type; - - int unlinked; -- -- volatile ptl_seq_t sequence; --} ptl_event_t; --#ifdef __CYGWIN__ --#pragma pop --#endif -- --typedef enum { -- PTL_ACK_REQ, -- PTL_NOACK_REQ --} ptl_ack_req_t; - - typedef struct { - volatile ptl_seq_t sequence; - ptl_size_t size; - ptl_event_t *base; - ptl_handle_any_t cb_eq_handle; - } ptl_eq_t; -- - typedef struct { - ptl_eq_t *eq; - } ptl_ni_t; -typedef void (*ptl_eq_handler_t)(ptl_event_t *event); -#define PTL_EQ_HANDLER_NONE NULL -- --typedef struct { - int max_match_entries; /* max number of match entries */ - int max_mem_descriptors; /* max number of memory descriptors */ - int max_event_queues; /* max number of event queues */ - int max_atable_index; /* maximum access control list table index */ - int max_ptable_index; /* maximum portals table index */ - int max_mes; - int max_mds; - int max_eqs; - int max_ac_index; - int max_pt_index; - int max_md_iovecs; - int max_me_list; - int max_getput_md; --} ptl_ni_limits_t; -- --/* -- * Status registers -- */ --typedef enum { -- PTL_SR_DROP_COUNT, -- PTL_SR_DROP_LENGTH, -- PTL_SR_RECV_COUNT, -- PTL_SR_RECV_LENGTH, -- PTL_SR_SEND_COUNT, -- PTL_SR_SEND_LENGTH, -- PTL_SR_MSGS_MAX, --} ptl_sr_index_t; -- --typedef int ptl_sr_value_t; - -typedef int ptl_interface_t; -#define PTL_IFACE_DEFAULT (-1) -- --#endif diff --cc lnet/klnds/.cvsignore index f5fd0b0,f5fd0b0..0000000 deleted file mode 100644,100644 --- a/lnet/klnds/.cvsignore +++ /dev/null @@@ -1,5 -1,5 +1,0 @@@ --Makefile --autoMakefile --autoMakefile.in --.*.cmd --.depend diff --cc lnet/klnds/Makefile.in index b5ed168,2a01119..0000000 deleted file mode 100644,100644 --- a/lnet/klnds/Makefile.in +++ /dev/null @@@ -1,6 -1,6 +1,0 @@@ --@BUILD_GMNAL_TRUE@subdir-m += gmnal - @BUILD_IBNAL_TRUE@subdir-m += ibnal -@BUILD_OPENIBNAL_TRUE@subdir-m += openibnal --@BUILD_QSWNAL_TRUE@subdir-m += qswnal --subdir-m += socknal -- --@INCLUDE_RULES@ diff --cc lnet/klnds/Makefile.mk index cd5d9d6,cd5d9d6..0000000 deleted file mode 100644,100644 --- a/lnet/klnds/Makefile.mk +++ /dev/null @@@ -1,4 -1,4 +1,0 @@@ --include $(obj)/../Kernelenv -- --obj-y = socknal/ --# more coming... diff --cc lnet/klnds/autoMakefile.am index 9d04a46,002c169..0000000 deleted file mode 100644,100644 --- a/lnet/klnds/autoMakefile.am +++ /dev/null @@@ -1,6 -1,6 +1,0 @@@ --# Copyright (C) 2001 Cluster File Systems, Inc. --# --# This code is issued under the GNU General Public License. --# See the file COPYING in this distribution -- - SUBDIRS = gmnal ibnal qswnal socknal -SUBDIRS = gmnal openibnal qswnal socknal diff --cc lnet/klnds/gmlnd/.cvsignore index 642e2e6,642e2e6..0000000 deleted file mode 100644,100644 --- a/lnet/klnds/gmlnd/.cvsignore +++ /dev/null @@@ -1,10 -1,10 +1,0 @@@ --.deps --Makefile --autoMakefile.in --autoMakefile --*.ko --*.mod.c --.*.cmd --.*.flags --.tmp_versions --.depend diff --cc lnet/klnds/gmlnd/Makefile.in index 89ea361,89ea361..0000000 deleted file mode 100644,100644 --- a/lnet/klnds/gmlnd/Makefile.in +++ /dev/null @@@ -1,6 -1,6 +1,0 @@@ --MODULES := kgmnal --kgmnal-objs := gmnal_api.o gmnal_cb.o gmnal_comm.o gmnal_utils.o gmnal_module.o -- --EXTRA_PRE_CFLAGS := @GMCPPFLAGS@ -- --@INCLUDE_RULES@ diff --cc lnet/klnds/gmlnd/Makefile.mk index b799a47,b799a47..0000000 deleted file mode 100644,100644 --- a/lnet/klnds/gmlnd/Makefile.mk +++ /dev/null @@@ -1,10 -1,10 +1,0 @@@ --# Copyright (C) 2001 Cluster File Systems, Inc. --# --# This code is issued under the GNU General Public License. --# See the file COPYING in this distribution -- --include ../../Kernelenv -- --obj-y += gmnal.o --gmnal-objs := gmnal_api.o gmnal_cb.o gmnal_utils.o gmnal_comm.o gmnal_module.o -- diff --cc lnet/klnds/gmlnd/autoMakefile.am index d8b9edb,d8b9edb..0000000 deleted file mode 100644,100644 --- a/lnet/klnds/gmlnd/autoMakefile.am +++ /dev/null @@@ -1,15 -1,15 +1,0 @@@ --# Copyright (C) 2001 Cluster File Systems, Inc. --# --# This code is issued under the GNU General Public License. --# See the file COPYING in this distribution -- --if MODULES --if BUILD_GMNAL --if !CRAY_PORTALS --modulenet_DATA = kgmnal$(KMODEXT) --endif --endif --endif -- --MOSTLYCLEANFILES = *.o *.ko *.mod.c --DIST_SOURCES = $(kgmnal-objs:%.o=%.c) gmnal.h diff --cc lnet/klnds/gmlnd/gmlnd.h index ad46b90,9c4425b..0000000 deleted file mode 100644,100644 --- a/lnet/klnds/gmlnd/gmlnd.h +++ /dev/null @@@ -1,486 -1,455 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * Copyright (c) 2003 Los Alamos National Laboratory (LANL) -- * -- * This file is part of Lustre, http://www.lustre.org/ -- * -- * Lustre is free software; you can redistribute it and/or -- * modify it under the terms of version 2 of the GNU General Public -- * License as published by the Free Software Foundation. -- * -- * Lustre is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- * GNU General Public License for more details. -- * -- * You should have received a copy of the GNU General Public License -- * along with Lustre; if not, write to the Free Software -- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -- */ -- -- --/* -- * Portals GM kernel NAL header file -- * This file makes all declaration and prototypes -- * for the API side and CB side of the NAL -- */ --#ifndef __INCLUDE_GMNAL_H__ --#define __INCLUDE_GMNAL_H__ -- --#ifndef EXPORT_SYMTAB --# define EXPORT_SYMTAB --#endif -- --#include "linux/config.h" --#include "linux/module.h" --#include "linux/tty.h" --#include "linux/kernel.h" --#include "linux/mm.h" --#include "linux/string.h" --#include "linux/stat.h" --#include "linux/errno.h" --#include "linux/locks.h" --#include "linux/unistd.h" --#include "linux/init.h" --#include "linux/sem.h" --#include "linux/vmalloc.h" --#include "linux/sysctl.h" -- --#define DEBUG_SUBSYSTEM S_GMNAL -- --#include "portals/nal.h" --#include "portals/api.h" --#include "portals/errno.h" --#include "linux/kp30.h" --#include "portals/p30.h" -- - #include "portals/lib-nal.h" -#include "portals/nal.h" --#include "portals/lib-p30.h" -- --#define GM_STRONG_TYPES 1 -#ifdef VERSION -#undef VERSION -#endif --#include "gm.h" --#include "gm_internal.h" -- -- -- --/* -- * Defines for the API NAL -- */ -- --/* -- * Small message size is configurable -- * insmod can set small_msg_size -- * which is used to populate nal_data.small_msg_size -- */ --#define GMNAL_SMALL_MESSAGE 1078 --#define GMNAL_LARGE_MESSAGE_INIT 1079 --#define GMNAL_LARGE_MESSAGE_ACK 1080 --#define GMNAL_LARGE_MESSAGE_FINI 1081 -- --extern int gmnal_small_msg_size; --extern int num_rx_threads; --extern int num_stxds; --extern int gm_port; --#define GMNAL_SMALL_MSG_SIZE(a) a->small_msg_size --#define GMNAL_IS_SMALL_MESSAGE(n,a,b,c) gmnal_is_small_msg(n, a, b, c) --#define GMNAL_MAGIC 0x1234abcd --/* -- * The gm_port to use for gmnal -- */ --#define GMNAL_GM_PORT gm_port -- -- --/* -- * Small Transmit Descriptor -- * A structre to keep track of a small transmit operation -- * This structure has a one-to-one relationship with a small -- * transmit buffer (both create by gmnal_stxd_alloc). -- * There are two free list of stxd. One for use by clients of the NAL -- * and the other by the NAL rxthreads when doing sends. -- * This helps prevent deadlock caused by stxd starvation. -- */ --typedef struct _gmnal_stxd_t { -- void *buffer; -- int buffer_size; -- gm_size_t gm_size; -- int msg_size; -- int gm_target_node; -- int gm_priority; -- int type; -- struct _gmnal_data_t *nal_data; -- lib_msg_t *cookie; -- int niov; -- struct iovec iov[PTL_MD_MAX_IOV]; -- struct _gmnal_stxd_t *next; -- int rxt; -- int kniov; -- struct iovec *iovec_dup; --} gmnal_stxd_t; -- --/* -- * keeps a transmit token for large transmit (gm_get) -- * and a pointer to rxd that is used as context for large receive -- */ --typedef struct _gmnal_ltxd_t { -- struct _gmnal_ltxd_t *next; -- struct _gmnal_srxd_t *srxd; --} gmnal_ltxd_t; -- -- --/* -- * as for gmnal_stxd_t -- * a hash table in nal_data find srxds from -- * the rx buffer address. hash table populated at init time -- */ --typedef struct _gmnal_srxd_t { -- void *buffer; -- int size; -- gm_size_t gmsize; -- unsigned int gm_source_node; -- gmnal_stxd_t *source_stxd; -- int type; -- int nsiov; -- int nriov; -- struct iovec *riov; -- int ncallbacks; -- spinlock_t callback_lock; -- int callback_status; -- lib_msg_t *cookie; -- struct _gmnal_srxd_t *next; -- struct _gmnal_data_t *nal_data; --} gmnal_srxd_t; -- --/* -- * Header which lmgnal puts at the start of each message -- */ --typedef struct _gmnal_msghdr { -- int magic; -- int type; -- unsigned int sender_node_id; -- gmnal_stxd_t *stxd; -- int niov; -- } gmnal_msghdr_t; --#define GMNAL_MSGHDR_SIZE sizeof(gmnal_msghdr_t) -- --/* -- * the caretaker thread (ct_thread) gets receive events -- * (and other events) from the myrinet device via the GM2 API. -- * caretaker thread populates one work entry for each receive event, -- * puts it on a Q in nal_data and wakes a receive thread to -- * process the receive. -- * Processing a portals receive can involve a transmit operation. -- * Because of this the caretaker thread cannot process receives -- * as it may get deadlocked when supply of transmit descriptors -- * is exhausted (as caretaker thread is responsible for replacing -- * transmit descriptors on the free list) -- */ --typedef struct _gmnal_rxtwe { -- void *buffer; -- unsigned snode; -- unsigned sport; -- unsigned type; -- unsigned length; -- struct _gmnal_rxtwe *next; --} gmnal_rxtwe_t; -- --/* -- * 1 receive thread started on each CPU -- */ --#define NRXTHREADS 10 /* max number of receiver threads */ -- --typedef struct _gmnal_data_t { - int refcnt; - spinlock_t cb_lock; -- spinlock_t stxd_lock; -- struct semaphore stxd_token; -- gmnal_stxd_t *stxd; -- spinlock_t rxt_stxd_lock; -- struct semaphore rxt_stxd_token; -- gmnal_stxd_t *rxt_stxd; -- spinlock_t ltxd_lock; -- struct semaphore ltxd_token; -- gmnal_ltxd_t *ltxd; -- spinlock_t srxd_lock; -- struct semaphore srxd_token; -- gmnal_srxd_t *srxd; -- struct gm_hash *srxd_hash; -- nal_t *nal; - nal_cb_t *nal_cb; - lib_nal_t *libnal; -- struct gm_port *gm_port; -- unsigned int gm_local_nid; -- unsigned int gm_global_nid; -- spinlock_t gm_lock; -- long rxthread_pid[NRXTHREADS]; -- int rxthread_stop_flag; -- spinlock_t rxthread_flag_lock; -- long rxthread_flag; -- long ctthread_pid; -- int ctthread_flag; -- gm_alarm_t ctthread_alarm; -- int small_msg_size; -- int small_msg_gmsize; -- gmnal_rxtwe_t *rxtwe_head; -- gmnal_rxtwe_t *rxtwe_tail; -- spinlock_t rxtwe_lock; -- struct semaphore rxtwe_wait; -- struct ctl_table_header *sysctl; --} gmnal_data_t; -- --/* -- * Flags to start/stop and check status of threads -- * each rxthread sets 1 bit (any bit) of the flag on startup -- * and clears 1 bit when exiting -- */ --#define GMNAL_THREAD_RESET 0 --#define GMNAL_THREAD_STOP 666 --#define GMNAL_CTTHREAD_STARTED 333 --#define GMNAL_RXTHREADS_STARTED ( (1<stxd_lock); --#define GMNAL_TXD_LOCK(a) spin_lock(&a->stxd_lock); --#define GMNAL_TXD_UNLOCK(a) spin_unlock(&a->stxd_lock); --#define GMNAL_TXD_TOKEN_INIT(a, n) sema_init(&a->stxd_token, n); --#define GMNAL_TXD_GETTOKEN(a) down(&a->stxd_token); --#define GMNAL_TXD_TRYGETTOKEN(a) down_trylock(&a->stxd_token) --#define GMNAL_TXD_RETURNTOKEN(a) up(&a->stxd_token); -- --#define GMNAL_RXT_TXD_LOCK_INIT(a) spin_lock_init(&a->rxt_stxd_lock); --#define GMNAL_RXT_TXD_LOCK(a) spin_lock(&a->rxt_stxd_lock); --#define GMNAL_RXT_TXD_UNLOCK(a) spin_unlock(&a->rxt_stxd_lock); --#define GMNAL_RXT_TXD_TOKEN_INIT(a, n) sema_init(&a->rxt_stxd_token, n); --#define GMNAL_RXT_TXD_GETTOKEN(a) down(&a->rxt_stxd_token); --#define GMNAL_RXT_TXD_TRYGETTOKEN(a) down_trylock(&a->rxt_stxd_token) --#define GMNAL_RXT_TXD_RETURNTOKEN(a) up(&a->rxt_stxd_token); -- --#define GMNAL_LTXD_LOCK_INIT(a) spin_lock_init(&a->ltxd_lock); --#define GMNAL_LTXD_LOCK(a) spin_lock(&a->ltxd_lock); --#define GMNAL_LTXD_UNLOCK(a) spin_unlock(&a->ltxd_lock); --#define GMNAL_LTXD_TOKEN_INIT(a, n) sema_init(&a->ltxd_token, n); --#define GMNAL_LTXD_GETTOKEN(a) down(&a->ltxd_token); --#define GMNAL_LTXD_TRYGETTOKEN(a) down_trylock(&a->ltxd_token) --#define GMNAL_LTXD_RETURNTOKEN(a) up(&a->ltxd_token); -- --#define GMNAL_RXD_LOCK_INIT(a) spin_lock_init(&a->srxd_lock); --#define GMNAL_RXD_LOCK(a) spin_lock(&a->srxd_lock); --#define GMNAL_RXD_UNLOCK(a) spin_unlock(&a->srxd_lock); --#define GMNAL_RXD_TOKEN_INIT(a, n) sema_init(&a->srxd_token, n); --#define GMNAL_RXD_GETTOKEN(a) down(&a->srxd_token); --#define GMNAL_RXD_TRYGETTOKEN(a) down_trylock(&a->srxd_token) --#define GMNAL_RXD_RETURNTOKEN(a) up(&a->srxd_token); -- --#define GMNAL_GM_LOCK_INIT(a) spin_lock_init(&a->gm_lock); --#define GMNAL_GM_LOCK(a) spin_lock(&a->gm_lock); --#define GMNAL_GM_UNLOCK(a) spin_unlock(&a->gm_lock); - #define GMNAL_CB_LOCK_INIT(a) spin_lock_init(&a->cb_lock); -- -- --/* -- * Memory Allocator -- */ -- --/* -- * API NAL -- */ -int gmnal_api_startup(nal_t *, ptl_pid_t, - ptl_ni_limits_t *, ptl_ni_limits_t *); - --int gmnal_api_forward(nal_t *, int, void *, size_t, void *, size_t); -- - int gmnal_api_shutdown(nal_t *, int); -void gmnal_api_shutdown(nal_t *); -- --int gmnal_api_validate(nal_t *, void *, size_t); -- - void gmnal_api_yield(nal_t *); -void gmnal_api_yield(nal_t *, unsigned long *, int); -- --void gmnal_api_lock(nal_t *, unsigned long *); -- --void gmnal_api_unlock(nal_t *, unsigned long *); -- -- --#define GMNAL_INIT_NAL(a) do { \ - a->forward = gmnal_api_forward; \ - a->shutdown = gmnal_api_shutdown; \ - a->validate = NULL; \ - a->yield = gmnal_api_yield; \ - a->lock = gmnal_api_lock; \ - a->unlock = gmnal_api_unlock; \ - a->timeout = NULL; \ - a->refct = 1; \ - a->nal_data = NULL; \ - (a)->nal_ni_init = gmnal_api_startup; \ - (a)->nal_ni_fini = gmnal_api_shutdown; \ - (a)->nal_data = NULL; \ -- } while (0) -- -- --/* -- * CB NAL -- */ - - int gmnal_cb_send(nal_cb_t *, void *, lib_msg_t *, ptl_hdr_t *, - int, ptl_nid_t, ptl_pid_t, unsigned int, struct iovec *, size_t); - - int gmnal_cb_send_pages(nal_cb_t *, void *, lib_msg_t *, ptl_hdr_t *, - int, ptl_nid_t, ptl_pid_t, unsigned int, ptl_kiov_t *, size_t); - - int gmnal_cb_recv(nal_cb_t *, void *, lib_msg_t *, - unsigned int, struct iovec *, size_t, size_t); - - int gmnal_cb_recv_pages(nal_cb_t *, void *, lib_msg_t *, - unsigned int, ptl_kiov_t *, size_t, size_t); - - int gmnal_cb_read(nal_cb_t *, void *private, void *, user_ptr, size_t); - - int gmnal_cb_write(nal_cb_t *, void *private, user_ptr, void *, size_t); - - int gmnal_cb_callback(nal_cb_t *, void *, lib_eq_t *, ptl_event_t *); - - void *gmnal_cb_malloc(nal_cb_t *, size_t); - - void gmnal_cb_free(nal_cb_t *, void *, size_t); - - void gmnal_cb_unmap(nal_cb_t *, unsigned int, struct iovec*, void **); -- - int gmnal_cb_map(nal_cb_t *, unsigned int, struct iovec*, void **); -ptl_err_t gmnal_cb_send(lib_nal_t *, void *, lib_msg_t *, ptl_hdr_t *, - int, ptl_nid_t, ptl_pid_t, unsigned int, struct iovec *, size_t, size_t); -- - void gmnal_cb_printf(nal_cb_t *, const char *fmt, ...); -ptl_err_t gmnal_cb_send_pages(lib_nal_t *, void *, lib_msg_t *, ptl_hdr_t *, - int, ptl_nid_t, ptl_pid_t, unsigned int, ptl_kiov_t *, size_t, size_t); -- - void gmnal_cb_cli(nal_cb_t *, unsigned long *); -ptl_err_t gmnal_cb_recv(lib_nal_t *, void *, lib_msg_t *, - unsigned int, struct iovec *, size_t, size_t, size_t); -- - void gmnal_cb_sti(nal_cb_t *, unsigned long *); -ptl_err_t gmnal_cb_recv_pages(lib_nal_t *, void *, lib_msg_t *, - unsigned int, ptl_kiov_t *, size_t, size_t, size_t); -- - int gmnal_cb_dist(nal_cb_t *, ptl_nid_t, unsigned long *); -int gmnal_cb_dist(lib_nal_t *, ptl_nid_t, unsigned long *); -- - nal_t *gmnal_init(int, ptl_pt_index_t, ptl_ac_index_t, ptl_pid_t rpid); -int gmnal_init(void); -- --void gmnal_fini(void); -- -- -- --#define GMNAL_INIT_NAL_CB(a) do { \ - a->cb_send = gmnal_cb_send; \ - a->cb_send_pages = gmnal_cb_send_pages; \ - a->cb_recv = gmnal_cb_recv; \ - a->cb_recv_pages = gmnal_cb_recv_pages; \ - a->cb_read = gmnal_cb_read; \ - a->cb_write = gmnal_cb_write; \ - a->cb_callback = gmnal_cb_callback; \ - a->cb_malloc = gmnal_cb_malloc; \ - a->cb_free = gmnal_cb_free; \ - a->cb_map = NULL; \ - a->cb_unmap = NULL; \ - a->cb_printf = gmnal_cb_printf; \ - a->cb_cli = gmnal_cb_cli; \ - a->cb_sti = gmnal_cb_sti; \ - a->cb_dist = gmnal_cb_dist; \ - a->nal_data = NULL; \ - a->libnal_send = gmnal_cb_send; \ - a->libnal_send_pages = gmnal_cb_send_pages; \ - a->libnal_recv = gmnal_cb_recv; \ - a->libnal_recv_pages = gmnal_cb_recv_pages; \ - a->libnal_map = NULL; \ - a->libnal_unmap = NULL; \ - a->libnal_dist = gmnal_cb_dist; \ - a->libnal_data = NULL; \ -- } while (0) -- -- --/* -- * Small and Large Transmit and Receive Descriptor Functions -- */ --int gmnal_alloc_txd(gmnal_data_t *); --void gmnal_free_txd(gmnal_data_t *); --gmnal_stxd_t* gmnal_get_stxd(gmnal_data_t *, int); --void gmnal_return_stxd(gmnal_data_t *, gmnal_stxd_t *); --gmnal_ltxd_t* gmnal_get_ltxd(gmnal_data_t *); --void gmnal_return_ltxd(gmnal_data_t *, gmnal_ltxd_t *); -- --int gmnal_alloc_srxd(gmnal_data_t *); --void gmnal_free_srxd(gmnal_data_t *); --gmnal_srxd_t* gmnal_get_srxd(gmnal_data_t *, int); --void gmnal_return_srxd(gmnal_data_t *, gmnal_srxd_t *); -- --/* -- * general utility functions -- */ --gmnal_srxd_t *gmnal_rxbuffer_to_srxd(gmnal_data_t *, void*); --void gmnal_stop_rxthread(gmnal_data_t *); --void gmnal_stop_ctthread(gmnal_data_t *); --void gmnal_small_tx_callback(gm_port_t *, void *, gm_status_t); --void gmnal_drop_sends_callback(gm_port_t *, void *, gm_status_t); --void gmnal_resume_sending_callback(gm_port_t *, void *, gm_status_t); --char *gmnal_gm_error(gm_status_t); --char *gmnal_rxevent(gm_recv_event_t*); --int gmnal_is_small_msg(gmnal_data_t*, int, struct iovec*, int); --void gmnal_yield(int); --int gmnal_start_kernel_threads(gmnal_data_t *); -- -- --/* -- * Communication functions -- */ -- --/* -- * Receive threads -- */ --int gmnal_ct_thread(void *); /* caretaker thread */ --int gmnal_rx_thread(void *); /* receive thread */ --int gmnal_pre_receive(gmnal_data_t*, gmnal_rxtwe_t*, int); --int gmnal_rx_bad(gmnal_data_t *, gmnal_rxtwe_t *, gmnal_srxd_t*); --int gmnal_rx_requeue_buffer(gmnal_data_t *, gmnal_srxd_t *); --int gmnal_add_rxtwe(gmnal_data_t *, gm_recv_t *); --gmnal_rxtwe_t * gmnal_get_rxtwe(gmnal_data_t *); --void gmnal_remove_rxtwe(gmnal_data_t *); -- -- --/* -- * Small messages -- */ - int gmnal_small_rx(nal_cb_t *, void *, lib_msg_t *, unsigned int, - struct iovec *, size_t, size_t); - int gmnal_small_tx(nal_cb_t *, void *, lib_msg_t *, ptl_hdr_t *, -int gmnal_small_rx(lib_nal_t *, void *, lib_msg_t *, unsigned int, - struct iovec *, size_t, size_t, size_t); -int gmnal_small_tx(lib_nal_t *, void *, lib_msg_t *, ptl_hdr_t *, -- int, ptl_nid_t, ptl_pid_t, - unsigned int, struct iovec*, int); - unsigned int, struct iovec*, size_t, int); --void gmnal_small_tx_callback(gm_port_t *, void *, gm_status_t); -- -- -- --/* -- * Large messages -- */ - int gmnal_large_rx(nal_cb_t *, void *, lib_msg_t *, unsigned int, - struct iovec *, size_t, size_t); -int gmnal_large_rx(lib_nal_t *, void *, lib_msg_t *, unsigned int, - struct iovec *, size_t, size_t, size_t); -- - int gmnal_large_tx(nal_cb_t *, void *, lib_msg_t *, ptl_hdr_t *, -int gmnal_large_tx(lib_nal_t *, void *, lib_msg_t *, ptl_hdr_t *, -- int, ptl_nid_t, ptl_pid_t, unsigned int, - struct iovec*, int); - struct iovec*, size_t, int); -- --void gmnal_large_tx_callback(gm_port_t *, void *, gm_status_t); -- --int gmnal_remote_get(gmnal_srxd_t *, int, struct iovec*, int, -- struct iovec*); -- --void gmnal_remote_get_callback(gm_port_t *, void *, gm_status_t); -- --int gmnal_copyiov(int, gmnal_srxd_t *, int, struct iovec*, int, -- struct iovec*); -- --void gmnal_large_tx_ack(gmnal_data_t *, gmnal_srxd_t *); --void gmnal_large_tx_ack_callback(gm_port_t *, void *, gm_status_t); --void gmnal_large_tx_ack_received(gmnal_data_t *, gmnal_srxd_t *); -- --#endif /*__INCLUDE_GMNAL_H__*/ diff --cc lnet/klnds/gmlnd/gmlnd_api.c index 1442aa7,bd6c83e..0000000 deleted file mode 100644,100644 --- a/lnet/klnds/gmlnd/gmlnd_api.c +++ /dev/null @@@ -1,500 -1,424 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * Copyright (c) 2003 Los Alamos National Laboratory (LANL) -- * -- * This file is part of Lustre, http://www.lustre.org/ -- * -- * Lustre is free software; you can redistribute it and/or -- * modify it under the terms of version 2 of the GNU General Public -- * License as published by the Free Software Foundation. -- * -- * Lustre is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- * GNU General Public License for more details. -- * -- * You should have received a copy of the GNU General Public License -- * along with Lustre; if not, write to the Free Software -- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -- */ -- --/* -- * Implements the API NAL functions -- */ -- --#include "gmnal.h" -- -- -- --gmnal_data_t *global_nal_data = NULL; --#define GLOBAL_NID_STR_LEN 16 --char global_nid_str[GLOBAL_NID_STR_LEN] = {0}; -ptl_handle_ni_t kgmnal_ni; - -extern int gmnal_cmd(struct portals_cfg *pcfg, void *private); -- --/* -- * Write the global nid /proc/sys/gmnal/globalnid -- */ --#define GMNAL_SYSCTL 201 --#define GMNAL_SYSCTL_GLOBALNID 1 -- --static ctl_table gmnal_sysctl_table[] = { -- {GMNAL_SYSCTL_GLOBALNID, "globalnid", -- global_nid_str, GLOBAL_NID_STR_LEN, -- 0444, NULL, &proc_dostring}, -- { 0 } --}; -- -- --static ctl_table gmnalnal_top_sysctl_table[] = { -- {GMNAL_SYSCTL, "gmnal", NULL, 0, 0555, gmnal_sysctl_table}, -- { 0 } --}; - - - - - - - /* - * gmnal_api_forward - * This function takes a pack block of arguments from the NAL API - * module and passes them to the NAL CB module. The CB module unpacks - * the args and calls the appropriate function indicated by index. - * Typically this function is used to pass args between kernel and use - * space. - * As lgmanl exists entirely in kernel, just pass the arg block directly - * to the NAL CB, buy passing the args to lib_dispatch - * Arguments are - * nal_t nal Our nal - * int index the api function that initiated this call - * void *args packed block of function args - * size_t arg_len length of args block - * void *ret A return value for the API NAL - * size_t ret_len Size of the return value - * - */ - - int - gmnal_api_forward(nal_t *nal, int index, void *args, size_t arg_len, - void *ret, size_t ret_len) - { - - nal_cb_t *nal_cb = NULL; - gmnal_data_t *nal_data = NULL; - - - - - - if (!nal || !args || (index < 0) || (arg_len < 0)) { - CDEBUG(D_ERROR, "Bad args to gmnal_api_forward\n"); - return (PTL_FAIL); - } - - if (ret && (ret_len <= 0)) { - CDEBUG(D_ERROR, "Bad args to gmnal_api_forward\n"); - return (PTL_FAIL); - } - - - if (!nal->nal_data) { - CDEBUG(D_ERROR, "bad nal, no nal data\n"); - return (PTL_FAIL); - } - - nal_data = nal->nal_data; - CDEBUG(D_INFO, "nal_data is [%p]\n", nal_data); - - if (!nal_data->nal_cb) { - CDEBUG(D_ERROR, "bad nal_data, no nal_cb\n"); - return (PTL_FAIL); - } - - nal_cb = nal_data->nal_cb; - CDEBUG(D_INFO, "nal_cb is [%p]\n", nal_cb); - - CDEBUG(D_PORTALS, "gmnal_api_forward calling lib_dispatch\n"); - lib_dispatch(nal_cb, NULL, index, args, ret); - CDEBUG(D_PORTALS, "gmnal_api_forward returns from lib_dispatch\n"); - - return(PTL_OK); - } - -- --/* -- * gmnal_api_shutdown - * nal_refct == 0 => called on last matching PtlNIFini() -- * Close down this interface and free any resources associated with it -- * nal_t nal our nal to shutdown - */ - int - gmnal_api_shutdown(nal_t *nal, int interface) - { - - gmnal_data_t *nal_data = nal->nal_data; - - CDEBUG(D_TRACE, "gmnal_api_shutdown: nal_data [%p]\n", nal_data); - - return(PTL_OK); - } - - - /* - * gmnal_api_validate - * validate a user address for use in communications - * There's nothing to be done here - */ - int - gmnal_api_validate(nal_t *nal, void *base, size_t extent) - { - - return(PTL_OK); - } - - - - /* - * gmnal_api_yield - * Give up the processor - */ - void - gmnal_api_yield(nal_t *nal) - { - CDEBUG(D_TRACE, "gmnal_api_yield : nal [%p]\n", nal); - - set_current_state(TASK_INTERRUPTIBLE); - schedule(); - - return; - } - - - - /* - * gmnal_api_lock - * Take a threadsafe lock -- */ --void - gmnal_api_lock(nal_t *nal, unsigned long *flags) -gmnal_api_shutdown(nal_t *nal) --{ - -- gmnal_data_t *nal_data; - nal_cb_t *nal_cb; - - nal_data = nal->nal_data; - nal_cb = nal_data->nal_cb; - lib_nal_t *libnal; -- - nal_cb->cb_cli(nal_cb, flags); - if (nal->nal_refct != 0) - return; - -- - return; - } - LASSERT(nal == global_nal_data->nal); - libnal = (lib_nal_t *)nal->nal_data; - nal_data = (gmnal_data_t *)libnal->libnal_data; - LASSERT(nal_data == global_nal_data); - CDEBUG(D_TRACE, "gmnal_api_shutdown: nal_data [%p]\n", nal_data); -- - /* - * gmnal_api_unlock - * Release a threadsafe lock - */ - void - gmnal_api_unlock(nal_t *nal, unsigned long *flags) - { - gmnal_data_t *nal_data; - nal_cb_t *nal_cb; - /* Stop portals calling our ioctl handler */ - libcfs_nal_cmd_unregister(GMNAL); -- - nal_data = nal->nal_data; - nal_cb = nal_data->nal_cb; - /* XXX for shutdown "under fire" we probably need to set a shutdown - * flag so when lib calls us we fail immediately and dont queue any - * more work but our threads can still call into lib OK. THEN - * shutdown our threads, THEN lib_fini() */ - lib_fini(libnal); -- - nal_cb->cb_sti(nal_cb, flags); - gmnal_stop_rxthread(nal_data); - gmnal_stop_ctthread(nal_data); - gmnal_free_txd(nal_data); - gmnal_free_srxd(nal_data); - GMNAL_GM_LOCK(nal_data); - gm_close(nal_data->gm_port); - gm_finalize(); - GMNAL_GM_UNLOCK(nal_data); - if (nal_data->sysctl) - unregister_sysctl_table (nal_data->sysctl); - /* Don't free 'nal'; it's a static struct */ - PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); - PORTAL_FREE(libnal, sizeof(lib_nal_t)); -- - return; - global_nal_data = NULL; - PORTAL_MODULE_UNUSE; --} -- -- - nal_t * - gmnal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size, - ptl_pid_t rpid) -int -gmnal_api_startup(nal_t *nal, ptl_pid_t requested_pid, - ptl_ni_limits_t *requested_limits, - ptl_ni_limits_t *actual_limits) --{ -- - nal_t *nal = NULL; - nal_cb_t *nal_cb = NULL; - lib_nal_t *libnal = NULL; -- gmnal_data_t *nal_data = NULL; -- gmnal_srxd_t *srxd = NULL; -- gm_status_t gm_status; -- unsigned int local_nid = 0, global_nid = 0; - ptl_nid_t portals_nid; - ptl_pid_t portals_pid = 0; - ptl_process_id_t process_id; -- - if (nal->nal_refct != 0) { - if (actual_limits != NULL) { - libnal = (lib_nal_t *)nal->nal_data; - *actual_limits = libnal->libnal_ni.ni_actual_limits; - } - return (PTL_OK); - } -- - CDEBUG(D_TRACE, "gmnal_init : interface [%d], ptl_size [%d], " - "ac_size[%d]\n", interface, ptl_size, ac_size); - /* Called on first PtlNIInit() */ - - CDEBUG(D_TRACE, "startup\n"); -- - LASSERT(global_nal_data == NULL); -- -- PORTAL_ALLOC(nal_data, sizeof(gmnal_data_t)); -- if (!nal_data) { -- CDEBUG(D_ERROR, "can't get memory\n"); - return(NULL); - return(PTL_NO_SPACE); -- } -- memset(nal_data, 0, sizeof(gmnal_data_t)); -- /* -- * set the small message buffer size -- */ - nal_data->refcnt = 1; -- -- CDEBUG(D_INFO, "Allocd and reset nal_data[%p]\n", nal_data); -- CDEBUG(D_INFO, "small_msg_size is [%d]\n", nal_data->small_msg_size); - - PORTAL_ALLOC(nal, sizeof(nal_t)); - if (!nal) { - PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); - return(NULL); - } - memset(nal, 0, sizeof(nal_t)); - CDEBUG(D_INFO, "Allocd and reset nal[%p]\n", nal); -- - PORTAL_ALLOC(nal_cb, sizeof(nal_cb_t)); - if (!nal_cb) { - PORTAL_FREE(nal, sizeof(nal_t)); - PORTAL_ALLOC(libnal, sizeof(lib_nal_t)); - if (!libnal) { -- PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); - return(NULL); - return(PTL_NO_SPACE); -- } - memset(nal_cb, 0, sizeof(nal_cb_t)); - CDEBUG(D_INFO, "Allocd and reset nal_cb[%p]\n", nal_cb); - memset(libnal, 0, sizeof(lib_nal_t)); - CDEBUG(D_INFO, "Allocd and reset libnal[%p]\n", libnal); -- - GMNAL_INIT_NAL(nal); - GMNAL_INIT_NAL_CB(nal_cb); - GMNAL_INIT_NAL_CB(libnal); -- /* -- * String them all together -- */ - nal->nal_data = (void*)nal_data; - nal_cb->nal_data = (void*)nal_data; - libnal->libnal_data = (void*)nal_data; -- nal_data->nal = nal; - nal_data->nal_cb = nal_cb; - nal_data->libnal = libnal; -- - GMNAL_CB_LOCK_INIT(nal_data); -- GMNAL_GM_LOCK_INIT(nal_data); -- -- -- /* -- * initialise the interface, -- */ -- CDEBUG(D_INFO, "Calling gm_init\n"); -- if (gm_init() != GM_SUCCESS) { -- CDEBUG(D_ERROR, "call to gm_init failed\n"); - PORTAL_FREE(nal, sizeof(nal_t)); -- PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); - PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); - return(NULL); - PORTAL_FREE(libnal, sizeof(lib_nal_t)); - return(PTL_FAIL); -- } -- -- - CDEBUG(D_NET, "Calling gm_open with interface [%d], port [%d], " - "name [%s], version [%d]\n", interface, GMNAL_GM_PORT, - CDEBUG(D_NET, "Calling gm_open with port [%d], " - "name [%s], version [%d]\n", GMNAL_GM_PORT, -- "gmnal", GM_API_VERSION); -- -- GMNAL_GM_LOCK(nal_data); -- gm_status = gm_open(&nal_data->gm_port, 0, GMNAL_GM_PORT, "gmnal", -- GM_API_VERSION); -- GMNAL_GM_UNLOCK(nal_data); -- -- CDEBUG(D_INFO, "gm_open returned [%d]\n", gm_status); -- if (gm_status == GM_SUCCESS) { -- CDEBUG(D_INFO, "gm_open succeeded port[%p]\n", -- nal_data->gm_port); -- } else { -- switch(gm_status) { -- case(GM_INVALID_PARAMETER): -- CDEBUG(D_ERROR, "gm_open Failure. Invalid Parameter\n"); -- break; -- case(GM_BUSY): -- CDEBUG(D_ERROR, "gm_open Failure. GM Busy\n"); -- break; -- case(GM_NO_SUCH_DEVICE): -- CDEBUG(D_ERROR, "gm_open Failure. No such device\n"); -- break; -- case(GM_INCOMPATIBLE_LIB_AND_DRIVER): -- CDEBUG(D_ERROR, "gm_open Failure. Incompatile lib " -- "and driver\n"); -- break; -- case(GM_OUT_OF_MEMORY): -- CDEBUG(D_ERROR, "gm_open Failure. Out of Memory\n"); -- break; -- default: -- CDEBUG(D_ERROR, "gm_open Failure. Unknow error " -- "code [%d]\n", gm_status); -- break; -- } -- GMNAL_GM_LOCK(nal_data); -- gm_finalize(); -- GMNAL_GM_UNLOCK(nal_data); - PORTAL_FREE(nal, sizeof(nal_t)); -- PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); - PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); - return(NULL); - PORTAL_FREE(libnal, sizeof(lib_nal_t)); - return(PTL_FAIL); -- } -- -- -- nal_data->small_msg_size = gmnal_small_msg_size; -- nal_data->small_msg_gmsize = -- gm_min_size_for_length(gmnal_small_msg_size); -- -- if (gmnal_alloc_srxd(nal_data) != GMNAL_STATUS_OK) { -- CDEBUG(D_ERROR, "Failed to allocate small rx descriptors\n"); -- gmnal_free_txd(nal_data); -- GMNAL_GM_LOCK(nal_data); -- gm_close(nal_data->gm_port); -- gm_finalize(); -- GMNAL_GM_UNLOCK(nal_data); - PORTAL_FREE(nal, sizeof(nal_t)); -- PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); - PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); - return(NULL); - PORTAL_FREE(libnal, sizeof(lib_nal_t)); - return(PTL_FAIL); -- } -- -- -- /* -- * Hang out a bunch of small receive buffers -- * In fact hang them all out -- */ -- while((srxd = gmnal_get_srxd(nal_data, 0))) { -- CDEBUG(D_NET, "giving [%p] to gm_provide_recvive_buffer\n", -- srxd->buffer); -- GMNAL_GM_LOCK(nal_data); -- gm_provide_receive_buffer_with_tag(nal_data->gm_port, -- srxd->buffer, srxd->gmsize, -- GM_LOW_PRIORITY, 0); -- GMNAL_GM_UNLOCK(nal_data); -- } -- -- /* -- * Allocate pools of small tx buffers and descriptors -- */ -- if (gmnal_alloc_txd(nal_data) != GMNAL_STATUS_OK) { -- CDEBUG(D_ERROR, "Failed to allocate small tx descriptors\n"); -- GMNAL_GM_LOCK(nal_data); -- gm_close(nal_data->gm_port); -- gm_finalize(); -- GMNAL_GM_UNLOCK(nal_data); - PORTAL_FREE(nal, sizeof(nal_t)); -- PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); - PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); - return(NULL); - PORTAL_FREE(libnal, sizeof(lib_nal_t)); - return(PTL_FAIL); -- } -- -- gmnal_start_kernel_threads(nal_data); -- -- while (nal_data->rxthread_flag != GMNAL_RXTHREADS_STARTED) { -- gmnal_yield(1); -- CDEBUG(D_INFO, "Waiting for receive thread signs of life\n"); -- } -- -- CDEBUG(D_INFO, "receive thread seems to have started\n"); -- -- -- /* -- * Initialise the portals library -- */ -- CDEBUG(D_NET, "Getting node id\n"); -- GMNAL_GM_LOCK(nal_data); -- gm_status = gm_get_node_id(nal_data->gm_port, &local_nid); -- GMNAL_GM_UNLOCK(nal_data); -- if (gm_status != GM_SUCCESS) { -- gmnal_stop_rxthread(nal_data); -- gmnal_stop_ctthread(nal_data); -- CDEBUG(D_ERROR, "can't determine node id\n"); -- gmnal_free_txd(nal_data); -- gmnal_free_srxd(nal_data); -- GMNAL_GM_LOCK(nal_data); -- gm_close(nal_data->gm_port); -- gm_finalize(); -- GMNAL_GM_UNLOCK(nal_data); - PORTAL_FREE(nal, sizeof(nal_t)); -- PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); - PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); - return(NULL); - PORTAL_FREE(libnal, sizeof(lib_nal_t)); - return(PTL_FAIL); -- } - -- nal_data->gm_local_nid = local_nid; -- CDEBUG(D_INFO, "Local node id is [%u]\n", local_nid); - -- GMNAL_GM_LOCK(nal_data); -- gm_status = gm_node_id_to_global_id(nal_data->gm_port, local_nid, -- &global_nid); -- GMNAL_GM_UNLOCK(nal_data); -- if (gm_status != GM_SUCCESS) { -- CDEBUG(D_ERROR, "failed to obtain global id\n"); -- gmnal_stop_rxthread(nal_data); -- gmnal_stop_ctthread(nal_data); -- gmnal_free_txd(nal_data); -- gmnal_free_srxd(nal_data); -- GMNAL_GM_LOCK(nal_data); -- gm_close(nal_data->gm_port); -- gm_finalize(); -- GMNAL_GM_UNLOCK(nal_data); - PORTAL_FREE(nal, sizeof(nal_t)); -- PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); - PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); - return(NULL); - PORTAL_FREE(libnal, sizeof(lib_nal_t)); - return(PTL_FAIL); -- } -- CDEBUG(D_INFO, "Global node id is [%u]\n", global_nid); -- nal_data->gm_global_nid = global_nid; -- snprintf(global_nid_str, GLOBAL_NID_STR_LEN, "%u", global_nid); -- --/* -- pid = gm_getpid(); --*/ - CDEBUG(D_INFO, "portals_pid is [%u]\n", portals_pid); - portals_nid = (unsigned long)global_nid; - CDEBUG(D_INFO, "portals_nid is ["LPU64"]\n", portals_nid); - process_id.pid = requested_pid; - process_id.nid = global_nid; - - CDEBUG(D_INFO, "portals_pid is [%u]\n", process_id.pid); - CDEBUG(D_INFO, "portals_nid is ["LPU64"]\n", process_id.nid); -- -- CDEBUG(D_PORTALS, "calling lib_init\n"); - if (lib_init(nal_cb, portals_nid, portals_pid, 1024, ptl_size, - ac_size) != PTL_OK) { - if (lib_init(libnal, nal, process_id, - requested_limits, actual_limits) != PTL_OK) { -- CDEBUG(D_ERROR, "lib_init failed\n"); -- gmnal_stop_rxthread(nal_data); -- gmnal_stop_ctthread(nal_data); -- gmnal_free_txd(nal_data); -- gmnal_free_srxd(nal_data); -- GMNAL_GM_LOCK(nal_data); -- gm_close(nal_data->gm_port); -- gm_finalize(); -- GMNAL_GM_UNLOCK(nal_data); - PORTAL_FREE(nal, sizeof(nal_t)); -- PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); - PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); - return(NULL); - PORTAL_FREE(libnal, sizeof(lib_nal_t)); - return(PTL_FAIL); -- -- } - - if (libcfs_nal_cmd_register(GMNAL, &gmnal_cmd, libnal->libnal_data) != 0) { - CDEBUG(D_INFO, "libcfs_nal_cmd_register failed\n"); - - /* XXX these cleanup cases should be restructured to - * minimise duplication... */ - lib_fini(libnal); - - gmnal_stop_rxthread(nal_data); - gmnal_stop_ctthread(nal_data); - gmnal_free_txd(nal_data); - gmnal_free_srxd(nal_data); - GMNAL_GM_LOCK(nal_data); - gm_close(nal_data->gm_port); - gm_finalize(); - GMNAL_GM_UNLOCK(nal_data); - PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); - PORTAL_FREE(libnal, sizeof(lib_nal_t)); - return(PTL_FAIL); - } - - /* might be better to initialise this at module load rather than in - * NAL startup */ -- nal_data->sysctl = NULL; -- nal_data->sysctl = register_sysctl_table (gmnalnal_top_sysctl_table, 0); -- -- -- CDEBUG(D_INFO, "gmnal_init finished\n"); -- global_nal_data = nal->nal_data; - return(nal); - - /* no unload now until shutdown */ - PORTAL_MODULE_USE; - - return(PTL_OK); --} - -nal_t the_gm_nal; - -/* - * Called when module loaded - */ -int gmnal_init(void) -{ - int rc; - - memset(&the_gm_nal, 0, sizeof(nal_t)); - CDEBUG(D_INFO, "reset nal[%p]\n", &the_gm_nal); - GMNAL_INIT_NAL(&the_gm_nal); - - rc = ptl_register_nal(GMNAL, &the_gm_nal); - if (rc != PTL_OK) - CERROR("Can't register GMNAL: %d\n", rc); - rc = PtlNIInit(GMNAL, LUSTRE_SRV_PTL_PID, NULL, NULL, &kgmnal_ni); - if (rc != PTL_OK && rc != PTL_IFACE_DUP) { - ptl_unregister_nal(GMNAL); - return (-ENODEV); - } -- - return (rc); -} -- - -- --/* -- * Called when module removed -- */ --void gmnal_fini() --{ - gmnal_data_t *nal_data = global_nal_data; - nal_t *nal = nal_data->nal; - nal_cb_t *nal_cb = nal_data->nal_cb; - -- CDEBUG(D_TRACE, "gmnal_fini\n"); -- - PtlNIFini(kgmnal_ni); - lib_fini(nal_cb); - LASSERT(global_nal_data == NULL); - PtlNIFini(kgmnal_ni); -- - gmnal_stop_rxthread(nal_data); - gmnal_stop_ctthread(nal_data); - gmnal_free_txd(nal_data); - gmnal_free_srxd(nal_data); - GMNAL_GM_LOCK(nal_data); - gm_close(nal_data->gm_port); - gm_finalize(); - GMNAL_GM_UNLOCK(nal_data); - if (nal_data->sysctl) - unregister_sysctl_table (nal_data->sysctl); - PORTAL_FREE(nal, sizeof(nal_t)); - PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); - PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); - ptl_unregister_nal(GMNAL); --} diff --cc lnet/klnds/gmlnd/gmlnd_cb.c index 1f287468,0ebf437..0000000 deleted file mode 100644,100644 --- a/lnet/klnds/gmlnd/gmlnd_cb.c +++ /dev/null @@@ -1,281 -1,207 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * Copyright (c) 2003 Los Alamos National Laboratory (LANL) -- * -- * This file is part of Lustre, http://www.lustre.org/ -- * -- * Lustre is free software; you can redistribute it and/or -- * modify it under the terms of version 2 of the GNU General Public -- * License as published by the Free Software Foundation. -- * -- * Lustre is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- * GNU General Public License for more details. -- * -- * You should have received a copy of the GNU General Public License -- * along with Lustre; if not, write to the Free Software -- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -- */ -- -- --/* -- * This file implements the nal cb functions -- */ -- -- --#include "gmnal.h" -- - int gmnal_cb_recv(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, - unsigned int niov, struct iovec *iov, size_t mlen, - size_t rlen) -ptl_err_t gmnal_cb_recv(lib_nal_t *libnal, void *private, lib_msg_t *cookie, - unsigned int niov, struct iovec *iov, size_t offset, - size_t mlen, size_t rlen) --{ -- gmnal_srxd_t *srxd = (gmnal_srxd_t*)private; -- int status = PTL_OK; -- -- - CDEBUG(D_TRACE, "gmnal_cb_recv nal_cb [%p], private[%p], cookie[%p], " - "niov[%d], iov [%p], mlen["LPSZ"], rlen["LPSZ"]\n", - nal_cb, private, cookie, niov, iov, mlen, rlen); - CDEBUG(D_TRACE, "gmnal_cb_recv libnal [%p], private[%p], cookie[%p], " - "niov[%d], iov [%p], offset["LPSZ"], mlen["LPSZ"], rlen["LPSZ"]\n", - libnal, private, cookie, niov, iov, offset, mlen, rlen); -- -- switch(srxd->type) { -- case(GMNAL_SMALL_MESSAGE): -- CDEBUG(D_INFO, "gmnal_cb_recv got small message\n"); - status = gmnal_small_rx(nal_cb, private, cookie, niov, - iov, mlen, rlen); - status = gmnal_small_rx(libnal, private, cookie, niov, - iov, offset, mlen, rlen); -- break; -- case(GMNAL_LARGE_MESSAGE_INIT): -- CDEBUG(D_INFO, "gmnal_cb_recv got large message init\n"); - status = gmnal_large_rx(nal_cb, private, cookie, niov, - iov, mlen, rlen); - status = gmnal_large_rx(libnal, private, cookie, niov, - iov, offset, mlen, rlen); -- } -- -- -- CDEBUG(D_INFO, "gmnal_cb_recv gmnal_return status [%d]\n", status); -- return(status); --} -- - int gmnal_cb_recv_pages(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, - unsigned int kniov, ptl_kiov_t *kiov, size_t mlen, - size_t rlen) -ptl_err_t gmnal_cb_recv_pages(lib_nal_t *libnal, void *private, lib_msg_t *cookie, - unsigned int kniov, ptl_kiov_t *kiov, size_t offset, - size_t mlen, size_t rlen) --{ -- gmnal_srxd_t *srxd = (gmnal_srxd_t*)private; -- int status = PTL_OK; -- struct iovec *iovec = NULL, *iovec_dup = NULL; -- int i = 0; -- ptl_kiov_t *kiov_dup = kiov;; -- -- - CDEBUG(D_TRACE, "gmnal_cb_recv_pages nal_cb [%p],private[%p], " - "cookie[%p], kniov[%d], kiov [%p], mlen["LPSZ"], rlen["LPSZ"]\n", - nal_cb, private, cookie, kniov, kiov, mlen, rlen); - CDEBUG(D_TRACE, "gmnal_cb_recv_pages libnal [%p],private[%p], " - "cookie[%p], kniov[%d], kiov [%p], offset["LPSZ"], mlen["LPSZ"], rlen["LPSZ"]\n", - libnal, private, cookie, kniov, kiov, offset, mlen, rlen); -- -- if (srxd->type == GMNAL_SMALL_MESSAGE) { -- PORTAL_ALLOC(iovec, sizeof(struct iovec)*kniov); -- if (!iovec) { -- CDEBUG(D_ERROR, "Can't malloc\n"); -- return(GMNAL_STATUS_FAIL); -- } -- iovec_dup = iovec; -- -- /* -- * map each page and create an iovec for it -- */ -- for (i=0; ikiov_page, kiov->kiov_len, -- kiov->kiov_offset); -- iovec->iov_len = kiov->kiov_len; -- CDEBUG(D_INFO, "Calling kmap[%p]", kiov->kiov_page); -- -- iovec->iov_base = kmap(kiov->kiov_page) + -- kiov->kiov_offset; -- -- CDEBUG(D_INFO, "iov_base is [%p]\n", iovec->iov_base); -- iovec++; -- kiov++; -- } -- CDEBUG(D_INFO, "calling gmnal_small_rx\n"); - status = gmnal_small_rx(nal_cb, private, cookie, kniov, - iovec_dup, mlen, rlen); - status = gmnal_small_rx(libnal, private, cookie, kniov, - iovec_dup, offset, mlen, rlen); -- for (i=0; ikiov_page); -- kiov_dup++; -- } -- PORTAL_FREE(iovec_dup, sizeof(struct iovec)*kniov); -- } -- -- -- CDEBUG(D_INFO, "gmnal_return status [%d]\n", status); -- return(status); --} -- -- - int gmnal_cb_send(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, -ptl_err_t gmnal_cb_send(lib_nal_t *libnal, void *private, lib_msg_t *cookie, -- ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, - unsigned int niov, struct iovec *iov, size_t len) - unsigned int niov, struct iovec *iov, size_t offset, size_t len) --{ -- -- gmnal_data_t *nal_data; -- -- - CDEBUG(D_TRACE, "gmnal_cb_send niov[%d] len["LPSZ"] nid["LPU64"]\n", - niov, len, nid); - nal_data = nal_cb->nal_data; - CDEBUG(D_TRACE, "gmnal_cb_send niov[%d] offset["LPSZ"] len["LPSZ"] nid["LPU64"]\n", - niov, offset, len, nid); - nal_data = libnal->libnal_data; -- -- if (GMNAL_IS_SMALL_MESSAGE(nal_data, niov, iov, len)) { -- CDEBUG(D_INFO, "This is a small message send\n"); - gmnal_small_tx(nal_cb, private, cookie, hdr, type, nid, pid, - niov, iov, len); - gmnal_small_tx(libnal, private, cookie, hdr, type, nid, pid, - niov, iov, offset, len); -- } else { -- CDEBUG(D_ERROR, "Large message send it is not supported\n"); - lib_finalize(nal_cb, private, cookie, PTL_FAIL); - lib_finalize(libnal, private, cookie, PTL_FAIL); -- return(PTL_FAIL); - gmnal_large_tx(nal_cb, private, cookie, hdr, type, nid, pid, - niov, iov, len); - gmnal_large_tx(libnal, private, cookie, hdr, type, nid, pid, - niov, iov, offset, len); -- } -- return(PTL_OK); --} -- - int gmnal_cb_send_pages(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, - ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, unsigned int kniov, ptl_kiov_t *kiov, size_t len) -ptl_err_t gmnal_cb_send_pages(lib_nal_t *libnal, void *private, lib_msg_t *cookie, - ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, - unsigned int kniov, ptl_kiov_t *kiov, size_t offset, size_t len) --{ -- -- int i = 0; -- gmnal_data_t *nal_data; -- struct iovec *iovec = NULL, *iovec_dup = NULL; -- ptl_kiov_t *kiov_dup = kiov; -- - CDEBUG(D_TRACE, "gmnal_cb_send_pages nid ["LPU64"] niov[%d] len["LPSZ"]\n", nid, kniov, len); - nal_data = nal_cb->nal_data; - CDEBUG(D_TRACE, "gmnal_cb_send_pages nid ["LPU64"] niov[%d] offset["LPSZ"] len["LPSZ"]\n", - nid, kniov, offset, len); - nal_data = libnal->libnal_data; -- PORTAL_ALLOC(iovec, kniov*sizeof(struct iovec)); -- iovec_dup = iovec; -- if (GMNAL_IS_SMALL_MESSAGE(nal_data, 0, NULL, len)) { -- CDEBUG(D_INFO, "This is a small message send\n"); -- -- for (i=0; ikiov_page, kiov->kiov_len, -- kiov->kiov_offset); -- -- iovec->iov_base = kmap(kiov->kiov_page) -- + kiov->kiov_offset; -- -- iovec->iov_len = kiov->kiov_len; -- iovec++; -- kiov++; -- } - gmnal_small_tx(nal_cb, private, cookie, hdr, type, nid, - pid, kniov, iovec_dup, len); - gmnal_small_tx(libnal, private, cookie, hdr, type, nid, - pid, kniov, iovec_dup, offset, len); -- } else { -- CDEBUG(D_ERROR, "Large message send it is not supported yet\n"); -- return(PTL_FAIL); -- for (i=0; ikiov_page, kiov->kiov_len, -- kiov->kiov_offset); -- -- iovec->iov_base = kmap(kiov->kiov_page) -- + kiov->kiov_offset; -- iovec->iov_len = kiov->kiov_len; -- iovec++; -- kiov++; -- } - gmnal_large_tx(nal_cb, private, cookie, hdr, type, nid, - pid, kniov, iovec, len); - gmnal_large_tx(libnal, private, cookie, hdr, type, nid, - pid, kniov, iovec, offset, len); -- } -- for (i=0; ikiov_page); -- kiov_dup++; -- } -- PORTAL_FREE(iovec_dup, kniov*sizeof(struct iovec)); - return(PTL_OK); - } - - int gmnal_cb_read(nal_cb_t *nal_cb, void *private, void *dst, - user_ptr src, size_t len) - { - gm_bcopy(src, dst, len); - return(PTL_OK); - } - - int gmnal_cb_write(nal_cb_t *nal_cb, void *private, user_ptr dst, - void *src, size_t len) - { - gm_bcopy(src, dst, len); - return(PTL_OK); - } - - int gmnal_cb_callback(nal_cb_t *nal_cb, void *private, lib_eq_t *eq, - ptl_event_t *ev) - { - - if (eq->event_callback != NULL) { - CDEBUG(D_INFO, "found callback\n"); - eq->event_callback(ev); - } - - return(PTL_OK); - } - - void *gmnal_cb_malloc(nal_cb_t *nal_cb, size_t len) - { - void *ptr = NULL; - CDEBUG(D_TRACE, "gmnal_cb_malloc len["LPSZ"]\n", len); - PORTAL_ALLOC(ptr, len); - return(ptr); - } - - void gmnal_cb_free(nal_cb_t *nal_cb, void *buf, size_t len) - { - CDEBUG(D_TRACE, "gmnal_cb_free :: buf[%p] len["LPSZ"]\n", buf, len); - PORTAL_FREE(buf, len); - return; - } - - void gmnal_cb_unmap(nal_cb_t *nal_cb, unsigned int niov, struct iovec *iov, - void **addrkey) - { - return; - } - - int gmnal_cb_map(nal_cb_t *nal_cb, unsigned int niov, struct iovec *iov, - void**addrkey) - { -- return(PTL_OK); - } - - void gmnal_cb_printf(nal_cb_t *nal_cb, const char *fmt, ...) - { - CDEBUG(D_TRACE, "gmnal_cb_printf\n"); - printk(fmt); - return; - } - - void gmnal_cb_cli(nal_cb_t *nal_cb, unsigned long *flags) - { - gmnal_data_t *nal_data = (gmnal_data_t*)nal_cb->nal_data; - - spin_lock_irqsave(&nal_data->cb_lock, *flags); - return; - } - - void gmnal_cb_sti(nal_cb_t *nal_cb, unsigned long *flags) - { - gmnal_data_t *nal_data = (gmnal_data_t*)nal_cb->nal_data; - - spin_unlock_irqrestore(&nal_data->cb_lock, *flags); - return; --} -- - int gmnal_cb_dist(nal_cb_t *nal_cb, ptl_nid_t nid, unsigned long *dist) -int gmnal_cb_dist(lib_nal_t *libnal, ptl_nid_t nid, unsigned long *dist) --{ -- CDEBUG(D_TRACE, "gmnal_cb_dist\n"); -- if (dist) -- *dist = 27; -- return(PTL_OK); --} diff --cc lnet/klnds/gmlnd/gmlnd_comm.c index 1bcd9bd,6a8fcbc..0000000 deleted file mode 100644,100644 --- a/lnet/klnds/gmlnd/gmlnd_comm.c +++ /dev/null @@@ -1,1325 -1,1380 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * Copyright (c) 2003 Los Alamos National Laboratory (LANL) -- * -- * This file is part of Lustre, http://www.lustre.org/ -- * -- * Lustre is free software; you can redistribute it and/or -- * modify it under the terms of version 2 of the GNU General Public -- * License as published by the Free Software Foundation. -- * -- * Lustre is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- * GNU General Public License for more details. -- * -- * You should have received a copy of the GNU General Public License -- * along with Lustre; if not, write to the Free Software -- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -- */ -- --/* -- * This file contains all gmnal send and receive functions -- */ -- --#include "gmnal.h" -- --/* -- * The caretaker thread -- * This is main thread of execution for the NAL side -- * This guy waits in gm_blocking_recvive and gets -- * woken up when the myrinet adaptor gets an interrupt. -- * Hands off receive operations to the receive thread -- * This thread Looks after gm_callbacks etc inline. -- */ --int --gmnal_ct_thread(void *arg) --{ -- gmnal_data_t *nal_data; -- gm_recv_event_t *rxevent = NULL; -- gm_recv_t *recv = NULL; -- -- if (!arg) { -- CDEBUG(D_TRACE, "NO nal_data. Exiting\n"); -- return(-1); -- } -- -- nal_data = (gmnal_data_t*)arg; -- CDEBUG(D_TRACE, "nal_data is [%p]\n", arg); -- -- daemonize(); -- -- nal_data->ctthread_flag = GMNAL_CTTHREAD_STARTED; -- -- GMNAL_GM_LOCK(nal_data); -- while(nal_data->ctthread_flag == GMNAL_CTTHREAD_STARTED) { -- CDEBUG(D_NET, "waiting\n"); -- rxevent = gm_blocking_receive_no_spin(nal_data->gm_port); -- if (nal_data->ctthread_flag == GMNAL_THREAD_STOP) { -- CDEBUG(D_INFO, "time to exit\n"); -- break; -- } -- CDEBUG(D_INFO, "got [%s]\n", gmnal_rxevent(rxevent)); -- switch (GM_RECV_EVENT_TYPE(rxevent)) { -- -- case(GM_RECV_EVENT): -- CDEBUG(D_NET, "CTTHREAD:: GM_RECV_EVENT\n"); -- recv = (gm_recv_t*)&rxevent->recv; -- GMNAL_GM_UNLOCK(nal_data); -- gmnal_add_rxtwe(nal_data, recv); -- GMNAL_GM_LOCK(nal_data); -- CDEBUG(D_NET, "CTTHREAD:: Added event to Q\n"); -- break; -- case(_GM_SLEEP_EVENT): -- /* -- * Blocking receive above just returns -- * immediatly with _GM_SLEEP_EVENT -- * Don't know what this is -- */ -- CDEBUG(D_NET, "Sleeping in gm_unknown\n"); -- GMNAL_GM_UNLOCK(nal_data); -- gm_unknown(nal_data->gm_port, rxevent); -- GMNAL_GM_LOCK(nal_data); -- CDEBUG(D_INFO, "Awake from gm_unknown\n"); -- break; -- -- default: -- /* -- * Don't know what this is -- * gm_unknown will make sense of it -- * Should be able to do something with -- * FAST_RECV_EVENTS here. -- */ -- CDEBUG(D_NET, "Passing event to gm_unknown\n"); -- GMNAL_GM_UNLOCK(nal_data); -- gm_unknown(nal_data->gm_port, rxevent); -- GMNAL_GM_LOCK(nal_data); -- CDEBUG(D_INFO, "Processed unknown event\n"); -- } -- } -- GMNAL_GM_UNLOCK(nal_data); -- nal_data->ctthread_flag = GMNAL_THREAD_RESET; -- CDEBUG(D_INFO, "thread nal_data [%p] is exiting\n", nal_data); -- return(GMNAL_STATUS_OK); --} -- -- --/* -- * process a receive event -- */ --int gmnal_rx_thread(void *arg) --{ -- gmnal_data_t *nal_data; -- void *buffer; -- gmnal_rxtwe_t *we = NULL; -- -- if (!arg) { -- CDEBUG(D_TRACE, "NO nal_data. Exiting\n"); -- return(-1); -- } -- -- nal_data = (gmnal_data_t*)arg; -- CDEBUG(D_TRACE, "nal_data is [%p]\n", arg); -- -- daemonize(); -- /* -- * set 1 bit for each thread started -- * doesn't matter which bit -- */ -- spin_lock(&nal_data->rxthread_flag_lock); -- if (nal_data->rxthread_flag) -- nal_data->rxthread_flag=nal_data->rxthread_flag*2 + 1; -- else -- nal_data->rxthread_flag = 1; -- CDEBUG(D_INFO, "rxthread flag is [%ld]\n", nal_data->rxthread_flag); -- spin_unlock(&nal_data->rxthread_flag_lock); -- -- while(nal_data->rxthread_stop_flag != GMNAL_THREAD_STOP) { -- CDEBUG(D_NET, "RXTHREAD:: Receive thread waiting\n"); -- we = gmnal_get_rxtwe(nal_data); -- if (!we) { -- CDEBUG(D_INFO, "Receive thread time to exit\n"); -- break; -- } -- -- buffer = we->buffer; -- switch(((gmnal_msghdr_t*)buffer)->type) { -- case(GMNAL_SMALL_MESSAGE): -- gmnal_pre_receive(nal_data, we, -- GMNAL_SMALL_MESSAGE); -- break; -- case(GMNAL_LARGE_MESSAGE_INIT): -- gmnal_pre_receive(nal_data, we, -- GMNAL_LARGE_MESSAGE_INIT); -- break; -- case(GMNAL_LARGE_MESSAGE_ACK): -- gmnal_pre_receive(nal_data, we, -- GMNAL_LARGE_MESSAGE_ACK); -- break; -- default: -- CDEBUG(D_ERROR, "Unsupported message type\n"); -- gmnal_rx_bad(nal_data, we, NULL); -- } -- PORTAL_FREE(we, sizeof(gmnal_rxtwe_t)); -- } -- -- spin_lock(&nal_data->rxthread_flag_lock); -- nal_data->rxthread_flag/=2; -- CDEBUG(D_INFO, "rxthread flag is [%ld]\n", nal_data->rxthread_flag); -- spin_unlock(&nal_data->rxthread_flag_lock); -- CDEBUG(D_INFO, "thread nal_data [%p] is exiting\n", nal_data); -- return(GMNAL_STATUS_OK); --} -- -- -- --/* -- * Start processing a small message receive -- * Get here from gmnal_receive_thread -- * Hand off to lib_parse, which calls cb_recv -- * which hands back to gmnal_small_receive -- * Deal with all endian stuff here. -- */ --int --gmnal_pre_receive(gmnal_data_t *nal_data, gmnal_rxtwe_t *we, int gmnal_type) --{ -- gmnal_srxd_t *srxd = NULL; -- void *buffer = NULL; -- unsigned int snode, sport, type, length; -- gmnal_msghdr_t *gmnal_msghdr; -- ptl_hdr_t *portals_hdr; - int rc; -- -- CDEBUG(D_INFO, "nal_data [%p], we[%p] type [%d]\n", -- nal_data, we, gmnal_type); -- -- buffer = we->buffer; -- snode = we->snode; -- sport = we->sport; -- type = we->type; -- buffer = we->buffer; -- length = we->length; -- -- gmnal_msghdr = (gmnal_msghdr_t*)buffer; -- portals_hdr = (ptl_hdr_t*)(buffer+GMNAL_MSGHDR_SIZE); -- -- CDEBUG(D_INFO, "rx_event:: Sender node [%d], Sender Port [%d], " -- "type [%d], length [%d], buffer [%p]\n", -- snode, sport, type, length, buffer); -- CDEBUG(D_INFO, "gmnal_msghdr:: Sender node [%u], magic [%d], " -- "gmnal_type [%d]\n", gmnal_msghdr->sender_node_id, -- gmnal_msghdr->magic, gmnal_msghdr->type); -- CDEBUG(D_INFO, "portals_hdr:: Sender node ["LPD64"], " -- "dest_node ["LPD64"]\n", portals_hdr->src_nid, -- portals_hdr->dest_nid); -- -- -- /* -- * Get a receive descriptor for this message -- */ -- srxd = gmnal_rxbuffer_to_srxd(nal_data, buffer); -- CDEBUG(D_INFO, "Back from gmnal_rxbuffer_to_srxd\n"); - srxd->nal_data = nal_data; -- if (!srxd) { -- CDEBUG(D_ERROR, "Failed to get receive descriptor\n"); - lib_parse(nal_data->nal_cb, portals_hdr, srxd); - /* I think passing a NULL srxd to lib_parse will crash - * gmnal_recv() */ - LBUG(); - lib_parse(nal_data->libnal, portals_hdr, srxd); -- return(GMNAL_STATUS_FAIL); -- } -- -- /* -- * no need to bother portals library with this -- */ -- if (gmnal_type == GMNAL_LARGE_MESSAGE_ACK) { -- gmnal_large_tx_ack_received(nal_data, srxd); -- return(GMNAL_STATUS_OK); -- } -- - srxd->nal_data = nal_data; -- srxd->type = gmnal_type; -- srxd->nsiov = gmnal_msghdr->niov; -- srxd->gm_source_node = gmnal_msghdr->sender_node_id; -- -- CDEBUG(D_PORTALS, "Calling lib_parse buffer is [%p]\n", -- buffer+GMNAL_MSGHDR_SIZE); -- /* -- * control passes to lib, which calls cb_recv -- * cb_recv is responsible for returning the buffer -- * for future receive -- */ - lib_parse(nal_data->nal_cb, portals_hdr, srxd); - rc = lib_parse(nal_data->libnal, portals_hdr, srxd); - - if (rc != PTL_OK) { - /* I just received garbage; take appropriate action... */ - LBUG(); - } -- -- return(GMNAL_STATUS_OK); --} -- -- -- --/* -- * After a receive has been processed, -- * hang out the receive buffer again. -- * This implicitly returns a receive token. -- */ --int --gmnal_rx_requeue_buffer(gmnal_data_t *nal_data, gmnal_srxd_t *srxd) --{ -- CDEBUG(D_TRACE, "gmnal_rx_requeue_buffer\n"); -- -- CDEBUG(D_NET, "requeueing srxd[%p] nal_data[%p]\n", srxd, nal_data); -- -- GMNAL_GM_LOCK(nal_data); -- gm_provide_receive_buffer_with_tag(nal_data->gm_port, srxd->buffer, -- srxd->gmsize, GM_LOW_PRIORITY, 0 ); -- GMNAL_GM_UNLOCK(nal_data); -- -- return(GMNAL_STATUS_OK); --} -- -- --/* -- * Handle a bad message -- * A bad message is one we don't expect or can't interpret -- */ --int --gmnal_rx_bad(gmnal_data_t *nal_data, gmnal_rxtwe_t *we, gmnal_srxd_t *srxd) --{ -- CDEBUG(D_TRACE, "Can't handle message\n"); -- -- if (!srxd) -- srxd = gmnal_rxbuffer_to_srxd(nal_data, -- we->buffer); -- if (srxd) { -- gmnal_rx_requeue_buffer(nal_data, srxd); -- } else { -- CDEBUG(D_ERROR, "Can't find a descriptor for this buffer\n"); -- /* -- * get rid of it ? -- */ -- return(GMNAL_STATUS_FAIL); -- } -- -- return(GMNAL_STATUS_OK); --} -- -- -- --/* -- * Process a small message receive. -- * Get here from gmnal_receive_thread, gmnal_pre_receive -- * lib_parse, cb_recv -- * Put data from prewired receive buffer into users buffer(s) -- * Hang out the receive buffer again for another receive -- * Call lib_finalize -- */ --int - gmnal_small_rx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, - unsigned int niov, struct iovec *iov, size_t mlen, size_t rlen) -gmnal_small_rx(lib_nal_t *libnal, void *private, lib_msg_t *cookie, - unsigned int niov, struct iovec *iov, size_t offset, size_t mlen, size_t rlen) --{ -- gmnal_srxd_t *srxd = NULL; -- void *buffer = NULL; - gmnal_data_t *nal_data = (gmnal_data_t*)nal_cb->nal_data; - gmnal_data_t *nal_data = (gmnal_data_t*)libnal->libnal_data; -- -- -- CDEBUG(D_TRACE, "niov [%d] mlen["LPSZ"]\n", niov, mlen); -- -- if (!private) { -- CDEBUG(D_ERROR, "gmnal_small_rx no context\n"); - lib_finalize(nal_cb, private, cookie, PTL_FAIL); - lib_finalize(libnal, private, cookie, PTL_FAIL); -- return(PTL_FAIL); -- } -- -- srxd = (gmnal_srxd_t*)private; -- buffer = srxd->buffer; -- buffer += sizeof(gmnal_msghdr_t); -- buffer += sizeof(ptl_hdr_t); -- -- while(niov--) { - CDEBUG(D_INFO, "processing [%p] len ["LPSZ"]\n", iov, - iov->iov_len); - gm_bcopy(buffer, iov->iov_base, iov->iov_len); - buffer += iov->iov_len; - iov++; - if (offset >= iov->iov_len) { - offset -= iov->iov_len; - } else if (offset > 0) { - CDEBUG(D_INFO, "processing [%p] base [%p] len %d, " - "offset %d, len ["LPSZ"]\n", iov, - iov->iov_base + offset, iov->iov_len, offset, - iov->iov_len - offset); - gm_bcopy(buffer, iov->iov_base + offset, - iov->iov_len - offset); - offset = 0; - buffer += iov->iov_len - offset; - } else { - CDEBUG(D_INFO, "processing [%p] len ["LPSZ"]\n", iov, - iov->iov_len); - gm_bcopy(buffer, iov->iov_base, iov->iov_len); - buffer += iov->iov_len; - } - iov++; -- } -- -- -- /* -- * let portals library know receive is complete -- */ -- CDEBUG(D_PORTALS, "calling lib_finalize\n"); - lib_finalize(nal_cb, private, cookie, PTL_OK); - lib_finalize(libnal, private, cookie, PTL_OK); -- /* -- * return buffer so it can be used again -- */ -- CDEBUG(D_NET, "calling gm_provide_receive_buffer\n"); -- GMNAL_GM_LOCK(nal_data); -- gm_provide_receive_buffer_with_tag(nal_data->gm_port, srxd->buffer, -- srxd->gmsize, GM_LOW_PRIORITY, 0); -- GMNAL_GM_UNLOCK(nal_data); -- -- return(PTL_OK); --} -- -- --/* -- * Start a small transmit. -- * Get a send token (and wired transmit buffer). -- * Copy data from senders buffer to wired buffer and -- * initiate gm_send from the wired buffer. -- * The callback function informs when the send is complete. -- */ --int - gmnal_small_tx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, -gmnal_small_tx(lib_nal_t *libnal, void *private, lib_msg_t *cookie, -- ptl_hdr_t *hdr, int type, ptl_nid_t global_nid, ptl_pid_t pid, - unsigned int niov, struct iovec *iov, int size) - unsigned int niov, struct iovec *iov, size_t offset, int size) --{ - gmnal_data_t *nal_data = (gmnal_data_t*)nal_cb->nal_data; - gmnal_data_t *nal_data = (gmnal_data_t*)libnal->libnal_data; -- gmnal_stxd_t *stxd = NULL; -- void *buffer = NULL; -- gmnal_msghdr_t *msghdr = NULL; -- int tot_size = 0; -- unsigned int local_nid; -- gm_status_t gm_status = GM_SUCCESS; -- - CDEBUG(D_TRACE, "gmnal_small_tx nal_cb [%p] private [%p] cookie [%p] " - CDEBUG(D_TRACE, "gmnal_small_tx libnal [%p] private [%p] cookie [%p] " -- "hdr [%p] type [%d] global_nid ["LPU64"] pid [%d] niov [%d] " - "iov [%p] size [%d]\n", nal_cb, private, cookie, hdr, type, - "iov [%p] size [%d]\n", libnal, private, cookie, hdr, type, -- global_nid, pid, niov, iov, size); -- -- CDEBUG(D_INFO, "portals_hdr:: dest_nid ["LPU64"], src_nid ["LPU64"]\n", -- hdr->dest_nid, hdr->src_nid); -- -- if (!nal_data) { -- CDEBUG(D_ERROR, "no nal_data\n"); -- return(GMNAL_STATUS_FAIL); -- } else { -- CDEBUG(D_INFO, "nal_data [%p]\n", nal_data); -- } -- -- GMNAL_GM_LOCK(nal_data); -- gm_status = gm_global_id_to_node_id(nal_data->gm_port, global_nid, -- &local_nid); -- GMNAL_GM_UNLOCK(nal_data); -- if (gm_status != GM_SUCCESS) { -- CDEBUG(D_ERROR, "Failed to obtain local id\n"); -- return(GMNAL_STATUS_FAIL); -- } -- CDEBUG(D_INFO, "Local Node_id is [%u][%x]\n", local_nid, local_nid); -- -- stxd = gmnal_get_stxd(nal_data, 1); -- CDEBUG(D_INFO, "stxd [%p]\n", stxd); -- -- stxd->type = GMNAL_SMALL_MESSAGE; -- stxd->cookie = cookie; -- -- /* -- * Copy gmnal_msg_hdr and portals header to the transmit buffer -- * Then copy the data in -- */ -- buffer = stxd->buffer; -- msghdr = (gmnal_msghdr_t*)buffer; -- -- msghdr->magic = GMNAL_MAGIC; -- msghdr->type = GMNAL_SMALL_MESSAGE; -- msghdr->sender_node_id = nal_data->gm_global_nid; -- CDEBUG(D_INFO, "processing msghdr at [%p]\n", buffer); -- -- buffer += sizeof(gmnal_msghdr_t); -- -- CDEBUG(D_INFO, "processing portals hdr at [%p]\n", buffer); -- gm_bcopy(hdr, buffer, sizeof(ptl_hdr_t)); -- -- buffer += sizeof(ptl_hdr_t); -- -- while(niov--) { - CDEBUG(D_INFO, "processing iov [%p] len ["LPSZ"] to [%p]\n", - iov, iov->iov_len, buffer); - gm_bcopy(iov->iov_base, buffer, iov->iov_len); - buffer+= iov->iov_len; - iov++; - if (offset >= iov->iov_len) { - offset -= iov->iov_len; - } else if (offset > 0) { - CDEBUG(D_INFO, "processing iov [%p] base [%p] len ["LPSZ"] to [%p]\n", - iov, iov->iov_base + offset, iov->iov_len - offset, buffer); - gm_bcopy(iov->iov_base + offset, buffer, iov->iov_len - offset); - buffer+= iov->iov_len - offset; - offset = 0; - } else { - CDEBUG(D_INFO, "processing iov [%p] len ["LPSZ"] to [%p]\n", - iov, iov->iov_len, buffer); - gm_bcopy(iov->iov_base, buffer, iov->iov_len); - buffer+= iov->iov_len; - } - iov++; -- } -- -- CDEBUG(D_INFO, "sending\n"); -- tot_size = size+sizeof(ptl_hdr_t)+sizeof(gmnal_msghdr_t); -- stxd->msg_size = tot_size; -- -- -- CDEBUG(D_NET, "Calling gm_send_to_peer port [%p] buffer [%p] " -- "gmsize [%lu] msize [%d] global_nid ["LPU64"] local_nid[%d] " -- "stxd [%p]\n", nal_data->gm_port, stxd->buffer, stxd->gm_size, -- stxd->msg_size, global_nid, local_nid, stxd); -- -- GMNAL_GM_LOCK(nal_data); -- stxd->gm_priority = GM_LOW_PRIORITY; -- stxd->gm_target_node = local_nid; -- gm_send_to_peer_with_callback(nal_data->gm_port, stxd->buffer, -- stxd->gm_size, stxd->msg_size, -- GM_LOW_PRIORITY, local_nid, -- gmnal_small_tx_callback, (void*)stxd); -- GMNAL_GM_UNLOCK(nal_data); -- CDEBUG(D_INFO, "done\n"); -- -- return(PTL_OK); --} -- -- --/* -- * A callback to indicate the small transmit operation is compete -- * Check for erros and try to deal with them. -- * Call lib_finalise to inform the client application that the send -- * is complete and the memory can be reused. -- * Return the stxd when finished with it (returns a send token) -- */ --void --gmnal_small_tx_callback(gm_port_t *gm_port, void *context, gm_status_t status) --{ -- gmnal_stxd_t *stxd = (gmnal_stxd_t*)context; -- lib_msg_t *cookie = stxd->cookie; -- gmnal_data_t *nal_data = (gmnal_data_t*)stxd->nal_data; - nal_cb_t *nal_cb = nal_data->nal_cb; - lib_nal_t *libnal = nal_data->libnal; -- -- if (!stxd) { -- CDEBUG(D_TRACE, "send completion event for unknown stxd\n"); -- return; -- } -- if (status != GM_SUCCESS) { -- CDEBUG(D_ERROR, "Result of send stxd [%p] is [%s]\n", -- stxd, gmnal_gm_error(status)); -- } -- -- switch(status) { -- case(GM_SUCCESS): -- break; -- -- -- -- case(GM_SEND_DROPPED): -- /* -- * do a resend on the dropped ones -- */ -- CDEBUG(D_ERROR, "send stxd [%p] was dropped " -- "resending\n", context); -- GMNAL_GM_LOCK(nal_data); -- gm_send_to_peer_with_callback(nal_data->gm_port, -- stxd->buffer, -- stxd->gm_size, -- stxd->msg_size, -- stxd->gm_priority, -- stxd->gm_target_node, -- gmnal_small_tx_callback, -- context); -- GMNAL_GM_UNLOCK(nal_data); -- -- return; -- case(GM_TIMED_OUT): -- case(GM_SEND_TIMED_OUT): -- /* -- * drop these ones -- */ -- CDEBUG(D_INFO, "calling gm_drop_sends\n"); -- GMNAL_GM_LOCK(nal_data); -- gm_drop_sends(nal_data->gm_port, stxd->gm_priority, -- stxd->gm_target_node, GMNAL_GM_PORT, -- gmnal_drop_sends_callback, context); -- GMNAL_GM_UNLOCK(nal_data); -- -- return; -- -- -- /* -- * abort on these ? -- */ -- case(GM_TRY_AGAIN): -- case(GM_INTERRUPTED): -- case(GM_FAILURE): -- case(GM_INPUT_BUFFER_TOO_SMALL): -- case(GM_OUTPUT_BUFFER_TOO_SMALL): -- case(GM_BUSY): -- case(GM_MEMORY_FAULT): -- case(GM_INVALID_PARAMETER): -- case(GM_OUT_OF_MEMORY): -- case(GM_INVALID_COMMAND): -- case(GM_PERMISSION_DENIED): -- case(GM_INTERNAL_ERROR): -- case(GM_UNATTACHED): -- case(GM_UNSUPPORTED_DEVICE): -- case(GM_SEND_REJECTED): -- case(GM_SEND_TARGET_PORT_CLOSED): -- case(GM_SEND_TARGET_NODE_UNREACHABLE): -- case(GM_SEND_PORT_CLOSED): -- case(GM_NODE_ID_NOT_YET_SET): -- case(GM_STILL_SHUTTING_DOWN): -- case(GM_CLONE_BUSY): -- case(GM_NO_SUCH_DEVICE): -- case(GM_ABORTED): -- case(GM_INCOMPATIBLE_LIB_AND_DRIVER): -- case(GM_UNTRANSLATED_SYSTEM_ERROR): -- case(GM_ACCESS_DENIED): -- case(GM_NO_DRIVER_SUPPORT): -- case(GM_PTE_REF_CNT_OVERFLOW): -- case(GM_NOT_SUPPORTED_IN_KERNEL): -- case(GM_NOT_SUPPORTED_ON_ARCH): -- case(GM_NO_MATCH): -- case(GM_USER_ERROR): -- case(GM_DATA_CORRUPTED): -- case(GM_HARDWARE_FAULT): -- case(GM_SEND_ORPHANED): -- case(GM_MINOR_OVERFLOW): -- case(GM_PAGE_TABLE_FULL): -- case(GM_UC_ERROR): -- case(GM_INVALID_PORT_NUMBER): -- case(GM_DEV_NOT_FOUND): -- case(GM_FIRMWARE_NOT_RUNNING): -- case(GM_YP_NO_MATCH): -- default: -- CDEBUG(D_ERROR, "Unknown send error\n"); -- gm_resume_sending(nal_data->gm_port, stxd->gm_priority, -- stxd->gm_target_node, GMNAL_GM_PORT, -- gmnal_resume_sending_callback, context); -- return; -- -- } -- -- /* -- * TO DO -- * If this is a large message init, -- * we're not finished with the data yet, -- * so can't call lib_finalise. -- * However, we're also holding on to a -- * stxd here (to keep track of the source -- * iovec only). Should use another structure -- * to keep track of iovec and return stxd to -- * free list earlier. -- */ -- if (stxd->type == GMNAL_LARGE_MESSAGE_INIT) { -- CDEBUG(D_INFO, "large transmit done\n"); -- return; -- } -- gmnal_return_stxd(nal_data, stxd); - lib_finalize(nal_cb, stxd, cookie, PTL_OK); - lib_finalize(libnal, stxd, cookie, PTL_OK); -- return; --} -- --/* -- * After an error on the port -- * call this to allow future sends to complete -- */ --void gmnal_resume_sending_callback(struct gm_port *gm_port, void *context, -- gm_status_t status) --{ -- gmnal_data_t *nal_data; -- gmnal_stxd_t *stxd = (gmnal_stxd_t*)context; -- CDEBUG(D_TRACE, "status is [%d] context is [%p]\n", status, context); -- gmnal_return_stxd(stxd->nal_data, stxd); -- return; --} -- -- --void gmnal_drop_sends_callback(struct gm_port *gm_port, void *context, -- gm_status_t status) --{ -- gmnal_stxd_t *stxd = (gmnal_stxd_t*)context; -- gmnal_data_t *nal_data = stxd->nal_data; -- -- CDEBUG(D_TRACE, "status is [%d] context is [%p]\n", status, context); -- if (status == GM_SUCCESS) { -- GMNAL_GM_LOCK(nal_data); -- gm_send_to_peer_with_callback(gm_port, stxd->buffer, -- stxd->gm_size, stxd->msg_size, -- stxd->gm_priority, -- stxd->gm_target_node, -- gmnal_small_tx_callback, -- context); -- GMNAL_GM_LOCK(nal_data); -- } else { -- CDEBUG(D_ERROR, "send_to_peer status for stxd [%p] is " -- "[%d][%s]\n", stxd, status, gmnal_gm_error(status)); -- } -- -- -- return; --} -- -- --/* -- * Begine a large transmit. -- * Do a gm_register of the memory pointed to by the iovec -- * and send details to the receiver. The receiver does a gm_get -- * to pull the data and sends and ack when finished. Upon receipt of -- * this ack, deregister the memory. Only 1 send token is required here. -- */ --int - gmnal_large_tx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, -gmnal_large_tx(lib_nal_t *libnal, void *private, lib_msg_t *cookie, -- ptl_hdr_t *hdr, int type, ptl_nid_t global_nid, ptl_pid_t pid, - unsigned int niov, struct iovec *iov, int size) - unsigned int niov, struct iovec *iov, size_t offset, int size) --{ -- -- gmnal_data_t *nal_data; -- gmnal_stxd_t *stxd = NULL; -- void *buffer = NULL; -- gmnal_msghdr_t *msghdr = NULL; -- unsigned int local_nid; -- int mlen = 0; /* the size of the init message data */ -- struct iovec *iov_dup = NULL; -- gm_status_t gm_status; -- int niov_dup; -- -- - CDEBUG(D_TRACE, "gmnal_large_tx nal_cb [%p] private [%p], cookie [%p] " - CDEBUG(D_TRACE, "gmnal_large_tx libnal [%p] private [%p], cookie [%p] " -- "hdr [%p], type [%d] global_nid ["LPU64"], pid [%d], niov [%d], " - "iov [%p], size [%d]\n", nal_cb, private, cookie, hdr, type, - "iov [%p], size [%d]\n", libnal, private, cookie, hdr, type, -- global_nid, pid, niov, iov, size); -- - if (nal_cb) - nal_data = (gmnal_data_t*)nal_cb->nal_data; - if (libnal) - nal_data = (gmnal_data_t*)libnal->libnal_data; -- else { - CDEBUG(D_ERROR, "no nal_cb.\n"); - CDEBUG(D_ERROR, "no libnal.\n"); -- return(GMNAL_STATUS_FAIL); -- } -- -- -- /* -- * Get stxd and buffer. Put local address of data in buffer, -- * send local addresses to target, -- * wait for the target node to suck the data over. -- * The stxd is used to ren -- */ -- stxd = gmnal_get_stxd(nal_data, 1); -- CDEBUG(D_INFO, "stxd [%p]\n", stxd); -- -- stxd->type = GMNAL_LARGE_MESSAGE_INIT; -- stxd->cookie = cookie; -- -- /* -- * Copy gmnal_msg_hdr and portals header to the transmit buffer -- * Then copy the iov in -- */ -- buffer = stxd->buffer; -- msghdr = (gmnal_msghdr_t*)buffer; -- -- CDEBUG(D_INFO, "processing msghdr at [%p]\n", buffer); -- -- msghdr->magic = GMNAL_MAGIC; -- msghdr->type = GMNAL_LARGE_MESSAGE_INIT; -- msghdr->sender_node_id = nal_data->gm_global_nid; -- msghdr->stxd = stxd; -- msghdr->niov = niov ; -- buffer += sizeof(gmnal_msghdr_t); -- mlen = sizeof(gmnal_msghdr_t); -- CDEBUG(D_INFO, "mlen is [%d]\n", mlen); -- -- -- CDEBUG(D_INFO, "processing portals hdr at [%p]\n", buffer); -- -- gm_bcopy(hdr, buffer, sizeof(ptl_hdr_t)); -- buffer += sizeof(ptl_hdr_t); -- mlen += sizeof(ptl_hdr_t); -- CDEBUG(D_INFO, "mlen is [%d]\n", mlen); - - while (offset >= iov->iov_len) { - offset -= iov->iov_len; - niov--; - iov++; - } - - LASSERT(offset >= 0); - /* - * Store the iovs in the stxd for we can get - * them later if we need them - */ - stxd->iov[0].iov_base = iov->iov_base + offset; - stxd->iov[0].iov_len = iov->iov_len - offset; - CDEBUG(D_NET, "Copying iov [%p] to [%p], niov=%d\n", iov, stxd->iov, niov); - if (niov > 1) - gm_bcopy(&iov[1], &stxd->iov[1], (niov-1)*sizeof(struct iovec)); - stxd->niov = niov; -- -- /* -- * copy the iov to the buffer so target knows -- * where to get the data from -- */ -- CDEBUG(D_INFO, "processing iov to [%p]\n", buffer); - gm_bcopy(iov, buffer, niov*sizeof(struct iovec)); - mlen += niov*(sizeof(struct iovec)); - gm_bcopy(stxd->iov, buffer, stxd->niov*sizeof(struct iovec)); - mlen += stxd->niov*(sizeof(struct iovec)); -- CDEBUG(D_INFO, "mlen is [%d]\n", mlen); - - - /* - * Store the iovs in the stxd for we can get - * them later if we need them - */ - CDEBUG(D_NET, "Copying iov [%p] to [%p]\n", iov, stxd->iov); - gm_bcopy(iov, stxd->iov, niov*sizeof(struct iovec)); - stxd->niov = niov; -- - -- /* -- * register the memory so the NIC can get hold of the data -- * This is a slow process. it'd be good to overlap it -- * with something else. -- */ - iov = stxd->iov; -- iov_dup = iov; -- niov_dup = niov; -- while(niov--) { -- CDEBUG(D_INFO, "Registering memory [%p] len ["LPSZ"] \n", -- iov->iov_base, iov->iov_len); -- GMNAL_GM_LOCK(nal_data); -- gm_status = gm_register_memory(nal_data->gm_port, -- iov->iov_base, iov->iov_len); -- if (gm_status != GM_SUCCESS) { -- GMNAL_GM_UNLOCK(nal_data); -- CDEBUG(D_ERROR, "gm_register_memory returns [%d][%s] " -- "for memory [%p] len ["LPSZ"]\n", -- gm_status, gmnal_gm_error(gm_status), -- iov->iov_base, iov->iov_len); -- GMNAL_GM_LOCK(nal_data); -- while (iov_dup != iov) { -- gm_deregister_memory(nal_data->gm_port, -- iov_dup->iov_base, -- iov_dup->iov_len); -- iov_dup++; -- } -- GMNAL_GM_UNLOCK(nal_data); -- gmnal_return_stxd(nal_data, stxd); -- return(PTL_FAIL); -- } -- -- GMNAL_GM_UNLOCK(nal_data); -- iov++; -- } -- -- /* -- * Send the init message to the target -- */ -- CDEBUG(D_INFO, "sending mlen [%d]\n", mlen); -- GMNAL_GM_LOCK(nal_data); -- gm_status = gm_global_id_to_node_id(nal_data->gm_port, global_nid, -- &local_nid); -- if (gm_status != GM_SUCCESS) { -- GMNAL_GM_UNLOCK(nal_data); -- CDEBUG(D_ERROR, "Failed to obtain local id\n"); -- gmnal_return_stxd(nal_data, stxd); -- /* TO DO deregister memory on failure */ -- return(GMNAL_STATUS_FAIL); -- } -- CDEBUG(D_INFO, "Local Node_id is [%d]\n", local_nid); -- gm_send_to_peer_with_callback(nal_data->gm_port, stxd->buffer, -- stxd->gm_size, mlen, GM_LOW_PRIORITY, -- local_nid, gmnal_large_tx_callback, -- (void*)stxd); -- GMNAL_GM_UNLOCK(nal_data); -- -- CDEBUG(D_INFO, "done\n"); -- -- return(PTL_OK); --} -- --/* -- * Callback function indicates that send of buffer with -- * large message iovec has completed (or failed). -- */ --void --gmnal_large_tx_callback(gm_port_t *gm_port, void *context, gm_status_t status) --{ -- gmnal_small_tx_callback(gm_port, context, status); -- --} -- -- -- --/* -- * Have received a buffer that contains an iovec of the sender. -- * Do a gm_register_memory of the receivers buffer and then do a get -- * data from the sender. -- */ --int - gmnal_large_rx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, - unsigned int nriov, struct iovec *riov, size_t mlen, - size_t rlen) -gmnal_large_rx(lib_nal_t *libnal, void *private, lib_msg_t *cookie, - unsigned int nriov, struct iovec *riov, size_t offset, - size_t mlen, size_t rlen) --{ - gmnal_data_t *nal_data = nal_cb->nal_data; - gmnal_data_t *nal_data = libnal->libnal_data; -- gmnal_srxd_t *srxd = (gmnal_srxd_t*)private; -- void *buffer = NULL; -- struct iovec *riov_dup; -- int nriov_dup; -- gmnal_msghdr_t *msghdr = NULL; -- gm_status_t gm_status; -- - CDEBUG(D_TRACE, "gmnal_large_rx :: nal_cb[%p], private[%p], " - CDEBUG(D_TRACE, "gmnal_large_rx :: libnal[%p], private[%p], " -- "cookie[%p], niov[%d], iov[%p], mlen["LPSZ"], rlen["LPSZ"]\n", - nal_cb, private, cookie, nriov, riov, mlen, rlen); - libnal, private, cookie, nriov, riov, mlen, rlen); -- -- if (!srxd) { -- CDEBUG(D_ERROR, "gmnal_large_rx no context\n"); - lib_finalize(nal_cb, private, cookie, PTL_FAIL); - lib_finalize(libnal, private, cookie, PTL_FAIL); -- return(PTL_FAIL); -- } -- -- buffer = srxd->buffer; -- msghdr = (gmnal_msghdr_t*)buffer; -- buffer += sizeof(gmnal_msghdr_t); -- buffer += sizeof(ptl_hdr_t); -- -- /* -- * Store the senders stxd address in the srxd for this message -- * The gmnal_large_message_ack needs it to notify the sender -- * the pull of data is complete -- */ -- srxd->source_stxd = msghdr->stxd; -- -- /* -- * Register the receivers memory -- * get the data, -- * tell the sender that we got the data -- * then tell the receiver we got the data -- * TO DO -- * If the iovecs match, could interleave -- * gm_registers and gm_gets for each element - */ - while (offset >= riov->iov_len) { - offset -= riov->iov_len; - riov++; - nriov--; - } - LASSERT (nriov >= 0); - LASSERT (offset >= 0); - /* - * do this so the final gm_get callback can deregister the memory -- */ - PORTAL_ALLOC(srxd->riov, nriov*(sizeof(struct iovec))); - - srxd->riov[0].iov_base = riov->iov_base + offset; - srxd->riov[0].iov_len = riov->iov_len - offset; - if (nriov > 1) - gm_bcopy(&riov[1], &srxd->riov[1], (nriov-1)*(sizeof(struct iovec))); - srxd->nriov = nriov; - - riov = srxd->riov; -- nriov_dup = nriov; -- riov_dup = riov; -- while(nriov--) { -- CDEBUG(D_INFO, "Registering memory [%p] len ["LPSZ"] \n", -- riov->iov_base, riov->iov_len); -- GMNAL_GM_LOCK(nal_data); -- gm_status = gm_register_memory(nal_data->gm_port, -- riov->iov_base, riov->iov_len); -- if (gm_status != GM_SUCCESS) { -- GMNAL_GM_UNLOCK(nal_data); -- CDEBUG(D_ERROR, "gm_register_memory returns [%d][%s] " -- "for memory [%p] len ["LPSZ"]\n", -- gm_status, gmnal_gm_error(gm_status), -- riov->iov_base, riov->iov_len); -- GMNAL_GM_LOCK(nal_data); -- while (riov_dup != riov) { -- gm_deregister_memory(nal_data->gm_port, -- riov_dup->iov_base, -- riov_dup->iov_len); -- riov_dup++; -- } -- GMNAL_GM_LOCK(nal_data); -- /* -- * give back srxd and buffer. Send NACK to sender -- */ - PORTAL_FREE(srxd->riov, nriov_dup*(sizeof(struct iovec))); -- return(PTL_FAIL); -- } -- GMNAL_GM_UNLOCK(nal_data); -- riov++; -- } - /* - * do this so the final gm_get callback can deregister the memory - */ - PORTAL_ALLOC(srxd->riov, nriov_dup*(sizeof(struct iovec))); - gm_bcopy(riov_dup, srxd->riov, nriov_dup*(sizeof(struct iovec))); - srxd->nriov = nriov_dup; -- -- /* -- * now do gm_get to get the data -- */ -- srxd->cookie = cookie; -- if (gmnal_remote_get(srxd, srxd->nsiov, (struct iovec*)buffer, -- nriov_dup, riov_dup) != GMNAL_STATUS_OK) { -- CDEBUG(D_ERROR, "can't get the data"); -- } -- -- CDEBUG(D_INFO, "lgmanl_large_rx done\n"); -- -- return(PTL_OK); --} -- -- --/* -- * Perform a number of remote gets as part of receiving -- * a large message. -- * The final one to complete (i.e. the last callback to get called) -- * tidies up. -- * gm_get requires a send token. -- */ --int --gmnal_remote_get(gmnal_srxd_t *srxd, int nsiov, struct iovec *siov, -- int nriov, struct iovec *riov) --{ -- -- int ncalls = 0; -- -- CDEBUG(D_TRACE, "gmnal_remote_get srxd[%p], nriov[%d], riov[%p], " -- "nsiov[%d], siov[%p]\n", srxd, nriov, riov, nsiov, siov); -- -- -- ncalls = gmnal_copyiov(0, srxd, nsiov, siov, nriov, riov); -- if (ncalls < 0) { -- CDEBUG(D_ERROR, "there's something wrong with the iovecs\n"); -- return(GMNAL_STATUS_FAIL); -- } -- CDEBUG(D_INFO, "gmnal_remote_get ncalls [%d]\n", ncalls); -- spin_lock_init(&srxd->callback_lock); -- srxd->ncallbacks = ncalls; -- srxd->callback_status = 0; -- -- ncalls = gmnal_copyiov(1, srxd, nsiov, siov, nriov, riov); -- if (ncalls < 0) { -- CDEBUG(D_ERROR, "there's something wrong with the iovecs\n"); -- return(GMNAL_STATUS_FAIL); -- } -- -- return(GMNAL_STATUS_OK); -- --} -- -- --/* -- * pull data from source node (source iovec) to a local iovec. -- * The iovecs may not match which adds the complications below. -- * Count the number of gm_gets that will be required to the callbacks -- * can determine who is the last one. -- */ --int --gmnal_copyiov(int do_copy, gmnal_srxd_t *srxd, int nsiov, -- struct iovec *siov, int nriov, struct iovec *riov) --{ -- -- int ncalls = 0; -- int slen = siov->iov_len, rlen = riov->iov_len; -- char *sbuf = siov->iov_base, *rbuf = riov->iov_base; -- unsigned long sbuf_long; -- gm_remote_ptr_t remote_ptr = 0; -- unsigned int source_node; -- gmnal_ltxd_t *ltxd = NULL; -- gmnal_data_t *nal_data = srxd->nal_data; -- -- CDEBUG(D_TRACE, "copy[%d] nal_data[%p]\n", do_copy, nal_data); -- if (do_copy) { -- if (!nal_data) { -- CDEBUG(D_ERROR, "Bad args No nal_data\n"); -- return(GMNAL_STATUS_FAIL); -- } -- GMNAL_GM_LOCK(nal_data); -- if (gm_global_id_to_node_id(nal_data->gm_port, -- srxd->gm_source_node, -- &source_node) != GM_SUCCESS) { -- -- CDEBUG(D_ERROR, "cannot resolve global_id [%u] " -- "to local node_id\n", srxd->gm_source_node); -- GMNAL_GM_UNLOCK(nal_data); -- return(GMNAL_STATUS_FAIL); -- } -- GMNAL_GM_UNLOCK(nal_data); -- /* -- * We need a send token to use gm_get -- * getting an stxd gets us a send token. -- * the stxd is used as the context to the -- * callback function (so stxd can be returned). -- * Set pointer in stxd to srxd so callback count in srxd -- * can be decremented to find last callback to complete -- */ -- CDEBUG(D_INFO, "gmnal_copyiov source node is G[%u]L[%d]\n", -- srxd->gm_source_node, source_node); -- } -- -- do { -- CDEBUG(D_INFO, "sbuf[%p] slen[%d] rbuf[%p], rlen[%d]\n", -- sbuf, slen, rbuf, rlen); -- if (slen > rlen) { -- ncalls++; -- if (do_copy) { -- CDEBUG(D_INFO, "slen>rlen\n"); -- ltxd = gmnal_get_ltxd(nal_data); -- ltxd->srxd = srxd; -- GMNAL_GM_LOCK(nal_data); -- /* -- * funny business to get rid -- * of compiler warning -- */ -- sbuf_long = (unsigned long) sbuf; -- remote_ptr = (gm_remote_ptr_t)sbuf_long; -- gm_get(nal_data->gm_port, remote_ptr, rbuf, -- rlen, GM_LOW_PRIORITY, source_node, -- GMNAL_GM_PORT, -- gmnal_remote_get_callback, ltxd); -- GMNAL_GM_UNLOCK(nal_data); -- } -- /* -- * at the end of 1 iov element -- */ -- sbuf+=rlen; -- slen-=rlen; -- riov++; -- nriov--; -- rbuf = riov->iov_base; -- rlen = riov->iov_len; -- } else if (rlen > slen) { -- ncalls++; -- if (do_copy) { -- CDEBUG(D_INFO, "slensrxd = srxd; -- GMNAL_GM_LOCK(nal_data); -- sbuf_long = (unsigned long) sbuf; -- remote_ptr = (gm_remote_ptr_t)sbuf_long; -- gm_get(nal_data->gm_port, remote_ptr, rbuf, -- slen, GM_LOW_PRIORITY, source_node, -- GMNAL_GM_PORT, -- gmnal_remote_get_callback, ltxd); -- GMNAL_GM_UNLOCK(nal_data); -- } -- /* -- * at end of siov element -- */ -- rbuf+=slen; -- rlen-=slen; -- siov++; -- sbuf = siov->iov_base; -- slen = siov->iov_len; -- } else { -- ncalls++; -- if (do_copy) { -- CDEBUG(D_INFO, "rlen=slen\n"); -- ltxd = gmnal_get_ltxd(nal_data); -- ltxd->srxd = srxd; -- GMNAL_GM_LOCK(nal_data); -- sbuf_long = (unsigned long) sbuf; -- remote_ptr = (gm_remote_ptr_t)sbuf_long; -- gm_get(nal_data->gm_port, remote_ptr, rbuf, -- rlen, GM_LOW_PRIORITY, source_node, -- GMNAL_GM_PORT, -- gmnal_remote_get_callback, ltxd); -- GMNAL_GM_UNLOCK(nal_data); -- } -- /* -- * at end of siov and riov element -- */ -- siov++; -- sbuf = siov->iov_base; -- slen = siov->iov_len; -- riov++; -- nriov--; -- rbuf = riov->iov_base; -- rlen = riov->iov_len; -- } -- -- } while (nriov); -- return(ncalls); --} -- -- --/* -- * The callback function that is invoked after each gm_get call completes. -- * Multiple callbacks may be invoked for 1 transaction, only the final -- * callback has work to do. -- */ --void --gmnal_remote_get_callback(gm_port_t *gm_port, void *context, -- gm_status_t status) --{ -- -- gmnal_ltxd_t *ltxd = (gmnal_ltxd_t*)context; -- gmnal_srxd_t *srxd = ltxd->srxd; - nal_cb_t *nal_cb = srxd->nal_data->nal_cb; - lib_nal_t *libnal = srxd->nal_data->libnal; -- int lastone; -- struct iovec *riov; -- int nriov; -- gmnal_data_t *nal_data; -- -- CDEBUG(D_TRACE, "called for context [%p]\n", context); -- -- if (status != GM_SUCCESS) { -- CDEBUG(D_ERROR, "reports error [%d][%s]\n", status, -- gmnal_gm_error(status)); -- } -- -- spin_lock(&srxd->callback_lock); -- srxd->ncallbacks--; -- srxd->callback_status |= status; -- lastone = srxd->ncallbacks?0:1; -- spin_unlock(&srxd->callback_lock); -- nal_data = srxd->nal_data; -- -- /* -- * everyone returns a send token -- */ -- gmnal_return_ltxd(nal_data, ltxd); -- -- if (!lastone) { -- CDEBUG(D_ERROR, "NOT final callback context[%p]\n", srxd); -- return; -- } -- -- /* -- * Let our client application proceed -- */ -- CDEBUG(D_ERROR, "final callback context[%p]\n", srxd); - lib_finalize(nal_cb, srxd, srxd->cookie, PTL_OK); - lib_finalize(libnal, srxd, srxd->cookie, PTL_OK); -- -- /* -- * send an ack to the sender to let him know we got the data -- */ -- gmnal_large_tx_ack(nal_data, srxd); -- -- /* -- * Unregister the memory that was used -- * This is a very slow business (slower then register) -- */ -- nriov = srxd->nriov; -- riov = srxd->riov; -- GMNAL_GM_LOCK(nal_data); -- while (nriov--) { -- CDEBUG(D_ERROR, "deregister memory [%p]\n", riov->iov_base); -- if (gm_deregister_memory(srxd->nal_data->gm_port, -- riov->iov_base, riov->iov_len)) { -- CDEBUG(D_ERROR, "failed to deregister memory [%p]\n", -- riov->iov_base); -- } -- riov++; -- } -- GMNAL_GM_UNLOCK(nal_data); -- PORTAL_FREE(srxd->riov, sizeof(struct iovec)*nriov); -- -- /* -- * repost the receive buffer (return receive token) -- */ -- GMNAL_GM_LOCK(nal_data); -- gm_provide_receive_buffer_with_tag(nal_data->gm_port, srxd->buffer, -- srxd->gmsize, GM_LOW_PRIORITY, 0); -- GMNAL_GM_UNLOCK(nal_data); -- -- return; --} -- -- --/* -- * Called on target node. -- * After pulling data from a source node -- * send an ack message to indicate the large transmit is complete. -- */ --void --gmnal_large_tx_ack(gmnal_data_t *nal_data, gmnal_srxd_t *srxd) --{ -- -- gmnal_stxd_t *stxd; -- gmnal_msghdr_t *msghdr; -- void *buffer = NULL; -- unsigned int local_nid; -- gm_status_t gm_status = GM_SUCCESS; -- -- CDEBUG(D_TRACE, "srxd[%p] target_node [%u]\n", srxd, -- srxd->gm_source_node); -- -- GMNAL_GM_LOCK(nal_data); -- gm_status = gm_global_id_to_node_id(nal_data->gm_port, -- srxd->gm_source_node, &local_nid); -- GMNAL_GM_UNLOCK(nal_data); -- if (gm_status != GM_SUCCESS) { -- CDEBUG(D_ERROR, "Failed to obtain local id\n"); -- return; -- } -- CDEBUG(D_INFO, "Local Node_id is [%u][%x]\n", local_nid, local_nid); -- -- stxd = gmnal_get_stxd(nal_data, 1); -- CDEBUG(D_TRACE, "gmnal_large_tx_ack got stxd[%p]\n", stxd); -- -- stxd->nal_data = nal_data; -- stxd->type = GMNAL_LARGE_MESSAGE_ACK; -- -- /* -- * Copy gmnal_msg_hdr and portals header to the transmit buffer -- * Then copy the data in -- */ -- buffer = stxd->buffer; -- msghdr = (gmnal_msghdr_t*)buffer; -- -- /* -- * Add in the address of the original stxd from the sender node -- * so it knows which thread to notify. -- */ -- msghdr->magic = GMNAL_MAGIC; -- msghdr->type = GMNAL_LARGE_MESSAGE_ACK; -- msghdr->sender_node_id = nal_data->gm_global_nid; -- msghdr->stxd = srxd->source_stxd; -- CDEBUG(D_INFO, "processing msghdr at [%p]\n", buffer); -- -- CDEBUG(D_INFO, "sending\n"); -- stxd->msg_size= sizeof(gmnal_msghdr_t); -- -- -- CDEBUG(D_NET, "Calling gm_send_to_peer port [%p] buffer [%p] " -- "gmsize [%lu] msize [%d] global_nid [%u] local_nid[%d] " -- "stxd [%p]\n", nal_data->gm_port, stxd->buffer, stxd->gm_size, -- stxd->msg_size, srxd->gm_source_node, local_nid, stxd); -- GMNAL_GM_LOCK(nal_data); -- stxd->gm_priority = GM_LOW_PRIORITY; -- stxd->gm_target_node = local_nid; -- gm_send_to_peer_with_callback(nal_data->gm_port, stxd->buffer, -- stxd->gm_size, stxd->msg_size, -- GM_LOW_PRIORITY, local_nid, -- gmnal_large_tx_ack_callback, -- (void*)stxd); -- -- GMNAL_GM_UNLOCK(nal_data); -- CDEBUG(D_INFO, "gmnal_large_tx_ack :: done\n"); -- -- return; --} -- -- --/* -- * A callback to indicate the small transmit operation is compete -- * Check for errors and try to deal with them. -- * Call lib_finalise to inform the client application that the -- * send is complete and the memory can be reused. -- * Return the stxd when finished with it (returns a send token) -- */ --void --gmnal_large_tx_ack_callback(gm_port_t *gm_port, void *context, -- gm_status_t status) --{ -- gmnal_stxd_t *stxd = (gmnal_stxd_t*)context; -- gmnal_data_t *nal_data = (gmnal_data_t*)stxd->nal_data; -- -- if (!stxd) { -- CDEBUG(D_ERROR, "send completion event for unknown stxd\n"); -- return; -- } -- CDEBUG(D_TRACE, "send completion event for stxd [%p] status is [%d]\n", -- stxd, status); -- gmnal_return_stxd(stxd->nal_data, stxd); -- -- GMNAL_GM_UNLOCK(nal_data); -- return; --} -- --/* -- * Indicates the large transmit operation is compete. -- * Called on transmit side (means data has been pulled by receiver -- * or failed). -- * Call lib_finalise to inform the client application that the send -- * is complete, deregister the memory and return the stxd. -- * Finally, report the rx buffer that the ack message was delivered in. -- */ --void --gmnal_large_tx_ack_received(gmnal_data_t *nal_data, gmnal_srxd_t *srxd) --{ - nal_cb_t *nal_cb = nal_data->nal_cb; - lib_nal_t *libnal = nal_data->libnal; -- gmnal_stxd_t *stxd = NULL; -- gmnal_msghdr_t *msghdr = NULL; -- void *buffer = NULL; -- struct iovec *iov; -- -- -- CDEBUG(D_TRACE, "gmnal_large_tx_ack_received buffer [%p]\n", buffer); -- -- buffer = srxd->buffer; -- msghdr = (gmnal_msghdr_t*)buffer; -- stxd = msghdr->stxd; -- -- CDEBUG(D_INFO, "gmnal_large_tx_ack_received stxd [%p]\n", stxd); -- - lib_finalize(nal_cb, stxd, stxd->cookie, PTL_OK); - lib_finalize(libnal, stxd, stxd->cookie, PTL_OK); -- -- /* -- * extract the iovec from the stxd, deregister the memory. -- * free the space used to store the iovec -- */ -- iov = stxd->iov; -- while(stxd->niov--) { -- CDEBUG(D_INFO, "deregister memory [%p] size ["LPSZ"]\n", -- iov->iov_base, iov->iov_len); -- GMNAL_GM_LOCK(nal_data); -- gm_deregister_memory(nal_data->gm_port, iov->iov_base, -- iov->iov_len); -- GMNAL_GM_UNLOCK(nal_data); -- iov++; -- } -- -- /* -- * return the send token -- * TO DO It is bad to hold onto the send token so long? -- */ -- gmnal_return_stxd(nal_data, stxd); -- -- -- /* -- * requeue the receive buffer -- */ -- gmnal_rx_requeue_buffer(nal_data, srxd); -- -- -- return; --} diff --cc lnet/klnds/gmlnd/gmlnd_module.c index 31f6819,3aca90f..0000000 deleted file mode 100644,100644 --- a/lnet/klnds/gmlnd/gmlnd_module.c +++ /dev/null @@@ -1,149 -1,134 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * Copyright (c) 2003 Los Alamos National Laboratory (LANL) -- * -- * This file is part of Lustre, http://www.lustre.org/ -- * -- * Lustre is free software; you can redistribute it and/or -- * modify it under the terms of version 2 of the GNU General Public -- * License as published by the Free Software Foundation. -- * -- * Lustre is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- * GNU General Public License for more details. -- * -- * You should have received a copy of the GNU General Public License -- * along with Lustre; if not, write to the Free Software -- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -- */ -- --#include "gmnal.h" -- -- --int gmnal_small_msg_size = 525312; --/* -- * -1 indicates default value. -- * This is 1 thread per cpu -- * See start_kernel_threads -- */ --int num_rx_threads = -1; --int num_stxds = 5; --int gm_port = 4; - - ptl_handle_ni_t kgmnal_ni; - -- --int --gmnal_cmd(struct portals_cfg *pcfg, void *private) --{ -- gmnal_data_t *nal_data = NULL; -- char *name = NULL; -- int nid = -2; -- int gnid; -- gm_status_t gm_status; -- -- -- CDEBUG(D_TRACE, "gmnal_cmd [%d] private [%p]\n", -- pcfg->pcfg_command, private); -- nal_data = (gmnal_data_t*)private; -- switch(pcfg->pcfg_command) { -- /* -- * just reuse already defined GET_NID. Should define GMNAL version -- */ -- case(GMNAL_IOC_GET_GNID): -- -- PORTAL_ALLOC(name, pcfg->pcfg_plen1); -- copy_from_user(name, pcfg->pcfg_pbuf1, pcfg->pcfg_plen1); -- -- GMNAL_GM_LOCK(nal_data); - nid = gm_host_name_to_node_id(nal_data->gm_port, name); - //nid = gm_host_name_to_node_id(nal_data->gm_port, name); - gm_status = gm_host_name_to_node_id_ex (nal_data->gm_port, 0, name, &nid); -- GMNAL_GM_UNLOCK(nal_data); - CDEBUG(D_INFO, "Local node id is [%d]\n", nid); - if (gm_status != GM_SUCCESS) { - CDEBUG(D_INFO, "gm_host_name_to_node_id_ex(...host %s) failed[%d]\n", - name, gm_status); - return (-1); - } else - CDEBUG(D_INFO, "Local node %s id is [%d]\n", name, nid); -- GMNAL_GM_LOCK(nal_data); -- gm_status = gm_node_id_to_global_id(nal_data->gm_port, -- nid, &gnid); -- GMNAL_GM_UNLOCK(nal_data); -- if (gm_status != GM_SUCCESS) { -- CDEBUG(D_INFO, "gm_node_id_to_global_id failed[%d]\n", -- gm_status); -- return(-1); -- } -- CDEBUG(D_INFO, "Global node is is [%u][%x]\n", gnid, gnid); -- copy_to_user(pcfg->pcfg_pbuf2, &gnid, pcfg->pcfg_plen2); -- break; -- default: -- CDEBUG(D_INFO, "gmnal_cmd UNKNOWN[%d]\n", pcfg->pcfg_command); -- pcfg->pcfg_nid2 = -1; -- } -- -- -- return(0); --} -- -- --static int __init --gmnal_load(void) --{ -- int status; -- CDEBUG(D_TRACE, "This is the gmnal module initialisation routine\n"); - -- -- -- CDEBUG(D_INFO, "Calling gmnal_init\n"); - status = PtlNIInit(gmnal_init, 32, 4, 0, &kgmnal_ni); - status = gmnal_init(); -- if (status == PTL_OK) { - CDEBUG(D_INFO, "Portals GMNAL initialised ok kgmnal_ni\n"); - CDEBUG(D_INFO, "Portals GMNAL initialised ok\n"); -- } else { -- CDEBUG(D_INFO, "Portals GMNAL Failed to initialise\n"); - return(1); - return(-ENODEV); -- - } - - CDEBUG(D_INFO, "Calling kportal_nal_register\n"); - /* - * global_nal_data is set by gmnal_init - */ - if (kportal_nal_register(GMNAL, &gmnal_cmd, global_nal_data) != 0) { - CDEBUG(D_INFO, "kportal_nal_register failed\n"); - return(1); -- } -- - CDEBUG(D_INFO, "Calling PORTAL_SYMBOL_REGISTER\n"); - PORTAL_SYMBOL_REGISTER(kgmnal_ni); -- CDEBUG(D_INFO, "This is the end of the gmnal init routine"); -- -- -- return(0); --} -- -- --static void __exit --gmnal_unload(void) --{ - - kportal_nal_unregister(GMNAL); - PORTAL_SYMBOL_UNREGISTER(kgmnal_ni); -- gmnal_fini(); - global_nal_data = NULL; -- return; --} -- -- --module_init(gmnal_load); -- --module_exit(gmnal_unload); - - EXPORT_SYMBOL(kgmnal_ni); -- --MODULE_PARM(gmnal_small_msg_size, "i"); --MODULE_PARM(num_rx_threads, "i"); --MODULE_PARM(num_stxds, "i"); --MODULE_PARM(gm_port, "i"); -- --MODULE_AUTHOR("Morgan Doyle"); -- --MODULE_DESCRIPTION("A Portals kernel NAL for Myrinet GM."); -- --MODULE_LICENSE("GPL"); diff --cc lnet/klnds/gmlnd/gmlnd_utils.c index 6a52319,6a52319..0000000 deleted file mode 100644,100644 --- a/lnet/klnds/gmlnd/gmlnd_utils.c +++ /dev/null @@@ -1,1075 -1,1075 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * Copyright (c) 2003 Los Alamos National Laboratory (LANL) -- * -- * This file is part of Lustre, http://www.lustre.org/ -- * -- * Lustre is free software; you can redistribute it and/or -- * modify it under the terms of version 2 of the GNU General Public -- * License as published by the Free Software Foundation. -- * -- * Lustre is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- * GNU General Public License for more details. -- * -- * You should have received a copy of the GNU General Public License -- * along with Lustre; if not, write to the Free Software -- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -- */ --/* -- * All utilities required by lgmanl -- */ -- --#include "gmnal.h" -- --/* -- * Am I one of the gmnal rxthreads ? -- */ --int --gmnal_is_rxthread(gmnal_data_t *nal_data) --{ -- int i; -- for (i=0; irxthread_pid[i] == current->pid) -- return(1); -- } -- return(0); --} -- -- --/* -- * Allocate tx descriptors/tokens (large and small) -- * allocate a number of small tx buffers and register with GM -- * so they are wired and set up for DMA. This is a costly operation. -- * Also allocate a corrosponding descriptor to keep track of -- * the buffer. -- * Put all small descriptors on singly linked list to be available to send -- * function. -- * Allocate the rest of the available tx tokens for large messages. These will be -- * used to do gm_gets in gmnal_copyiov -- */ --int --gmnal_alloc_txd(gmnal_data_t *nal_data) --{ -- int ntx= 0, nstx= 0, nrxt_stx= 0, -- nltx= 0, i = 0; -- gmnal_stxd_t *txd = NULL; -- gmnal_ltxd_t *ltxd = NULL; -- void *txbuffer = NULL; -- -- CDEBUG(D_TRACE, "gmnal_alloc_small tx\n"); -- -- GMNAL_GM_LOCK(nal_data); -- /* -- * total number of transmit tokens -- */ -- ntx = gm_num_send_tokens(nal_data->gm_port); -- GMNAL_GM_UNLOCK(nal_data); -- CDEBUG(D_INFO, "total number of send tokens available is [%d]\n", ntx); -- -- /* -- * allocate a number for small sends -- * num_stxds from gmnal_module.c -- */ -- nstx = num_stxds; -- /* -- * give that number plus 1 to the receive threads -- */ -- nrxt_stx = nstx + 1; -- -- /* -- * give the rest for gm_gets -- */ -- nltx = ntx - (nrxt_stx + nstx); -- if (nltx < 1) { -- CDEBUG(D_ERROR, "No tokens available for large messages\n"); -- return(GMNAL_STATUS_FAIL); -- } -- -- -- /* -- * A semaphore is initialised with the -- * number of transmit tokens available. -- * To get a stxd, acquire the token semaphore. -- * this decrements the available token count -- * (if no tokens you block here, someone returning a -- * stxd will release the semaphore and wake you) -- * When token is obtained acquire the spinlock -- * to manipulate the list -- */ -- GMNAL_TXD_TOKEN_INIT(nal_data, nstx); -- GMNAL_TXD_LOCK_INIT(nal_data); -- GMNAL_RXT_TXD_TOKEN_INIT(nal_data, nrxt_stx); -- GMNAL_RXT_TXD_LOCK_INIT(nal_data); -- GMNAL_LTXD_TOKEN_INIT(nal_data, nltx); -- GMNAL_LTXD_LOCK_INIT(nal_data); -- -- for (i=0; i<=nstx; i++) { -- PORTAL_ALLOC(txd, sizeof(gmnal_stxd_t)); -- if (!txd) { -- CDEBUG(D_ERROR, "Failed to malloc txd [%d]\n", i); -- return(GMNAL_STATUS_NOMEM); -- } -- GMNAL_GM_LOCK(nal_data); -- txbuffer = gm_dma_malloc(nal_data->gm_port, -- GMNAL_SMALL_MSG_SIZE(nal_data)); -- GMNAL_GM_UNLOCK(nal_data); -- if (!txbuffer) { -- CDEBUG(D_ERROR, "Failed to gm_dma_malloc txbuffer [%d]," -- " size [%d]\n", i, -- GMNAL_SMALL_MSG_SIZE(nal_data)); -- PORTAL_FREE(txd, sizeof(gmnal_stxd_t)); -- return(GMNAL_STATUS_FAIL); -- } -- txd->buffer = txbuffer; -- txd->buffer_size = GMNAL_SMALL_MSG_SIZE(nal_data); -- txd->gm_size = gm_min_size_for_length(txd->buffer_size); -- txd->nal_data = (struct _gmnal_data_t*)nal_data; -- txd->rxt = 0; -- -- txd->next = nal_data->stxd; -- nal_data->stxd = txd; -- CDEBUG(D_INFO, "Registered txd [%p] with buffer [%p], " -- "size [%d]\n", txd, txd->buffer, txd->buffer_size); -- } -- -- for (i=0; i<=nrxt_stx; i++) { -- PORTAL_ALLOC(txd, sizeof(gmnal_stxd_t)); -- if (!txd) { -- CDEBUG(D_ERROR, "Failed to malloc txd [%d]\n", i); -- return(GMNAL_STATUS_NOMEM); -- } -- GMNAL_GM_LOCK(nal_data); -- txbuffer = gm_dma_malloc(nal_data->gm_port, -- GMNAL_SMALL_MSG_SIZE(nal_data)); -- GMNAL_GM_UNLOCK(nal_data); -- if (!txbuffer) { -- CDEBUG(D_ERROR, "Failed to gm_dma_malloc txbuffer [%d]," -- " size [%d]\n", i, -- GMNAL_SMALL_MSG_SIZE(nal_data)); -- PORTAL_FREE(txd, sizeof(gmnal_stxd_t)); -- return(GMNAL_STATUS_FAIL); -- } -- txd->buffer = txbuffer; -- txd->buffer_size = GMNAL_SMALL_MSG_SIZE(nal_data); -- txd->gm_size = gm_min_size_for_length(txd->buffer_size); -- txd->nal_data = (struct _gmnal_data_t*)nal_data; -- txd->rxt = 1; -- -- txd->next = nal_data->rxt_stxd; -- nal_data->rxt_stxd = txd; -- CDEBUG(D_INFO, "Registered txd [%p] with buffer [%p], " -- "size [%d]\n", txd, txd->buffer, txd->buffer_size); -- } -- -- /* -- * string together large tokens -- */ -- for (i=0; i<=nltx ; i++) { -- PORTAL_ALLOC(ltxd, sizeof(gmnal_ltxd_t)); -- ltxd->next = nal_data->ltxd; -- nal_data->ltxd = ltxd; -- } -- return(GMNAL_STATUS_OK); --} -- --/* Free the list of wired and gm_registered small tx buffers and -- * the tx descriptors that go along with them. -- */ --void --gmnal_free_txd(gmnal_data_t *nal_data) --{ -- gmnal_stxd_t *txd = nal_data->stxd, *_txd = NULL; -- gmnal_ltxd_t *ltxd = NULL, *_ltxd = NULL; -- -- CDEBUG(D_TRACE, "gmnal_free_small tx\n"); -- -- while(txd) { -- CDEBUG(D_INFO, "Freeing txd [%p] with buffer [%p], " -- "size [%d]\n", txd, txd->buffer, txd->buffer_size); -- _txd = txd; -- txd = txd->next; -- GMNAL_GM_LOCK(nal_data); -- gm_dma_free(nal_data->gm_port, _txd->buffer); -- GMNAL_GM_UNLOCK(nal_data); -- PORTAL_FREE(_txd, sizeof(gmnal_stxd_t)); -- } -- txd = nal_data->rxt_stxd; -- while(txd) { -- CDEBUG(D_INFO, "Freeing txd [%p] with buffer [%p], " -- "size [%d]\n", txd, txd->buffer, txd->buffer_size); -- _txd = txd; -- txd = txd->next; -- GMNAL_GM_LOCK(nal_data); -- gm_dma_free(nal_data->gm_port, _txd->buffer); -- GMNAL_GM_UNLOCK(nal_data); -- PORTAL_FREE(_txd, sizeof(gmnal_stxd_t)); -- } -- ltxd = nal_data->ltxd; -- while(txd) { -- _ltxd = ltxd; -- ltxd = ltxd->next; -- PORTAL_FREE(_ltxd, sizeof(gmnal_ltxd_t)); -- } -- -- return; --} -- -- --/* -- * Get a txd from the list -- * This get us a wired and gm_registered small tx buffer. -- * This implicitly gets us a send token also. -- */ --gmnal_stxd_t * --gmnal_get_stxd(gmnal_data_t *nal_data, int block) --{ -- -- gmnal_stxd_t *txd = NULL; -- pid_t pid = current->pid; -- -- -- CDEBUG(D_TRACE, "gmnal_get_stxd nal_data [%p] block[%d] pid [%d]\n", -- nal_data, block, pid); -- -- if (gmnal_is_rxthread(nal_data)) { -- CDEBUG(D_INFO, "RXTHREAD Attempting to get token\n"); -- GMNAL_RXT_TXD_GETTOKEN(nal_data); -- GMNAL_RXT_TXD_LOCK(nal_data); -- txd = nal_data->rxt_stxd; -- nal_data->rxt_stxd = txd->next; -- GMNAL_RXT_TXD_UNLOCK(nal_data); -- CDEBUG(D_INFO, "RXTHREAD got [%p], head is [%p]\n", -- txd, nal_data->rxt_stxd); -- txd->kniov = 0; -- txd->rxt = 1; -- } else { -- if (block) { -- CDEBUG(D_INFO, "Attempting to get token\n"); -- GMNAL_TXD_GETTOKEN(nal_data); -- CDEBUG(D_PORTALS, "Got token\n"); -- } else { -- if (GMNAL_TXD_TRYGETTOKEN(nal_data)) { -- CDEBUG(D_ERROR, "can't get token\n"); -- return(NULL); -- } -- } -- GMNAL_TXD_LOCK(nal_data); -- txd = nal_data->stxd; -- nal_data->stxd = txd->next; -- GMNAL_TXD_UNLOCK(nal_data); -- CDEBUG(D_INFO, "got [%p], head is [%p]\n", txd, -- nal_data->stxd); -- txd->kniov = 0; -- } /* general txd get */ -- return(txd); --} -- --/* -- * Return a txd to the list -- */ --void --gmnal_return_stxd(gmnal_data_t *nal_data, gmnal_stxd_t *txd) --{ -- CDEBUG(D_TRACE, "nal_data [%p], txd[%p] rxt[%d]\n", nal_data, -- txd, txd->rxt); -- -- /* -- * this transmit descriptor is -- * for the rxthread -- */ -- if (txd->rxt) { -- GMNAL_RXT_TXD_LOCK(nal_data); -- txd->next = nal_data->rxt_stxd; -- nal_data->rxt_stxd = txd; -- GMNAL_RXT_TXD_UNLOCK(nal_data); -- GMNAL_RXT_TXD_RETURNTOKEN(nal_data); -- CDEBUG(D_INFO, "Returned stxd to rxthread list\n"); -- } else { -- GMNAL_TXD_LOCK(nal_data); -- txd->next = nal_data->stxd; -- nal_data->stxd = txd; -- GMNAL_TXD_UNLOCK(nal_data); -- GMNAL_TXD_RETURNTOKEN(nal_data); -- CDEBUG(D_INFO, "Returned stxd to general list\n"); -- } -- return; --} -- -- --/* -- * Get a large transmit descriptor from the free list -- * This implicitly gets us a transmit token . -- * always wait for one. -- */ --gmnal_ltxd_t * --gmnal_get_ltxd(gmnal_data_t *nal_data) --{ -- -- gmnal_ltxd_t *ltxd = NULL; -- -- CDEBUG(D_TRACE, "nal_data [%p]\n", nal_data); -- -- GMNAL_LTXD_GETTOKEN(nal_data); -- GMNAL_LTXD_LOCK(nal_data); -- ltxd = nal_data->ltxd; -- nal_data->ltxd = ltxd->next; -- GMNAL_LTXD_UNLOCK(nal_data); -- CDEBUG(D_INFO, "got [%p], head is [%p]\n", ltxd, nal_data->ltxd); -- return(ltxd); --} -- --/* -- * Return an ltxd to the list -- */ --void --gmnal_return_ltxd(gmnal_data_t *nal_data, gmnal_ltxd_t *ltxd) --{ -- CDEBUG(D_TRACE, "nal_data [%p], ltxd[%p]\n", nal_data, ltxd); -- -- GMNAL_LTXD_LOCK(nal_data); -- ltxd->next = nal_data->ltxd; -- nal_data->ltxd = ltxd; -- GMNAL_LTXD_UNLOCK(nal_data); -- GMNAL_LTXD_RETURNTOKEN(nal_data); -- return; --} --/* -- * allocate a number of small rx buffers and register with GM -- * so they are wired and set up for DMA. This is a costly operation. -- * Also allocate a corrosponding descriptor to keep track of -- * the buffer. -- * Put all descriptors on singly linked list to be available to -- * receive thread. -- */ --int --gmnal_alloc_srxd(gmnal_data_t *nal_data) --{ -- int nrx = 0, nsrx = 0, i = 0; -- gmnal_srxd_t *rxd = NULL; -- void *rxbuffer = NULL; -- -- CDEBUG(D_TRACE, "gmnal_alloc_small rx\n"); -- -- GMNAL_GM_LOCK(nal_data); -- nrx = gm_num_receive_tokens(nal_data->gm_port); -- GMNAL_GM_UNLOCK(nal_data); -- CDEBUG(D_INFO, "total number of receive tokens available is [%d]\n", -- nrx); -- -- nsrx = nrx/2; -- nsrx = 12; -- /* -- * make the number of rxds twice our total -- * number of stxds plus 1 -- */ -- nsrx = num_stxds*2 + 2; -- -- CDEBUG(D_INFO, "Allocated [%d] receive tokens to small messages\n", -- nsrx); -- -- -- GMNAL_GM_LOCK(nal_data); -- nal_data->srxd_hash = gm_create_hash(gm_hash_compare_ptrs, -- gm_hash_hash_ptr, 0, 0, nsrx, 0); -- GMNAL_GM_UNLOCK(nal_data); -- if (!nal_data->srxd_hash) { -- CDEBUG(D_ERROR, "Failed to create hash table\n"); -- return(GMNAL_STATUS_NOMEM); -- } -- -- GMNAL_RXD_TOKEN_INIT(nal_data, nsrx); -- GMNAL_RXD_LOCK_INIT(nal_data); -- -- for (i=0; i<=nsrx; i++) { -- PORTAL_ALLOC(rxd, sizeof(gmnal_srxd_t)); -- if (!rxd) { -- CDEBUG(D_ERROR, "Failed to malloc rxd [%d]\n", i); -- return(GMNAL_STATUS_NOMEM); -- } --#if 0 -- PORTAL_ALLOC(rxbuffer, GMNAL_SMALL_MSG_SIZE(nal_data)); -- if (!rxbuffer) { -- CDEBUG(D_ERROR, "Failed to malloc rxbuffer [%d], " -- "size [%d]\n", i, -- GMNAL_SMALL_MSG_SIZE(nal_data)); -- PORTAL_FREE(rxd, sizeof(gmnal_srxd_t)); -- return(GMNAL_STATUS_FAIL); -- } -- CDEBUG(D_NET, "Calling gm_register_memory with port [%p] " -- "rxbuffer [%p], size [%d]\n", nal_data->gm_port, -- rxbuffer, GMNAL_SMALL_MSG_SIZE(nal_data)); -- GMNAL_GM_LOCK(nal_data); -- gm_status = gm_register_memory(nal_data->gm_port, rxbuffer, -- GMNAL_SMALL_MSG_SIZE(nal_data)); -- GMNAL_GM_UNLOCK(nal_data); -- if (gm_status != GM_SUCCESS) { -- CDEBUG(D_ERROR, "gm_register_memory failed buffer [%p]," -- " index [%d]\n", rxbuffer, i); -- switch(gm_status) { -- case(GM_FAILURE): -- CDEBUG(D_ERROR, "GM_FAILURE\n"); -- break; -- case(GM_PERMISSION_DENIED): -- CDEBUG(D_ERROR, "PERMISSION_DENIED\n"); -- break; -- case(GM_INVALID_PARAMETER): -- CDEBUG(D_ERROR, "INVALID_PARAMETER\n"); -- break; -- default: -- CDEBUG(D_ERROR, "Unknown error[%d]\n", -- gm_status); -- break; -- -- } -- return(GMNAL_STATUS_FAIL); -- } --#else -- GMNAL_GM_LOCK(nal_data); -- rxbuffer = gm_dma_malloc(nal_data->gm_port, -- GMNAL_SMALL_MSG_SIZE(nal_data)); -- GMNAL_GM_UNLOCK(nal_data); -- if (!rxbuffer) { -- CDEBUG(D_ERROR, "Failed to gm_dma_malloc rxbuffer [%d]," -- " size [%d]\n", i, -- GMNAL_SMALL_MSG_SIZE(nal_data)); -- PORTAL_FREE(rxd, sizeof(gmnal_srxd_t)); -- return(GMNAL_STATUS_FAIL); -- } --#endif -- -- rxd->buffer = rxbuffer; -- rxd->size = GMNAL_SMALL_MSG_SIZE(nal_data); -- rxd->gmsize = gm_min_size_for_length(rxd->size); -- -- if (gm_hash_insert(nal_data->srxd_hash, -- (void*)rxbuffer, (void*)rxd)) { -- -- CDEBUG(D_ERROR, "failed to create hash entry rxd[%p] " -- "for rxbuffer[%p]\n", rxd, rxbuffer); -- return(GMNAL_STATUS_FAIL); -- } -- -- rxd->next = nal_data->srxd; -- nal_data->srxd = rxd; -- CDEBUG(D_INFO, "Registered rxd [%p] with buffer [%p], " -- "size [%d]\n", rxd, rxd->buffer, rxd->size); -- } -- -- return(GMNAL_STATUS_OK); --} -- -- -- --/* Free the list of wired and gm_registered small rx buffers and the -- * rx descriptors that go along with them. -- */ --void --gmnal_free_srxd(gmnal_data_t *nal_data) --{ -- gmnal_srxd_t *rxd = nal_data->srxd, *_rxd = NULL; -- -- CDEBUG(D_TRACE, "gmnal_free_small rx\n"); -- -- while(rxd) { -- CDEBUG(D_INFO, "Freeing rxd [%p] buffer [%p], size [%d]\n", -- rxd, rxd->buffer, rxd->size); -- _rxd = rxd; -- rxd = rxd->next; -- --#if 0 -- GMNAL_GM_LOCK(nal_data); -- gm_deregister_memory(nal_data->gm_port, _rxd->buffer, -- _rxd->size); -- GMNAL_GM_UNLOCK(nal_data); -- PORTAL_FREE(_rxd->buffer, GMNAL_SMALL_RXBUFFER_SIZE); --#else -- GMNAL_GM_LOCK(nal_data); -- gm_dma_free(nal_data->gm_port, _rxd->buffer); -- GMNAL_GM_UNLOCK(nal_data); --#endif -- PORTAL_FREE(_rxd, sizeof(gmnal_srxd_t)); -- } -- return; --} -- -- --/* -- * Get a rxd from the free list -- * This get us a wired and gm_registered small rx buffer. -- * This implicitly gets us a receive token also. -- */ --gmnal_srxd_t * --gmnal_get_srxd(gmnal_data_t *nal_data, int block) --{ -- -- gmnal_srxd_t *rxd = NULL; -- CDEBUG(D_TRACE, "nal_data [%p] block [%d]\n", nal_data, block); -- -- if (block) { -- GMNAL_RXD_GETTOKEN(nal_data); -- } else { -- if (GMNAL_RXD_TRYGETTOKEN(nal_data)) { -- CDEBUG(D_INFO, "gmnal_get_srxd Can't get token\n"); -- return(NULL); -- } -- } -- GMNAL_RXD_LOCK(nal_data); -- rxd = nal_data->srxd; -- if (rxd) -- nal_data->srxd = rxd->next; -- GMNAL_RXD_UNLOCK(nal_data); -- CDEBUG(D_INFO, "got [%p], head is [%p]\n", rxd, nal_data->srxd); -- return(rxd); --} -- --/* -- * Return an rxd to the list -- */ --void --gmnal_return_srxd(gmnal_data_t *nal_data, gmnal_srxd_t *rxd) --{ -- CDEBUG(D_TRACE, "nal_data [%p], rxd[%p]\n", nal_data, rxd); -- -- GMNAL_RXD_LOCK(nal_data); -- rxd->next = nal_data->srxd; -- nal_data->srxd = rxd; -- GMNAL_RXD_UNLOCK(nal_data); -- GMNAL_RXD_RETURNTOKEN(nal_data); -- return; --} -- --/* -- * Given a pointer to a srxd find -- * the relevant descriptor for it -- * This is done by searching a hash -- * list that is created when the srxd's -- * are created -- */ --gmnal_srxd_t * --gmnal_rxbuffer_to_srxd(gmnal_data_t *nal_data, void *rxbuffer) --{ -- gmnal_srxd_t *srxd = NULL; -- CDEBUG(D_TRACE, "nal_data [%p], rxbuffer [%p]\n", nal_data, rxbuffer); -- srxd = gm_hash_find(nal_data->srxd_hash, rxbuffer); -- CDEBUG(D_INFO, "srxd is [%p]\n", srxd); -- return(srxd); --} -- -- --void --gmnal_stop_rxthread(gmnal_data_t *nal_data) --{ -- int delay = 30; -- -- -- -- CDEBUG(D_TRACE, "Attempting to stop rxthread nal_data [%p]\n", -- nal_data); -- -- nal_data->rxthread_stop_flag = GMNAL_THREAD_STOP; -- -- gmnal_remove_rxtwe(nal_data); -- /* -- * kick the thread -- */ -- up(&nal_data->rxtwe_wait); -- -- while(nal_data->rxthread_flag != GMNAL_THREAD_RESET && delay--) { -- CDEBUG(D_INFO, "gmnal_stop_rxthread sleeping\n"); -- gmnal_yield(1); -- up(&nal_data->rxtwe_wait); -- } -- -- if (nal_data->rxthread_flag != GMNAL_THREAD_RESET) { -- CDEBUG(D_ERROR, "I don't know how to wake the thread\n"); -- } else { -- CDEBUG(D_INFO, "rx thread seems to have stopped\n"); -- } --} -- --void --gmnal_stop_ctthread(gmnal_data_t *nal_data) --{ -- int delay = 15; -- -- -- -- CDEBUG(D_TRACE, "Attempting to stop ctthread nal_data [%p]\n", -- nal_data); -- -- nal_data->ctthread_flag = GMNAL_THREAD_STOP; -- GMNAL_GM_LOCK(nal_data); -- gm_set_alarm(nal_data->gm_port, &nal_data->ctthread_alarm, 10, -- NULL, NULL); -- GMNAL_GM_UNLOCK(nal_data); -- -- while(nal_data->ctthread_flag == GMNAL_THREAD_STOP && delay--) { -- CDEBUG(D_INFO, "gmnal_stop_ctthread sleeping\n"); -- gmnal_yield(1); -- } -- -- if (nal_data->ctthread_flag == GMNAL_THREAD_STOP) { -- CDEBUG(D_ERROR, "I DON'T KNOW HOW TO WAKE THE THREAD\n"); -- } else { -- CDEBUG(D_INFO, "CT THREAD SEEMS TO HAVE STOPPED\n"); -- } --} -- -- -- --char * --gmnal_gm_error(gm_status_t status) --{ -- return(gm_strerror(status)); -- -- switch(status) { -- case(GM_SUCCESS): -- return("SUCCESS"); -- case(GM_FAILURE): -- return("FAILURE"); -- case(GM_INPUT_BUFFER_TOO_SMALL): -- return("INPUT_BUFFER_TOO_SMALL"); -- case(GM_OUTPUT_BUFFER_TOO_SMALL): -- return("OUTPUT_BUFFER_TOO_SMALL"); -- case(GM_TRY_AGAIN ): -- return("TRY_AGAIN"); -- case(GM_BUSY): -- return("BUSY"); -- case(GM_MEMORY_FAULT): -- return("MEMORY_FAULT"); -- case(GM_INTERRUPTED): -- return("INTERRUPTED"); -- case(GM_INVALID_PARAMETER): -- return("INVALID_PARAMETER"); -- case(GM_OUT_OF_MEMORY): -- return("OUT_OF_MEMORY"); -- case(GM_INVALID_COMMAND): -- return("INVALID_COMMAND"); -- case(GM_PERMISSION_DENIED): -- return("PERMISSION_DENIED"); -- case(GM_INTERNAL_ERROR): -- return("INTERNAL_ERROR"); -- case(GM_UNATTACHED): -- return("UNATTACHED"); -- case(GM_UNSUPPORTED_DEVICE): -- return("UNSUPPORTED_DEVICE"); -- case(GM_SEND_TIMED_OUT): -- return("GM_SEND_TIMEDOUT"); -- case(GM_SEND_REJECTED): -- return("GM_SEND_REJECTED"); -- case(GM_SEND_TARGET_PORT_CLOSED): -- return("GM_SEND_TARGET_PORT_CLOSED"); -- case(GM_SEND_TARGET_NODE_UNREACHABLE): -- return("GM_SEND_TARGET_NODE_UNREACHABLE"); -- case(GM_SEND_DROPPED): -- return("GM_SEND_DROPPED"); -- case(GM_SEND_PORT_CLOSED): -- return("GM_SEND_PORT_CLOSED"); -- case(GM_NODE_ID_NOT_YET_SET): -- return("GM_NODE_ID_NOT_YET_SET"); -- case(GM_STILL_SHUTTING_DOWN): -- return("GM_STILL_SHUTTING_DOWN"); -- case(GM_CLONE_BUSY): -- return("GM_CLONE_BUSY"); -- case(GM_NO_SUCH_DEVICE): -- return("GM_NO_SUCH_DEVICE"); -- case(GM_ABORTED): -- return("GM_ABORTED"); -- case(GM_INCOMPATIBLE_LIB_AND_DRIVER): -- return("GM_INCOMPATIBLE_LIB_AND_DRIVER"); -- case(GM_UNTRANSLATED_SYSTEM_ERROR): -- return("GM_UNTRANSLATED_SYSTEM_ERROR"); -- case(GM_ACCESS_DENIED): -- return("GM_ACCESS_DENIED"); -- -- --/* -- * These ones are in the docs but aren't in the header file -- case(GM_DEV_NOT_FOUND): -- return("GM_DEV_NOT_FOUND"); -- case(GM_INVALID_PORT_NUMBER): -- return("GM_INVALID_PORT_NUMBER"); -- case(GM_UC_ERROR): -- return("GM_US_ERROR"); -- case(GM_PAGE_TABLE_FULL): -- return("GM_PAGE_TABLE_FULL"); -- case(GM_MINOR_OVERFLOW): -- return("GM_MINOR_OVERFLOW"); -- case(GM_SEND_ORPHANED): -- return("GM_SEND_ORPHANED"); -- case(GM_HARDWARE_FAULT): -- return("GM_HARDWARE_FAULT"); -- case(GM_DATA_CORRUPTED): -- return("GM_DATA_CORRUPTED"); -- case(GM_TIMED_OUT): -- return("GM_TIMED_OUT"); -- case(GM_USER_ERROR): -- return("GM_USER_ERROR"); -- case(GM_NO_MATCH): -- return("GM_NOMATCH"); -- case(GM_NOT_SUPPORTED_IN_KERNEL): -- return("GM_NOT_SUPPORTED_IN_KERNEL"); -- case(GM_NOT_SUPPORTED_ON_ARCH): -- return("GM_NOT_SUPPORTED_ON_ARCH"); -- case(GM_PTE_REF_CNT_OVERFLOW): -- return("GM_PTR_REF_CNT_OVERFLOW"); -- case(GM_NO_DRIVER_SUPPORT): -- return("GM_NO_DRIVER_SUPPORT"); -- case(GM_FIRMWARE_NOT_RUNNING): -- return("GM_FIRMWARE_NOT_RUNNING"); -- -- * These ones are in the docs but aren't in the header file -- */ -- default: -- return("UNKNOWN GM ERROR CODE"); -- } --} -- -- --char * --gmnal_rxevent(gm_recv_event_t *ev) --{ -- short event; -- event = GM_RECV_EVENT_TYPE(ev); -- switch(event) { -- case(GM_NO_RECV_EVENT): -- return("GM_NO_RECV_EVENT"); -- case(GM_SENDS_FAILED_EVENT): -- return("GM_SEND_FAILED_EVENT"); -- case(GM_ALARM_EVENT): -- return("GM_ALARM_EVENT"); -- case(GM_SENT_EVENT): -- return("GM_SENT_EVENT"); -- case(_GM_SLEEP_EVENT): -- return("_GM_SLEEP_EVENT"); -- case(GM_RAW_RECV_EVENT): -- return("GM_RAW_RECV_EVENT"); -- case(GM_BAD_SEND_DETECTED_EVENT): -- return("GM_BAD_SEND_DETECTED_EVENT"); -- case(GM_SEND_TOKEN_VIOLATION_EVENT): -- return("GM_SEND_TOKEN_VIOLATION_EVENT"); -- case(GM_RECV_TOKEN_VIOLATION_EVENT): -- return("GM_RECV_TOKEN_VIOLATION_EVENT"); -- case(GM_BAD_RECV_TOKEN_EVENT): -- return("GM_BAD_RECV_TOKEN_EVENT"); -- case(GM_ALARM_VIOLATION_EVENT): -- return("GM_ALARM_VIOLATION_EVENT"); -- case(GM_RECV_EVENT): -- return("GM_RECV_EVENT"); -- case(GM_HIGH_RECV_EVENT): -- return("GM_HIGH_RECV_EVENT"); -- case(GM_PEER_RECV_EVENT): -- return("GM_PEER_RECV_EVENT"); -- case(GM_HIGH_PEER_RECV_EVENT): -- return("GM_HIGH_PEER_RECV_EVENT"); -- case(GM_FAST_RECV_EVENT): -- return("GM_FAST_RECV_EVENT"); -- case(GM_FAST_HIGH_RECV_EVENT): -- return("GM_FAST_HIGH_RECV_EVENT"); -- case(GM_FAST_PEER_RECV_EVENT): -- return("GM_FAST_PEER_RECV_EVENT"); -- case(GM_FAST_HIGH_PEER_RECV_EVENT): -- return("GM_FAST_HIGH_PEER_RECV_EVENT"); -- case(GM_REJECTED_SEND_EVENT): -- return("GM_REJECTED_SEND_EVENT"); -- case(GM_ORPHANED_SEND_EVENT): -- return("GM_ORPHANED_SEND_EVENT"); -- case(GM_BAD_RESEND_DETECTED_EVENT): -- return("GM_BAD_RESEND_DETETED_EVENT"); -- case(GM_DROPPED_SEND_EVENT): -- return("GM_DROPPED_SEND_EVENT"); -- case(GM_BAD_SEND_VMA_EVENT): -- return("GM_BAD_SEND_VMA_EVENT"); -- case(GM_BAD_RECV_VMA_EVENT): -- return("GM_BAD_RECV_VMA_EVENT"); -- case(_GM_FLUSHED_ALARM_EVENT): -- return("GM_FLUSHED_ALARM_EVENT"); -- case(GM_SENT_TOKENS_EVENT): -- return("GM_SENT_TOKENS_EVENTS"); -- case(GM_IGNORE_RECV_EVENT): -- return("GM_IGNORE_RECV_EVENT"); -- case(GM_ETHERNET_RECV_EVENT): -- return("GM_ETHERNET_RECV_EVENT"); -- case(GM_NEW_NO_RECV_EVENT): -- return("GM_NEW_NO_RECV_EVENT"); -- case(GM_NEW_SENDS_FAILED_EVENT): -- return("GM_NEW_SENDS_FAILED_EVENT"); -- case(GM_NEW_ALARM_EVENT): -- return("GM_NEW_ALARM_EVENT"); -- case(GM_NEW_SENT_EVENT): -- return("GM_NEW_SENT_EVENT"); -- case(_GM_NEW_SLEEP_EVENT): -- return("GM_NEW_SLEEP_EVENT"); -- case(GM_NEW_RAW_RECV_EVENT): -- return("GM_NEW_RAW_RECV_EVENT"); -- case(GM_NEW_BAD_SEND_DETECTED_EVENT): -- return("GM_NEW_BAD_SEND_DETECTED_EVENT"); -- case(GM_NEW_SEND_TOKEN_VIOLATION_EVENT): -- return("GM_NEW_SEND_TOKEN_VIOLATION_EVENT"); -- case(GM_NEW_RECV_TOKEN_VIOLATION_EVENT): -- return("GM_NEW_RECV_TOKEN_VIOLATION_EVENT"); -- case(GM_NEW_BAD_RECV_TOKEN_EVENT): -- return("GM_NEW_BAD_RECV_TOKEN_EVENT"); -- case(GM_NEW_ALARM_VIOLATION_EVENT): -- return("GM_NEW_ALARM_VIOLATION_EVENT"); -- case(GM_NEW_RECV_EVENT): -- return("GM_NEW_RECV_EVENT"); -- case(GM_NEW_HIGH_RECV_EVENT): -- return("GM_NEW_HIGH_RECV_EVENT"); -- case(GM_NEW_PEER_RECV_EVENT): -- return("GM_NEW_PEER_RECV_EVENT"); -- case(GM_NEW_HIGH_PEER_RECV_EVENT): -- return("GM_NEW_HIGH_PEER_RECV_EVENT"); -- case(GM_NEW_FAST_RECV_EVENT): -- return("GM_NEW_FAST_RECV_EVENT"); -- case(GM_NEW_FAST_HIGH_RECV_EVENT): -- return("GM_NEW_FAST_HIGH_RECV_EVENT"); -- case(GM_NEW_FAST_PEER_RECV_EVENT): -- return("GM_NEW_FAST_PEER_RECV_EVENT"); -- case(GM_NEW_FAST_HIGH_PEER_RECV_EVENT): -- return("GM_NEW_FAST_HIGH_PEER_RECV_EVENT"); -- case(GM_NEW_REJECTED_SEND_EVENT): -- return("GM_NEW_REJECTED_SEND_EVENT"); -- case(GM_NEW_ORPHANED_SEND_EVENT): -- return("GM_NEW_ORPHANED_SEND_EVENT"); -- case(_GM_NEW_PUT_NOTIFICATION_EVENT): -- return("_GM_NEW_PUT_NOTIFICATION_EVENT"); -- case(GM_NEW_FREE_SEND_TOKEN_EVENT): -- return("GM_NEW_FREE_SEND_TOKEN_EVENT"); -- case(GM_NEW_FREE_HIGH_SEND_TOKEN_EVENT): -- return("GM_NEW_FREE_HIGH_SEND_TOKEN_EVENT"); -- case(GM_NEW_BAD_RESEND_DETECTED_EVENT): -- return("GM_NEW_BAD_RESEND_DETECTED_EVENT"); -- case(GM_NEW_DROPPED_SEND_EVENT): -- return("GM_NEW_DROPPED_SEND_EVENT"); -- case(GM_NEW_BAD_SEND_VMA_EVENT): -- return("GM_NEW_BAD_SEND_VMA_EVENT"); -- case(GM_NEW_BAD_RECV_VMA_EVENT): -- return("GM_NEW_BAD_RECV_VMA_EVENT"); -- case(_GM_NEW_FLUSHED_ALARM_EVENT): -- return("GM_NEW_FLUSHED_ALARM_EVENT"); -- case(GM_NEW_SENT_TOKENS_EVENT): -- return("GM_NEW_SENT_TOKENS_EVENT"); -- case(GM_NEW_IGNORE_RECV_EVENT): -- return("GM_NEW_IGNORE_RECV_EVENT"); -- case(GM_NEW_ETHERNET_RECV_EVENT): -- return("GM_NEW_ETHERNET_RECV_EVENT"); -- default: -- return("Unknown Recv event"); --#if 0 -- case(/* _GM_PUT_NOTIFICATION_EVENT */ -- case(/* GM_FREE_SEND_TOKEN_EVENT */ -- case(/* GM_FREE_HIGH_SEND_TOKEN_EVENT */ --#endif -- } --} -- -- --void --gmnal_yield(int delay) --{ -- set_current_state(TASK_INTERRUPTIBLE); -- schedule_timeout(delay); --} -- --int --gmnal_is_small_msg(gmnal_data_t *nal_data, int niov, struct iovec *iov, -- int len) --{ -- -- CDEBUG(D_TRACE, "len [%d] limit[%d]\n", len, -- GMNAL_SMALL_MSG_SIZE(nal_data)); -- -- if ((len + sizeof(ptl_hdr_t) + sizeof(gmnal_msghdr_t)) -- < GMNAL_SMALL_MSG_SIZE(nal_data)) { -- -- CDEBUG(D_INFO, "Yep, small message\n"); -- return(1); -- } else { -- CDEBUG(D_ERROR, "No, not small message\n"); -- /* -- * could be made up of lots of little ones ! -- */ -- return(0); -- } -- --} -- --/* -- * extract info from the receive event. -- * Have to do this before the next call to gm_receive -- * Deal with all endian stuff here. -- * Then stick work entry on list where rxthreads -- * can get it to complete the receive -- */ --int --gmnal_add_rxtwe(gmnal_data_t *nal_data, gm_recv_t *recv) --{ -- gmnal_rxtwe_t *we = NULL; -- -- CDEBUG(D_NET, "adding entry to list\n"); -- -- PORTAL_ALLOC(we, sizeof(gmnal_rxtwe_t)); -- if (!we) { -- CDEBUG(D_ERROR, "failed to malloc\n"); -- return(GMNAL_STATUS_FAIL); -- } -- we->buffer = gm_ntohp(recv->buffer); -- we->snode = (int)gm_ntoh_u16(recv->sender_node_id); -- we->sport = (int)gm_ntoh_u8(recv->sender_port_id); -- we->type = (int)gm_ntoh_u8(recv->type); -- we->length = (int)gm_ntohl(recv->length); -- -- spin_lock(&nal_data->rxtwe_lock); -- if (nal_data->rxtwe_tail) { -- nal_data->rxtwe_tail->next = we; -- } else { -- nal_data->rxtwe_head = we; -- nal_data->rxtwe_tail = we; -- } -- nal_data->rxtwe_tail = we; -- spin_unlock(&nal_data->rxtwe_lock); -- -- up(&nal_data->rxtwe_wait); -- return(GMNAL_STATUS_OK); --} -- --void --gmnal_remove_rxtwe(gmnal_data_t *nal_data) --{ -- gmnal_rxtwe_t *_we, *we = nal_data->rxtwe_head; -- -- CDEBUG(D_NET, "removing all work list entries\n"); -- -- spin_lock(&nal_data->rxtwe_lock); -- CDEBUG(D_NET, "Got lock\n"); -- while (we) { -- _we = we; -- we = we->next; -- PORTAL_FREE(_we, sizeof(gmnal_rxtwe_t)); -- } -- spin_unlock(&nal_data->rxtwe_lock); -- nal_data->rxtwe_head = NULL; -- nal_data->rxtwe_tail = NULL; --} -- --gmnal_rxtwe_t * --gmnal_get_rxtwe(gmnal_data_t *nal_data) --{ -- gmnal_rxtwe_t *we = NULL; -- -- CDEBUG(D_NET, "Getting entry to list\n"); -- -- do { -- down(&nal_data->rxtwe_wait); -- if (nal_data->rxthread_stop_flag == GMNAL_THREAD_STOP) { -- /* -- * time to stop -- * TO DO some one free the work entries -- */ -- return(NULL); -- } -- spin_lock(&nal_data->rxtwe_lock); -- if (nal_data->rxtwe_head) { -- CDEBUG(D_INFO, "Got a work entry\n"); -- we = nal_data->rxtwe_head; -- nal_data->rxtwe_head = we->next; -- if (!nal_data->rxtwe_head) -- nal_data->rxtwe_tail = NULL; -- } else { -- CDEBUG(D_WARNING, "woken but no work\n"); -- } -- spin_unlock(&nal_data->rxtwe_lock); -- } while (!we); -- -- CDEBUG(D_INFO, "Returning we[%p]\n", we); -- return(we); --} -- -- --/* -- * Start the caretaker thread and a number of receiver threads -- * The caretaker thread gets events from the gm library. -- * It passes receive events to the receiver threads via a work list. -- * It processes other events itself in gm_unknown. These will be -- * callback events or sleeps. -- */ --int --gmnal_start_kernel_threads(gmnal_data_t *nal_data) --{ -- -- int threads = 0; -- /* -- * the alarm is used to wake the caretaker thread from -- * gm_unknown call (sleeping) to exit it. -- */ -- CDEBUG(D_NET, "Initializing caretaker thread alarm and flag\n"); -- gm_initialize_alarm(&nal_data->ctthread_alarm); -- nal_data->ctthread_flag = GMNAL_THREAD_RESET; -- -- -- CDEBUG(D_INFO, "Starting caretaker thread\n"); -- nal_data->ctthread_pid = -- kernel_thread(gmnal_ct_thread, (void*)nal_data, 0); -- if (nal_data->ctthread_pid <= 0) { -- CDEBUG(D_ERROR, "Caretaker thread failed to start\n"); -- return(GMNAL_STATUS_FAIL); -- } -- -- while (nal_data->rxthread_flag != GMNAL_THREAD_RESET) { -- gmnal_yield(1); -- CDEBUG(D_INFO, "Waiting for caretaker thread signs of life\n"); -- } -- -- CDEBUG(D_INFO, "caretaker thread has started\n"); -- -- -- /* -- * Now start a number of receiver threads -- * these treads get work to do from the caretaker (ct) thread -- */ -- nal_data->rxthread_flag = GMNAL_THREAD_RESET; -- nal_data->rxthread_stop_flag = GMNAL_THREAD_RESET; -- -- for (threads=0; threadsrxthread_pid[threads] = -1; -- spin_lock_init(&nal_data->rxtwe_lock); -- spin_lock_init(&nal_data->rxthread_flag_lock); -- sema_init(&nal_data->rxtwe_wait, 0); -- nal_data->rxtwe_head = NULL; -- nal_data->rxtwe_tail = NULL; -- /* -- * If the default number of receive threades isn't -- * modified at load time, then start one thread per cpu -- */ -- if (num_rx_threads == -1) -- num_rx_threads = smp_num_cpus; -- CDEBUG(D_INFO, "Starting [%d] receive threads\n", num_rx_threads); -- for (threads=0; threadsrxthread_pid[threads] = -- kernel_thread(gmnal_rx_thread, (void*)nal_data, 0); -- if (nal_data->rxthread_pid[threads] <= 0) { -- CDEBUG(D_ERROR, "Receive thread failed to start\n"); -- gmnal_stop_rxthread(nal_data); -- gmnal_stop_ctthread(nal_data); -- return(GMNAL_STATUS_FAIL); -- } -- } -- -- for (;;) { -- spin_lock(&nal_data->rxthread_flag_lock); -- if (nal_data->rxthread_flag == GMNAL_RXTHREADS_STARTED) { -- spin_unlock(&nal_data->rxthread_flag_lock); -- break; -- } -- spin_unlock(&nal_data->rxthread_flag_lock); -- gmnal_yield(1); -- } -- -- CDEBUG(D_INFO, "receive threads seem to have started\n"); -- -- return(GMNAL_STATUS_OK); --} diff --cc lnet/klnds/qswlnd/Makefile.in index 60d09c8,d27240c..0000000 deleted file mode 100644,100644 --- a/lnet/klnds/qswlnd/Makefile.in +++ /dev/null @@@ -1,6 -1,6 +1,0 @@@ --MODULES := kqswnal --kqswnal-objs := qswnal.o qswnal_cb.o -- - EXTRA_PRE_CFLAGS := @QSWCPPFLAGS@ -I/usr/include -EXTRA_POST_CFLAGS := @QSWCPPFLAGS@ -I/usr/include -- --@INCLUDE_RULES@ diff --cc lnet/klnds/qswlnd/qswlnd.c index e7691a0,16123c2..0000000 deleted file mode 100644,100644 --- a/lnet/klnds/qswlnd/qswlnd.c +++ /dev/null @@@ -1,814 -1,829 +1,0 @@@ --/* -- * Copyright (C) 2002 Cluster File Systems, Inc. -- * Author: Eric Barton -- * -- * Copyright (C) 2002, Lawrence Livermore National Labs (LLNL) -- * W. Marcus Miller - Based on ksocknal -- * -- * This file is part of Portals, http://www.sf.net/projects/lustre/ -- * -- * Portals is free software; you can redistribute it and/or -- * modify it under the terms of version 2 of the GNU General Public -- * License as published by the Free Software Foundation. -- * -- * Portals is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- * GNU General Public License for more details. -- * -- * You should have received a copy of the GNU General Public License -- * along with Portals; if not, write to the Free Software -- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -- * -- */ -- --#include "qswnal.h" -- - ptl_handle_ni_t kqswnal_ni; --nal_t kqswnal_api; --kqswnal_data_t kqswnal_data; -ptl_handle_ni_t kqswnal_ni; -kqswnal_tunables_t kqswnal_tunables; -- --kpr_nal_interface_t kqswnal_router_interface = { -- kprni_nalid: QSWNAL, -- kprni_arg: NULL, -- kprni_fwd: kqswnal_fwd_packet, -- kprni_notify: NULL, /* we're connectionless */ --}; -- --#if CONFIG_SYSCTL --#define QSWNAL_SYSCTL 201 -- --#define QSWNAL_SYSCTL_OPTIMIZED_GETS 1 --#define QSWNAL_SYSCTL_COPY_SMALL_FWD 2 -- --static ctl_table kqswnal_ctl_table[] = { - {QSWNAL_SYSCTL_OPTIMIZED_GETS, "optimized_gets", - &kqswnal_data.kqn_optimized_gets, sizeof (int), - {QSWNAL_SYSCTL_OPTIMIZED_GETS, "optimized_puts", - &kqswnal_tunables.kqn_optimized_puts, sizeof (int), -- 0644, NULL, &proc_dointvec}, - {QSWNAL_SYSCTL_COPY_SMALL_FWD, "copy_small_fwd", - &kqswnal_data.kqn_copy_small_fwd, sizeof (int), - {QSWNAL_SYSCTL_OPTIMIZED_GETS, "optimized_gets", - &kqswnal_tunables.kqn_optimized_gets, sizeof (int), -- 0644, NULL, &proc_dointvec}, -- {0} --}; -- --static ctl_table kqswnal_top_ctl_table[] = { -- {QSWNAL_SYSCTL, "qswnal", NULL, 0, 0555, kqswnal_ctl_table}, -- {0} --}; --#endif - - static int - kqswnal_forward(nal_t *nal, - int id, - void *args, size_t args_len, - void *ret, size_t ret_len) - { - kqswnal_data_t *k = nal->nal_data; - nal_cb_t *nal_cb = k->kqn_cb; - - LASSERT (nal == &kqswnal_api); - LASSERT (k == &kqswnal_data); - LASSERT (nal_cb == &kqswnal_lib); - - lib_dispatch(nal_cb, k, id, args, ret); /* nal needs k */ - return (PTL_OK); - } - - static void - kqswnal_lock (nal_t *nal, unsigned long *flags) - { - kqswnal_data_t *k = nal->nal_data; - nal_cb_t *nal_cb = k->kqn_cb; - - LASSERT (nal == &kqswnal_api); - LASSERT (k == &kqswnal_data); - LASSERT (nal_cb == &kqswnal_lib); - - nal_cb->cb_cli(nal_cb,flags); - } - - static void - kqswnal_unlock(nal_t *nal, unsigned long *flags) - { - kqswnal_data_t *k = nal->nal_data; - nal_cb_t *nal_cb = k->kqn_cb; - - LASSERT (nal == &kqswnal_api); - LASSERT (k == &kqswnal_data); - LASSERT (nal_cb == &kqswnal_lib); - - nal_cb->cb_sti(nal_cb,flags); - } - - static int - kqswnal_shutdown(nal_t *nal, int ni) - { - CDEBUG (D_NET, "shutdown\n"); - - LASSERT (nal == &kqswnal_api); - return (0); - } - - static void - kqswnal_yield( nal_t *nal ) - { - CDEBUG (D_NET, "yield\n"); - - if (need_resched()) - schedule(); - return; - } - - static nal_t * - kqswnal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size, - ptl_pid_t requested_pid) - { - ptl_nid_t mynid = kqswnal_elanid2nid (kqswnal_data.kqn_elanid); - int nnids = kqswnal_data.kqn_nnodes; - - CDEBUG(D_NET, "calling lib_init with nid "LPX64" of %d\n", mynid, nnids); - - lib_init(&kqswnal_lib, mynid, 0, nnids, ptl_size, ac_size); - - return (&kqswnal_api); - } -- --int --kqswnal_get_tx_desc (struct portals_cfg *pcfg) --{ -- unsigned long flags; -- struct list_head *tmp; -- kqswnal_tx_t *ktx; - ptl_hdr_t *hdr; -- int index = pcfg->pcfg_count; -- int rc = -ENOENT; -- -- spin_lock_irqsave (&kqswnal_data.kqn_idletxd_lock, flags); -- -- list_for_each (tmp, &kqswnal_data.kqn_activetxds) { -- if (index-- != 0) -- continue; -- -- ktx = list_entry (tmp, kqswnal_tx_t, ktx_list); - hdr = (ptl_hdr_t *)ktx->ktx_buffer; -- -- pcfg->pcfg_pbuf1 = (char *)ktx; - pcfg->pcfg_count = NTOH__u32(ktx->ktx_wire_hdr->type); - pcfg->pcfg_size = NTOH__u32(ktx->ktx_wire_hdr->payload_length); - pcfg->pcfg_nid = NTOH__u64(ktx->ktx_wire_hdr->dest_nid); - pcfg->pcfg_count = le32_to_cpu(hdr->type); - pcfg->pcfg_size = le32_to_cpu(hdr->payload_length); - pcfg->pcfg_nid = le64_to_cpu(hdr->dest_nid); -- pcfg->pcfg_nid2 = ktx->ktx_nid; -- pcfg->pcfg_misc = ktx->ktx_launcher; -- pcfg->pcfg_flags = (list_empty (&ktx->ktx_delayed_list) ? 0 : 1) | -- (!ktx->ktx_isnblk ? 0 : 2) | -- (ktx->ktx_state << 2); -- rc = 0; -- break; -- } -- -- spin_unlock_irqrestore (&kqswnal_data.kqn_idletxd_lock, flags); -- return (rc); --} -- --int --kqswnal_cmd (struct portals_cfg *pcfg, void *private) --{ -- LASSERT (pcfg != NULL); -- -- switch (pcfg->pcfg_command) { -- case NAL_CMD_GET_TXDESC: -- return (kqswnal_get_tx_desc (pcfg)); -- -- case NAL_CMD_REGISTER_MYNID: -- CDEBUG (D_IOCTL, "setting NID offset to "LPX64" (was "LPX64")\n", -- pcfg->pcfg_nid - kqswnal_data.kqn_elanid, -- kqswnal_data.kqn_nid_offset); -- kqswnal_data.kqn_nid_offset = -- pcfg->pcfg_nid - kqswnal_data.kqn_elanid; - kqswnal_lib.ni.nid = pcfg->pcfg_nid; - kqswnal_lib.libnal_ni.ni_pid.nid = pcfg->pcfg_nid; -- return (0); -- -- default: -- return (-EINVAL); -- } --} -- - void __exit - kqswnal_finalise (void) -static void -kqswnal_shutdown(nal_t *nal) --{ - kqswnal_tx_t *ktx; - kqswnal_rx_t *krx; - unsigned long flags; - int do_lib_fini = 0; - - /* NB The first ref was this module! */ - if (nal->nal_refct != 0) { - PORTAL_MODULE_UNUSE; - return; - } - - CDEBUG (D_NET, "shutdown\n"); - LASSERT (nal == &kqswnal_api); -- -- switch (kqswnal_data.kqn_init) -- { -- default: -- LASSERT (0); -- -- case KQN_INIT_ALL: - #if CONFIG_SYSCTL - if (kqswnal_data.kqn_sysctl != NULL) - unregister_sysctl_table (kqswnal_data.kqn_sysctl); - #endif - PORTAL_SYMBOL_UNREGISTER (kqswnal_ni); - kportal_nal_unregister(QSWNAL); - libcfs_nal_cmd_unregister(QSWNAL); -- /* fall through */ -- - case KQN_INIT_PTL: - PtlNIFini (kqswnal_ni); - lib_fini (&kqswnal_lib); - case KQN_INIT_LIB: - do_lib_fini = 1; -- /* fall through */ -- -- case KQN_INIT_DATA: -- break; -- -- case KQN_INIT_NOTHING: -- return; -- } -- -- /**********************************************************************/ - /* Make router stop her calling me and fail any more call-ins */ - /* Tell router we're shutting down. Any router calls my threads - * make will now fail immediately and the router will stop calling - * into me. */ -- kpr_shutdown (&kqswnal_data.kqn_router); - - -- /**********************************************************************/ - /* flag threads we've started to terminate and wait for all to ack */ - - /* Signal the start of shutdown... */ - spin_lock_irqsave(&kqswnal_data.kqn_idletxd_lock, flags); -- kqswnal_data.kqn_shuttingdown = 1; - wake_up_all (&kqswnal_data.kqn_sched_waitq); - spin_unlock_irqrestore(&kqswnal_data.kqn_idletxd_lock, flags); -- - while (atomic_read (&kqswnal_data.kqn_nthreads_running) != 0) { - CDEBUG(D_NET, "waiting for %d threads to start shutting down\n", - atomic_read (&kqswnal_data.kqn_nthreads_running)); - wake_up_all(&kqswnal_data.kqn_idletxd_waitq); - - /**********************************************************************/ - /* wait for sends that have allocated a tx desc to launch or give up */ - while (atomic_read (&kqswnal_data.kqn_pending_txs) != 0) { - CDEBUG(D_NET, "waiting for %d pending sends\n", - atomic_read (&kqswnal_data.kqn_pending_txs)); -- set_current_state (TASK_UNINTERRUPTIBLE); -- schedule_timeout (HZ); -- } -- -- /**********************************************************************/ -- /* close elan comms */ --#if MULTIRAIL_EKC - /* Shut down receivers first; rx callbacks might try sending... */ -- if (kqswnal_data.kqn_eprx_small != NULL) -- ep_free_rcvr (kqswnal_data.kqn_eprx_small); -- -- if (kqswnal_data.kqn_eprx_large != NULL) -- ep_free_rcvr (kqswnal_data.kqn_eprx_large); - - /* NB ep_free_rcvr() returns only after we've freed off all receive - * buffers (see shutdown handling in kqswnal_requeue_rx()). This - * means we must have completed any messages we passed to - * lib_parse() or kpr_fwd_start(). */ -- -- if (kqswnal_data.kqn_eptx != NULL) -- ep_free_xmtr (kqswnal_data.kqn_eptx); -- - /* freeing the xmtr completes all txs pdq */ - /* NB ep_free_xmtr() returns only after all outstanding transmits - * have called their callback... */ -- LASSERT(list_empty(&kqswnal_data.kqn_activetxds)); --#else - /* "Old" EKC just pretends to shutdown cleanly but actually - * provides no guarantees */ -- if (kqswnal_data.kqn_eprx_small != NULL) -- ep_remove_large_rcvr (kqswnal_data.kqn_eprx_small); -- -- if (kqswnal_data.kqn_eprx_large != NULL) -- ep_remove_large_rcvr (kqswnal_data.kqn_eprx_large); -- -- /* wait for transmits to complete */ -- while (!list_empty(&kqswnal_data.kqn_activetxds)) { -- CWARN("waiting for active transmits to complete\n"); -- set_current_state(TASK_UNINTERRUPTIBLE); -- schedule_timeout(HZ); -- } -- -- if (kqswnal_data.kqn_eptx != NULL) -- ep_free_large_xmtr (kqswnal_data.kqn_eptx); --#endif -- /**********************************************************************/ -- /* flag threads to terminate, wake them and wait for them to die */ - -- kqswnal_data.kqn_shuttingdown = 2; -- wake_up_all (&kqswnal_data.kqn_sched_waitq); -- -- while (atomic_read (&kqswnal_data.kqn_nthreads) != 0) { -- CDEBUG(D_NET, "waiting for %d threads to terminate\n", -- atomic_read (&kqswnal_data.kqn_nthreads)); -- set_current_state (TASK_UNINTERRUPTIBLE); -- schedule_timeout (HZ); -- } -- -- /**********************************************************************/ -- /* No more threads. No more portals, router or comms callbacks! -- * I control the horizontals and the verticals... -- */ -- --#if MULTIRAIL_EKC -- LASSERT (list_empty (&kqswnal_data.kqn_readyrxds)); - LASSERT (list_empty (&kqswnal_data.kqn_delayedtxds)); - LASSERT (list_empty (&kqswnal_data.kqn_delayedfwds)); --#endif -- -- /**********************************************************************/ - /* Complete any blocked forwarding packets with error - /* Complete any blocked forwarding packets, with error -- */ -- -- while (!list_empty (&kqswnal_data.kqn_idletxd_fwdq)) -- { -- kpr_fwd_desc_t *fwd = list_entry (kqswnal_data.kqn_idletxd_fwdq.next, - kpr_fwd_desc_t, kprfd_list); - list_del (&fwd->kprfd_list); - kpr_fwd_done (&kqswnal_data.kqn_router, fwd, -EHOSTUNREACH); - } - - while (!list_empty (&kqswnal_data.kqn_delayedfwds)) - { - kpr_fwd_desc_t *fwd = list_entry (kqswnal_data.kqn_delayedfwds.next, -- kpr_fwd_desc_t, kprfd_list); -- list_del (&fwd->kprfd_list); - kpr_fwd_done (&kqswnal_data.kqn_router, fwd, -EHOSTUNREACH); - kpr_fwd_done (&kqswnal_data.kqn_router, fwd, -ESHUTDOWN); -- } -- -- /**********************************************************************/ - /* Wait for router to complete any packets I sent her - */ - /* finalise router and portals lib */ -- -- kpr_deregister (&kqswnal_data.kqn_router); -- - if (do_lib_fini) - lib_fini (&kqswnal_lib); -- -- /**********************************************************************/ -- /* Unmap message buffers and free all descriptors and buffers -- */ -- --#if MULTIRAIL_EKC -- /* FTTB, we need to unmap any remaining mapped memory. When -- * ep_dvma_release() get fixed (and releases any mappings in the -- * region), we can delete all the code from here --------> */ -- - for (ktx = kqswnal_data.kqn_txds; ktx != NULL; ktx =ktx->ktx_alloclist){ - /* If ktx has a buffer, it got mapped; unmap now. NB only - * the pre-mapped stuff is still mapped since all tx descs - * must be idle */ - if (kqswnal_data.kqn_txds != NULL) { - int i; -- - if (ktx->ktx_buffer != NULL) - ep_dvma_unload(kqswnal_data.kqn_ep, - kqswnal_data.kqn_ep_tx_nmh, - &ktx->ktx_ebuffer); - for (i = 0; i < KQSW_NTXMSGS + KQSW_NNBLK_TXMSGS; i++) { - kqswnal_tx_t *ktx = &kqswnal_data.kqn_txds[i]; - - /* If ktx has a buffer, it got mapped; unmap now. - * NB only the pre-mapped stuff is still mapped - * since all tx descs must be idle */ - - if (ktx->ktx_buffer != NULL) - ep_dvma_unload(kqswnal_data.kqn_ep, - kqswnal_data.kqn_ep_tx_nmh, - &ktx->ktx_ebuffer); - } -- } -- - for (krx = kqswnal_data.kqn_rxds; krx != NULL; krx =krx->krx_alloclist){ - /* If krx_kiov[0].kiov_page got allocated, it got mapped. - * NB subsequent pages get merged */ - if (kqswnal_data.kqn_rxds != NULL) { - int i; -- - if (krx->krx_kiov[0].kiov_page != NULL) - ep_dvma_unload(kqswnal_data.kqn_ep, - kqswnal_data.kqn_ep_rx_nmh, - &krx->krx_elanbuffer); - for (i = 0; i < KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE; i++) { - kqswnal_rx_t *krx = &kqswnal_data.kqn_rxds[i]; - - /* If krx_kiov[0].kiov_page got allocated, it got mapped. - * NB subsequent pages get merged */ - - if (krx->krx_kiov[0].kiov_page != NULL) - ep_dvma_unload(kqswnal_data.kqn_ep, - kqswnal_data.kqn_ep_rx_nmh, - &krx->krx_elanbuffer); - } -- } -- /* <----------- to here */ -- -- if (kqswnal_data.kqn_ep_rx_nmh != NULL) - ep_dvma_release(kqswnal_data.kqn_ep,kqswnal_data.kqn_ep_rx_nmh); - ep_dvma_release(kqswnal_data.kqn_ep, kqswnal_data.kqn_ep_rx_nmh); -- -- if (kqswnal_data.kqn_ep_tx_nmh != NULL) - ep_dvma_release(kqswnal_data.kqn_ep,kqswnal_data.kqn_ep_tx_nmh); - ep_dvma_release(kqswnal_data.kqn_ep, kqswnal_data.kqn_ep_tx_nmh); --#else -- if (kqswnal_data.kqn_eprxdmahandle != NULL) -- { -- elan3_dvma_unload(kqswnal_data.kqn_ep->DmaState, -- kqswnal_data.kqn_eprxdmahandle, 0, -- KQSW_NRXMSGPAGES_SMALL * KQSW_NRXMSGS_SMALL + -- KQSW_NRXMSGPAGES_LARGE * KQSW_NRXMSGS_LARGE); -- -- elan3_dma_release(kqswnal_data.kqn_ep->DmaState, -- kqswnal_data.kqn_eprxdmahandle); -- } -- -- if (kqswnal_data.kqn_eptxdmahandle != NULL) -- { -- elan3_dvma_unload(kqswnal_data.kqn_ep->DmaState, -- kqswnal_data.kqn_eptxdmahandle, 0, -- KQSW_NTXMSGPAGES * (KQSW_NTXMSGS + -- KQSW_NNBLK_TXMSGS)); -- -- elan3_dma_release(kqswnal_data.kqn_ep->DmaState, -- kqswnal_data.kqn_eptxdmahandle); -- } --#endif -- - while (kqswnal_data.kqn_txds != NULL) { - ktx = kqswnal_data.kqn_txds; - if (kqswnal_data.kqn_txds != NULL) - { - int i; -- - if (ktx->ktx_buffer != NULL) - PORTAL_FREE(ktx->ktx_buffer, KQSW_TX_BUFFER_SIZE); - for (i = 0; i < KQSW_NTXMSGS + KQSW_NNBLK_TXMSGS; i++) - { - kqswnal_tx_t *ktx = &kqswnal_data.kqn_txds[i]; -- - kqswnal_data.kqn_txds = ktx->ktx_alloclist; - PORTAL_FREE(ktx, sizeof(*ktx)); - if (ktx->ktx_buffer != NULL) - PORTAL_FREE(ktx->ktx_buffer, - KQSW_TX_BUFFER_SIZE); - } - - PORTAL_FREE(kqswnal_data.kqn_txds, - sizeof (kqswnal_tx_t) * (KQSW_NTXMSGS + - KQSW_NNBLK_TXMSGS)); -- } -- - while (kqswnal_data.kqn_rxds != NULL) { - int i; - if (kqswnal_data.kqn_rxds != NULL) - { - int i; - int j; -- - krx = kqswnal_data.kqn_rxds; - for (i = 0; i < krx->krx_npages; i++) - if (krx->krx_kiov[i].kiov_page != NULL) - __free_page (krx->krx_kiov[i].kiov_page); - for (i = 0; i < KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE; i++) - { - kqswnal_rx_t *krx = &kqswnal_data.kqn_rxds[i]; -- - kqswnal_data.kqn_rxds = krx->krx_alloclist; - PORTAL_FREE(krx, sizeof (*krx)); - for (j = 0; j < krx->krx_npages; j++) - if (krx->krx_kiov[j].kiov_page != NULL) - __free_page (krx->krx_kiov[j].kiov_page); - } - - PORTAL_FREE(kqswnal_data.kqn_rxds, - sizeof(kqswnal_rx_t) * (KQSW_NRXMSGS_SMALL + - KQSW_NRXMSGS_LARGE)); -- } -- -- /* resets flags, pointers to NULL etc */ -- memset(&kqswnal_data, 0, sizeof (kqswnal_data)); -- -- CDEBUG (D_MALLOC, "done kmem %d\n", atomic_read(&portal_kmemory)); -- -- printk (KERN_INFO "Lustre: Routing QSW NAL unloaded (final mem %d)\n", -- atomic_read(&portal_kmemory)); --} -- - static int __init - kqswnal_initialise (void) -static int -kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid, - ptl_ni_limits_t *requested_limits, - ptl_ni_limits_t *actual_limits) --{ --#if MULTIRAIL_EKC -- EP_RAILMASK all_rails = EP_RAILMASK_ALL; --#else -- ELAN3_DMA_REQUEST dmareq; --#endif -- int rc; -- int i; - kqswnal_rx_t *krx; - kqswnal_tx_t *ktx; -- int elan_page_idx; - ptl_process_id_t my_process_id; -- int pkmem = atomic_read(&portal_kmemory); -- - LASSERT (kqswnal_data.kqn_init == KQN_INIT_NOTHING); - LASSERT (nal == &kqswnal_api); -- - CDEBUG (D_MALLOC, "start kmem %d\n", atomic_read(&portal_kmemory)); - if (nal->nal_refct != 0) { - if (actual_limits != NULL) - *actual_limits = kqswnal_lib.libnal_ni.ni_actual_limits; - /* This module got the first ref */ - PORTAL_MODULE_USE; - return (PTL_OK); - } -- - kqswnal_api.forward = kqswnal_forward; - kqswnal_api.shutdown = kqswnal_shutdown; - kqswnal_api.yield = kqswnal_yield; - kqswnal_api.validate = NULL; /* our api validate is a NOOP */ - kqswnal_api.lock = kqswnal_lock; - kqswnal_api.unlock = kqswnal_unlock; - kqswnal_api.nal_data = &kqswnal_data; - LASSERT (kqswnal_data.kqn_init == KQN_INIT_NOTHING); -- - kqswnal_lib.nal_data = &kqswnal_data; - CDEBUG (D_MALLOC, "start kmem %d\n", atomic_read(&portal_kmemory)); -- - memset(&kqswnal_rpc_success, 0, sizeof(kqswnal_rpc_success)); - memset(&kqswnal_rpc_failed, 0, sizeof(kqswnal_rpc_failed)); - #if MULTIRAIL_EKC - kqswnal_rpc_failed.Data[0] = -ECONNREFUSED; - #else - kqswnal_rpc_failed.Status = -ECONNREFUSED; - #endif -- /* ensure all pointers NULL etc */ -- memset (&kqswnal_data, 0, sizeof (kqswnal_data)); - - kqswnal_data.kqn_optimized_gets = KQSW_OPTIMIZED_GETS; - kqswnal_data.kqn_copy_small_fwd = KQSW_COPY_SMALL_FWD; - - kqswnal_data.kqn_cb = &kqswnal_lib; -- -- INIT_LIST_HEAD (&kqswnal_data.kqn_idletxds); -- INIT_LIST_HEAD (&kqswnal_data.kqn_nblk_idletxds); -- INIT_LIST_HEAD (&kqswnal_data.kqn_activetxds); -- spin_lock_init (&kqswnal_data.kqn_idletxd_lock); -- init_waitqueue_head (&kqswnal_data.kqn_idletxd_waitq); -- INIT_LIST_HEAD (&kqswnal_data.kqn_idletxd_fwdq); -- -- INIT_LIST_HEAD (&kqswnal_data.kqn_delayedfwds); -- INIT_LIST_HEAD (&kqswnal_data.kqn_delayedtxds); -- INIT_LIST_HEAD (&kqswnal_data.kqn_readyrxds); -- -- spin_lock_init (&kqswnal_data.kqn_sched_lock); -- init_waitqueue_head (&kqswnal_data.kqn_sched_waitq); -- - spin_lock_init (&kqswnal_data.kqn_statelock); - /* Leave kqn_rpc_success zeroed */ -#if MULTIRAIL_EKC - kqswnal_data.kqn_rpc_failed.Data[0] = -ECONNREFUSED; -#else - kqswnal_data.kqn_rpc_failed.Status = -ECONNREFUSED; -#endif -- -- /* pointers/lists/locks initialised */ -- kqswnal_data.kqn_init = KQN_INIT_DATA; - - --#if MULTIRAIL_EKC -- kqswnal_data.kqn_ep = ep_system(); -- if (kqswnal_data.kqn_ep == NULL) { -- CERROR("Can't initialise EKC\n"); - return (-ENODEV); - kqswnal_shutdown(nal); - return (PTL_IFACE_INVALID); -- } -- -- if (ep_waitfor_nodeid(kqswnal_data.kqn_ep) == ELAN_INVALID_NODE) { -- CERROR("Can't get elan ID\n"); - kqswnal_finalise(); - return (-ENODEV); - kqswnal_shutdown(nal); - return (PTL_IFACE_INVALID); -- } --#else -- /**********************************************************************/ -- /* Find the first Elan device */ -- -- kqswnal_data.kqn_ep = ep_device (0); -- if (kqswnal_data.kqn_ep == NULL) -- { -- CERROR ("Can't get elan device 0\n"); - return (-ENODEV); - kqswnal_shutdown(nal); - return (PTL_IFACE_INVALID); -- } --#endif -- -- kqswnal_data.kqn_nid_offset = 0; -- kqswnal_data.kqn_nnodes = ep_numnodes (kqswnal_data.kqn_ep); -- kqswnal_data.kqn_elanid = ep_nodeid (kqswnal_data.kqn_ep); -- -- /**********************************************************************/ -- /* Get the transmitter */ -- -- kqswnal_data.kqn_eptx = ep_alloc_xmtr (kqswnal_data.kqn_ep); -- if (kqswnal_data.kqn_eptx == NULL) -- { -- CERROR ("Can't allocate transmitter\n"); - kqswnal_finalise (); - return (-ENOMEM); - kqswnal_shutdown (nal); - return (PTL_NO_SPACE); -- } -- -- /**********************************************************************/ -- /* Get the receivers */ -- -- kqswnal_data.kqn_eprx_small = ep_alloc_rcvr (kqswnal_data.kqn_ep, -- EP_MSG_SVC_PORTALS_SMALL, -- KQSW_EP_ENVELOPES_SMALL); -- if (kqswnal_data.kqn_eprx_small == NULL) -- { -- CERROR ("Can't install small msg receiver\n"); - kqswnal_finalise (); - return (-ENOMEM); - kqswnal_shutdown (nal); - return (PTL_NO_SPACE); -- } -- -- kqswnal_data.kqn_eprx_large = ep_alloc_rcvr (kqswnal_data.kqn_ep, -- EP_MSG_SVC_PORTALS_LARGE, -- KQSW_EP_ENVELOPES_LARGE); -- if (kqswnal_data.kqn_eprx_large == NULL) -- { -- CERROR ("Can't install large msg receiver\n"); - kqswnal_finalise (); - return (-ENOMEM); - kqswnal_shutdown (nal); - return (PTL_NO_SPACE); -- } -- -- /**********************************************************************/ -- /* Reserve Elan address space for transmit descriptors NB we may -- * either send the contents of associated buffers immediately, or -- * map them for the peer to suck/blow... */ --#if MULTIRAIL_EKC -- kqswnal_data.kqn_ep_tx_nmh = -- ep_dvma_reserve(kqswnal_data.kqn_ep, -- KQSW_NTXMSGPAGES*(KQSW_NTXMSGS+KQSW_NNBLK_TXMSGS), -- EP_PERM_WRITE); -- if (kqswnal_data.kqn_ep_tx_nmh == NULL) { -- CERROR("Can't reserve tx dma space\n"); - kqswnal_finalise(); - return (-ENOMEM); - kqswnal_shutdown(nal); - return (PTL_NO_SPACE); -- } --#else -- dmareq.Waitfn = DDI_DMA_SLEEP; -- dmareq.ElanAddr = (E3_Addr) 0; -- dmareq.Attr = PTE_LOAD_LITTLE_ENDIAN; -- dmareq.Perm = ELAN_PERM_REMOTEWRITE; -- -- rc = elan3_dma_reserve(kqswnal_data.kqn_ep->DmaState, -- KQSW_NTXMSGPAGES*(KQSW_NTXMSGS+KQSW_NNBLK_TXMSGS), -- &dmareq, &kqswnal_data.kqn_eptxdmahandle); -- if (rc != DDI_SUCCESS) -- { -- CERROR ("Can't reserve rx dma space\n"); - kqswnal_finalise (); - return (-ENOMEM); - kqswnal_shutdown (nal); - return (PTL_NO_SPACE); -- } --#endif -- /**********************************************************************/ -- /* Reserve Elan address space for receive buffers */ --#if MULTIRAIL_EKC -- kqswnal_data.kqn_ep_rx_nmh = -- ep_dvma_reserve(kqswnal_data.kqn_ep, -- KQSW_NRXMSGPAGES_SMALL * KQSW_NRXMSGS_SMALL + -- KQSW_NRXMSGPAGES_LARGE * KQSW_NRXMSGS_LARGE, -- EP_PERM_WRITE); -- if (kqswnal_data.kqn_ep_tx_nmh == NULL) { -- CERROR("Can't reserve rx dma space\n"); - kqswnal_finalise(); - return (-ENOMEM); - kqswnal_shutdown(nal); - return (PTL_NO_SPACE); -- } --#else -- dmareq.Waitfn = DDI_DMA_SLEEP; -- dmareq.ElanAddr = (E3_Addr) 0; -- dmareq.Attr = PTE_LOAD_LITTLE_ENDIAN; -- dmareq.Perm = ELAN_PERM_REMOTEWRITE; -- -- rc = elan3_dma_reserve (kqswnal_data.kqn_ep->DmaState, -- KQSW_NRXMSGPAGES_SMALL * KQSW_NRXMSGS_SMALL + -- KQSW_NRXMSGPAGES_LARGE * KQSW_NRXMSGS_LARGE, -- &dmareq, &kqswnal_data.kqn_eprxdmahandle); -- if (rc != DDI_SUCCESS) -- { -- CERROR ("Can't reserve rx dma space\n"); - kqswnal_finalise (); - return (-ENOMEM); - kqswnal_shutdown (nal); - return (PTL_NO_SPACE); -- } --#endif -- /**********************************************************************/ -- /* Allocate/Initialise transmit descriptors */ -- - kqswnal_data.kqn_txds = NULL; - PORTAL_ALLOC(kqswnal_data.kqn_txds, - sizeof(kqswnal_tx_t) * (KQSW_NTXMSGS + KQSW_NNBLK_TXMSGS)); - if (kqswnal_data.kqn_txds == NULL) - { - kqswnal_shutdown (nal); - return (PTL_NO_SPACE); - } - - /* clear flags, null pointers etc */ - memset(kqswnal_data.kqn_txds, 0, - sizeof(kqswnal_tx_t) * (KQSW_NTXMSGS + KQSW_NNBLK_TXMSGS)); -- for (i = 0; i < (KQSW_NTXMSGS + KQSW_NNBLK_TXMSGS); i++) -- { -- int premapped_pages; - kqswnal_tx_t *ktx = &kqswnal_data.kqn_txds[i]; -- int basepage = i * KQSW_NTXMSGPAGES; - - PORTAL_ALLOC (ktx, sizeof(*ktx)); - if (ktx == NULL) { - kqswnal_finalise (); - return (-ENOMEM); - } - - ktx->ktx_alloclist = kqswnal_data.kqn_txds; - kqswnal_data.kqn_txds = ktx; -- -- PORTAL_ALLOC (ktx->ktx_buffer, KQSW_TX_BUFFER_SIZE); -- if (ktx->ktx_buffer == NULL) -- { - kqswnal_finalise (); - return (-ENOMEM); - kqswnal_shutdown (nal); - return (PTL_NO_SPACE); -- } -- -- /* Map pre-allocated buffer NOW, to save latency on transmit */ -- premapped_pages = kqswnal_pages_spanned(ktx->ktx_buffer, -- KQSW_TX_BUFFER_SIZE); --#if MULTIRAIL_EKC -- ep_dvma_load(kqswnal_data.kqn_ep, NULL, -- ktx->ktx_buffer, KQSW_TX_BUFFER_SIZE, -- kqswnal_data.kqn_ep_tx_nmh, basepage, -- &all_rails, &ktx->ktx_ebuffer); --#else -- elan3_dvma_kaddr_load (kqswnal_data.kqn_ep->DmaState, -- kqswnal_data.kqn_eptxdmahandle, -- ktx->ktx_buffer, KQSW_TX_BUFFER_SIZE, -- basepage, &ktx->ktx_ebuffer); --#endif -- ktx->ktx_basepage = basepage + premapped_pages; /* message mapping starts here */ -- ktx->ktx_npages = KQSW_NTXMSGPAGES - premapped_pages; /* for this many pages */ -- -- INIT_LIST_HEAD (&ktx->ktx_delayed_list); -- -- ktx->ktx_state = KTX_IDLE; -#if MULTIRAIL_EKC - ktx->ktx_rail = -1; /* unset rail */ -#endif -- ktx->ktx_isnblk = (i >= KQSW_NTXMSGS); -- list_add_tail (&ktx->ktx_list, -- ktx->ktx_isnblk ? &kqswnal_data.kqn_nblk_idletxds : -- &kqswnal_data.kqn_idletxds); -- } -- -- /**********************************************************************/ -- /* Allocate/Initialise receive descriptors */ -- - kqswnal_data.kqn_rxds = NULL; - PORTAL_ALLOC (kqswnal_data.kqn_rxds, - sizeof (kqswnal_rx_t) * (KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE)); - if (kqswnal_data.kqn_rxds == NULL) - { - kqswnal_shutdown (nal); - return (PTL_NO_SPACE); - } - - memset(kqswnal_data.kqn_rxds, 0, /* clear flags, null pointers etc */ - sizeof(kqswnal_rx_t) * (KQSW_NRXMSGS_SMALL+KQSW_NRXMSGS_LARGE)); - -- elan_page_idx = 0; -- for (i = 0; i < KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE; i++) -- { --#if MULTIRAIL_EKC -- EP_NMD elanbuffer; --#else -- E3_Addr elanbuffer; --#endif -- int j; - - PORTAL_ALLOC(krx, sizeof(*krx)); - if (krx == NULL) { - kqswnal_finalise(); - return (-ENOSPC); - } - - krx->krx_alloclist = kqswnal_data.kqn_rxds; - kqswnal_data.kqn_rxds = krx; - kqswnal_rx_t *krx = &kqswnal_data.kqn_rxds[i]; -- -- if (i < KQSW_NRXMSGS_SMALL) -- { -- krx->krx_npages = KQSW_NRXMSGPAGES_SMALL; -- krx->krx_eprx = kqswnal_data.kqn_eprx_small; -- } -- else -- { -- krx->krx_npages = KQSW_NRXMSGPAGES_LARGE; -- krx->krx_eprx = kqswnal_data.kqn_eprx_large; -- } -- -- LASSERT (krx->krx_npages > 0); -- for (j = 0; j < krx->krx_npages; j++) -- { -- struct page *page = alloc_page(GFP_KERNEL); -- -- if (page == NULL) { - kqswnal_finalise (); - return (-ENOMEM); - kqswnal_shutdown (nal); - return (PTL_NO_SPACE); -- } -- -- krx->krx_kiov[j].kiov_page = page; -- LASSERT(page_address(page) != NULL); -- --#if MULTIRAIL_EKC -- ep_dvma_load(kqswnal_data.kqn_ep, NULL, -- page_address(page), -- PAGE_SIZE, kqswnal_data.kqn_ep_rx_nmh, -- elan_page_idx, &all_rails, &elanbuffer); -- -- if (j == 0) { -- krx->krx_elanbuffer = elanbuffer; -- } else { -- rc = ep_nmd_merge(&krx->krx_elanbuffer, -- &krx->krx_elanbuffer, -- &elanbuffer); -- /* NB contiguous mapping */ -- LASSERT(rc); -- } --#else -- elan3_dvma_kaddr_load(kqswnal_data.kqn_ep->DmaState, -- kqswnal_data.kqn_eprxdmahandle, -- page_address(page), -- PAGE_SIZE, elan_page_idx, -- &elanbuffer); -- if (j == 0) -- krx->krx_elanbuffer = elanbuffer; -- -- /* NB contiguous mapping */ -- LASSERT (elanbuffer == krx->krx_elanbuffer + j * PAGE_SIZE); --#endif -- elan_page_idx++; -- -- } -- } -- LASSERT (elan_page_idx == -- (KQSW_NRXMSGS_SMALL * KQSW_NRXMSGPAGES_SMALL) + -- (KQSW_NRXMSGS_LARGE * KQSW_NRXMSGPAGES_LARGE)); -- -- /**********************************************************************/ -- /* Network interface ready to initialise */ -- - rc = PtlNIInit(kqswnal_init, 32, 4, 0, &kqswnal_ni); - if (rc != 0) - my_process_id.nid = kqswnal_elanid2nid(kqswnal_data.kqn_elanid); - my_process_id.pid = requested_pid; - - rc = lib_init(&kqswnal_lib, nal, my_process_id, - requested_limits, actual_limits); - if (rc != PTL_OK) -- { - CERROR ("PtlNIInit failed %d\n", rc); - kqswnal_finalise (); - return (-ENOMEM); - CERROR ("lib_init failed %d\n", rc); - kqswnal_shutdown (nal); - return (rc); -- } -- - kqswnal_data.kqn_init = KQN_INIT_PTL; - kqswnal_data.kqn_init = KQN_INIT_LIB; -- -- /**********************************************************************/ -- /* Queue receives, now that it's OK to run their completion callbacks */ -- - for (krx = kqswnal_data.kqn_rxds; krx != NULL; krx =krx->krx_alloclist){ - for (i = 0; i < KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE; i++) - { - kqswnal_rx_t *krx = &kqswnal_data.kqn_rxds[i]; - -- /* NB this enqueue can allocate/sleep (attr == 0) */ - krx->krx_state = KRX_POSTED; --#if MULTIRAIL_EKC -- rc = ep_queue_receive(krx->krx_eprx, kqswnal_rxhandler, krx, -- &krx->krx_elanbuffer, 0); --#else -- rc = ep_queue_receive(krx->krx_eprx, kqswnal_rxhandler, krx, -- krx->krx_elanbuffer, -- krx->krx_npages * PAGE_SIZE, 0); --#endif -- if (rc != EP_SUCCESS) -- { -- CERROR ("failed ep_queue_receive %d\n", rc); - kqswnal_finalise (); - return (-ENOMEM); - kqswnal_shutdown (nal); - return (PTL_FAIL); -- } -- } -- -- /**********************************************************************/ -- /* Spawn scheduling threads */ -- for (i = 0; i < num_online_cpus(); i++) { -- rc = kqswnal_thread_start (kqswnal_scheduler, NULL); -- if (rc != 0) -- { -- CERROR ("failed to spawn scheduling thread: %d\n", rc); - kqswnal_finalise (); - return (rc); - kqswnal_shutdown (nal); - return (PTL_FAIL); -- } -- } -- -- /**********************************************************************/ -- /* Connect to the router */ -- rc = kpr_register (&kqswnal_data.kqn_router, &kqswnal_router_interface); -- CDEBUG(D_NET, "Can't initialise routing interface (rc = %d): not routing\n",rc); -- - rc = kportal_nal_register (QSWNAL, &kqswnal_cmd, NULL); - rc = libcfs_nal_cmd_register (QSWNAL, &kqswnal_cmd, NULL); -- if (rc != 0) { -- CERROR ("Can't initialise command interface (rc = %d)\n", rc); - kqswnal_finalise (); - return (rc); - kqswnal_shutdown (nal); - return (PTL_FAIL); -- } - - #if CONFIG_SYSCTL - /* Press on regardless even if registering sysctl doesn't work */ - kqswnal_data.kqn_sysctl = register_sysctl_table (kqswnal_top_ctl_table, 0); - #endif -- - PORTAL_SYMBOL_REGISTER(kqswnal_ni); -- kqswnal_data.kqn_init = KQN_INIT_ALL; -- -- printk(KERN_INFO "Lustre: Routing QSW NAL loaded on node %d of %d " -- "(Routing %s, initial mem %d)\n", -- kqswnal_data.kqn_elanid, kqswnal_data.kqn_nnodes, -- kpr_routing (&kqswnal_data.kqn_router) ? "enabled" : "disabled", -- pkmem); -- - return (0); - return (PTL_OK); -} - -void __exit -kqswnal_finalise (void) -{ -#if CONFIG_SYSCTL - if (kqswnal_tunables.kqn_sysctl != NULL) - unregister_sysctl_table (kqswnal_tunables.kqn_sysctl); -#endif - PtlNIFini(kqswnal_ni); - - ptl_unregister_nal(QSWNAL); --} - -static int __init -kqswnal_initialise (void) -{ - int rc; - - kqswnal_api.nal_ni_init = kqswnal_startup; - kqswnal_api.nal_ni_fini = kqswnal_shutdown; - - /* Initialise dynamic tunables to defaults once only */ - kqswnal_tunables.kqn_optimized_puts = KQSW_OPTIMIZED_PUTS; - kqswnal_tunables.kqn_optimized_gets = KQSW_OPTIMIZED_GETS; - - rc = ptl_register_nal(QSWNAL, &kqswnal_api); - if (rc != PTL_OK) { - CERROR("Can't register QSWNAL: %d\n", rc); - return (-ENOMEM); /* or something... */ - } - - /* Pure gateways, and the workaround for 'EKC blocks forever until - * the service is active' want the NAL started up at module load - * time... */ - rc = PtlNIInit(QSWNAL, LUSTRE_SRV_PTL_PID, NULL, NULL, &kqswnal_ni); - if (rc != PTL_OK && rc != PTL_IFACE_DUP) { - ptl_unregister_nal(QSWNAL); - return (-ENODEV); - } -- -#if CONFIG_SYSCTL - /* Press on regardless even if registering sysctl doesn't work */ - kqswnal_tunables.kqn_sysctl = - register_sysctl_table (kqswnal_top_ctl_table, 0); -#endif - return (0); -} -- --MODULE_AUTHOR("Cluster File Systems, Inc. "); --MODULE_DESCRIPTION("Kernel Quadrics/Elan NAL v1.01"); --MODULE_LICENSE("GPL"); -- --module_init (kqswnal_initialise); --module_exit (kqswnal_finalise); - - EXPORT_SYMBOL (kqswnal_ni); diff --cc lnet/klnds/qswlnd/qswlnd.h index f96893f,438edc6..0000000 deleted file mode 100644,100644 --- a/lnet/klnds/qswlnd/qswlnd.h +++ /dev/null @@@ -1,360 -1,374 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * Copyright (C) 2001 Cluster File Systems, Inc. -- * -- * This file is part of Lustre, http://www.lustre.org. -- * -- * Lustre is free software; you can redistribute it and/or -- * modify it under the terms of version 2 of the GNU General Public -- * License as published by the Free Software Foundation. -- * -- * Lustre is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- * GNU General Public License for more details. -- * -- * You should have received a copy of the GNU General Public License -- * along with Lustre; if not, write to the Free Software -- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -- * - * Basic library routines. - * Basic library routines. -- * -- */ -- --#ifndef _QSWNAL_H --#define _QSWNAL_H --#ifndef EXPORT_SYMTAB --# define EXPORT_SYMTAB --#endif -- --#include --#undef printf /* nasty QSW #define */ -- --#include --#include -- --#if MULTIRAIL_EKC --# include --#else --# include --# include --# include --# include --# include --# include --# include --# include --# include --#endif -- --#include --#include --#include --#include --#include --#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) --#include /* wait_on_buffer */ --#else --#include /* wait_on_buffer */ --#endif --#include --#include --#include -- --#include --#include -- --#include --#include --#include --#include --#include --#include -- --#define DEBUG_SUBSYSTEM S_QSWNAL -- --#include --#include --#include --#include --#include -- --#define KQSW_CHECKSUM 0 --#if KQSW_CHECKSUM --typedef unsigned long kqsw_csum_t; --#define KQSW_CSUM_SIZE (2 * sizeof (kqsw_csum_t)) --#else --#define KQSW_CSUM_SIZE 0 --#endif --#define KQSW_HDR_SIZE (sizeof (ptl_hdr_t) + KQSW_CSUM_SIZE) -- --/* -- * Performance Tuning defines -- * NB no mention of PAGE_SIZE for interoperability -- */ --#define KQSW_MAXPAYLOAD PTL_MTU --#define KQSW_SMALLPAYLOAD ((4<<10) - KQSW_HDR_SIZE) /* small/large ep receiver breakpoint */ -- --#define KQSW_TX_MAXCONTIG (1<<10) /* largest payload that gets made contiguous on transmit */ -- --#define KQSW_NTXMSGS 8 /* # normal transmit messages */ --#define KQSW_NNBLK_TXMSGS 256 /* # reserved transmit messages if can't block */ -- --#define KQSW_NRXMSGS_LARGE 64 /* # large receive buffers */ --#define KQSW_EP_ENVELOPES_LARGE 128 /* # large ep envelopes */ -- --#define KQSW_NRXMSGS_SMALL 256 /* # small receive buffers */ --#define KQSW_EP_ENVELOPES_SMALL 2048 /* # small ep envelopes */ -- --#define KQSW_RESCHED 100 /* # busy loops that forces scheduler to yield */ -- - #define KQSW_OPTIMIZED_GETS 1 /* optimized gets? */ -#define KQSW_OPTIMIZED_GETS 1 /* optimize gets >= this size */ -#define KQSW_OPTIMIZED_PUTS (32<<10) /* optimize puts >= this size */ --#define KQSW_COPY_SMALL_FWD 0 /* copy small fwd messages to pre-mapped buffer? */ -- --/* -- * derived constants -- */ -- --#define KQSW_TX_BUFFER_SIZE (KQSW_HDR_SIZE + KQSW_TX_MAXCONTIG) --/* The pre-allocated tx buffer (hdr + small payload) */ -- --#define KQSW_NTXMSGPAGES (btopr(KQSW_TX_BUFFER_SIZE) + 1 + btopr(KQSW_MAXPAYLOAD) + 1) --/* Reserve elan address space for pre-allocated and pre-mapped transmit -- * buffer and a full payload too. Extra pages allow for page alignment */ -- --#define KQSW_NRXMSGPAGES_SMALL (btopr(KQSW_HDR_SIZE + KQSW_SMALLPAYLOAD)) --/* receive hdr/payload always contiguous and page aligned */ --#define KQSW_NRXMSGBYTES_SMALL (KQSW_NRXMSGPAGES_SMALL * PAGE_SIZE) -- --#define KQSW_NRXMSGPAGES_LARGE (btopr(KQSW_HDR_SIZE + KQSW_MAXPAYLOAD)) --/* receive hdr/payload always contiguous and page aligned */ --#define KQSW_NRXMSGBYTES_LARGE (KQSW_NRXMSGPAGES_LARGE * PAGE_SIZE) --/* biggest complete packet we can receive (or transmit) */ -- --/* Remote memory descriptor */ --typedef struct --{ -- __u32 kqrmd_nfrag; /* # frags */ --#if MULTIRAIL_EKC -- EP_NMD kqrmd_frag[0]; /* actual frags */ --#else -- EP_IOVEC kqrmd_frag[0]; /* actual frags */ --#endif --} kqswnal_remotemd_t; -- - typedef struct kqswnal_rx -typedef struct --{ -- struct list_head krx_list; /* enqueue -> thread */ - struct kqswnal_rx *krx_alloclist; /* stack in kqn_rxds */ -- EP_RCVR *krx_eprx; /* port to post receives to */ -- EP_RXD *krx_rxd; /* receive descriptor (for repost) */ --#if MULTIRAIL_EKC -- EP_NMD krx_elanbuffer; /* contiguous Elan buffer */ --#else -- E3_Addr krx_elanbuffer; /* contiguous Elan buffer */ --#endif -- int krx_npages; /* # pages in receive buffer */ -- int krx_nob; /* Number Of Bytes received into buffer */ -- int krx_rpc_reply_needed; /* peer waiting for EKC RPC reply */ - int krx_rpc_reply_sent; /* rpc reply sent */ - int krx_rpc_reply_status; /* what status to send */ - int krx_state; /* what this RX is doing */ -- atomic_t krx_refcount; /* how to tell when rpc is done */ -- kpr_fwd_desc_t krx_fwd; /* embedded forwarding descriptor */ -- ptl_kiov_t krx_kiov[KQSW_NRXMSGPAGES_LARGE]; /* buffer frags */ --} kqswnal_rx_t; -- - typedef struct kqswnal_tx -#define KRX_POSTED 1 /* receiving */ -#define KRX_PARSE 2 /* ready to be parsed */ -#define KRX_COMPLETING 3 /* waiting to be completed */ - - -typedef struct --{ -- struct list_head ktx_list; /* enqueue idle/active */ -- struct list_head ktx_delayed_list; /* enqueue delayedtxds */ - struct kqswnal_tx *ktx_alloclist; /* stack in kqn_txds */ -- unsigned int ktx_isnblk:1; /* reserved descriptor? */ -- unsigned int ktx_state:7; /* What I'm doing */ -- unsigned int ktx_firsttmpfrag:1; /* ktx_frags[0] is in my ebuffer ? 0 : 1 */ -- uint32_t ktx_basepage; /* page offset in reserved elan tx vaddrs for mapping pages */ -- int ktx_npages; /* pages reserved for mapping messages */ -- int ktx_nmappedpages; /* # pages mapped for current message */ -- int ktx_port; /* destination ep port */ -- ptl_nid_t ktx_nid; /* destination node */ - void *ktx_args[2]; /* completion passthru */ - void *ktx_args[3]; /* completion passthru */ -- char *ktx_buffer; /* pre-allocated contiguous buffer for hdr + small payloads */ -- unsigned long ktx_launchtime; /* when (in jiffies) the transmit was launched */ -- -- /* debug/info fields */ -- pid_t ktx_launcher; /* pid of launching process */ - ptl_hdr_t *ktx_wire_hdr; /* portals header (wire endian) */ -- -- int ktx_nfrag; /* # message frags */ --#if MULTIRAIL_EKC - int ktx_rail; /* preferred rail */ -- EP_NMD ktx_ebuffer; /* elan mapping of ktx_buffer */ -- EP_NMD ktx_frags[EP_MAXFRAG];/* elan mapping of msg frags */ --#else -- E3_Addr ktx_ebuffer; /* elan address of ktx_buffer */ -- EP_IOVEC ktx_frags[EP_MAXFRAG];/* msg frags (elan vaddrs) */ --#endif --} kqswnal_tx_t; -- --#define KTX_IDLE 0 /* on kqn_(nblk_)idletxds */ - #define KTX_SENDING 1 /* local send */ - #define KTX_FORWARDING 2 /* routing a packet */ - #define KTX_GETTING 3 /* local optimised get */ -#define KTX_FORWARDING 1 /* sending a forwarded packet */ -#define KTX_SENDING 2 /* normal send */ -#define KTX_GETTING 3 /* sending optimised get */ -#define KTX_PUTTING 4 /* sending optimised put */ -#define KTX_RDMAING 5 /* handling optimised put/get */ - -typedef struct -{ - /* dynamic tunables... */ - int kqn_optimized_puts; /* optimized PUTs? */ - int kqn_optimized_gets; /* optimized GETs? */ -#if CONFIG_SYSCTL - struct ctl_table_header *kqn_sysctl; /* sysctl interface */ -#endif -} kqswnal_tunables_t; -- --typedef struct --{ -- char kqn_init; /* what's been initialised */ -- char kqn_shuttingdown; /* I'm trying to shut down */ - atomic_t kqn_nthreads; /* # threads not terminated */ - atomic_t kqn_nthreads_running;/* # threads still running */ - - int kqn_optimized_gets; /* optimized GETs? */ - int kqn_copy_small_fwd; /* fwd small msgs from pre-allocated buffer? */ - atomic_t kqn_nthreads; /* # threads running */ -- - #if CONFIG_SYSCTL - struct ctl_table_header *kqn_sysctl; /* sysctl interface */ - #endif - kqswnal_rx_t *kqn_rxds; /* stack of all the receive descriptors */ - kqswnal_tx_t *kqn_txds; /* stack of all the transmit descriptors */ - kqswnal_rx_t *kqn_rxds; /* all the receive descriptors */ - kqswnal_tx_t *kqn_txds; /* all the transmit descriptors */ -- -- struct list_head kqn_idletxds; /* transmit descriptors free to use */ -- struct list_head kqn_nblk_idletxds; /* reserved free transmit descriptors */ -- struct list_head kqn_activetxds; /* transmit descriptors being used */ -- spinlock_t kqn_idletxd_lock; /* serialise idle txd access */ -- wait_queue_head_t kqn_idletxd_waitq; /* sender blocks here waiting for idle txd */ -- struct list_head kqn_idletxd_fwdq; /* forwarded packets block here waiting for idle txd */ - atomic_t kqn_pending_txs; /* # transmits being prepped */ -- -- spinlock_t kqn_sched_lock; /* serialise packet schedulers */ -- wait_queue_head_t kqn_sched_waitq; /* scheduler blocks here */ -- -- struct list_head kqn_readyrxds; /* rxds full of data */ -- struct list_head kqn_delayedfwds; /* delayed forwards */ -- struct list_head kqn_delayedtxds; /* delayed transmits */ -- - spinlock_t kqn_statelock; /* cb_cli/cb_sti */ - nal_cb_t *kqn_cb; /* -> kqswnal_lib */ --#if MULTIRAIL_EKC -- EP_SYS *kqn_ep; /* elan system */ -- EP_NMH *kqn_ep_tx_nmh; /* elan reserved tx vaddrs */ -- EP_NMH *kqn_ep_rx_nmh; /* elan reserved rx vaddrs */ --#else -- EP_DEV *kqn_ep; /* elan device */ -- ELAN3_DMA_HANDLE *kqn_eptxdmahandle; /* elan reserved tx vaddrs */ -- ELAN3_DMA_HANDLE *kqn_eprxdmahandle; /* elan reserved rx vaddrs */ --#endif -- EP_XMTR *kqn_eptx; /* elan transmitter */ -- EP_RCVR *kqn_eprx_small; /* elan receiver (small messages) */ -- EP_RCVR *kqn_eprx_large; /* elan receiver (large messages) */ -- kpr_router_t kqn_router; /* connection to Kernel Portals Router module */ -- -- ptl_nid_t kqn_nid_offset; /* this cluster's NID offset */ -- int kqn_nnodes; /* this cluster's size */ -- int kqn_elanid; /* this nodes's elan ID */ - - EP_STATUSBLK kqn_rpc_success; /* preset RPC reply status blocks */ - EP_STATUSBLK kqn_rpc_failed; --} kqswnal_data_t; -- --/* kqn_init state */ --#define KQN_INIT_NOTHING 0 /* MUST BE ZERO so zeroed state is initialised OK */ --#define KQN_INIT_DATA 1 - #define KQN_INIT_PTL 2 -#define KQN_INIT_LIB 2 --#define KQN_INIT_ALL 3 - - extern nal_cb_t kqswnal_lib; - extern nal_t kqswnal_api; - extern kqswnal_data_t kqswnal_data; -- - /* global pre-prepared replies to keep off the stack */ - extern EP_STATUSBLK kqswnal_rpc_success; - extern EP_STATUSBLK kqswnal_rpc_failed; -extern lib_nal_t kqswnal_lib; -extern nal_t kqswnal_api; -extern kqswnal_tunables_t kqswnal_tunables; -extern kqswnal_data_t kqswnal_data; -- --extern int kqswnal_thread_start (int (*fn)(void *arg), void *arg); --extern void kqswnal_rxhandler(EP_RXD *rxd); --extern int kqswnal_scheduler (void *); --extern void kqswnal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd); - extern void kqswnal_dma_reply_complete (EP_RXD *rxd); - extern void kqswnal_requeue_rx (kqswnal_rx_t *krx); -extern void kqswnal_rx_done (kqswnal_rx_t *krx); -- --static inline ptl_nid_t --kqswnal_elanid2nid (int elanid) --{ -- return (kqswnal_data.kqn_nid_offset + elanid); --} -- --static inline int --kqswnal_nid2elanid (ptl_nid_t nid) --{ -- /* not in this cluster? */ -- if (nid < kqswnal_data.kqn_nid_offset || -- nid >= kqswnal_data.kqn_nid_offset + kqswnal_data.kqn_nnodes) -- return (-1); -- -- return (nid - kqswnal_data.kqn_nid_offset); -} - -static inline ptl_nid_t -kqswnal_rx_nid(kqswnal_rx_t *krx) -{ - return (kqswnal_elanid2nid(ep_rxd_node(krx->krx_rxd))); --} -- --static inline int --kqswnal_pages_spanned (void *base, int nob) --{ -- unsigned long first_page = ((unsigned long)base) >> PAGE_SHIFT; -- unsigned long last_page = (((unsigned long)base) + (nob - 1)) >> PAGE_SHIFT; -- -- LASSERT (last_page >= first_page); /* can't wrap address space */ -- return (last_page - first_page + 1); --} -- --#if KQSW_CHECKSUM --static inline kqsw_csum_t kqsw_csum (kqsw_csum_t sum, void *base, int nob) --{ -- unsigned char *ptr = (unsigned char *)base; -- -- while (nob-- > 0) -- sum += *ptr++; -- -- return (sum); --} --#endif -- - static inline void kqswnal_rx_done (kqswnal_rx_t *krx) -static inline void kqswnal_rx_decref (kqswnal_rx_t *krx) --{ -- LASSERT (atomic_read (&krx->krx_refcount) > 0); -- if (atomic_dec_and_test (&krx->krx_refcount)) - kqswnal_requeue_rx(krx); - kqswnal_rx_done(krx); --} -- --#if MULTIRAIL_EKC --# ifndef EP_RAILMASK_ALL --# error "old (unsupported) version of EKC headers" --# endif --#else --/* multirail defines these in */ --#define EP_MSG_SVC_PORTALS_SMALL (0x10) /* Portals over elan port number (large payloads) */ --#define EP_MSG_SVC_PORTALS_LARGE (0x11) /* Portals over elan port number (small payloads) */ --/* NB small/large message sizes are GLOBAL constants */ -- --/* A minimal attempt to minimise inline #ifdeffing */ -- --#define EP_SUCCESS ESUCCESS --#define EP_ENOMEM ENOMEM -- --static inline EP_XMTR * --ep_alloc_xmtr(EP_DEV *e) --{ -- return (ep_alloc_large_xmtr(e)); --} -- --static inline EP_RCVR * --ep_alloc_rcvr(EP_DEV *e, int svc, int nenv) --{ -- return (ep_install_large_rcvr(e, svc, nenv)); --} -- --static inline void --ep_free_xmtr(EP_XMTR *x) --{ -- ep_free_large_xmtr(x); --} -- --static inline void --ep_free_rcvr(EP_RCVR *r) --{ -- ep_remove_large_rcvr(r); --} --#endif -- --#endif /* _QSWNAL_H */ diff --cc lnet/klnds/qswlnd/qswlnd_cb.c index 08453a0,97b5a26..0000000 deleted file mode 100644,100644 --- a/lnet/klnds/qswlnd/qswlnd_cb.c +++ /dev/null @@@ -1,1852 -1,2000 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * Copyright (C) 2002 Cluster File Systems, Inc. -- * Author: Eric Barton -- * -- * Copyright (C) 2002, Lawrence Livermore National Labs (LLNL) -- * W. Marcus Miller - Based on ksocknal -- * -- * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ -- * -- * Portals is free software; you can redistribute it and/or -- * modify it under the terms of version 2 of the GNU General Public -- * License as published by the Free Software Foundation. -- * -- * Portals is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- * GNU General Public License for more details. -- * -- * You should have received a copy of the GNU General Public License -- * along with Portals; if not, write to the Free Software -- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -- * -- */ -- --#include "qswnal.h" - - EP_STATUSBLK kqswnal_rpc_success; - EP_STATUSBLK kqswnal_rpc_failed; -- --/* -- * LIB functions follow -- * -- */ - static ptl_err_t - kqswnal_read(nal_cb_t *nal, void *private, void *dst_addr, user_ptr src_addr, - size_t len) - { - CDEBUG (D_NET, LPX64": reading "LPSZ" bytes from %p -> %p\n", - nal->ni.nid, len, src_addr, dst_addr ); - memcpy( dst_addr, src_addr, len ); - - return (PTL_OK); - } - - static ptl_err_t - kqswnal_write(nal_cb_t *nal, void *private, user_ptr dst_addr, void *src_addr, - size_t len) - { - CDEBUG (D_NET, LPX64": writing "LPSZ" bytes from %p -> %p\n", - nal->ni.nid, len, src_addr, dst_addr ); - memcpy( dst_addr, src_addr, len ); - - return (PTL_OK); - } - - static void * - kqswnal_malloc(nal_cb_t *nal, size_t len) - { - void *buf; - - PORTAL_ALLOC(buf, len); - return (buf); - } - - static void - kqswnal_free(nal_cb_t *nal, void *buf, size_t len) - { - PORTAL_FREE(buf, len); - } - - static void - kqswnal_printf (nal_cb_t * nal, const char *fmt, ...) - { - va_list ap; - char msg[256]; - - va_start (ap, fmt); - vsnprintf (msg, sizeof (msg), fmt, ap); /* sprint safely */ - va_end (ap); - - msg[sizeof (msg) - 1] = 0; /* ensure terminated */ - - CDEBUG (D_NET, "%s", msg); - } - - #if (defined(CONFIG_SPARC32) || defined(CONFIG_SPARC64)) - # error "Can't save/restore irq contexts in different procedures" - #endif - - static void - kqswnal_cli(nal_cb_t *nal, unsigned long *flags) - { - kqswnal_data_t *data= nal->nal_data; - - spin_lock_irqsave(&data->kqn_statelock, *flags); - } - - - static void - kqswnal_sti(nal_cb_t *nal, unsigned long *flags) - { - kqswnal_data_t *data= nal->nal_data; - - spin_unlock_irqrestore(&data->kqn_statelock, *flags); - } - - --static int - kqswnal_dist(nal_cb_t *nal, ptl_nid_t nid, unsigned long *dist) -kqswnal_dist(lib_nal_t *nal, ptl_nid_t nid, unsigned long *dist) --{ - if (nid == nal->ni.nid) - if (nid == nal->libnal_ni.ni_pid.nid) -- *dist = 0; /* it's me */ -- else if (kqswnal_nid2elanid (nid) >= 0) -- *dist = 1; /* it's my peer */ -- else -- *dist = 2; /* via router */ -- return (0); --} -- --void --kqswnal_notify_peer_down(kqswnal_tx_t *ktx) --{ -- struct timeval now; -- time_t then; -- -- do_gettimeofday (&now); -- then = now.tv_sec - (jiffies - ktx->ktx_launchtime)/HZ; -- -- kpr_notify(&kqswnal_data.kqn_router, ktx->ktx_nid, 0, then); --} -- --void --kqswnal_unmap_tx (kqswnal_tx_t *ktx) --{ --#if MULTIRAIL_EKC -- int i; - - ktx->ktx_rail = -1; /* unset rail */ --#endif -- -- if (ktx->ktx_nmappedpages == 0) -- return; -- --#if MULTIRAIL_EKC -- CDEBUG(D_NET, "%p unloading %d frags starting at %d\n", -- ktx, ktx->ktx_nfrag, ktx->ktx_firsttmpfrag); -- -- for (i = ktx->ktx_firsttmpfrag; i < ktx->ktx_nfrag; i++) -- ep_dvma_unload(kqswnal_data.kqn_ep, -- kqswnal_data.kqn_ep_tx_nmh, -- &ktx->ktx_frags[i]); --#else -- CDEBUG (D_NET, "%p[%d] unloading pages %d for %d\n", -- ktx, ktx->ktx_nfrag, ktx->ktx_basepage, ktx->ktx_nmappedpages); -- -- LASSERT (ktx->ktx_nmappedpages <= ktx->ktx_npages); -- LASSERT (ktx->ktx_basepage + ktx->ktx_nmappedpages <= -- kqswnal_data.kqn_eptxdmahandle->NumDvmaPages); -- -- elan3_dvma_unload(kqswnal_data.kqn_ep->DmaState, -- kqswnal_data.kqn_eptxdmahandle, -- ktx->ktx_basepage, ktx->ktx_nmappedpages); --#endif -- ktx->ktx_nmappedpages = 0; --} -- --int --kqswnal_map_tx_kiov (kqswnal_tx_t *ktx, int offset, int nob, int niov, ptl_kiov_t *kiov) --{ -- int nfrags = ktx->ktx_nfrag; -- int nmapped = ktx->ktx_nmappedpages; -- int maxmapped = ktx->ktx_npages; -- uint32_t basepage = ktx->ktx_basepage + nmapped; -- char *ptr; --#if MULTIRAIL_EKC -- EP_RAILMASK railmask; - int rail = ep_xmtr_prefrail(kqswnal_data.kqn_eptx, - EP_RAILMASK_ALL, - kqswnal_nid2elanid(ktx->ktx_nid)); - - int rail; - - if (ktx->ktx_rail < 0) - ktx->ktx_rail = ep_xmtr_prefrail(kqswnal_data.kqn_eptx, - EP_RAILMASK_ALL, - kqswnal_nid2elanid(ktx->ktx_nid)); - rail = ktx->ktx_rail; -- if (rail < 0) { -- CERROR("No rails available for "LPX64"\n", ktx->ktx_nid); -- return (-ENETDOWN); -- } -- railmask = 1 << rail; --#endif -- LASSERT (nmapped <= maxmapped); -- LASSERT (nfrags >= ktx->ktx_firsttmpfrag); -- LASSERT (nfrags <= EP_MAXFRAG); -- LASSERT (niov > 0); -- LASSERT (nob > 0); -- -- /* skip complete frags before 'offset' */ -- while (offset >= kiov->kiov_len) { -- offset -= kiov->kiov_len; -- kiov++; -- niov--; -- LASSERT (niov > 0); -- } -- -- do { -- int fraglen = kiov->kiov_len - offset; -- - /* nob exactly spans the iovs */ - LASSERT (fraglen <= nob); - /* each frag fits in a page */ - /* each page frag is contained in one page */ -- LASSERT (kiov->kiov_offset + kiov->kiov_len <= PAGE_SIZE); - - if (fraglen > nob) - fraglen = nob; -- -- nmapped++; -- if (nmapped > maxmapped) { -- CERROR("Can't map message in %d pages (max %d)\n", -- nmapped, maxmapped); -- return (-EMSGSIZE); -- } -- -- if (nfrags == EP_MAXFRAG) { -- CERROR("Message too fragmented in Elan VM (max %d frags)\n", -- EP_MAXFRAG); -- return (-EMSGSIZE); -- } -- -- /* XXX this is really crap, but we'll have to kmap until -- * EKC has a page (rather than vaddr) mapping interface */ -- -- ptr = ((char *)kmap (kiov->kiov_page)) + kiov->kiov_offset + offset; -- -- CDEBUG(D_NET, -- "%p[%d] loading %p for %d, page %d, %d total\n", -- ktx, nfrags, ptr, fraglen, basepage, nmapped); -- --#if MULTIRAIL_EKC -- ep_dvma_load(kqswnal_data.kqn_ep, NULL, -- ptr, fraglen, -- kqswnal_data.kqn_ep_tx_nmh, basepage, -- &railmask, &ktx->ktx_frags[nfrags]); -- -- if (nfrags == ktx->ktx_firsttmpfrag || -- !ep_nmd_merge(&ktx->ktx_frags[nfrags - 1], -- &ktx->ktx_frags[nfrags - 1], -- &ktx->ktx_frags[nfrags])) { -- /* new frag if this is the first or can't merge */ -- nfrags++; -- } --#else -- elan3_dvma_kaddr_load (kqswnal_data.kqn_ep->DmaState, -- kqswnal_data.kqn_eptxdmahandle, -- ptr, fraglen, -- basepage, &ktx->ktx_frags[nfrags].Base); -- -- if (nfrags > 0 && /* previous frag mapped */ -- ktx->ktx_frags[nfrags].Base == /* contiguous with this one */ -- (ktx->ktx_frags[nfrags-1].Base + ktx->ktx_frags[nfrags-1].Len)) -- /* just extend previous */ -- ktx->ktx_frags[nfrags - 1].Len += fraglen; -- else { -- ktx->ktx_frags[nfrags].Len = fraglen; -- nfrags++; /* new frag */ -- } --#endif -- -- kunmap (kiov->kiov_page); -- -- /* keep in loop for failure case */ -- ktx->ktx_nmappedpages = nmapped; -- -- basepage++; -- kiov++; -- niov--; -- nob -= fraglen; -- offset = 0; -- -- /* iov must not run out before end of data */ -- LASSERT (nob == 0 || niov > 0); -- -- } while (nob > 0); -- -- ktx->ktx_nfrag = nfrags; -- CDEBUG (D_NET, "%p got %d frags over %d pages\n", -- ktx, ktx->ktx_nfrag, ktx->ktx_nmappedpages); -- -- return (0); --} -- --int --kqswnal_map_tx_iov (kqswnal_tx_t *ktx, int offset, int nob, -- int niov, struct iovec *iov) --{ -- int nfrags = ktx->ktx_nfrag; -- int nmapped = ktx->ktx_nmappedpages; -- int maxmapped = ktx->ktx_npages; -- uint32_t basepage = ktx->ktx_basepage + nmapped; --#if MULTIRAIL_EKC -- EP_RAILMASK railmask; - int rail = ep_xmtr_prefrail(kqswnal_data.kqn_eptx, - EP_RAILMASK_ALL, - kqswnal_nid2elanid(ktx->ktx_nid)); - int rail; -- - if (ktx->ktx_rail < 0) - ktx->ktx_rail = ep_xmtr_prefrail(kqswnal_data.kqn_eptx, - EP_RAILMASK_ALL, - kqswnal_nid2elanid(ktx->ktx_nid)); - rail = ktx->ktx_rail; -- if (rail < 0) { -- CERROR("No rails available for "LPX64"\n", ktx->ktx_nid); -- return (-ENETDOWN); -- } -- railmask = 1 << rail; --#endif -- LASSERT (nmapped <= maxmapped); -- LASSERT (nfrags >= ktx->ktx_firsttmpfrag); -- LASSERT (nfrags <= EP_MAXFRAG); -- LASSERT (niov > 0); -- LASSERT (nob > 0); -- -- /* skip complete frags before offset */ -- while (offset >= iov->iov_len) { -- offset -= iov->iov_len; -- iov++; -- niov--; -- LASSERT (niov > 0); -- } -- -- do { -- int fraglen = iov->iov_len - offset; - long npages = kqswnal_pages_spanned (iov->iov_base, fraglen); - - /* nob exactly spans the iovs */ - LASSERT (fraglen <= nob); - long npages; -- - if (fraglen > nob) - fraglen = nob; - npages = kqswnal_pages_spanned (iov->iov_base, fraglen); - -- nmapped += npages; -- if (nmapped > maxmapped) { -- CERROR("Can't map message in %d pages (max %d)\n", -- nmapped, maxmapped); -- return (-EMSGSIZE); -- } -- -- if (nfrags == EP_MAXFRAG) { -- CERROR("Message too fragmented in Elan VM (max %d frags)\n", -- EP_MAXFRAG); -- return (-EMSGSIZE); -- } -- -- CDEBUG(D_NET, -- "%p[%d] loading %p for %d, pages %d for %ld, %d total\n", -- ktx, nfrags, iov->iov_base + offset, fraglen, -- basepage, npages, nmapped); -- --#if MULTIRAIL_EKC -- ep_dvma_load(kqswnal_data.kqn_ep, NULL, -- iov->iov_base + offset, fraglen, -- kqswnal_data.kqn_ep_tx_nmh, basepage, -- &railmask, &ktx->ktx_frags[nfrags]); -- -- if (nfrags == ktx->ktx_firsttmpfrag || -- !ep_nmd_merge(&ktx->ktx_frags[nfrags - 1], -- &ktx->ktx_frags[nfrags - 1], -- &ktx->ktx_frags[nfrags])) { -- /* new frag if this is the first or can't merge */ -- nfrags++; -- } --#else -- elan3_dvma_kaddr_load (kqswnal_data.kqn_ep->DmaState, -- kqswnal_data.kqn_eptxdmahandle, -- iov->iov_base + offset, fraglen, -- basepage, &ktx->ktx_frags[nfrags].Base); -- -- if (nfrags > 0 && /* previous frag mapped */ -- ktx->ktx_frags[nfrags].Base == /* contiguous with this one */ -- (ktx->ktx_frags[nfrags-1].Base + ktx->ktx_frags[nfrags-1].Len)) -- /* just extend previous */ -- ktx->ktx_frags[nfrags - 1].Len += fraglen; -- else { -- ktx->ktx_frags[nfrags].Len = fraglen; -- nfrags++; /* new frag */ -- } --#endif -- -- /* keep in loop for failure case */ -- ktx->ktx_nmappedpages = nmapped; -- -- basepage += npages; -- iov++; -- niov--; -- nob -= fraglen; -- offset = 0; -- -- /* iov must not run out before end of data */ -- LASSERT (nob == 0 || niov > 0); -- -- } while (nob > 0); -- -- ktx->ktx_nfrag = nfrags; -- CDEBUG (D_NET, "%p got %d frags over %d pages\n", -- ktx, ktx->ktx_nfrag, ktx->ktx_nmappedpages); -- -- return (0); --} -- -- --void --kqswnal_put_idle_tx (kqswnal_tx_t *ktx) --{ -- kpr_fwd_desc_t *fwd = NULL; -- unsigned long flags; -- -- kqswnal_unmap_tx (ktx); /* release temporary mappings */ -- ktx->ktx_state = KTX_IDLE; -- -- spin_lock_irqsave (&kqswnal_data.kqn_idletxd_lock, flags); -- -- list_del (&ktx->ktx_list); /* take off active list */ -- -- if (ktx->ktx_isnblk) { -- /* reserved for non-blocking tx */ -- list_add (&ktx->ktx_list, &kqswnal_data.kqn_nblk_idletxds); -- spin_unlock_irqrestore (&kqswnal_data.kqn_idletxd_lock, flags); -- return; -- } -- -- list_add (&ktx->ktx_list, &kqswnal_data.kqn_idletxds); -- -- /* anything blocking for a tx descriptor? */ - if (!list_empty(&kqswnal_data.kqn_idletxd_fwdq)) /* forwarded packet? */ - if (!kqswnal_data.kqn_shuttingdown && - !list_empty(&kqswnal_data.kqn_idletxd_fwdq)) /* forwarded packet? */ -- { -- CDEBUG(D_NET,"wakeup fwd\n"); -- -- fwd = list_entry (kqswnal_data.kqn_idletxd_fwdq.next, -- kpr_fwd_desc_t, kprfd_list); -- list_del (&fwd->kprfd_list); -- } -- -- wake_up (&kqswnal_data.kqn_idletxd_waitq); -- -- spin_unlock_irqrestore (&kqswnal_data.kqn_idletxd_lock, flags); -- -- if (fwd == NULL) -- return; -- -- /* schedule packet for forwarding again */ -- spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags); -- -- list_add_tail (&fwd->kprfd_list, &kqswnal_data.kqn_delayedfwds); -- wake_up (&kqswnal_data.kqn_sched_waitq); -- -- spin_unlock_irqrestore (&kqswnal_data.kqn_sched_lock, flags); --} -- --kqswnal_tx_t * --kqswnal_get_idle_tx (kpr_fwd_desc_t *fwd, int may_block) --{ -- unsigned long flags; -- kqswnal_tx_t *ktx = NULL; -- -- for (;;) { -- spin_lock_irqsave (&kqswnal_data.kqn_idletxd_lock, flags); - - if (kqswnal_data.kqn_shuttingdown) - break; -- -- /* "normal" descriptor is free */ -- if (!list_empty (&kqswnal_data.kqn_idletxds)) { -- ktx = list_entry (kqswnal_data.kqn_idletxds.next, -- kqswnal_tx_t, ktx_list); -- break; -- } - - /* "normal" descriptor pool is empty */ -- - if (fwd != NULL) { /* forwarded packet => queue for idle txd */ - CDEBUG (D_NET, "blocked fwd [%p]\n", fwd); - list_add_tail (&fwd->kprfd_list, - &kqswnal_data.kqn_idletxd_fwdq); - if (fwd != NULL) /* forwarded packet? */ -- break; - } -- -- /* doing a local transmit */ -- if (!may_block) { -- if (list_empty (&kqswnal_data.kqn_nblk_idletxds)) { -- CERROR ("intr tx desc pool exhausted\n"); -- break; -- } -- -- ktx = list_entry (kqswnal_data.kqn_nblk_idletxds.next, -- kqswnal_tx_t, ktx_list); -- break; -- } -- -- /* block for idle tx */ -- -- spin_unlock_irqrestore (&kqswnal_data.kqn_idletxd_lock, flags); -- -- CDEBUG (D_NET, "blocking for tx desc\n"); -- wait_event (kqswnal_data.kqn_idletxd_waitq, - !list_empty (&kqswnal_data.kqn_idletxds)); - !list_empty (&kqswnal_data.kqn_idletxds) || - kqswnal_data.kqn_shuttingdown); -- } -- -- if (ktx != NULL) { -- list_del (&ktx->ktx_list); -- list_add (&ktx->ktx_list, &kqswnal_data.kqn_activetxds); -- ktx->ktx_launcher = current->pid; - atomic_inc(&kqswnal_data.kqn_pending_txs); - } else if (fwd != NULL) { - /* queue forwarded packet until idle txd available */ - CDEBUG (D_NET, "blocked fwd [%p]\n", fwd); - list_add_tail (&fwd->kprfd_list, - &kqswnal_data.kqn_idletxd_fwdq); -- } -- -- spin_unlock_irqrestore (&kqswnal_data.kqn_idletxd_lock, flags); -- -- /* Idle descs can't have any mapped (as opposed to pre-mapped) pages */ -- LASSERT (ktx == NULL || ktx->ktx_nmappedpages == 0); -- -- return (ktx); --} -- --void --kqswnal_tx_done (kqswnal_tx_t *ktx, int error) --{ - lib_msg_t *msg; - lib_msg_t *repmsg = NULL; - -- switch (ktx->ktx_state) { -- case KTX_FORWARDING: /* router asked me to forward this packet */ -- kpr_fwd_done (&kqswnal_data.kqn_router, -- (kpr_fwd_desc_t *)ktx->ktx_args[0], error); -- break; -- - case KTX_SENDING: /* packet sourced locally */ - lib_finalize (&kqswnal_lib, ktx->ktx_args[0], - case KTX_RDMAING: /* optimized GET/PUT handled */ - case KTX_PUTTING: /* optimized PUT sent */ - case KTX_SENDING: /* normal send */ - lib_finalize (&kqswnal_lib, NULL, -- (lib_msg_t *)ktx->ktx_args[1], - (error == 0) ? PTL_OK : - (error == -ENOMEM) ? PTL_NOSPACE : PTL_FAIL); - (error == 0) ? PTL_OK : PTL_FAIL); -- break; - - case KTX_GETTING: /* Peer has DMA-ed direct? */ - msg = (lib_msg_t *)ktx->ktx_args[1]; -- - if (error == 0) { - repmsg = lib_fake_reply_msg (&kqswnal_lib, - ktx->ktx_nid, msg->md); - if (repmsg == NULL) - error = -ENOMEM; - } - - if (error == 0) { - lib_finalize (&kqswnal_lib, ktx->ktx_args[0], - msg, PTL_OK); - lib_finalize (&kqswnal_lib, NULL, repmsg, PTL_OK); - } else { - lib_finalize (&kqswnal_lib, ktx->ktx_args[0], msg, - (error == -ENOMEM) ? PTL_NOSPACE : PTL_FAIL); - } - case KTX_GETTING: /* optimized GET sent & REPLY received */ - /* Complete the GET with success since we can't avoid - * delivering a REPLY event; we committed to it when we - * launched the GET */ - lib_finalize (&kqswnal_lib, NULL, - (lib_msg_t *)ktx->ktx_args[1], PTL_OK); - lib_finalize (&kqswnal_lib, NULL, - (lib_msg_t *)ktx->ktx_args[2], - (error == 0) ? PTL_OK : PTL_FAIL); -- break; -- -- default: -- LASSERT (0); -- } -- -- kqswnal_put_idle_tx (ktx); --} -- --static void --kqswnal_txhandler(EP_TXD *txd, void *arg, int status) --{ -- kqswnal_tx_t *ktx = (kqswnal_tx_t *)arg; -- -- LASSERT (txd != NULL); -- LASSERT (ktx != NULL); -- -- CDEBUG(D_NET, "txd %p, arg %p status %d\n", txd, arg, status); -- -- if (status != EP_SUCCESS) { -- -- CERROR ("Tx completion to "LPX64" failed: %d\n", -- ktx->ktx_nid, status); -- -- kqswnal_notify_peer_down(ktx); -- status = -EHOSTDOWN; -- - } else if (ktx->ktx_state == KTX_GETTING) { - /* RPC completed OK; what did our peer put in the status - } else switch (ktx->ktx_state) { - - case KTX_GETTING: - case KTX_PUTTING: - /* RPC completed OK; but what did our peer put in the status -- * block? */ --#if MULTIRAIL_EKC -- status = ep_txd_statusblk(txd)->Data[0]; --#else -- status = ep_txd_statusblk(txd)->Status; --#endif - } else { - break; - - case KTX_FORWARDING: - case KTX_SENDING: -- status = 0; - break; - - default: - LBUG(); - break; -- } -- -- kqswnal_tx_done (ktx, status); --} -- --int --kqswnal_launch (kqswnal_tx_t *ktx) --{ -- /* Don't block for transmit descriptor if we're in interrupt context */ -- int attr = in_interrupt() ? (EP_NO_SLEEP | EP_NO_ALLOC) : 0; -- int dest = kqswnal_nid2elanid (ktx->ktx_nid); -- unsigned long flags; -- int rc; -- -- ktx->ktx_launchtime = jiffies; - - if (kqswnal_data.kqn_shuttingdown) - return (-ESHUTDOWN); -- -- LASSERT (dest >= 0); /* must be a peer */ - if (ktx->ktx_state == KTX_GETTING) { - /* NB ktx_frag[0] is the GET hdr + kqswnal_remotemd_t. The - * other frags are the GET sink which we obviously don't - * send here :) */ - --#if MULTIRAIL_EKC - if (ktx->ktx_nmappedpages != 0) - attr = EP_SET_PREFRAIL(attr, ktx->ktx_rail); -#endif - - switch (ktx->ktx_state) { - case KTX_GETTING: - case KTX_PUTTING: - /* NB ktx_frag[0] is the GET/PUT hdr + kqswnal_remotemd_t. - * The other frags are the payload, awaiting RDMA */ -- rc = ep_transmit_rpc(kqswnal_data.kqn_eptx, dest, -- ktx->ktx_port, attr, -- kqswnal_txhandler, ktx, -- NULL, ktx->ktx_frags, 1); - #else - rc = ep_transmit_rpc(kqswnal_data.kqn_eptx, dest, - ktx->ktx_port, attr, kqswnal_txhandler, - ktx, NULL, ktx->ktx_frags, 1); - #endif - } else { - break; - - case KTX_FORWARDING: - case KTX_SENDING: --#if MULTIRAIL_EKC -- rc = ep_transmit_message(kqswnal_data.kqn_eptx, dest, -- ktx->ktx_port, attr, -- kqswnal_txhandler, ktx, -- NULL, ktx->ktx_frags, ktx->ktx_nfrag); --#else -- rc = ep_transmit_large(kqswnal_data.kqn_eptx, dest, -- ktx->ktx_port, attr, -- kqswnal_txhandler, ktx, -- ktx->ktx_frags, ktx->ktx_nfrag); --#endif - break; - - default: - LBUG(); - rc = -EINVAL; /* no compiler warning please */ - break; -- } -- -- switch (rc) { -- case EP_SUCCESS: /* success */ -- return (0); -- -- case EP_ENOMEM: /* can't allocate ep txd => queue for later */ - LASSERT (in_interrupt()); - -- spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags); -- -- list_add_tail (&ktx->ktx_delayed_list, &kqswnal_data.kqn_delayedtxds); -- wake_up (&kqswnal_data.kqn_sched_waitq); -- -- spin_unlock_irqrestore (&kqswnal_data.kqn_sched_lock, flags); -- return (0); -- -- default: /* fatal error */ -- CERROR ("Tx to "LPX64" failed: %d\n", ktx->ktx_nid, rc); -- kqswnal_notify_peer_down(ktx); -- return (-EHOSTUNREACH); -- } --} -- -#if 0 --static char * --hdr_type_string (ptl_hdr_t *hdr) --{ -- switch (hdr->type) { -- case PTL_MSG_ACK: -- return ("ACK"); -- case PTL_MSG_PUT: -- return ("PUT"); -- case PTL_MSG_GET: -- return ("GET"); -- case PTL_MSG_REPLY: -- return ("REPLY"); -- default: -- return (""); -- } --} -- --static void --kqswnal_cerror_hdr(ptl_hdr_t * hdr) --{ -- char *type_str = hdr_type_string (hdr); -- -- CERROR("P3 Header at %p of type %s length %d\n", hdr, type_str, - NTOH__u32(hdr->payload_length)); - CERROR(" From nid/pid "LPU64"/%u\n", NTOH__u64(hdr->src_nid), - NTOH__u32(hdr->src_pid)); - CERROR(" To nid/pid "LPU64"/%u\n", NTOH__u64(hdr->dest_nid), - NTOH__u32(hdr->dest_pid)); - le32_to_cpu(hdr->payload_length)); - CERROR(" From nid/pid "LPU64"/%u\n", le64_to_cpu(hdr->src_nid), - le32_to_cpu(hdr->src_pid)); - CERROR(" To nid/pid "LPU64"/%u\n", le64_to_cpu(hdr->dest_nid), - le32_to_cpu(hdr->dest_pid)); -- - switch (NTOH__u32(hdr->type)) { - switch (le32_to_cpu(hdr->type)) { -- case PTL_MSG_PUT: -- CERROR(" Ptl index %d, ack md "LPX64"."LPX64", " -- "match bits "LPX64"\n", - NTOH__u32 (hdr->msg.put.ptl_index), - le32_to_cpu(hdr->msg.put.ptl_index), -- hdr->msg.put.ack_wmd.wh_interface_cookie, -- hdr->msg.put.ack_wmd.wh_object_cookie, - NTOH__u64 (hdr->msg.put.match_bits)); - le64_to_cpu(hdr->msg.put.match_bits)); -- CERROR(" offset %d, hdr data "LPX64"\n", - NTOH__u32(hdr->msg.put.offset), - le32_to_cpu(hdr->msg.put.offset), -- hdr->msg.put.hdr_data); -- break; -- -- case PTL_MSG_GET: -- CERROR(" Ptl index %d, return md "LPX64"."LPX64", " -- "match bits "LPX64"\n", - NTOH__u32 (hdr->msg.get.ptl_index), - le32_to_cpu(hdr->msg.get.ptl_index), -- hdr->msg.get.return_wmd.wh_interface_cookie, -- hdr->msg.get.return_wmd.wh_object_cookie, -- hdr->msg.get.match_bits); -- CERROR(" Length %d, src offset %d\n", - NTOH__u32 (hdr->msg.get.sink_length), - NTOH__u32 (hdr->msg.get.src_offset)); - le32_to_cpu(hdr->msg.get.sink_length), - le32_to_cpu(hdr->msg.get.src_offset)); -- break; -- -- case PTL_MSG_ACK: -- CERROR(" dst md "LPX64"."LPX64", manipulated length %d\n", -- hdr->msg.ack.dst_wmd.wh_interface_cookie, -- hdr->msg.ack.dst_wmd.wh_object_cookie, - NTOH__u32 (hdr->msg.ack.mlength)); - le32_to_cpu(hdr->msg.ack.mlength)); -- break; -- -- case PTL_MSG_REPLY: -- CERROR(" dst md "LPX64"."LPX64"\n", -- hdr->msg.reply.dst_wmd.wh_interface_cookie, -- hdr->msg.reply.dst_wmd.wh_object_cookie); -- } -- --} /* end of print_hdr() */ -#endif -- --#if !MULTIRAIL_EKC --void --kqswnal_print_eiov (int how, char *str, int n, EP_IOVEC *iov) --{ -- int i; -- -- CDEBUG (how, "%s: %d\n", str, n); -- for (i = 0; i < n; i++) { -- CDEBUG (how, " %08x for %d\n", iov[i].Base, iov[i].Len); -- } --} -- --int --kqswnal_eiovs2datav (int ndv, EP_DATAVEC *dv, -- int nsrc, EP_IOVEC *src, -- int ndst, EP_IOVEC *dst) --{ -- int count; -- int nob; -- -- LASSERT (ndv > 0); -- LASSERT (nsrc > 0); -- LASSERT (ndst > 0); -- -- for (count = 0; count < ndv; count++, dv++) { -- -- if (nsrc == 0 || ndst == 0) { -- if (nsrc != ndst) { -- /* For now I'll barf on any left over entries */ -- CERROR ("mismatched src and dst iovs\n"); -- return (-EINVAL); -- } -- return (count); -- } -- -- nob = (src->Len < dst->Len) ? src->Len : dst->Len; -- dv->Len = nob; -- dv->Source = src->Base; -- dv->Dest = dst->Base; -- -- if (nob >= src->Len) { -- src++; -- nsrc--; -- } else { -- src->Len -= nob; -- src->Base += nob; -- } -- -- if (nob >= dst->Len) { -- dst++; -- ndst--; -- } else { -- src->Len -= nob; -- src->Base += nob; -- } -- } -- -- CERROR ("DATAVEC too small\n"); -- return (-E2BIG); -} -#else -int -kqswnal_check_rdma (int nlfrag, EP_NMD *lfrag, - int nrfrag, EP_NMD *rfrag) -{ - int i; - - if (nlfrag != nrfrag) { - CERROR("Can't cope with unequal # frags: %d local %d remote\n", - nlfrag, nrfrag); - return (-EINVAL); - } - - for (i = 0; i < nlfrag; i++) - if (lfrag[i].nmd_len != rfrag[i].nmd_len) { - CERROR("Can't cope with unequal frags %d(%d):" - " %d local %d remote\n", - i, nlfrag, lfrag[i].nmd_len, rfrag[i].nmd_len); - return (-EINVAL); - } - - return (0); --} --#endif -- - int - kqswnal_dma_reply (kqswnal_tx_t *ktx, int nfrag, - struct iovec *iov, ptl_kiov_t *kiov, - int offset, int nob) -kqswnal_remotemd_t * -kqswnal_parse_rmd (kqswnal_rx_t *krx, int type, ptl_nid_t expected_nid) --{ - kqswnal_rx_t *krx = (kqswnal_rx_t *)ktx->ktx_args[0]; -- char *buffer = (char *)page_address(krx->krx_kiov[0].kiov_page); - ptl_hdr_t *hdr = (ptl_hdr_t *)buffer; -- kqswnal_remotemd_t *rmd = (kqswnal_remotemd_t *)(buffer + KQSW_HDR_SIZE); - int rc; - #if MULTIRAIL_EKC - int i; - #else - EP_DATAVEC datav[EP_MAXFRAG]; - int ndatav; - #endif - LASSERT (krx->krx_rpc_reply_needed); - LASSERT ((iov == NULL) != (kiov == NULL)); - ptl_nid_t nid = kqswnal_rx_nid(krx); -- - /* see kqswnal_sendmsg comment regarding endian-ness */ - /* Note (1) lib_parse has already flipped hdr. - * (2) RDMA addresses are sent in native endian-ness. When - * EKC copes with different endian nodes, I'll fix this (and - * eat my hat :) */ - - LASSERT (krx->krx_nob >= sizeof(*hdr)); - - if (hdr->type != type) { - CERROR ("Unexpected optimized get/put type %d (%d expected)" - "from "LPX64"\n", hdr->type, type, nid); - return (NULL); - } - - if (hdr->src_nid != nid) { - CERROR ("Unexpected optimized get/put source NID " - LPX64" from "LPX64"\n", hdr->src_nid, nid); - return (NULL); - } - - LASSERT (nid == expected_nid); - -- if (buffer + krx->krx_nob < (char *)(rmd + 1)) { -- /* msg too small to discover rmd size */ -- CERROR ("Incoming message [%d] too small for RMD (%d needed)\n", -- krx->krx_nob, (int)(((char *)(rmd + 1)) - buffer)); - return (-EINVAL); - return (NULL); -- } - - -- if (buffer + krx->krx_nob < (char *)&rmd->kqrmd_frag[rmd->kqrmd_nfrag]) { -- /* rmd doesn't fit in the incoming message */ -- CERROR ("Incoming message [%d] too small for RMD[%d] (%d needed)\n", -- krx->krx_nob, rmd->kqrmd_nfrag, -- (int)(((char *)&rmd->kqrmd_frag[rmd->kqrmd_nfrag]) - buffer)); - return (-EINVAL); - return (NULL); -- } -- - /* Map the source data... */ - return (rmd); -} - -void -kqswnal_rdma_store_complete (EP_RXD *rxd) -{ - int status = ep_rxd_status(rxd); - kqswnal_tx_t *ktx = (kqswnal_tx_t *)ep_rxd_arg(rxd); - kqswnal_rx_t *krx = (kqswnal_rx_t *)ktx->ktx_args[0]; - - CDEBUG((status == EP_SUCCESS) ? D_NET : D_ERROR, - "rxd %p, ktx %p, status %d\n", rxd, ktx, status); - - LASSERT (ktx->ktx_state == KTX_RDMAING); - LASSERT (krx->krx_rxd == rxd); - LASSERT (krx->krx_rpc_reply_needed); - - krx->krx_rpc_reply_needed = 0; - kqswnal_rx_decref (krx); - - /* free ktx & finalize() its lib_msg_t */ - kqswnal_tx_done(ktx, (status == EP_SUCCESS) ? 0 : -ECONNABORTED); -} - -void -kqswnal_rdma_fetch_complete (EP_RXD *rxd) -{ - /* Completed fetching the PUT data */ - int status = ep_rxd_status(rxd); - kqswnal_tx_t *ktx = (kqswnal_tx_t *)ep_rxd_arg(rxd); - kqswnal_rx_t *krx = (kqswnal_rx_t *)ktx->ktx_args[0]; - unsigned long flags; - - CDEBUG((status == EP_SUCCESS) ? D_NET : D_ERROR, - "rxd %p, ktx %p, status %d\n", rxd, ktx, status); - - LASSERT (ktx->ktx_state == KTX_RDMAING); - LASSERT (krx->krx_rxd == rxd); - LASSERT (krx->krx_rpc_reply_needed); - - /* Set the RPC completion status */ - status = (status == EP_SUCCESS) ? 0 : -ECONNABORTED; - krx->krx_rpc_reply_status = status; - - /* free ktx & finalize() its lib_msg_t */ - kqswnal_tx_done(ktx, status); - - if (!in_interrupt()) { - /* OK to complete the RPC now (iff I had the last ref) */ - kqswnal_rx_decref (krx); - return; - } - - LASSERT (krx->krx_state == KRX_PARSE); - krx->krx_state = KRX_COMPLETING; - - /* Complete the RPC in thread context */ - spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags); - - list_add_tail (&krx->krx_list, &kqswnal_data.kqn_readyrxds); - wake_up (&kqswnal_data.kqn_sched_waitq); - - spin_unlock_irqrestore (&kqswnal_data.kqn_sched_lock, flags); -} - -int -kqswnal_rdma (kqswnal_rx_t *krx, lib_msg_t *libmsg, int type, - int niov, struct iovec *iov, ptl_kiov_t *kiov, - size_t offset, size_t len) -{ - kqswnal_remotemd_t *rmd; - kqswnal_tx_t *ktx; - int eprc; - int rc; -#if !MULTIRAIL_EKC - EP_DATAVEC datav[EP_MAXFRAG]; - int ndatav; -#endif - - LASSERT (type == PTL_MSG_GET || type == PTL_MSG_PUT); - /* Not both mapped and paged payload */ - LASSERT (iov == NULL || kiov == NULL); - /* RPC completes with failure by default */ - LASSERT (krx->krx_rpc_reply_needed); - LASSERT (krx->krx_rpc_reply_status != 0); - - rmd = kqswnal_parse_rmd(krx, type, libmsg->ev.initiator.nid); - if (rmd == NULL) - return (-EPROTO); - - if (len == 0) { - /* data got truncated to nothing. */ - lib_finalize(&kqswnal_lib, krx, libmsg, PTL_OK); - /* Let kqswnal_rx_done() complete the RPC with success */ - krx->krx_rpc_reply_status = 0; - return (0); - } - - /* NB I'm using 'ktx' just to map the local RDMA buffers; I'm not - actually sending a portals message with it */ - ktx = kqswnal_get_idle_tx(NULL, 0); - if (ktx == NULL) { - CERROR ("Can't get txd for RDMA with "LPX64"\n", - libmsg->ev.initiator.nid); - return (-ENOMEM); - } - - ktx->ktx_state = KTX_RDMAING; - ktx->ktx_nid = libmsg->ev.initiator.nid; - ktx->ktx_args[0] = krx; - ktx->ktx_args[1] = libmsg; - -#if MULTIRAIL_EKC - /* Map on the rail the RPC prefers */ - ktx->ktx_rail = ep_rcvr_prefrail(krx->krx_eprx, - ep_rxd_railmask(krx->krx_rxd)); -#endif - - /* Start mapping at offset 0 (we're not mapping any headers) */ -- ktx->ktx_nfrag = ktx->ktx_firsttmpfrag = 0; - -- if (kiov != NULL) - rc = kqswnal_map_tx_kiov (ktx, offset, nob, nfrag, kiov); - rc = kqswnal_map_tx_kiov(ktx, offset, len, niov, kiov); -- else - rc = kqswnal_map_tx_iov (ktx, offset, nob, nfrag, iov); - rc = kqswnal_map_tx_iov(ktx, offset, len, niov, iov); -- -- if (rc != 0) { - CERROR ("Can't map source data: %d\n", rc); - return (rc); - CERROR ("Can't map local RDMA data: %d\n", rc); - goto out; -- } -- --#if MULTIRAIL_EKC - if (ktx->ktx_nfrag != rmd->kqrmd_nfrag) { - CERROR("Can't cope with unequal # frags: %d local %d remote\n", - ktx->ktx_nfrag, rmd->kqrmd_nfrag); - return (-EINVAL); - rc = kqswnal_check_rdma (ktx->ktx_nfrag, ktx->ktx_frags, - rmd->kqrmd_nfrag, rmd->kqrmd_frag); - if (rc != 0) { - CERROR ("Incompatible RDMA descriptors\n"); - goto out; -- } - - for (i = 0; i < rmd->kqrmd_nfrag; i++) - if (ktx->ktx_frags[i].nmd_len != rmd->kqrmd_frag[i].nmd_len) { - CERROR("Can't cope with unequal frags %d(%d):" - " %d local %d remote\n", - i, rmd->kqrmd_nfrag, - ktx->ktx_frags[i].nmd_len, - rmd->kqrmd_frag[i].nmd_len); - return (-EINVAL); - } --#else - ndatav = kqswnal_eiovs2datav (EP_MAXFRAG, datav, - ktx->ktx_nfrag, ktx->ktx_frags, - rmd->kqrmd_nfrag, rmd->kqrmd_frag); - switch (type) { - default: - LBUG(); - - case PTL_MSG_GET: - ndatav = kqswnal_eiovs2datav(EP_MAXFRAG, datav, - ktx->ktx_nfrag, ktx->ktx_frags, - rmd->kqrmd_nfrag, rmd->kqrmd_frag); - break; - - case PTL_MSG_PUT: - ndatav = kqswnal_eiovs2datav(EP_MAXFRAG, datav, - rmd->kqrmd_nfrag, rmd->kqrmd_frag, - ktx->ktx_nfrag, ktx->ktx_frags); - break; - } - -- if (ndatav < 0) { -- CERROR ("Can't create datavec: %d\n", ndatav); - return (ndatav); - rc = ndatav; - goto out; -- } --#endif -- - /* Our caller will start to race with kqswnal_dma_reply_complete... */ - LASSERT (atomic_read (&krx->krx_refcount) == 1); - atomic_set (&krx->krx_refcount, 2); - LASSERT (atomic_read(&krx->krx_refcount) > 0); - /* Take an extra ref for the completion callback */ - atomic_inc(&krx->krx_refcount); -- - #if MULTIRAIL_EKC - rc = ep_complete_rpc(krx->krx_rxd, kqswnal_dma_reply_complete, ktx, - &kqswnal_rpc_success, - ktx->ktx_frags, rmd->kqrmd_frag, rmd->kqrmd_nfrag); - if (rc == EP_SUCCESS) - return (0); - switch (type) { - default: - LBUG(); -- - /* Well we tried... */ - krx->krx_rpc_reply_needed = 0; - case PTL_MSG_GET: -#if MULTIRAIL_EKC - eprc = ep_complete_rpc(krx->krx_rxd, - kqswnal_rdma_store_complete, ktx, - &kqswnal_data.kqn_rpc_success, - ktx->ktx_frags, rmd->kqrmd_frag, rmd->kqrmd_nfrag); --#else - rc = ep_complete_rpc (krx->krx_rxd, kqswnal_dma_reply_complete, ktx, - &kqswnal_rpc_success, datav, ndatav); - if (rc == EP_SUCCESS) - return (0); - - /* "old" EKC destroys rxd on failed completion */ - krx->krx_rxd = NULL; - eprc = ep_complete_rpc (krx->krx_rxd, - kqswnal_rdma_store_complete, ktx, - &kqswnal_data.kqn_rpc_success, - datav, ndatav); - if (eprc != EP_SUCCESS) /* "old" EKC destroys rxd on failed completion */ - krx->krx_rxd = NULL; --#endif - - CERROR("can't complete RPC: %d\n", rc); - if (eprc != EP_SUCCESS) { - CERROR("can't complete RPC: %d\n", eprc); - /* don't re-attempt RPC completion */ - krx->krx_rpc_reply_needed = 0; - rc = -ECONNABORTED; - } - break; - - case PTL_MSG_PUT: -#if MULTIRAIL_EKC - eprc = ep_rpc_get (krx->krx_rxd, - kqswnal_rdma_fetch_complete, ktx, - rmd->kqrmd_frag, ktx->ktx_frags, ktx->ktx_nfrag); -#else - eprc = ep_rpc_get (krx->krx_rxd, - kqswnal_rdma_fetch_complete, ktx, - datav, ndatav); -#endif - if (eprc != EP_SUCCESS) { - CERROR("ep_rpc_get failed: %d\n", eprc); - rc = -ECONNABORTED; - } - break; - } -- - /* reset refcount back to 1: we're not going to be racing with - * kqswnal_dma_reply_complete. */ - atomic_set (&krx->krx_refcount, 1); - out: - if (rc != 0) { - kqswnal_rx_decref(krx); /* drop callback's ref */ - kqswnal_put_idle_tx (ktx); - } -- - return (-ECONNABORTED); - atomic_dec(&kqswnal_data.kqn_pending_txs); - return (rc); --} -- --static ptl_err_t - kqswnal_sendmsg (nal_cb_t *nal, -kqswnal_sendmsg (lib_nal_t *nal, -- void *private, -- lib_msg_t *libmsg, -- ptl_hdr_t *hdr, -- int type, -- ptl_nid_t nid, -- ptl_pid_t pid, -- unsigned int payload_niov, -- struct iovec *payload_iov, -- ptl_kiov_t *payload_kiov, -- size_t payload_offset, -- size_t payload_nob) --{ -- kqswnal_tx_t *ktx; -- int rc; -- ptl_nid_t targetnid; --#if KQSW_CHECKSUM -- int i; -- kqsw_csum_t csum; -- int sumoff; -- int sumnob; --#endif - /* NB 1. hdr is in network byte order */ - /* 2. 'private' depends on the message type */ -- -- CDEBUG(D_NET, "sending "LPSZ" bytes in %d frags to nid: "LPX64 -- " pid %u\n", payload_nob, payload_niov, nid, pid); -- -- LASSERT (payload_nob == 0 || payload_niov > 0); -- LASSERT (payload_niov <= PTL_MD_MAX_IOV); -- -- /* It must be OK to kmap() if required */ -- LASSERT (payload_kiov == NULL || !in_interrupt ()); -- /* payload is either all vaddrs or all pages */ -- LASSERT (!(payload_kiov != NULL && payload_iov != NULL)); - - -- if (payload_nob > KQSW_MAXPAYLOAD) { -- CERROR ("request exceeds MTU size "LPSZ" (max %u).\n", -- payload_nob, KQSW_MAXPAYLOAD); -- return (PTL_FAIL); - } - - if (type == PTL_MSG_REPLY && /* can I look in 'private' */ - ((kqswnal_rx_t *)private)->krx_rpc_reply_needed) { /* is it an RPC */ - /* Must be a REPLY for an optimized GET */ - rc = kqswnal_rdma ((kqswnal_rx_t *)private, libmsg, PTL_MSG_GET, - payload_niov, payload_iov, payload_kiov, - payload_offset, payload_nob); - return ((rc == 0) ? PTL_OK : PTL_FAIL); -- } -- -- targetnid = nid; -- if (kqswnal_nid2elanid (nid) < 0) { /* Can't send direct: find gateway? */ -- rc = kpr_lookup (&kqswnal_data.kqn_router, nid, -- sizeof (ptl_hdr_t) + payload_nob, &targetnid); -- if (rc != 0) { -- CERROR("Can't route to "LPX64": router error %d\n", -- nid, rc); -- return (PTL_FAIL); -- } -- if (kqswnal_nid2elanid (targetnid) < 0) { -- CERROR("Bad gateway "LPX64" for "LPX64"\n", -- targetnid, nid); -- return (PTL_FAIL); -- } -- } -- -- /* I may not block for a transmit descriptor if I might block the -- * receiver, or an interrupt handler. */ -- ktx = kqswnal_get_idle_tx(NULL, !(type == PTL_MSG_ACK || -- type == PTL_MSG_REPLY || -- in_interrupt())); -- if (ktx == NULL) { - kqswnal_cerror_hdr (hdr); - return (PTL_NOSPACE); - CERROR ("Can't get txd for msg type %d for "LPX64"\n", - type, libmsg->ev.initiator.nid); - return (PTL_NO_SPACE); -- } -- - ktx->ktx_state = KTX_SENDING; -- ktx->ktx_nid = targetnid; -- ktx->ktx_args[0] = private; -- ktx->ktx_args[1] = libmsg; - - if (type == PTL_MSG_REPLY && - ((kqswnal_rx_t *)private)->krx_rpc_reply_needed) { - if (nid != targetnid || - kqswnal_nid2elanid(nid) != - ep_rxd_node(((kqswnal_rx_t *)private)->krx_rxd)) { - CERROR("Optimized reply nid conflict: " - "nid "LPX64" via "LPX64" elanID %d\n", - nid, targetnid, - ep_rxd_node(((kqswnal_rx_t *)private)->krx_rxd)); - return (PTL_FAIL); - } - - /* peer expects RPC completion with GET data */ - rc = kqswnal_dma_reply (ktx, payload_niov, - payload_iov, payload_kiov, - payload_offset, payload_nob); - if (rc == 0) - return (PTL_OK); - - CERROR ("Can't DMA reply to "LPX64": %d\n", nid, rc); - kqswnal_put_idle_tx (ktx); - return (PTL_FAIL); - } - ktx->ktx_args[2] = NULL; /* set when a GET commits to REPLY */ -- -- memcpy (ktx->ktx_buffer, hdr, sizeof (*hdr)); /* copy hdr from caller's stack */ - ktx->ktx_wire_hdr = (ptl_hdr_t *)ktx->ktx_buffer; -- --#if KQSW_CHECKSUM -- csum = kqsw_csum (0, (char *)hdr, sizeof (*hdr)); -- memcpy (ktx->ktx_buffer + sizeof (*hdr), &csum, sizeof (csum)); -- for (csum = 0, i = 0, sumoff = payload_offset, sumnob = payload_nob; sumnob > 0; i++) { -- LASSERT(i < niov); -- if (payload_kiov != NULL) { -- ptl_kiov_t *kiov = &payload_kiov[i]; -- -- if (sumoff >= kiov->kiov_len) { -- sumoff -= kiov->kiov_len; -- } else { -- char *addr = ((char *)kmap (kiov->kiov_page)) + -- kiov->kiov_offset + sumoff; -- int fragnob = kiov->kiov_len - sumoff; -- -- csum = kqsw_csum(csum, addr, MIN(sumnob, fragnob)); -- sumnob -= fragnob; -- sumoff = 0; -- kunmap(kiov->kiov_page); -- } -- } else { -- struct iovec *iov = &payload_iov[i]; -- -- if (sumoff > iov->iov_len) { -- sumoff -= iov->iov_len; -- } else { -- char *addr = iov->iov_base + sumoff; -- int fragnob = iov->iov_len - sumoff; -- -- csum = kqsw_csum(csum, addr, MIN(sumnob, fragnob)); -- sumnob -= fragnob; -- sumoff = 0; -- } -- } -- } -- memcpy(ktx->ktx_buffer + sizeof(*hdr) + sizeof(csum), &csum, sizeof(csum)); --#endif -- - if (kqswnal_data.kqn_optimized_gets && - type == PTL_MSG_GET && /* doing a GET */ - nid == targetnid) { /* not forwarding */ - /* The first frag will be the pre-mapped buffer for (at least) the - * portals header. */ - ktx->ktx_nfrag = ktx->ktx_firsttmpfrag = 1; - - if (nid == targetnid && /* not forwarding */ - ((type == PTL_MSG_GET && /* optimize GET? */ - kqswnal_tunables.kqn_optimized_gets != 0 && - le32_to_cpu(hdr->msg.get.sink_length) >= kqswnal_tunables.kqn_optimized_gets) || - (type == PTL_MSG_PUT && /* optimize PUT? */ - kqswnal_tunables.kqn_optimized_puts != 0 && - payload_nob >= kqswnal_tunables.kqn_optimized_puts))) { -- lib_md_t *md = libmsg->md; -- kqswnal_remotemd_t *rmd = (kqswnal_remotemd_t *)(ktx->ktx_buffer + KQSW_HDR_SIZE); -- - /* Optimised path: I send over the Elan vaddrs of the get - * sink buffers, and my peer DMAs directly into them. - /* Optimised path: I send over the Elan vaddrs of the local - * buffers, and my peer DMAs directly to/from them. -- * -- * First I set up ktx as if it was going to send this -- * payload, (it needs to map it anyway). This fills -- * ktx_frags[1] and onward with the network addresses -- * of the GET sink frags. I copy these into ktx_buffer, - * immediately after the header, and send that as my GET - * message. - * - * Note that the addresses are sent in native endian-ness. - * When EKC copes with different endian nodes, I'll fix - * this (and eat my hat :) */ - * immediately after the header, and send that as my - * message. */ -- - ktx->ktx_nfrag = ktx->ktx_firsttmpfrag = 1; - ktx->ktx_state = KTX_GETTING; - ktx->ktx_state = (type == PTL_MSG_PUT) ? KTX_PUTTING : KTX_GETTING; -- -- if ((libmsg->md->options & PTL_MD_KIOV) != 0) -- rc = kqswnal_map_tx_kiov (ktx, 0, md->length, -- md->md_niov, md->md_iov.kiov); -- else -- rc = kqswnal_map_tx_iov (ktx, 0, md->length, -- md->md_niov, md->md_iov.iov); - - if (rc < 0) { - kqswnal_put_idle_tx (ktx); - return (PTL_FAIL); - } - if (rc != 0) - goto out; -- -- rmd->kqrmd_nfrag = ktx->ktx_nfrag - 1; -- -- payload_nob = offsetof(kqswnal_remotemd_t, -- kqrmd_frag[rmd->kqrmd_nfrag]); -- LASSERT (KQSW_HDR_SIZE + payload_nob <= KQSW_TX_BUFFER_SIZE); -- --#if MULTIRAIL_EKC -- memcpy(&rmd->kqrmd_frag[0], &ktx->ktx_frags[1], -- rmd->kqrmd_nfrag * sizeof(EP_NMD)); -- -- ep_nmd_subset(&ktx->ktx_frags[0], &ktx->ktx_ebuffer, -- 0, KQSW_HDR_SIZE + payload_nob); --#else -- memcpy(&rmd->kqrmd_frag[0], &ktx->ktx_frags[1], -- rmd->kqrmd_nfrag * sizeof(EP_IOVEC)); -- -- ktx->ktx_frags[0].Base = ktx->ktx_ebuffer; -- ktx->ktx_frags[0].Len = KQSW_HDR_SIZE + payload_nob; --#endif - if (type == PTL_MSG_GET) { - /* Allocate reply message now while I'm in thread context */ - ktx->ktx_args[2] = lib_create_reply_msg (&kqswnal_lib, - nid, libmsg); - if (ktx->ktx_args[2] == NULL) - goto out; - - /* NB finalizing the REPLY message is my - * responsibility now, whatever happens. */ - } - -- } else if (payload_nob <= KQSW_TX_MAXCONTIG) { -- -- /* small message: single frag copied into the pre-mapped buffer */ -- - ktx->ktx_nfrag = ktx->ktx_firsttmpfrag = 1; - ktx->ktx_state = KTX_SENDING; --#if MULTIRAIL_EKC -- ep_nmd_subset(&ktx->ktx_frags[0], &ktx->ktx_ebuffer, -- 0, KQSW_HDR_SIZE + payload_nob); --#else -- ktx->ktx_frags[0].Base = ktx->ktx_ebuffer; -- ktx->ktx_frags[0].Len = KQSW_HDR_SIZE + payload_nob; --#endif -- if (payload_nob > 0) { -- if (payload_kiov != NULL) -- lib_copy_kiov2buf (ktx->ktx_buffer + KQSW_HDR_SIZE, -- payload_niov, payload_kiov, -- payload_offset, payload_nob); -- else -- lib_copy_iov2buf (ktx->ktx_buffer + KQSW_HDR_SIZE, -- payload_niov, payload_iov, -- payload_offset, payload_nob); -- } -- } else { -- -- /* large message: multiple frags: first is hdr in pre-mapped buffer */ -- - ktx->ktx_nfrag = ktx->ktx_firsttmpfrag = 1; - ktx->ktx_state = KTX_SENDING; --#if MULTIRAIL_EKC -- ep_nmd_subset(&ktx->ktx_frags[0], &ktx->ktx_ebuffer, -- 0, KQSW_HDR_SIZE); --#else -- ktx->ktx_frags[0].Base = ktx->ktx_ebuffer; -- ktx->ktx_frags[0].Len = KQSW_HDR_SIZE; --#endif -- if (payload_kiov != NULL) -- rc = kqswnal_map_tx_kiov (ktx, payload_offset, payload_nob, -- payload_niov, payload_kiov); -- else -- rc = kqswnal_map_tx_iov (ktx, payload_offset, payload_nob, -- payload_niov, payload_iov); - if (rc != 0) { - kqswnal_put_idle_tx (ktx); - return (PTL_FAIL); - } - if (rc != 0) - goto out; -- } -- -- ktx->ktx_port = (payload_nob <= KQSW_SMALLPAYLOAD) ? -- EP_MSG_SVC_PORTALS_SMALL : EP_MSG_SVC_PORTALS_LARGE; -- -- rc = kqswnal_launch (ktx); - if (rc != 0) { /* failed? */ - CERROR ("Failed to send packet to "LPX64": %d\n", targetnid, rc); - - out: - CDEBUG(rc == 0 ? D_NET : D_ERROR, - "%s "LPSZ" bytes to "LPX64" via "LPX64": rc %d\n", - rc == 0 ? "Sent" : "Failed to send", - payload_nob, nid, targetnid, rc); - - if (rc != 0) { - if (ktx->ktx_state == KTX_GETTING && - ktx->ktx_args[2] != NULL) { - /* We committed to reply, but there was a problem - * launching the GET. We can't avoid delivering a - * REPLY event since we committed above, so we - * pretend the GET succeeded but the REPLY - * failed. */ - rc = 0; - lib_finalize (&kqswnal_lib, private, libmsg, PTL_OK); - lib_finalize (&kqswnal_lib, private, - (lib_msg_t *)ktx->ktx_args[2], PTL_FAIL); - } - -- kqswnal_put_idle_tx (ktx); - return (PTL_FAIL); -- } - - CDEBUG(D_NET, "sent "LPSZ" bytes to "LPX64" via "LPX64"\n", - payload_nob, nid, targetnid); - return (PTL_OK); - - atomic_dec(&kqswnal_data.kqn_pending_txs); - return (rc == 0 ? PTL_OK : PTL_FAIL); --} -- --static ptl_err_t - kqswnal_send (nal_cb_t *nal, -kqswnal_send (lib_nal_t *nal, -- void *private, -- lib_msg_t *libmsg, -- ptl_hdr_t *hdr, -- int type, -- ptl_nid_t nid, -- ptl_pid_t pid, -- unsigned int payload_niov, -- struct iovec *payload_iov, -- size_t payload_offset, -- size_t payload_nob) --{ -- return (kqswnal_sendmsg (nal, private, libmsg, hdr, type, nid, pid, -- payload_niov, payload_iov, NULL, -- payload_offset, payload_nob)); --} -- --static ptl_err_t - kqswnal_send_pages (nal_cb_t *nal, -kqswnal_send_pages (lib_nal_t *nal, -- void *private, -- lib_msg_t *libmsg, -- ptl_hdr_t *hdr, -- int type, -- ptl_nid_t nid, -- ptl_pid_t pid, -- unsigned int payload_niov, -- ptl_kiov_t *payload_kiov, -- size_t payload_offset, -- size_t payload_nob) --{ -- return (kqswnal_sendmsg (nal, private, libmsg, hdr, type, nid, pid, -- payload_niov, NULL, payload_kiov, -- payload_offset, payload_nob)); --} -- --void --kqswnal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd) --{ -- int rc; -- kqswnal_tx_t *ktx; -- ptl_kiov_t *kiov = fwd->kprfd_kiov; -- int niov = fwd->kprfd_niov; -- int nob = fwd->kprfd_nob; -- ptl_nid_t nid = fwd->kprfd_gateway_nid; -- --#if KQSW_CHECKSUM -- CERROR ("checksums for forwarded packets not implemented\n"); -- LBUG (); --#endif -- /* The router wants this NAL to forward a packet */ -- CDEBUG (D_NET, "forwarding [%p] to "LPX64", payload: %d frags %d bytes\n", -- fwd, nid, niov, nob); -- -- ktx = kqswnal_get_idle_tx (fwd, 0); -- if (ktx == NULL) /* can't get txd right now */ -- return; /* fwd will be scheduled when tx desc freed */ -- - if (nid == kqswnal_lib.ni.nid) /* gateway is me */ - if (nid == kqswnal_lib.libnal_ni.ni_pid.nid) /* gateway is me */ -- nid = fwd->kprfd_target_nid; /* target is final dest */ -- -- if (kqswnal_nid2elanid (nid) < 0) { -- CERROR("Can't forward [%p] to "LPX64": not a peer\n", fwd, nid); -- rc = -EHOSTUNREACH; - goto failed; - goto out; -- } -- -- /* copy hdr into pre-mapped buffer */ -- memcpy(ktx->ktx_buffer, fwd->kprfd_hdr, sizeof(ptl_hdr_t)); - ktx->ktx_wire_hdr = (ptl_hdr_t *)ktx->ktx_buffer; -- -- ktx->ktx_port = (nob <= KQSW_SMALLPAYLOAD) ? -- EP_MSG_SVC_PORTALS_SMALL : EP_MSG_SVC_PORTALS_LARGE; -- ktx->ktx_nid = nid; -- ktx->ktx_state = KTX_FORWARDING; -- ktx->ktx_args[0] = fwd; -- ktx->ktx_nfrag = ktx->ktx_firsttmpfrag = 1; -- -- if (nob <= KQSW_TX_MAXCONTIG) -- { -- /* send payload from ktx's pre-mapped contiguous buffer */ --#if MULTIRAIL_EKC -- ep_nmd_subset(&ktx->ktx_frags[0], &ktx->ktx_ebuffer, -- 0, KQSW_HDR_SIZE + nob); --#else -- ktx->ktx_frags[0].Base = ktx->ktx_ebuffer; -- ktx->ktx_frags[0].Len = KQSW_HDR_SIZE + nob; --#endif -- if (nob > 0) -- lib_copy_kiov2buf(ktx->ktx_buffer + KQSW_HDR_SIZE, -- niov, kiov, 0, nob); -- } -- else -- { -- /* zero copy payload */ --#if MULTIRAIL_EKC -- ep_nmd_subset(&ktx->ktx_frags[0], &ktx->ktx_ebuffer, -- 0, KQSW_HDR_SIZE); --#else -- ktx->ktx_frags[0].Base = ktx->ktx_ebuffer; -- ktx->ktx_frags[0].Len = KQSW_HDR_SIZE; --#endif -- rc = kqswnal_map_tx_kiov (ktx, 0, nob, niov, kiov); -- if (rc != 0) - goto failed; - goto out; -- } -- -- rc = kqswnal_launch (ktx); - if (rc == 0) - return; - out: - if (rc != 0) { - CERROR ("Failed to forward [%p] to "LPX64": %d\n", fwd, nid, rc); -- - failed: - LASSERT (rc != 0); - CERROR ("Failed to forward [%p] to "LPX64": %d\n", fwd, nid, rc); - /* complete now (with failure) */ - kqswnal_tx_done (ktx, rc); - } -- - kqswnal_put_idle_tx (ktx); - /* complete now (with failure) */ - kpr_fwd_done (&kqswnal_data.kqn_router, fwd, rc); - atomic_dec(&kqswnal_data.kqn_pending_txs); --} -- --void --kqswnal_fwd_callback (void *arg, int error) --{ -- kqswnal_rx_t *krx = (kqswnal_rx_t *)arg; -- -- /* The router has finished forwarding this packet */ -- -- if (error != 0) -- { -- ptl_hdr_t *hdr = (ptl_hdr_t *)page_address (krx->krx_kiov[0].kiov_page); -- -- CERROR("Failed to route packet from "LPX64" to "LPX64": %d\n", - NTOH__u64(hdr->src_nid), NTOH__u64(hdr->dest_nid),error); - le64_to_cpu(hdr->src_nid), le64_to_cpu(hdr->dest_nid),error); -- } -- - kqswnal_requeue_rx (krx); - LASSERT (atomic_read(&krx->krx_refcount) == 1); - kqswnal_rx_decref (krx); --} -- --void - kqswnal_dma_reply_complete (EP_RXD *rxd) -kqswnal_requeue_rx (kqswnal_rx_t *krx) --{ - int status = ep_rxd_status(rxd); - kqswnal_tx_t *ktx = (kqswnal_tx_t *)ep_rxd_arg(rxd); - kqswnal_rx_t *krx = (kqswnal_rx_t *)ktx->ktx_args[0]; - lib_msg_t *msg = (lib_msg_t *)ktx->ktx_args[1]; - - CDEBUG((status == EP_SUCCESS) ? D_NET : D_ERROR, - "rxd %p, ktx %p, status %d\n", rxd, ktx, status); - LASSERT (atomic_read(&krx->krx_refcount) == 0); - LASSERT (!krx->krx_rpc_reply_needed); -- - LASSERT (krx->krx_rxd == rxd); - LASSERT (krx->krx_rpc_reply_needed); - krx->krx_state = KRX_POSTED; -- - krx->krx_rpc_reply_needed = 0; - kqswnal_rx_done (krx); -#if MULTIRAIL_EKC - if (kqswnal_data.kqn_shuttingdown) { - /* free EKC rxd on shutdown */ - ep_complete_receive(krx->krx_rxd); - } else { - /* repost receive */ - ep_requeue_receive(krx->krx_rxd, - kqswnal_rxhandler, krx, - &krx->krx_elanbuffer, 0); - } -#else - if (kqswnal_data.kqn_shuttingdown) - return; -- - lib_finalize (&kqswnal_lib, NULL, msg, - (status == EP_SUCCESS) ? PTL_OK : PTL_FAIL); - kqswnal_put_idle_tx (ktx); - if (krx->krx_rxd == NULL) { - /* We had a failed ep_complete_rpc() which nukes the - * descriptor in "old" EKC */ - int eprc = ep_queue_receive(krx->krx_eprx, - kqswnal_rxhandler, krx, - krx->krx_elanbuffer, - krx->krx_npages * PAGE_SIZE, 0); - LASSERT (eprc == EP_SUCCESS); - /* We don't handle failure here; it's incredibly rare - * (never reported?) and only happens with "old" EKC */ - } else { - ep_requeue_receive(krx->krx_rxd, kqswnal_rxhandler, krx, - krx->krx_elanbuffer, - krx->krx_npages * PAGE_SIZE); - } -#endif --} -- --void --kqswnal_rpc_complete (EP_RXD *rxd) --{ -- int status = ep_rxd_status(rxd); -- kqswnal_rx_t *krx = (kqswnal_rx_t *)ep_rxd_arg(rxd); -- -- CDEBUG((status == EP_SUCCESS) ? D_NET : D_ERROR, -- "rxd %p, krx %p, status %d\n", rxd, krx, status); -- -- LASSERT (krx->krx_rxd == rxd); -- LASSERT (krx->krx_rpc_reply_needed); -- -- krx->krx_rpc_reply_needed = 0; -- kqswnal_requeue_rx (krx); --} -- --void - kqswnal_requeue_rx (kqswnal_rx_t *krx) -kqswnal_rx_done (kqswnal_rx_t *krx) --{ - int rc; - int rc; - EP_STATUSBLK *sblk; -- -- LASSERT (atomic_read(&krx->krx_refcount) == 0); -- -- if (krx->krx_rpc_reply_needed) { - /* We've not completed the peer's RPC yet... */ - sblk = (krx->krx_rpc_reply_status == 0) ? - &kqswnal_data.kqn_rpc_success : - &kqswnal_data.kqn_rpc_failed; -- - /* We failed to complete the peer's optimized GET (e.g. we - * couldn't map the source buffers). We complete the - * peer's EKC rpc now with failure. */ - LASSERT (!in_interrupt()); --#if MULTIRAIL_EKC - rc = ep_complete_rpc(krx->krx_rxd, kqswnal_rpc_complete, krx, - &kqswnal_rpc_failed, NULL, NULL, 0); - rc = ep_complete_rpc(krx->krx_rxd, - kqswnal_rpc_complete, krx, - sblk, NULL, NULL, 0); -- if (rc == EP_SUCCESS) -- return; - - CERROR("can't complete RPC: %d\n", rc); --#else - if (krx->krx_rxd != NULL) { - /* We didn't try (and fail) to complete earlier... */ - rc = ep_complete_rpc(krx->krx_rxd, - kqswnal_rpc_complete, krx, - &kqswnal_rpc_failed, NULL, 0); - if (rc == EP_SUCCESS) - return; - - CERROR("can't complete RPC: %d\n", rc); - } - - /* NB the old ep_complete_rpc() frees rxd on failure, so we - * have to requeue from scratch here, unless we're shutting - * down */ - if (kqswnal_data.kqn_shuttingdown) - rc = ep_complete_rpc(krx->krx_rxd, - kqswnal_rpc_complete, krx, - sblk, NULL, 0); - if (rc == EP_SUCCESS) -- return; -- - rc = ep_queue_receive(krx->krx_eprx, kqswnal_rxhandler, krx, - krx->krx_elanbuffer, - krx->krx_npages * PAGE_SIZE, 0); - LASSERT (rc == EP_SUCCESS); - /* We don't handle failure here; it's incredibly rare - * (never reported?) and only happens with "old" EKC */ - return; - /* "old" EKC destroys rxd on failed completion */ - krx->krx_rxd = NULL; --#endif - CERROR("can't complete RPC: %d\n", rc); - krx->krx_rpc_reply_needed = 0; -- } -- - #if MULTIRAIL_EKC - if (kqswnal_data.kqn_shuttingdown) { - /* free EKC rxd on shutdown */ - ep_complete_receive(krx->krx_rxd); - } else { - /* repost receive */ - ep_requeue_receive(krx->krx_rxd, kqswnal_rxhandler, krx, - &krx->krx_elanbuffer, 0); - } - #else - /* don't actually requeue on shutdown */ - if (!kqswnal_data.kqn_shuttingdown) - ep_requeue_receive(krx->krx_rxd, kqswnal_rxhandler, krx, - krx->krx_elanbuffer, krx->krx_npages * PAGE_SIZE); - #endif - kqswnal_requeue_rx(krx); --} -- --void - kqswnal_rx (kqswnal_rx_t *krx) -kqswnal_parse (kqswnal_rx_t *krx) --{ -- ptl_hdr_t *hdr = (ptl_hdr_t *) page_address(krx->krx_kiov[0].kiov_page); - ptl_nid_t dest_nid = NTOH__u64 (hdr->dest_nid); - ptl_nid_t dest_nid = le64_to_cpu(hdr->dest_nid); -- int payload_nob; -- int nob; -- int niov; -- - LASSERT (atomic_read(&krx->krx_refcount) == 0); - LASSERT (atomic_read(&krx->krx_refcount) == 1); -- - if (dest_nid == kqswnal_lib.ni.nid) { /* It's for me :) */ - atomic_set(&krx->krx_refcount, 1); - lib_parse (&kqswnal_lib, hdr, krx); - kqswnal_rx_done(krx); - if (dest_nid == kqswnal_lib.libnal_ni.ni_pid.nid) { /* It's for me :) */ - /* I ignore parse errors since I'm not consuming a byte - * stream */ - (void)lib_parse (&kqswnal_lib, hdr, krx); - - /* Drop my ref; any RDMA activity takes an additional ref */ - kqswnal_rx_decref(krx); -- return; -- } -- --#if KQSW_CHECKSUM - CERROR ("checksums for forwarded packets not implemented\n"); - LBUG (); - LASSERTF (0, "checksums for forwarded packets not implemented\n"); --#endif - -- if (kqswnal_nid2elanid (dest_nid) >= 0) /* should have gone direct to peer */ -- { -- CERROR("dropping packet from "LPX64" for "LPX64 - ": target is peer\n", NTOH__u64(hdr->src_nid), dest_nid); - ": target is peer\n", le64_to_cpu(hdr->src_nid), dest_nid); -- - kqswnal_requeue_rx (krx); - kqswnal_rx_decref (krx); -- return; -- } -- -- nob = payload_nob = krx->krx_nob - KQSW_HDR_SIZE; -- niov = 0; -- if (nob > 0) { -- krx->krx_kiov[0].kiov_offset = KQSW_HDR_SIZE; -- krx->krx_kiov[0].kiov_len = MIN(PAGE_SIZE - KQSW_HDR_SIZE, nob); -- niov = 1; -- nob -= PAGE_SIZE - KQSW_HDR_SIZE; -- -- while (nob > 0) { -- LASSERT (niov < krx->krx_npages); -- -- krx->krx_kiov[niov].kiov_offset = 0; -- krx->krx_kiov[niov].kiov_len = MIN(PAGE_SIZE, nob); -- niov++; -- nob -= PAGE_SIZE; -- } -- } -- -- kpr_fwd_init (&krx->krx_fwd, dest_nid, -- hdr, payload_nob, niov, krx->krx_kiov, -- kqswnal_fwd_callback, krx); -- -- kpr_fwd_start (&kqswnal_data.kqn_router, &krx->krx_fwd); --} -- --/* Receive Interrupt Handler: posts to schedulers */ --void --kqswnal_rxhandler(EP_RXD *rxd) --{ -- unsigned long flags; -- int nob = ep_rxd_len (rxd); -- int status = ep_rxd_status (rxd); -- kqswnal_rx_t *krx = (kqswnal_rx_t *)ep_rxd_arg (rxd); -- -- CDEBUG(D_NET, "kqswnal_rxhandler: rxd %p, krx %p, nob %d, status %d\n", -- rxd, krx, nob, status); -- -- LASSERT (krx != NULL); - - LASSERT (krx->krx_state = KRX_POSTED); - - krx->krx_state = KRX_PARSE; -- krx->krx_rxd = rxd; -- krx->krx_nob = nob; --#if MULTIRAIL_EKC -- krx->krx_rpc_reply_needed = (status != EP_SHUTDOWN) && ep_rxd_isrpc(rxd); --#else -- krx->krx_rpc_reply_needed = ep_rxd_isrpc(rxd); --#endif - - /* Default to failure if an RPC reply is requested but not handled */ - krx->krx_rpc_reply_status = -EPROTO; - atomic_set (&krx->krx_refcount, 1); - -- /* must receive a whole header to be able to parse */ -- if (status != EP_SUCCESS || nob < sizeof (ptl_hdr_t)) -- { -- /* receives complete with failure when receiver is removed */ --#if MULTIRAIL_EKC -- if (status == EP_SHUTDOWN) -- LASSERT (kqswnal_data.kqn_shuttingdown); -- else -- CERROR("receive status failed with status %d nob %d\n", -- ep_rxd_status(rxd), nob); --#else -- if (!kqswnal_data.kqn_shuttingdown) -- CERROR("receive status failed with status %d nob %d\n", -- ep_rxd_status(rxd), nob); --#endif - kqswnal_requeue_rx (krx); - kqswnal_rx_decref(krx); -- return; -- } -- -- if (!in_interrupt()) { - kqswnal_rx (krx); - kqswnal_parse(krx); -- return; -- } -- -- spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags); -- -- list_add_tail (&krx->krx_list, &kqswnal_data.kqn_readyrxds); -- wake_up (&kqswnal_data.kqn_sched_waitq); -- -- spin_unlock_irqrestore (&kqswnal_data.kqn_sched_lock, flags); --} -- --#if KQSW_CHECKSUM --void --kqswnal_csum_error (kqswnal_rx_t *krx, int ishdr) --{ -- ptl_hdr_t *hdr = (ptl_hdr_t *)page_address (krx->krx_kiov[0].kiov_page); -- -- CERROR ("%s checksum mismatch %p: dnid "LPX64", snid "LPX64 -- ", dpid %d, spid %d, type %d\n", -- ishdr ? "Header" : "Payload", krx, - NTOH__u64(hdr->dest_nid), NTOH__u64(hdr->src_nid) - NTOH__u32(hdr->dest_pid), NTOH__u32(hdr->src_pid), - NTOH__u32(hdr->type)); - le64_to_cpu(hdr->dest_nid), le64_to_cpu(hdr->src_nid) - le32_to_cpu(hdr->dest_pid), le32_to_cpu(hdr->src_pid), - le32_to_cpu(hdr->type)); -- - switch (NTOH__u32 (hdr->type)) - switch (le32_to_cpu(hdr->type)) -- { -- case PTL_MSG_ACK: -- CERROR("ACK: mlen %d dmd "LPX64"."LPX64" match "LPX64 -- " len %u\n", - NTOH__u32(hdr->msg.ack.mlength), - le32_to_cpu(hdr->msg.ack.mlength), -- hdr->msg.ack.dst_wmd.handle_cookie, -- hdr->msg.ack.dst_wmd.handle_idx, - NTOH__u64(hdr->msg.ack.match_bits), - NTOH__u32(hdr->msg.ack.length)); - le64_to_cpu(hdr->msg.ack.match_bits), - le32_to_cpu(hdr->msg.ack.length)); -- break; -- case PTL_MSG_PUT: -- CERROR("PUT: ptl %d amd "LPX64"."LPX64" match "LPX64 -- " len %u off %u data "LPX64"\n", - NTOH__u32(hdr->msg.put.ptl_index), - le32_to_cpu(hdr->msg.put.ptl_index), -- hdr->msg.put.ack_wmd.handle_cookie, -- hdr->msg.put.ack_wmd.handle_idx, - NTOH__u64(hdr->msg.put.match_bits), - NTOH__u32(hdr->msg.put.length), - NTOH__u32(hdr->msg.put.offset), - le64_to_cpu(hdr->msg.put.match_bits), - le32_to_cpu(hdr->msg.put.length), - le32_to_cpu(hdr->msg.put.offset), -- hdr->msg.put.hdr_data); -- break; -- case PTL_MSG_GET: -- CERROR ("GET: <>\n"); -- break; -- case PTL_MSG_REPLY: -- CERROR ("REPLY: <>\n"); -- break; -- default: -- CERROR ("TYPE?: <>\n"); -- } --} --#endif -- --static ptl_err_t - kqswnal_recvmsg (nal_cb_t *nal, -kqswnal_recvmsg (lib_nal_t *nal, -- void *private, -- lib_msg_t *libmsg, -- unsigned int niov, -- struct iovec *iov, -- ptl_kiov_t *kiov, -- size_t offset, -- size_t mlen, -- size_t rlen) --{ -- kqswnal_rx_t *krx = (kqswnal_rx_t *)private; -- char *buffer = page_address(krx->krx_kiov[0].kiov_page); - ptl_hdr_t *hdr = (ptl_hdr_t *)buffer; -- int page; -- char *page_ptr; -- int page_nob; -- char *iov_ptr; -- int iov_nob; -- int frag; - int rc; --#if KQSW_CHECKSUM -- kqsw_csum_t senders_csum; -- kqsw_csum_t payload_csum = 0; - kqsw_csum_t hdr_csum = kqsw_csum(0, buffer, sizeof(ptl_hdr_t)); - kqsw_csum_t hdr_csum = kqsw_csum(0, hdr, sizeof(*hdr)); -- size_t csum_len = mlen; -- int csum_frags = 0; -- int csum_nob = 0; -- static atomic_t csum_counter; -- int csum_verbose = (atomic_read(&csum_counter)%1000001) == 0; -- -- atomic_inc (&csum_counter); -- -- memcpy (&senders_csum, buffer + sizeof (ptl_hdr_t), sizeof (kqsw_csum_t)); -- if (senders_csum != hdr_csum) -- kqswnal_csum_error (krx, 1); --#endif - /* NB lib_parse() has already flipped *hdr */ - -- CDEBUG(D_NET,"kqswnal_recv, mlen="LPSZ", rlen="LPSZ"\n", mlen, rlen); - - if (krx->krx_rpc_reply_needed && - hdr->type == PTL_MSG_PUT) { - /* This must be an optimized PUT */ - rc = kqswnal_rdma (krx, libmsg, PTL_MSG_PUT, - niov, iov, kiov, offset, mlen); - return (rc == 0 ? PTL_OK : PTL_FAIL); - } -- -- /* What was actually received must be >= payload. */ -- LASSERT (mlen <= rlen); -- if (krx->krx_nob < KQSW_HDR_SIZE + mlen) { -- CERROR("Bad message size: have %d, need %d + %d\n", -- krx->krx_nob, (int)KQSW_HDR_SIZE, (int)mlen); -- return (PTL_FAIL); -- } -- -- /* It must be OK to kmap() if required */ -- LASSERT (kiov == NULL || !in_interrupt ()); -- /* Either all pages or all vaddrs */ -- LASSERT (!(kiov != NULL && iov != NULL)); -- -- if (mlen != 0) { -- page = 0; -- page_ptr = buffer + KQSW_HDR_SIZE; -- page_nob = PAGE_SIZE - KQSW_HDR_SIZE; -- -- LASSERT (niov > 0); -- -- if (kiov != NULL) { -- /* skip complete frags */ -- while (offset >= kiov->kiov_len) { -- offset -= kiov->kiov_len; -- kiov++; -- niov--; -- LASSERT (niov > 0); -- } -- iov_ptr = ((char *)kmap (kiov->kiov_page)) + -- kiov->kiov_offset + offset; -- iov_nob = kiov->kiov_len - offset; -- } else { -- /* skip complete frags */ -- while (offset >= iov->iov_len) { -- offset -= iov->iov_len; -- iov++; -- niov--; -- LASSERT (niov > 0); -- } -- iov_ptr = iov->iov_base + offset; -- iov_nob = iov->iov_len - offset; -- } -- -- for (;;) -- { -- frag = mlen; -- if (frag > page_nob) -- frag = page_nob; -- if (frag > iov_nob) -- frag = iov_nob; -- -- memcpy (iov_ptr, page_ptr, frag); --#if KQSW_CHECKSUM -- payload_csum = kqsw_csum (payload_csum, iov_ptr, frag); -- csum_nob += frag; -- csum_frags++; --#endif -- mlen -= frag; -- if (mlen == 0) -- break; -- -- page_nob -= frag; -- if (page_nob != 0) -- page_ptr += frag; -- else -- { -- page++; -- LASSERT (page < krx->krx_npages); -- page_ptr = page_address(krx->krx_kiov[page].kiov_page); -- page_nob = PAGE_SIZE; -- } -- -- iov_nob -= frag; -- if (iov_nob != 0) -- iov_ptr += frag; -- else if (kiov != NULL) { -- kunmap (kiov->kiov_page); -- kiov++; -- niov--; -- LASSERT (niov > 0); -- iov_ptr = ((char *)kmap (kiov->kiov_page)) + kiov->kiov_offset; -- iov_nob = kiov->kiov_len; -- } else { -- iov++; -- niov--; -- LASSERT (niov > 0); -- iov_ptr = iov->iov_base; -- iov_nob = iov->iov_len; -- } -- } -- -- if (kiov != NULL) -- kunmap (kiov->kiov_page); -- } -- --#if KQSW_CHECKSUM -- memcpy (&senders_csum, buffer + sizeof(ptl_hdr_t) + sizeof(kqsw_csum_t), -- sizeof(kqsw_csum_t)); -- -- if (csum_len != rlen) -- CERROR("Unable to checksum data in user's buffer\n"); -- else if (senders_csum != payload_csum) -- kqswnal_csum_error (krx, 0); -- -- if (csum_verbose) -- CERROR("hdr csum %lx, payload_csum %lx, csum_frags %d, " -- "csum_nob %d\n", -- hdr_csum, payload_csum, csum_frags, csum_nob); --#endif -- lib_finalize(nal, private, libmsg, PTL_OK); -- -- return (PTL_OK); --} -- --static ptl_err_t - kqswnal_recv(nal_cb_t *nal, -kqswnal_recv(lib_nal_t *nal, -- void *private, -- lib_msg_t *libmsg, -- unsigned int niov, -- struct iovec *iov, -- size_t offset, -- size_t mlen, -- size_t rlen) --{ -- return (kqswnal_recvmsg(nal, private, libmsg, -- niov, iov, NULL, -- offset, mlen, rlen)); --} -- --static ptl_err_t - kqswnal_recv_pages (nal_cb_t *nal, -kqswnal_recv_pages (lib_nal_t *nal, -- void *private, -- lib_msg_t *libmsg, -- unsigned int niov, -- ptl_kiov_t *kiov, -- size_t offset, -- size_t mlen, -- size_t rlen) --{ -- return (kqswnal_recvmsg(nal, private, libmsg, -- niov, NULL, kiov, -- offset, mlen, rlen)); --} -- --int --kqswnal_thread_start (int (*fn)(void *arg), void *arg) --{ -- long pid = kernel_thread (fn, arg, 0); -- -- if (pid < 0) -- return ((int)pid); -- -- atomic_inc (&kqswnal_data.kqn_nthreads); - atomic_inc (&kqswnal_data.kqn_nthreads_running); -- return (0); --} -- --void --kqswnal_thread_fini (void) --{ -- atomic_dec (&kqswnal_data.kqn_nthreads); --} -- --int --kqswnal_scheduler (void *arg) --{ -- kqswnal_rx_t *krx; -- kqswnal_tx_t *ktx; -- kpr_fwd_desc_t *fwd; -- unsigned long flags; -- int rc; -- int counter = 0; - int shuttingdown = 0; -- int did_something; -- -- kportal_daemonize ("kqswnal_sched"); -- kportal_blockallsigs (); -- -- spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags); -- -- for (;;) -- { - if (kqswnal_data.kqn_shuttingdown != shuttingdown) { - - if (kqswnal_data.kqn_shuttingdown == 2) - break; - - /* During stage 1 of shutdown we are still responsive - * to receives */ - - atomic_dec (&kqswnal_data.kqn_nthreads_running); - shuttingdown = kqswnal_data.kqn_shuttingdown; - } - -- did_something = 0; -- -- if (!list_empty (&kqswnal_data.kqn_readyrxds)) -- { -- krx = list_entry(kqswnal_data.kqn_readyrxds.next, -- kqswnal_rx_t, krx_list); -- list_del (&krx->krx_list); -- spin_unlock_irqrestore(&kqswnal_data.kqn_sched_lock, -- flags); -- - kqswnal_rx (krx); - switch (krx->krx_state) { - case KRX_PARSE: - kqswnal_parse (krx); - break; - case KRX_COMPLETING: - /* Drop last ref to reply to RPC and requeue */ - LASSERT (krx->krx_rpc_reply_needed); - kqswnal_rx_decref (krx); - break; - default: - LBUG(); - } -- -- did_something = 1; -- spin_lock_irqsave(&kqswnal_data.kqn_sched_lock, flags); -- } -- - if (!shuttingdown && - !list_empty (&kqswnal_data.kqn_delayedtxds)) - if (!list_empty (&kqswnal_data.kqn_delayedtxds)) -- { -- ktx = list_entry(kqswnal_data.kqn_delayedtxds.next, -- kqswnal_tx_t, ktx_list); -- list_del_init (&ktx->ktx_delayed_list); -- spin_unlock_irqrestore(&kqswnal_data.kqn_sched_lock, -- flags); -- -- rc = kqswnal_launch (ktx); - if (rc != 0) /* failed: ktx_nid down? */ - { - if (rc != 0) { -- CERROR("Failed delayed transmit to "LPX64 -- ": %d\n", ktx->ktx_nid, rc); -- kqswnal_tx_done (ktx, rc); -- } - atomic_dec (&kqswnal_data.kqn_pending_txs); -- -- did_something = 1; -- spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags); -- } -- - if (!shuttingdown & - !list_empty (&kqswnal_data.kqn_delayedfwds)) - if (!list_empty (&kqswnal_data.kqn_delayedfwds)) -- { -- fwd = list_entry (kqswnal_data.kqn_delayedfwds.next, kpr_fwd_desc_t, kprfd_list); -- list_del (&fwd->kprfd_list); -- spin_unlock_irqrestore (&kqswnal_data.kqn_sched_lock, flags); -- - /* If we're shutting down, this will just requeue fwd on kqn_idletxd_fwdq */ -- kqswnal_fwd_packet (NULL, fwd); -- -- did_something = 1; -- spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags); -- } -- - /* nothing to do or hogging CPU */ - /* nothing to do or hogging CPU */ -- if (!did_something || counter++ == KQSW_RESCHED) { -- spin_unlock_irqrestore(&kqswnal_data.kqn_sched_lock, -- flags); -- -- counter = 0; -- -- if (!did_something) { - if (kqswnal_data.kqn_shuttingdown == 2) { - /* We only exit in stage 2 of shutdown when - * there's nothing left to do */ - break; - } -- rc = wait_event_interruptible (kqswnal_data.kqn_sched_waitq, - kqswnal_data.kqn_shuttingdown != shuttingdown || - kqswnal_data.kqn_shuttingdown == 2 || -- !list_empty(&kqswnal_data.kqn_readyrxds) || -- !list_empty(&kqswnal_data.kqn_delayedtxds) || -- !list_empty(&kqswnal_data.kqn_delayedfwds)); -- LASSERT (rc == 0); -- } else if (need_resched()) -- schedule (); -- -- spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags); -- } -- } - - spin_unlock_irqrestore (&kqswnal_data.kqn_sched_lock, flags); -- -- kqswnal_thread_fini (); -- return (0); --} -- - nal_cb_t kqswnal_lib = -lib_nal_t kqswnal_lib = --{ - nal_data: &kqswnal_data, /* NAL private data */ - cb_send: kqswnal_send, - cb_send_pages: kqswnal_send_pages, - cb_recv: kqswnal_recv, - cb_recv_pages: kqswnal_recv_pages, - cb_read: kqswnal_read, - cb_write: kqswnal_write, - cb_malloc: kqswnal_malloc, - cb_free: kqswnal_free, - cb_printf: kqswnal_printf, - cb_cli: kqswnal_cli, - cb_sti: kqswnal_sti, - cb_dist: kqswnal_dist - libnal_data: &kqswnal_data, /* NAL private data */ - libnal_send: kqswnal_send, - libnal_send_pages: kqswnal_send_pages, - libnal_recv: kqswnal_recv, - libnal_recv_pages: kqswnal_recv_pages, - libnal_dist: kqswnal_dist --}; diff --cc lnet/klnds/scimaclnd/.cvsignore index 48b17e9,48b17e9..0000000 deleted file mode 100644,100644 --- a/lnet/klnds/scimaclnd/.cvsignore +++ /dev/null @@@ -1,10 -1,10 +1,0 @@@ --.deps --Makefile --autoMakefile.in --autoMakefile --*.ko --*.mod.c --.*.flags --.*.cmd --.tmp_versions --.depend diff --cc lnet/klnds/socklnd/.cvsignore index 5ed596b,5ed596b..0000000 deleted file mode 100644,100644 --- a/lnet/klnds/socklnd/.cvsignore +++ /dev/null @@@ -1,10 -1,10 +1,0 @@@ --.deps --Makefile --.*.cmd --autoMakefile.in --autoMakefile --*.ko --*.mod.c --.*.flags --.tmp_versions --.depend diff --cc lnet/klnds/socklnd/Makefile.in index 633b455,633b455..0000000 deleted file mode 100644,100644 --- a/lnet/klnds/socklnd/Makefile.in +++ /dev/null @@@ -1,8 -1,8 +1,0 @@@ --MODULES := ksocknal --ksocknal-objs := socknal.o socknal_cb.o -- --# If you don't build with -O2, your modules won't insert, becahse htonl is --# just special that way. --EXTRA_POST_CFLAGS := -O2 -- --@INCLUDE_RULES@ diff --cc lnet/klnds/socklnd/autoMakefile.am index 070b649,070b649..0000000 deleted file mode 100644,100644 --- a/lnet/klnds/socklnd/autoMakefile.am +++ /dev/null @@@ -1,13 -1,13 +1,0 @@@ --# Copyright (C) 2001 Cluster File Systems, Inc. --# --# This code is issued under the GNU General Public License. --# See the file COPYING in this distribution -- --if MODULES --if !CRAY_PORTALS --modulenet_DATA = ksocknal$(KMODEXT) --endif --endif -- --MOSTLYCLEANFILES = *.o *.ko *.mod.c --DIST_SOURCES = $(ksocknal-objs:%.o=%.c) socknal.h diff --cc lnet/klnds/socklnd/socklnd.c index bbe19cf,2a0ef11..0000000 deleted file mode 100644,100644 --- a/lnet/klnds/socklnd/socklnd.c +++ /dev/null @@@ -1,1765 -1,2529 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * Copyright (C) 2001, 2002 Cluster File Systems, Inc. -- * Author: Zach Brown -- * Author: Peter J. Braam -- * Author: Phil Schwan -- * Author: Eric Barton -- * -- * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ -- * -- * Portals is free software; you can redistribute it and/or -- * modify it under the terms of version 2 of the GNU General Public -- * License as published by the Free Software Foundation. -- * -- * Portals is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- * GNU General Public License for more details. -- * -- * You should have received a copy of the GNU General Public License -- * along with Portals; if not, write to the Free Software -- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -- */ -- --#include "socknal.h" -- -nal_t ksocknal_api; -ksock_nal_data_t ksocknal_data; --ptl_handle_ni_t ksocknal_ni; - static nal_t ksocknal_api; - #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) - ksock_nal_data_t ksocknal_data; - #else - static ksock_nal_data_t ksocknal_data; - #endif -ksock_tunables_t ksocknal_tunables; -- --kpr_nal_interface_t ksocknal_router_interface = { -- kprni_nalid: SOCKNAL, -- kprni_arg: &ksocknal_data, -- kprni_fwd: ksocknal_fwd_packet, -- kprni_notify: ksocknal_notify, --}; -- -#ifdef CONFIG_SYSCTL --#define SOCKNAL_SYSCTL 200 -- - #define SOCKNAL_SYSCTL_TIMEOUT 1 - #define SOCKNAL_SYSCTL_EAGER_ACK 2 - #define SOCKNAL_SYSCTL_ZERO_COPY 3 - #define SOCKNAL_SYSCTL_TYPED 4 - #define SOCKNAL_SYSCTL_MIN_BULK 5 -#define SOCKNAL_SYSCTL_TIMEOUT 1 -#define SOCKNAL_SYSCTL_EAGER_ACK 2 -#define SOCKNAL_SYSCTL_ZERO_COPY 3 -#define SOCKNAL_SYSCTL_TYPED 4 -#define SOCKNAL_SYSCTL_MIN_BULK 5 -#define SOCKNAL_SYSCTL_BUFFER_SIZE 6 -#define SOCKNAL_SYSCTL_NAGLE 7 -#define SOCKNAL_SYSCTL_IRQ_AFFINITY 8 -#define SOCKNAL_SYSCTL_KEEPALIVE_IDLE 9 -#define SOCKNAL_SYSCTL_KEEPALIVE_COUNT 10 -#define SOCKNAL_SYSCTL_KEEPALIVE_INTVL 11 -- --static ctl_table ksocknal_ctl_table[] = { -- {SOCKNAL_SYSCTL_TIMEOUT, "timeout", - &ksocknal_data.ksnd_io_timeout, sizeof (int), - &ksocknal_tunables.ksnd_io_timeout, sizeof (int), -- 0644, NULL, &proc_dointvec}, -- {SOCKNAL_SYSCTL_EAGER_ACK, "eager_ack", - &ksocknal_data.ksnd_eager_ack, sizeof (int), - &ksocknal_tunables.ksnd_eager_ack, sizeof (int), -- 0644, NULL, &proc_dointvec}, --#if SOCKNAL_ZC -- {SOCKNAL_SYSCTL_ZERO_COPY, "zero_copy", - &ksocknal_data.ksnd_zc_min_frag, sizeof (int), - &ksocknal_tunables.ksnd_zc_min_frag, sizeof (int), -- 0644, NULL, &proc_dointvec}, --#endif -- {SOCKNAL_SYSCTL_TYPED, "typed", - &ksocknal_data.ksnd_typed_conns, sizeof (int), - &ksocknal_tunables.ksnd_typed_conns, sizeof (int), -- 0644, NULL, &proc_dointvec}, -- {SOCKNAL_SYSCTL_MIN_BULK, "min_bulk", - &ksocknal_data.ksnd_min_bulk, sizeof (int), - &ksocknal_tunables.ksnd_min_bulk, sizeof (int), - 0644, NULL, &proc_dointvec}, - {SOCKNAL_SYSCTL_BUFFER_SIZE, "buffer_size", - &ksocknal_tunables.ksnd_buffer_size, sizeof(int), - 0644, NULL, &proc_dointvec}, - {SOCKNAL_SYSCTL_NAGLE, "nagle", - &ksocknal_tunables.ksnd_nagle, sizeof(int), - 0644, NULL, &proc_dointvec}, -#if CPU_AFFINITY - {SOCKNAL_SYSCTL_IRQ_AFFINITY, "irq_affinity", - &ksocknal_tunables.ksnd_irq_affinity, sizeof(int), - 0644, NULL, &proc_dointvec}, -#endif - {SOCKNAL_SYSCTL_KEEPALIVE_IDLE, "keepalive_idle", - &ksocknal_tunables.ksnd_keepalive_idle, sizeof(int), - 0644, NULL, &proc_dointvec}, - {SOCKNAL_SYSCTL_KEEPALIVE_COUNT, "keepalive_count", - &ksocknal_tunables.ksnd_keepalive_count, sizeof(int), - 0644, NULL, &proc_dointvec}, - {SOCKNAL_SYSCTL_KEEPALIVE_INTVL, "keepalive_intvl", - &ksocknal_tunables.ksnd_keepalive_intvl, sizeof(int), -- 0644, NULL, &proc_dointvec}, -- { 0 } --}; -- --static ctl_table ksocknal_top_ctl_table[] = { -- {SOCKNAL_SYSCTL, "socknal", NULL, 0, 0555, ksocknal_ctl_table}, -- { 0 } --}; - - int - ksocknal_api_forward(nal_t *nal, int id, void *args, size_t args_len, - void *ret, size_t ret_len) - { - ksock_nal_data_t *k; - nal_cb_t *nal_cb; - - k = nal->nal_data; - nal_cb = k->ksnd_nal_cb; - - lib_dispatch(nal_cb, k, id, args, ret); /* ksocknal_send needs k */ - return PTL_OK; - } - - int - ksocknal_api_shutdown(nal_t *nal, int ni) - { - return PTL_OK; - } - - void - ksocknal_api_yield(nal_t *nal) - { - our_cond_resched(); - return; - } - - void - ksocknal_api_lock(nal_t *nal, unsigned long *flags) - { - ksock_nal_data_t *k; - nal_cb_t *nal_cb; - - k = nal->nal_data; - nal_cb = k->ksnd_nal_cb; - nal_cb->cb_cli(nal_cb,flags); - } - - void - ksocknal_api_unlock(nal_t *nal, unsigned long *flags) - { - ksock_nal_data_t *k; - nal_cb_t *nal_cb; - - k = nal->nal_data; - nal_cb = k->ksnd_nal_cb; - nal_cb->cb_sti(nal_cb,flags); - } - - nal_t * - ksocknal_init(int interface, ptl_pt_index_t ptl_size, - ptl_ac_index_t ac_size, ptl_pid_t requested_pid) - { - CDEBUG(D_NET, "calling lib_init with nid "LPX64"\n", (ptl_nid_t)0); - lib_init(&ksocknal_lib, (ptl_nid_t)0, 0, 10, ptl_size, ac_size); - return (&ksocknal_api); - } - - /* - * EXTRA functions follow - */ -#endif -- --int --ksocknal_set_mynid(ptl_nid_t nid) --{ - lib_ni_t *ni = &ksocknal_lib.ni; - lib_ni_t *ni = &ksocknal_lib.libnal_ni; -- -- /* FIXME: we have to do this because we call lib_init() at module -- * insertion time, which is before we have 'mynid' available. lib_init -- * sets the NAL's nid, which it uses to tell other nodes where packets -- * are coming from. This is not a very graceful solution to this -- * problem. */ -- -- CDEBUG(D_IOCTL, "setting mynid to "LPX64" (old nid="LPX64")\n", - nid, ni->nid); - nid, ni->ni_pid.nid); -- - ni->nid = nid; - ni->ni_pid.nid = nid; -- return (0); --} -- --void --ksocknal_bind_irq (unsigned int irq) --{ --#if (defined(CONFIG_SMP) && CPU_AFFINITY) -- int bind; -- int cpu; -- unsigned long flags; -- char cmdline[64]; -- ksock_irqinfo_t *info; -- char *argv[] = {"/bin/sh", -- "-c", -- cmdline, -- NULL}; -- char *envp[] = {"HOME=/", -- "PATH=/sbin:/bin:/usr/sbin:/usr/bin", -- NULL}; -- -- LASSERT (irq < NR_IRQS); -- if (irq == 0) /* software NIC or affinity disabled */ -- return; -- -- info = &ksocknal_data.ksnd_irqinfo[irq]; -- -- write_lock_irqsave (&ksocknal_data.ksnd_global_lock, flags); -- -- LASSERT (info->ksni_valid); -- bind = !info->ksni_bound; -- info->ksni_bound = 1; -- -- write_unlock_irqrestore (&ksocknal_data.ksnd_global_lock, flags); -- -- if (!bind) /* bound already */ -- return; -- -- cpu = ksocknal_irqsched2cpu(info->ksni_sched); -- snprintf (cmdline, sizeof (cmdline), -- "echo %d > /proc/irq/%u/smp_affinity", 1 << cpu, irq); -- -- printk (KERN_INFO "Lustre: Binding irq %u to CPU %d with cmd: %s\n", -- irq, cpu, cmdline); -- -- /* FIXME: Find a better method of setting IRQ affinity... -- */ -- -- USERMODEHELPER(argv[0], argv, envp); --#endif -} - -ksock_interface_t * -ksocknal_ip2iface(__u32 ip) -{ - int i; - ksock_interface_t *iface; - - for (i = 0; i < ksocknal_data.ksnd_ninterfaces; i++) { - LASSERT(i < SOCKNAL_MAX_INTERFACES); - iface = &ksocknal_data.ksnd_interfaces[i]; - - if (iface->ksni_ipaddr == ip) - return (iface); - } - - return (NULL); --} -- --ksock_route_t * - ksocknal_create_route (__u32 ipaddr, int port, int buffer_size, - int irq_affinity, int eager) -ksocknal_create_route (__u32 ipaddr, int port) --{ -- ksock_route_t *route; -- -- PORTAL_ALLOC (route, sizeof (*route)); -- if (route == NULL) -- return (NULL); -- -- atomic_set (&route->ksnr_refcount, 1); - route->ksnr_sharecount = 0; -- route->ksnr_peer = NULL; -- route->ksnr_timeout = jiffies; -- route->ksnr_retry_interval = SOCKNAL_MIN_RECONNECT_INTERVAL; -- route->ksnr_ipaddr = ipaddr; -- route->ksnr_port = port; - route->ksnr_buffer_size = buffer_size; - route->ksnr_irq_affinity = irq_affinity; - route->ksnr_eager = eager; -- route->ksnr_connecting = 0; -- route->ksnr_connected = 0; -- route->ksnr_deleted = 0; -- route->ksnr_conn_count = 0; - route->ksnr_share_count = 0; -- -- return (route); --} -- --void --ksocknal_destroy_route (ksock_route_t *route) --{ - LASSERT (route->ksnr_sharecount == 0); - -- if (route->ksnr_peer != NULL) -- ksocknal_put_peer (route->ksnr_peer); -- -- PORTAL_FREE (route, sizeof (*route)); --} -- --void --ksocknal_put_route (ksock_route_t *route) --{ -- CDEBUG (D_OTHER, "putting route[%p] (%d)\n", -- route, atomic_read (&route->ksnr_refcount)); -- -- LASSERT (atomic_read (&route->ksnr_refcount) > 0); -- if (!atomic_dec_and_test (&route->ksnr_refcount)) -- return; -- -- ksocknal_destroy_route (route); --} -- --ksock_peer_t * --ksocknal_create_peer (ptl_nid_t nid) --{ -- ksock_peer_t *peer; -- -- LASSERT (nid != PTL_NID_ANY); -- -- PORTAL_ALLOC (peer, sizeof (*peer)); -- if (peer == NULL) -- return (NULL); -- - memset (peer, 0, sizeof (*peer)); - memset (peer, 0, sizeof (*peer)); /* NULL pointers/clear flags etc */ -- -- peer->ksnp_nid = nid; -- atomic_set (&peer->ksnp_refcount, 1); /* 1 ref for caller */ -- peer->ksnp_closing = 0; -- INIT_LIST_HEAD (&peer->ksnp_conns); -- INIT_LIST_HEAD (&peer->ksnp_routes); -- INIT_LIST_HEAD (&peer->ksnp_tx_queue); -- -- atomic_inc (&ksocknal_data.ksnd_npeers); -- return (peer); --} -- --void --ksocknal_destroy_peer (ksock_peer_t *peer) --{ -- CDEBUG (D_NET, "peer "LPX64" %p deleted\n", peer->ksnp_nid, peer); -- -- LASSERT (atomic_read (&peer->ksnp_refcount) == 0); -- LASSERT (list_empty (&peer->ksnp_conns)); -- LASSERT (list_empty (&peer->ksnp_routes)); -- LASSERT (list_empty (&peer->ksnp_tx_queue)); -- -- PORTAL_FREE (peer, sizeof (*peer)); -- -- /* NB a peer's connections and autoconnect routes keep a reference -- * on their peer until they are destroyed, so we can be assured -- * that _all_ state to do with this peer has been cleaned up when -- * its refcount drops to zero. */ -- atomic_dec (&ksocknal_data.ksnd_npeers); --} -- --void --ksocknal_put_peer (ksock_peer_t *peer) --{ -- CDEBUG (D_OTHER, "putting peer[%p] -> "LPX64" (%d)\n", -- peer, peer->ksnp_nid, -- atomic_read (&peer->ksnp_refcount)); -- -- LASSERT (atomic_read (&peer->ksnp_refcount) > 0); -- if (!atomic_dec_and_test (&peer->ksnp_refcount)) -- return; -- -- ksocknal_destroy_peer (peer); --} -- --ksock_peer_t * --ksocknal_find_peer_locked (ptl_nid_t nid) --{ -- struct list_head *peer_list = ksocknal_nid2peerlist (nid); -- struct list_head *tmp; -- ksock_peer_t *peer; -- -- list_for_each (tmp, peer_list) { -- -- peer = list_entry (tmp, ksock_peer_t, ksnp_list); -- -- LASSERT (!peer->ksnp_closing); - LASSERT (!(list_empty (&peer->ksnp_routes) && - list_empty (&peer->ksnp_conns))); -- -- if (peer->ksnp_nid != nid) -- continue; -- -- CDEBUG(D_NET, "got peer [%p] -> "LPX64" (%d)\n", -- peer, nid, atomic_read (&peer->ksnp_refcount)); -- return (peer); -- } -- return (NULL); --} -- --ksock_peer_t * --ksocknal_get_peer (ptl_nid_t nid) --{ -- ksock_peer_t *peer; -- -- read_lock (&ksocknal_data.ksnd_global_lock); -- peer = ksocknal_find_peer_locked (nid); -- if (peer != NULL) /* +1 ref for caller? */ -- atomic_inc (&peer->ksnp_refcount); -- read_unlock (&ksocknal_data.ksnd_global_lock); -- -- return (peer); --} -- --void --ksocknal_unlink_peer_locked (ksock_peer_t *peer) --{ - int i; - __u32 ip; - - for (i = 0; i < peer->ksnp_n_passive_ips; i++) { - LASSERT (i < SOCKNAL_MAX_INTERFACES); - ip = peer->ksnp_passive_ips[i]; - - ksocknal_ip2iface(ip)->ksni_npeers--; - } - - LASSERT (list_empty(&peer->ksnp_conns)); - LASSERT (list_empty(&peer->ksnp_routes)); -- LASSERT (!peer->ksnp_closing); -- peer->ksnp_closing = 1; -- list_del (&peer->ksnp_list); -- /* lose peerlist's ref */ -- ksocknal_put_peer (peer); --} -- - ksock_route_t * - ksocknal_get_route_by_idx (int index) -int -ksocknal_get_peer_info (int index, ptl_nid_t *nid, - __u32 *myip, __u32 *peer_ip, int *port, - int *conn_count, int *share_count) --{ -- ksock_peer_t *peer; -- struct list_head *ptmp; -- ksock_route_t *route; -- struct list_head *rtmp; -- int i; - int j; - int rc = -ENOENT; -- -- read_lock (&ksocknal_data.ksnd_global_lock); -- -- for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) { - -- list_for_each (ptmp, &ksocknal_data.ksnd_peers[i]) { -- peer = list_entry (ptmp, ksock_peer_t, ksnp_list); -- - LASSERT (!(list_empty (&peer->ksnp_routes) && - list_empty (&peer->ksnp_conns))); - if (peer->ksnp_n_passive_ips == 0 && - list_empty(&peer->ksnp_routes)) { - if (index-- > 0) - continue; - - *nid = peer->ksnp_nid; - *myip = 0; - *peer_ip = 0; - *port = 0; - *conn_count = 0; - *share_count = 0; - rc = 0; - goto out; - } -- - for (j = 0; j < peer->ksnp_n_passive_ips; j++) { - if (index-- > 0) - continue; - - *nid = peer->ksnp_nid; - *myip = peer->ksnp_passive_ips[j]; - *peer_ip = 0; - *port = 0; - *conn_count = 0; - *share_count = 0; - rc = 0; - goto out; - } - -- list_for_each (rtmp, &peer->ksnp_routes) { -- if (index-- > 0) -- continue; -- - route = list_entry (rtmp, ksock_route_t, ksnr_list); - atomic_inc (&route->ksnr_refcount); - read_unlock (&ksocknal_data.ksnd_global_lock); - return (route); - route = list_entry(rtmp, ksock_route_t, - ksnr_list); - - *nid = peer->ksnp_nid; - *myip = route->ksnr_myipaddr; - *peer_ip = route->ksnr_ipaddr; - *port = route->ksnr_port; - *conn_count = route->ksnr_conn_count; - *share_count = route->ksnr_share_count; - rc = 0; - goto out; -- } -- } -- } - - out: -- read_unlock (&ksocknal_data.ksnd_global_lock); - return (NULL); - return (rc); -} - -void -ksocknal_associate_route_conn_locked(ksock_route_t *route, ksock_conn_t *conn) -{ - ksock_peer_t *peer = route->ksnr_peer; - int type = conn->ksnc_type; - ksock_interface_t *iface; - - conn->ksnc_route = route; - atomic_inc (&route->ksnr_refcount); - - if (route->ksnr_myipaddr != conn->ksnc_myipaddr) { - if (route->ksnr_myipaddr == 0) { - /* route wasn't bound locally yet (the initial route) */ - CWARN("Binding "LPX64" %u.%u.%u.%u to %u.%u.%u.%u\n", - peer->ksnp_nid, - HIPQUAD(route->ksnr_ipaddr), - HIPQUAD(conn->ksnc_myipaddr)); - } else { - CWARN("Rebinding "LPX64" %u.%u.%u.%u from " - "%u.%u.%u.%u to %u.%u.%u.%u\n", - peer->ksnp_nid, - HIPQUAD(route->ksnr_ipaddr), - HIPQUAD(route->ksnr_myipaddr), - HIPQUAD(conn->ksnc_myipaddr)); - - iface = ksocknal_ip2iface(route->ksnr_myipaddr); - if (iface != NULL) - iface->ksni_nroutes--; - } - route->ksnr_myipaddr = conn->ksnc_myipaddr; - iface = ksocknal_ip2iface(route->ksnr_myipaddr); - if (iface != NULL) - iface->ksni_nroutes++; - } - - route->ksnr_connected |= (1<ksnr_connecting &= ~(1<ksnr_conn_count++; - - /* Successful connection => further attempts can - * proceed immediately */ - route->ksnr_timeout = jiffies; - route->ksnr_retry_interval = SOCKNAL_MIN_RECONNECT_INTERVAL; -} - -void -ksocknal_add_route_locked (ksock_peer_t *peer, ksock_route_t *route) -{ - struct list_head *tmp; - ksock_conn_t *conn; - int type; - ksock_route_t *route2; - - LASSERT (route->ksnr_peer == NULL); - LASSERT (route->ksnr_connecting == 0); - LASSERT (route->ksnr_connected == 0); - - /* LASSERT(unique) */ - list_for_each(tmp, &peer->ksnp_routes) { - route2 = list_entry(tmp, ksock_route_t, ksnr_list); - - if (route2->ksnr_ipaddr == route->ksnr_ipaddr) { - CERROR ("Duplicate route "LPX64" %u.%u.%u.%u\n", - peer->ksnp_nid, HIPQUAD(route->ksnr_ipaddr)); - LBUG(); - } - } - - route->ksnr_peer = peer; - atomic_inc (&peer->ksnp_refcount); - /* peer's routelist takes over my ref on 'route' */ - list_add_tail(&route->ksnr_list, &peer->ksnp_routes); - - list_for_each(tmp, &peer->ksnp_conns) { - conn = list_entry(tmp, ksock_conn_t, ksnc_list); - type = conn->ksnc_type; - - if (conn->ksnc_ipaddr != route->ksnr_ipaddr) - continue; - - ksocknal_associate_route_conn_locked(route, conn); - /* keep going (typed routes) */ - } -} - -void -ksocknal_del_route_locked (ksock_route_t *route) -{ - ksock_peer_t *peer = route->ksnr_peer; - ksock_interface_t *iface; - ksock_conn_t *conn; - struct list_head *ctmp; - struct list_head *cnxt; - - LASSERT (!route->ksnr_deleted); - - /* Close associated conns */ - list_for_each_safe (ctmp, cnxt, &peer->ksnp_conns) { - conn = list_entry(ctmp, ksock_conn_t, ksnc_list); - - if (conn->ksnc_route != route) - continue; - - ksocknal_close_conn_locked (conn, 0); - } - - if (route->ksnr_myipaddr != 0) { - iface = ksocknal_ip2iface(route->ksnr_myipaddr); - if (iface != NULL) - iface->ksni_nroutes--; - } - - route->ksnr_deleted = 1; - list_del (&route->ksnr_list); - ksocknal_put_route (route); /* drop peer's ref */ - - if (list_empty (&peer->ksnp_routes) && - list_empty (&peer->ksnp_conns)) { - /* I've just removed the last autoconnect route of a peer - * with no active connections */ - ksocknal_unlink_peer_locked (peer); - } --} -- --int - ksocknal_add_route (ptl_nid_t nid, __u32 ipaddr, int port, int bufnob, - int bind_irq, int share, int eager) -ksocknal_add_peer (ptl_nid_t nid, __u32 ipaddr, int port) --{ -- unsigned long flags; - struct list_head *tmp; -- ksock_peer_t *peer; -- ksock_peer_t *peer2; -- ksock_route_t *route; - struct list_head *rtmp; -- ksock_route_t *route2; -- -- if (nid == PTL_NID_ANY) -- return (-EINVAL); -- -- /* Have a brand new peer ready... */ -- peer = ksocknal_create_peer (nid); -- if (peer == NULL) -- return (-ENOMEM); -- - route = ksocknal_create_route (ipaddr, port, bufnob, - bind_irq, eager); - route = ksocknal_create_route (ipaddr, port); -- if (route == NULL) { -- ksocknal_put_peer (peer); -- return (-ENOMEM); -- } -- -- write_lock_irqsave (&ksocknal_data.ksnd_global_lock, flags); -- -- peer2 = ksocknal_find_peer_locked (nid); -- if (peer2 != NULL) { -- ksocknal_put_peer (peer); -- peer = peer2; -- } else { - /* peer table takes existing ref on peer */ - list_add (&peer->ksnp_list, - ksocknal_nid2peerlist (nid)); - /* peer table takes my ref on peer */ - list_add_tail (&peer->ksnp_list, - ksocknal_nid2peerlist (nid)); -- } -- -- route2 = NULL; - if (share) { - /* check for existing route to this NID via this ipaddr */ - list_for_each (rtmp, &peer->ksnp_routes) { - route2 = list_entry (rtmp, ksock_route_t, ksnr_list); - - if (route2->ksnr_ipaddr == ipaddr) - break; - - route2 = NULL; - } - list_for_each (tmp, &peer->ksnp_routes) { - route2 = list_entry(tmp, ksock_route_t, ksnr_list); - - if (route2->ksnr_ipaddr == ipaddr) - break; - - route2 = NULL; -- } - - if (route2 != NULL) { - ksocknal_put_route (route); - route = route2; - if (route2 == NULL) { - ksocknal_add_route_locked(peer, route); - route->ksnr_share_count++; -- } else { - /* route takes a ref on peer */ - route->ksnr_peer = peer; - atomic_inc (&peer->ksnp_refcount); - /* peer's route list takes existing ref on route */ - list_add_tail (&route->ksnr_list, &peer->ksnp_routes); - ksocknal_put_route(route); - route2->ksnr_share_count++; -- } - - route->ksnr_sharecount++; -- -- write_unlock_irqrestore (&ksocknal_data.ksnd_global_lock, flags); -- -- return (0); --} -- --void - ksocknal_del_route_locked (ksock_route_t *route, int share, int keep_conn) -ksocknal_del_peer_locked (ksock_peer_t *peer, __u32 ip, int single_share) --{ - ksock_peer_t *peer = route->ksnr_peer; -- ksock_conn_t *conn; - struct list_head *ctmp; - struct list_head *cnxt; - ksock_route_t *route; - struct list_head *tmp; - struct list_head *nxt; - int nshared; -- - if (!share) - route->ksnr_sharecount = 0; - else { - route->ksnr_sharecount--; - if (route->ksnr_sharecount != 0) - return; - } - LASSERT (!peer->ksnp_closing); -- - list_for_each_safe (ctmp, cnxt, &peer->ksnp_conns) { - conn = list_entry(ctmp, ksock_conn_t, ksnc_list); - list_for_each_safe (tmp, nxt, &peer->ksnp_routes) { - route = list_entry(tmp, ksock_route_t, ksnr_list); -- - if (conn->ksnc_route != route) - if (single_share && route->ksnr_share_count == 0) -- continue; - - if (!keep_conn) { - ksocknal_close_conn_locked (conn, 0); - - /* no match */ - if (!(ip == 0 || route->ksnr_ipaddr == ip)) -- continue; - - if (!single_share) - route->ksnr_share_count = 0; - else if (route->ksnr_share_count > 0) - route->ksnr_share_count--; - - if (route->ksnr_share_count == 0) { - /* This deletes associated conns too */ - ksocknal_del_route_locked (route); -- } -- - /* keeping the conn; just dissociate it and route... */ - conn->ksnc_route = NULL; - ksocknal_put_route (route); /* drop conn's ref on route */ - if (single_share) - break; -- } - - route->ksnr_deleted = 1; - list_del (&route->ksnr_list); - ksocknal_put_route (route); /* drop peer's ref */ -- - if (list_empty (&peer->ksnp_routes) && - list_empty (&peer->ksnp_conns)) { - /* I've just removed the last autoconnect route of a peer - * with no active connections */ - ksocknal_unlink_peer_locked (peer); - nshared = 0; - list_for_each_safe (tmp, nxt, &peer->ksnp_routes) { - route = list_entry(tmp, ksock_route_t, ksnr_list); - nshared += route->ksnr_share_count; - } - - if (nshared == 0) { - /* remove everything else if there are no explicit entries - * left */ - - list_for_each_safe (tmp, nxt, &peer->ksnp_routes) { - route = list_entry(tmp, ksock_route_t, ksnr_list); - - /* we should only be removing auto-entries */ - LASSERT(route->ksnr_share_count == 0); - ksocknal_del_route_locked (route); - } - - list_for_each_safe (tmp, nxt, &peer->ksnp_conns) { - conn = list_entry(tmp, ksock_conn_t, ksnc_list); - - ksocknal_close_conn_locked(conn, 0); - } -- } - - /* NB peer unlinks itself when last conn/route is removed */ --} -- --int - ksocknal_del_route (ptl_nid_t nid, __u32 ipaddr, int share, int keep_conn) -ksocknal_del_peer (ptl_nid_t nid, __u32 ip, int single_share) --{ -- unsigned long flags; -- struct list_head *ptmp; -- struct list_head *pnxt; -- ksock_peer_t *peer; - struct list_head *rtmp; - struct list_head *rnxt; - ksock_route_t *route; -- int lo; -- int hi; -- int i; -- int rc = -ENOENT; -- -- write_lock_irqsave (&ksocknal_data.ksnd_global_lock, flags); -- -- if (nid != PTL_NID_ANY) -- lo = hi = ksocknal_nid2peerlist(nid) - ksocknal_data.ksnd_peers; -- else { -- lo = 0; -- hi = ksocknal_data.ksnd_peer_hash_size - 1; -- } -- -- for (i = lo; i <= hi; i++) { -- list_for_each_safe (ptmp, pnxt, &ksocknal_data.ksnd_peers[i]) { -- peer = list_entry (ptmp, ksock_peer_t, ksnp_list); -- -- if (!(nid == PTL_NID_ANY || peer->ksnp_nid == nid)) -- continue; - - list_for_each_safe (rtmp, rnxt, &peer->ksnp_routes) { - route = list_entry (rtmp, ksock_route_t, - ksnr_list); -- - if (!(ipaddr == 0 || - route->ksnr_ipaddr == ipaddr)) - continue; - ksocknal_del_peer_locked (peer, ip, single_share); - rc = 0; /* matched! */ -- - ksocknal_del_route_locked (route, share, keep_conn); - rc = 0; /* matched something */ - if (share) - goto out; - } - if (single_share) - break; -- } -- } - out: - -- write_unlock_irqrestore (&ksocknal_data.ksnd_global_lock, flags); -- -- return (rc); --} -- --ksock_conn_t * --ksocknal_get_conn_by_idx (int index) --{ -- ksock_peer_t *peer; -- struct list_head *ptmp; -- ksock_conn_t *conn; -- struct list_head *ctmp; -- int i; -- -- read_lock (&ksocknal_data.ksnd_global_lock); -- -- for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) { -- list_for_each (ptmp, &ksocknal_data.ksnd_peers[i]) { -- peer = list_entry (ptmp, ksock_peer_t, ksnp_list); -- - LASSERT (!(list_empty (&peer->ksnp_routes) && - list_empty (&peer->ksnp_conns))); - LASSERT (!peer->ksnp_closing); -- -- list_for_each (ctmp, &peer->ksnp_conns) { -- if (index-- > 0) -- continue; -- -- conn = list_entry (ctmp, ksock_conn_t, ksnc_list); -- atomic_inc (&conn->ksnc_refcount); -- read_unlock (&ksocknal_data.ksnd_global_lock); -- return (conn); -- } -- } -- } -- -- read_unlock (&ksocknal_data.ksnd_global_lock); -- return (NULL); --} -- - void - ksocknal_get_peer_addr (ksock_conn_t *conn) -int -ksocknal_get_conn_addrs (ksock_conn_t *conn) --{ -- struct sockaddr_in sin; -- int len = sizeof (sin); -- int rc; -- -- rc = conn->ksnc_sock->ops->getname (conn->ksnc_sock, -- (struct sockaddr *)&sin, &len, 2); -- /* Didn't need the {get,put}connsock dance to deref ksnc_sock... */ -- LASSERT (!conn->ksnc_closing); - LASSERT (len <= sizeof (sin)); -- -- if (rc != 0) { -- CERROR ("Error %d getting sock peer IP\n", rc); - return; - return rc; -- } -- -- conn->ksnc_ipaddr = ntohl (sin.sin_addr.s_addr); -- conn->ksnc_port = ntohs (sin.sin_port); - - rc = conn->ksnc_sock->ops->getname (conn->ksnc_sock, - (struct sockaddr *)&sin, &len, 0); - if (rc != 0) { - CERROR ("Error %d getting sock local IP\n", rc); - return rc; - } - - conn->ksnc_myipaddr = ntohl (sin.sin_addr.s_addr); - - return 0; --} -- --unsigned int - ksocknal_conn_irq (ksock_conn_t *conn) -ksocknal_sock_irq (struct socket *sock) --{ -- int irq = 0; -- struct dst_entry *dst; -- - dst = sk_dst_get (conn->ksnc_sock->sk); - if (!ksocknal_tunables.ksnd_irq_affinity) - return 0; - - dst = sk_dst_get (sock->sk); -- if (dst != NULL) { -- if (dst->dev != NULL) { -- irq = dst->dev->irq; -- if (irq >= NR_IRQS) { -- CERROR ("Unexpected IRQ %x\n", irq); -- irq = 0; -- } -- } -- dst_release (dst); -- } -- - /* Didn't need the {get,put}connsock dance to deref ksnc_sock... */ - LASSERT (!conn->ksnc_closing); -- return (irq); --} -- --ksock_sched_t * --ksocknal_choose_scheduler_locked (unsigned int irq) --{ -- ksock_sched_t *sched; -- ksock_irqinfo_t *info; -- int i; -- -- LASSERT (irq < NR_IRQS); -- info = &ksocknal_data.ksnd_irqinfo[irq]; -- -- if (irq != 0 && /* hardware NIC */ -- info->ksni_valid) { /* already set up */ -- return (&ksocknal_data.ksnd_schedulers[info->ksni_sched]); -- } -- -- /* software NIC (irq == 0) || not associated with a scheduler yet. -- * Choose the CPU with the fewest connections... */ -- sched = &ksocknal_data.ksnd_schedulers[0]; - for (i = 1; i < SOCKNAL_N_SCHED; i++) - for (i = 1; i < ksocknal_data.ksnd_nschedulers; i++) -- if (sched->kss_nconns > -- ksocknal_data.ksnd_schedulers[i].kss_nconns) -- sched = &ksocknal_data.ksnd_schedulers[i]; -- -- if (irq != 0) { /* Hardware NIC */ -- info->ksni_valid = 1; -- info->ksni_sched = sched - ksocknal_data.ksnd_schedulers; -- -- /* no overflow... */ -- LASSERT (info->ksni_sched == sched - ksocknal_data.ksnd_schedulers); -- } -- -- return (sched); --} -- --int - ksocknal_create_conn (ksock_route_t *route, struct socket *sock, - int bind_irq, int type) -ksocknal_local_ipvec (__u32 *ipaddrs) -{ - int i; - int nip; - - read_lock (&ksocknal_data.ksnd_global_lock); - - nip = ksocknal_data.ksnd_ninterfaces; - for (i = 0; i < nip; i++) { - LASSERT (i < SOCKNAL_MAX_INTERFACES); - - ipaddrs[i] = ksocknal_data.ksnd_interfaces[i].ksni_ipaddr; - LASSERT (ipaddrs[i] != 0); - } - - read_unlock (&ksocknal_data.ksnd_global_lock); - return (nip); -} - -int -ksocknal_match_peerip (ksock_interface_t *iface, __u32 *ips, int nips) -{ - int best_netmatch = 0; - int best_xor = 0; - int best = -1; - int this_xor; - int this_netmatch; - int i; - - for (i = 0; i < nips; i++) { - if (ips[i] == 0) - continue; - - this_xor = (ips[i] ^ iface->ksni_ipaddr); - this_netmatch = ((this_xor & iface->ksni_netmask) == 0) ? 1 : 0; - - if (!(best < 0 || - best_netmatch < this_netmatch || - (best_netmatch == this_netmatch && - best_xor > this_xor))) - continue; - - best = i; - best_netmatch = this_netmatch; - best_xor = this_xor; - } - - LASSERT (best >= 0); - return (best); -} - -int -ksocknal_select_ips(ksock_peer_t *peer, __u32 *peerips, int n_peerips) -{ - rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock; - unsigned long flags; - ksock_interface_t *iface; - ksock_interface_t *best_iface; - int n_ips; - int i; - int j; - int k; - __u32 ip; - __u32 xor; - int this_netmatch; - int best_netmatch; - int best_npeers; - - /* CAVEAT EMPTOR: We do all our interface matching with an - * exclusive hold of global lock at IRQ priority. We're only - * expecting to be dealing with small numbers of interfaces, so the - * O(n**3)-ness shouldn't matter */ - - /* Also note that I'm not going to return more than n_peerips - * interfaces, even if I have more myself */ - - write_lock_irqsave(global_lock, flags); - - LASSERT (n_peerips <= SOCKNAL_MAX_INTERFACES); - LASSERT (ksocknal_data.ksnd_ninterfaces <= SOCKNAL_MAX_INTERFACES); - - n_ips = MIN(n_peerips, ksocknal_data.ksnd_ninterfaces); - - for (i = 0; peer->ksnp_n_passive_ips < n_ips; i++) { - /* ^ yes really... */ - - /* If we have any new interfaces, first tick off all the - * peer IPs that match old interfaces, then choose new - * interfaces to match the remaining peer IPS. - * We don't forget interfaces we've stopped using; we might - * start using them again... */ - - if (i < peer->ksnp_n_passive_ips) { - /* Old interface. */ - ip = peer->ksnp_passive_ips[i]; - best_iface = ksocknal_ip2iface(ip); - - /* peer passive ips are kept up to date */ - LASSERT(best_iface != NULL); - } else { - /* choose a new interface */ - LASSERT (i == peer->ksnp_n_passive_ips); - - best_iface = NULL; - best_netmatch = 0; - best_npeers = 0; - - for (j = 0; j < ksocknal_data.ksnd_ninterfaces; j++) { - iface = &ksocknal_data.ksnd_interfaces[j]; - ip = iface->ksni_ipaddr; - - for (k = 0; k < peer->ksnp_n_passive_ips; k++) - if (peer->ksnp_passive_ips[k] == ip) - break; - - if (k < peer->ksnp_n_passive_ips) /* using it already */ - continue; - - k = ksocknal_match_peerip(iface, peerips, n_peerips); - xor = (ip ^ peerips[k]); - this_netmatch = ((xor & iface->ksni_netmask) == 0) ? 1 : 0; - - if (!(best_iface == NULL || - best_netmatch < this_netmatch || - (best_netmatch == this_netmatch && - best_npeers > iface->ksni_npeers))) - continue; - - best_iface = iface; - best_netmatch = this_netmatch; - best_npeers = iface->ksni_npeers; - } - - best_iface->ksni_npeers++; - ip = best_iface->ksni_ipaddr; - peer->ksnp_passive_ips[i] = ip; - peer->ksnp_n_passive_ips = i+1; - } - - LASSERT (best_iface != NULL); - - /* mark the best matching peer IP used */ - j = ksocknal_match_peerip(best_iface, peerips, n_peerips); - peerips[j] = 0; - } - - /* Overwrite input peer IP addresses */ - memcpy(peerips, peer->ksnp_passive_ips, n_ips * sizeof(*peerips)); - - write_unlock_irqrestore(global_lock, flags); - - return (n_ips); -} - -void -ksocknal_create_routes(ksock_peer_t *peer, int port, - __u32 *peer_ipaddrs, int npeer_ipaddrs) -{ - ksock_route_t *newroute = NULL; - rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock; - unsigned long flags; - struct list_head *rtmp; - ksock_route_t *route; - ksock_interface_t *iface; - ksock_interface_t *best_iface; - int best_netmatch; - int this_netmatch; - int best_nroutes; - int i; - int j; - - /* CAVEAT EMPTOR: We do all our interface matching with an - * exclusive hold of global lock at IRQ priority. We're only - * expecting to be dealing with small numbers of interfaces, so the - * O(n**3)-ness here shouldn't matter */ - - write_lock_irqsave(global_lock, flags); - - LASSERT (npeer_ipaddrs <= SOCKNAL_MAX_INTERFACES); - - for (i = 0; i < npeer_ipaddrs; i++) { - if (newroute != NULL) { - newroute->ksnr_ipaddr = peer_ipaddrs[i]; - } else { - write_unlock_irqrestore(global_lock, flags); - - newroute = ksocknal_create_route(peer_ipaddrs[i], port); - if (newroute == NULL) - return; - - write_lock_irqsave(global_lock, flags); - } - - /* Already got a route? */ - route = NULL; - list_for_each(rtmp, &peer->ksnp_routes) { - route = list_entry(rtmp, ksock_route_t, ksnr_list); - - if (route->ksnr_ipaddr == newroute->ksnr_ipaddr) - break; - - route = NULL; - } - if (route != NULL) - continue; - - best_iface = NULL; - best_nroutes = 0; - best_netmatch = 0; - - LASSERT (ksocknal_data.ksnd_ninterfaces <= SOCKNAL_MAX_INTERFACES); - - /* Select interface to connect from */ - for (j = 0; j < ksocknal_data.ksnd_ninterfaces; j++) { - iface = &ksocknal_data.ksnd_interfaces[j]; - - /* Using this interface already? */ - list_for_each(rtmp, &peer->ksnp_routes) { - route = list_entry(rtmp, ksock_route_t, ksnr_list); - - if (route->ksnr_myipaddr == iface->ksni_ipaddr) - break; - - route = NULL; - } - if (route != NULL) - continue; - - this_netmatch = (((iface->ksni_ipaddr ^ - newroute->ksnr_ipaddr) & - iface->ksni_netmask) == 0) ? 1 : 0; - - if (!(best_iface == NULL || - best_netmatch < this_netmatch || - (best_netmatch == this_netmatch && - best_nroutes > iface->ksni_nroutes))) - continue; - - best_iface = iface; - best_netmatch = this_netmatch; - best_nroutes = iface->ksni_nroutes; - } - - if (best_iface == NULL) - continue; - - newroute->ksnr_myipaddr = best_iface->ksni_ipaddr; - best_iface->ksni_nroutes++; - - ksocknal_add_route_locked(peer, newroute); - newroute = NULL; - } - - write_unlock_irqrestore(global_lock, flags); - if (newroute != NULL) - ksocknal_put_route(newroute); -} - -int -ksocknal_create_conn (ksock_route_t *route, struct socket *sock, int type) --{ - int passive = (type == SOCKNAL_CONN_NONE); - rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock; - __u32 ipaddrs[SOCKNAL_MAX_INTERFACES]; - int nipaddrs; -- ptl_nid_t nid; - struct list_head *tmp; -- __u64 incarnation; -- unsigned long flags; -- ksock_conn_t *conn; - ksock_peer_t *peer; - ksock_conn_t *conn2; - ksock_peer_t *peer = NULL; -- ksock_peer_t *peer2; -- ksock_sched_t *sched; -- unsigned int irq; -- ksock_tx_t *tx; -- int rc; -- -- /* NB, sock has an associated file since (a) this connection might -- * have been created in userland and (b) we need to refcount the -- * socket so that we don't close it while I/O is being done on -- * it, and sock->file has that pre-cooked... */ -- LASSERT (sock->file != NULL); -- LASSERT (file_count(sock->file) > 0); - LASSERT (route == NULL || !passive); -- -- rc = ksocknal_setup_sock (sock); -- if (rc != 0) -- return (rc); - - if (route == NULL) { - /* acceptor or explicit connect */ - nid = PTL_NID_ANY; - } else { - LASSERT (type != SOCKNAL_CONN_NONE); - /* autoconnect: expect this nid on exchange */ - nid = route->ksnr_peer->ksnp_nid; - } -- - rc = ksocknal_hello (sock, &nid, &type, &incarnation); - if (rc != 0) - return (rc); - - peer = NULL; - if (route == NULL) { /* not autoconnect */ - /* Assume this socket connects to a brand new peer */ - peer = ksocknal_create_peer (nid); - if (peer == NULL) - return (-ENOMEM); - } - irq = ksocknal_sock_irq (sock); -- -- PORTAL_ALLOC(conn, sizeof(*conn)); - if (conn == NULL) { - if (peer != NULL) - ksocknal_put_peer (peer); - if (conn == NULL) -- return (-ENOMEM); - } -- -- memset (conn, 0, sizeof (*conn)); -- conn->ksnc_peer = NULL; -- conn->ksnc_route = NULL; -- conn->ksnc_sock = sock; -- conn->ksnc_type = type; - conn->ksnc_incarnation = incarnation; -- conn->ksnc_saved_data_ready = sock->sk->sk_data_ready; -- conn->ksnc_saved_write_space = sock->sk->sk_write_space; -- atomic_set (&conn->ksnc_refcount, 1); /* 1 ref for me */ -- -- conn->ksnc_rx_ready = 0; -- conn->ksnc_rx_scheduled = 0; -- ksocknal_new_packet (conn, 0); -- -- INIT_LIST_HEAD (&conn->ksnc_tx_queue); -- conn->ksnc_tx_ready = 0; -- conn->ksnc_tx_scheduled = 0; -- atomic_set (&conn->ksnc_tx_nob, 0); -- - ksocknal_get_peer_addr (conn); - /* stash conn's local and remote addrs */ - rc = ksocknal_get_conn_addrs (conn); - if (rc != 0) - goto failed_0; -- - irq = ksocknal_conn_irq (conn); - if (!passive) { - /* Active connection sends HELLO eagerly */ - rc = ksocknal_local_ipvec(ipaddrs); - if (rc < 0) - goto failed_0; - nipaddrs = rc; -- - write_lock_irqsave (&ksocknal_data.ksnd_global_lock, flags); - rc = ksocknal_send_hello (conn, ipaddrs, nipaddrs); - if (rc != 0) - goto failed_0; - } - - /* Find out/confirm peer's NID and connection type and get the - * vector of interfaces she's willing to let me connect to */ - nid = (route == NULL) ? PTL_NID_ANY : route->ksnr_peer->ksnp_nid; - rc = ksocknal_recv_hello (conn, &nid, &incarnation, ipaddrs); - if (rc < 0) - goto failed_0; - nipaddrs = rc; - LASSERT (nid != PTL_NID_ANY); -- -- if (route != NULL) { - /* Autoconnected! */ - LASSERT ((route->ksnr_connected & (1 << type)) == 0); - LASSERT ((route->ksnr_connecting & (1 << type)) != 0); - peer = route->ksnr_peer; - atomic_inc(&peer->ksnp_refcount); - } else { - peer = ksocknal_create_peer(nid); - if (peer == NULL) { - rc = -ENOMEM; - goto failed_0; - } -- - if (route->ksnr_deleted) { - /* This conn was autoconnected, but the autoconnect - * route got deleted while it was being - * established! */ - write_unlock_irqrestore (&ksocknal_data.ksnd_global_lock, - flags); - PORTAL_FREE (conn, sizeof (*conn)); - return (-ESTALE); - write_lock_irqsave(global_lock, flags); - - peer2 = ksocknal_find_peer_locked(nid); - if (peer2 == NULL) { - /* NB this puts an "empty" peer in the peer - * table (which takes my ref) */ - list_add_tail(&peer->ksnp_list, - ksocknal_nid2peerlist(nid)); - } else { - ksocknal_put_peer(peer); - peer = peer2; -- } - /* +1 ref for me */ - atomic_inc(&peer->ksnp_refcount); -- - write_unlock_irqrestore(global_lock, flags); - } - - if (!passive) { - ksocknal_create_routes(peer, conn->ksnc_port, - ipaddrs, nipaddrs); - rc = 0; - } else { - rc = ksocknal_select_ips(peer, ipaddrs, nipaddrs); - LASSERT (rc >= 0); - rc = ksocknal_send_hello (conn, ipaddrs, rc); - } - if (rc < 0) - goto failed_1; - - write_lock_irqsave (global_lock, flags); -- - /* associate conn/route */ - conn->ksnc_route = route; - atomic_inc (&route->ksnr_refcount); - if (peer->ksnp_closing || - (route != NULL && route->ksnr_deleted)) { - /* route/peer got closed under me */ - rc = -ESTALE; - goto failed_2; - } -- - route->ksnr_connecting &= ~(1 << type); - route->ksnr_connected |= (1 << type); - route->ksnr_conn_count++; - route->ksnr_retry_interval = SOCKNAL_MIN_RECONNECT_INTERVAL; - /* Refuse to duplicate an existing connection (both sides might - * autoconnect at once), unless this is a loopback connection */ - if (conn->ksnc_ipaddr != conn->ksnc_myipaddr) { - list_for_each(tmp, &peer->ksnp_conns) { - conn2 = list_entry(tmp, ksock_conn_t, ksnc_list); -- - peer = route->ksnr_peer; - } else { - /* Not an autoconnected connection; see if there is an - * existing peer for this NID */ - peer2 = ksocknal_find_peer_locked (nid); - if (peer2 != NULL) { - ksocknal_put_peer (peer); - peer = peer2; - } else { - list_add (&peer->ksnp_list, - ksocknal_nid2peerlist (nid)); - /* peer list takes over existing ref */ - if (conn2->ksnc_ipaddr != conn->ksnc_ipaddr || - conn2->ksnc_myipaddr != conn->ksnc_myipaddr || - conn2->ksnc_type != conn->ksnc_type || - conn2->ksnc_incarnation != incarnation) - continue; - - CWARN("Not creating duplicate connection to " - "%u.%u.%u.%u type %d\n", - HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_type); - rc = -EALREADY; - goto failed_2; -- } -- } -- - /* Give conn a ref on sock->file since we're going to return success */ - get_file(sock->file); - /* If the connection created by this route didn't bind to the IP - * address the route connected to, the connection/route matching - * code below probably isn't going to work. */ - if (route != NULL && - route->ksnr_ipaddr != conn->ksnc_ipaddr) { - CERROR("Route "LPX64" %u.%u.%u.%u connected to %u.%u.%u.%u\n", - peer->ksnp_nid, - HIPQUAD(route->ksnr_ipaddr), - HIPQUAD(conn->ksnc_ipaddr)); - } -- - LASSERT (!peer->ksnp_closing); - /* Search for a route corresponding to the new connection and - * create an association. This allows incoming connections created - * by routes in my peer to match my own route entries so I don't - * continually create duplicate routes. */ - list_for_each (tmp, &peer->ksnp_routes) { - route = list_entry(tmp, ksock_route_t, ksnr_list); -- - conn->ksnc_peer = peer; - atomic_inc (&peer->ksnp_refcount); - if (route->ksnr_ipaddr != conn->ksnc_ipaddr) - continue; - - ksocknal_associate_route_conn_locked(route, conn); - break; - } - - conn->ksnc_peer = peer; /* conn takes my ref on peer */ - conn->ksnc_incarnation = incarnation; -- peer->ksnp_last_alive = jiffies; -- peer->ksnp_error = 0; - - sched = ksocknal_choose_scheduler_locked (irq); - sched->kss_nconns++; - conn->ksnc_scheduler = sched; -- -- /* Set the deadline for the outgoing HELLO to drain */ - conn->ksnc_tx_bufnob = sock->sk->sk_wmem_queued; -- conn->ksnc_tx_deadline = jiffies + - ksocknal_data.ksnd_io_timeout * HZ; - ksocknal_tunables.ksnd_io_timeout * HZ; - mb(); /* order with adding to peer's conn list */ -- -- list_add (&conn->ksnc_list, &peer->ksnp_conns); -- atomic_inc (&conn->ksnc_refcount); - - sched = ksocknal_choose_scheduler_locked (irq); - sched->kss_nconns++; - conn->ksnc_scheduler = sched; -- -- /* NB my callbacks block while I hold ksnd_global_lock */ -- sock->sk->sk_user_data = conn; -- sock->sk->sk_data_ready = ksocknal_data_ready; -- sock->sk->sk_write_space = ksocknal_write_space; -- -- /* Take all the packets blocking for a connection. -- * NB, it might be nicer to share these blocked packets among any - * other connections that are becoming established, however that - * confuses the normal packet launching operation, which selects a - * connection and queues the packet on it without needing an - * exclusive lock on ksnd_global_lock. */ - * other connections that are becoming established. */ -- while (!list_empty (&peer->ksnp_tx_queue)) { -- tx = list_entry (peer->ksnp_tx_queue.next, -- ksock_tx_t, tx_list); -- -- list_del (&tx->tx_list); -- ksocknal_queue_tx_locked (tx, conn); -- } - - rc = ksocknal_close_stale_conns_locked (peer, incarnation); - - write_unlock_irqrestore (&ksocknal_data.ksnd_global_lock, flags); -- - rc = ksocknal_close_stale_conns_locked(peer, incarnation); -- if (rc != 0) -- CERROR ("Closed %d stale conns to nid "LPX64" ip %d.%d.%d.%d\n", -- rc, conn->ksnc_peer->ksnp_nid, -- HIPQUAD(conn->ksnc_ipaddr)); -- - if (bind_irq) /* irq binding required */ - ksocknal_bind_irq (irq); - write_unlock_irqrestore (global_lock, flags); - - ksocknal_bind_irq (irq); -- -- /* Call the callbacks right now to get things going. */ - ksocknal_data_ready (sock->sk, 0); - ksocknal_write_space (sock->sk); - if (ksocknal_getconnsock(conn) == 0) { - ksocknal_data_ready (sock->sk, 0); - ksocknal_write_space (sock->sk); - ksocknal_putconnsock(conn); - } -- - CDEBUG(D_IOCTL, "conn [%p] registered for nid "LPX64" ip %d.%d.%d.%d\n", - conn, conn->ksnc_peer->ksnp_nid, HIPQUAD(conn->ksnc_ipaddr)); - CWARN("New conn nid:"LPX64" [type:%d] %u.%u.%u.%u -> %u.%u.%u.%u/%d" - " incarnation:"LPX64" sched[%d]/%d\n", - nid, conn->ksnc_type, HIPQUAD(conn->ksnc_myipaddr), - HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port, incarnation, - (int)(conn->ksnc_scheduler - ksocknal_data.ksnd_schedulers), irq); -- -- ksocknal_put_conn (conn); -- return (0); - - failed_2: - if (!peer->ksnp_closing && - list_empty (&peer->ksnp_conns) && - list_empty (&peer->ksnp_routes)) - ksocknal_unlink_peer_locked(peer); - write_unlock_irqrestore(global_lock, flags); - - failed_1: - ksocknal_put_peer (peer); - - failed_0: - PORTAL_FREE (conn, sizeof(*conn)); - - LASSERT (rc != 0); - return (rc); --} -- --void --ksocknal_close_conn_locked (ksock_conn_t *conn, int error) --{ -- /* This just does the immmediate housekeeping, and queues the -- * connection for the reaper to terminate. -- * Caller holds ksnd_global_lock exclusively in irq context */ - ksock_peer_t *peer = conn->ksnc_peer; - ksock_route_t *route; - ksock_peer_t *peer = conn->ksnc_peer; - ksock_route_t *route; - ksock_conn_t *conn2; - struct list_head *tmp; -- -- LASSERT (peer->ksnp_error == 0); -- LASSERT (!conn->ksnc_closing); -- conn->ksnc_closing = 1; -- atomic_inc (&ksocknal_data.ksnd_nclosing_conns); -- - /* ksnd_deathrow_conns takes over peer's ref */ - list_del (&conn->ksnc_list); - -- route = conn->ksnc_route; -- if (route != NULL) { -- /* dissociate conn from route... */ -- LASSERT (!route->ksnr_deleted); -- LASSERT ((route->ksnr_connecting & (1 << conn->ksnc_type)) == 0); -- LASSERT ((route->ksnr_connected & (1 << conn->ksnc_type)) != 0); -- - route->ksnr_connected &= ~(1 << conn->ksnc_type); - conn2 = NULL; - list_for_each(tmp, &peer->ksnp_conns) { - conn2 = list_entry(tmp, ksock_conn_t, ksnc_list); - - if (conn2->ksnc_route == route && - conn2->ksnc_type == conn->ksnc_type) - break; - - conn2 = NULL; - } - if (conn2 == NULL) - route->ksnr_connected &= ~(1 << conn->ksnc_type); - -- conn->ksnc_route = NULL; -- -#if 0 /* irrelevent with only eager routes */ -- list_del (&route->ksnr_list); /* make route least favourite */ -- list_add_tail (&route->ksnr_list, &peer->ksnp_routes); - -#endif -- ksocknal_put_route (route); /* drop conn's ref on route */ -- } - - /* ksnd_deathrow_conns takes over peer's ref */ - list_del (&conn->ksnc_list); -- -- if (list_empty (&peer->ksnp_conns)) { -- /* No more connections to this peer */ -- -- peer->ksnp_error = error; /* stash last conn close reason */ -- -- if (list_empty (&peer->ksnp_routes)) { -- /* I've just closed last conn belonging to a -- * non-autoconnecting peer */ -- ksocknal_unlink_peer_locked (peer); -- } -- } -- -- spin_lock (&ksocknal_data.ksnd_reaper_lock); -- -- list_add_tail (&conn->ksnc_list, &ksocknal_data.ksnd_deathrow_conns); -- wake_up (&ksocknal_data.ksnd_reaper_waitq); -- -- spin_unlock (&ksocknal_data.ksnd_reaper_lock); --} -- --void --ksocknal_terminate_conn (ksock_conn_t *conn) --{ -- /* This gets called by the reaper (guaranteed thread context) to -- * disengage the socket from its callbacks and close it. -- * ksnc_refcount will eventually hit zero, and then the reaper will -- * destroy it. */ -- unsigned long flags; -- ksock_peer_t *peer = conn->ksnc_peer; -- ksock_sched_t *sched = conn->ksnc_scheduler; -- struct timeval now; -- time_t then = 0; -- int notify = 0; -- -- LASSERT(conn->ksnc_closing); -- -- /* wake up the scheduler to "send" all remaining packets to /dev/null */ -- spin_lock_irqsave(&sched->kss_lock, flags); -- -- if (!conn->ksnc_tx_scheduled && -- !list_empty(&conn->ksnc_tx_queue)){ -- list_add_tail (&conn->ksnc_tx_list, -- &sched->kss_tx_conns); -- /* a closing conn is always ready to tx */ -- conn->ksnc_tx_ready = 1; -- conn->ksnc_tx_scheduled = 1; -- /* extra ref for scheduler */ -- atomic_inc (&conn->ksnc_refcount); -- -- wake_up (&sched->kss_waitq); -- } -- -- spin_unlock_irqrestore (&sched->kss_lock, flags); -- -- /* serialise with callbacks */ -- write_lock_irqsave (&ksocknal_data.ksnd_global_lock, flags); -- -- /* Remove conn's network callbacks. -- * NB I _have_ to restore the callback, rather than storing a noop, -- * since the socket could survive past this module being unloaded!! */ -- conn->ksnc_sock->sk->sk_data_ready = conn->ksnc_saved_data_ready; -- conn->ksnc_sock->sk->sk_write_space = conn->ksnc_saved_write_space; -- -- /* A callback could be in progress already; they hold a read lock -- * on ksnd_global_lock (to serialise with me) and NOOP if -- * sk_user_data is NULL. */ -- conn->ksnc_sock->sk->sk_user_data = NULL; -- -- /* OK, so this conn may not be completely disengaged from its -- * scheduler yet, but it _has_ committed to terminate... */ -- conn->ksnc_scheduler->kss_nconns--; -- -- if (peer->ksnp_error != 0) { -- /* peer's last conn closed in error */ -- LASSERT (list_empty (&peer->ksnp_conns)); -- -- /* convert peer's last-known-alive timestamp from jiffies */ -- do_gettimeofday (&now); -- then = now.tv_sec - (jiffies - peer->ksnp_last_alive)/HZ; -- notify = 1; -- } -- -- write_unlock_irqrestore (&ksocknal_data.ksnd_global_lock, flags); -- -- /* The socket is closed on the final put; either here, or in -- * ksocknal_{send,recv}msg(). Since we set up the linger2 option -- * when the connection was established, this will close the socket -- * immediately, aborting anything buffered in it. Any hung -- * zero-copy transmits will therefore complete in finite time. */ -- ksocknal_putconnsock (conn); -- -- if (notify) -- kpr_notify (&ksocknal_data.ksnd_router, peer->ksnp_nid, -- 0, then); --} -- --void --ksocknal_destroy_conn (ksock_conn_t *conn) --{ -- /* Final coup-de-grace of the reaper */ -- CDEBUG (D_NET, "connection %p\n", conn); -- -- LASSERT (atomic_read (&conn->ksnc_refcount) == 0); -- LASSERT (conn->ksnc_route == NULL); -- LASSERT (!conn->ksnc_tx_scheduled); -- LASSERT (!conn->ksnc_rx_scheduled); -- LASSERT (list_empty(&conn->ksnc_tx_queue)); -- -- /* complete current receive if any */ -- switch (conn->ksnc_rx_state) { -- case SOCKNAL_RX_BODY: -- CERROR("Completing partial receive from "LPX64 -- ", ip %d.%d.%d.%d:%d, with error\n", -- conn->ksnc_peer->ksnp_nid, -- HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port); -- lib_finalize (&ksocknal_lib, NULL, conn->ksnc_cookie, PTL_FAIL); -- break; -- case SOCKNAL_RX_BODY_FWD: -- ksocknal_fmb_callback (conn->ksnc_cookie, -ECONNABORTED); -- break; -- case SOCKNAL_RX_HEADER: -- case SOCKNAL_RX_SLOP: -- break; -- default: -- LBUG (); -- break; -- } -- -- ksocknal_put_peer (conn->ksnc_peer); -- -- PORTAL_FREE (conn, sizeof (*conn)); -- atomic_dec (&ksocknal_data.ksnd_nclosing_conns); --} -- --void --ksocknal_put_conn (ksock_conn_t *conn) --{ -- unsigned long flags; -- -- CDEBUG (D_OTHER, "putting conn[%p] -> "LPX64" (%d)\n", -- conn, conn->ksnc_peer->ksnp_nid, -- atomic_read (&conn->ksnc_refcount)); -- -- LASSERT (atomic_read (&conn->ksnc_refcount) > 0); -- if (!atomic_dec_and_test (&conn->ksnc_refcount)) -- return; -- -- spin_lock_irqsave (&ksocknal_data.ksnd_reaper_lock, flags); -- -- list_add (&conn->ksnc_list, &ksocknal_data.ksnd_zombie_conns); -- wake_up (&ksocknal_data.ksnd_reaper_waitq); -- -- spin_unlock_irqrestore (&ksocknal_data.ksnd_reaper_lock, flags); --} -- --int --ksocknal_close_peer_conns_locked (ksock_peer_t *peer, __u32 ipaddr, int why) --{ -- ksock_conn_t *conn; -- struct list_head *ctmp; -- struct list_head *cnxt; -- int count = 0; -- -- list_for_each_safe (ctmp, cnxt, &peer->ksnp_conns) { -- conn = list_entry (ctmp, ksock_conn_t, ksnc_list); -- -- if (ipaddr == 0 || -- conn->ksnc_ipaddr == ipaddr) { -- count++; -- ksocknal_close_conn_locked (conn, why); -- } -- } -- -- return (count); --} -- --int --ksocknal_close_stale_conns_locked (ksock_peer_t *peer, __u64 incarnation) --{ -- ksock_conn_t *conn; -- struct list_head *ctmp; -- struct list_head *cnxt; -- int count = 0; -- -- list_for_each_safe (ctmp, cnxt, &peer->ksnp_conns) { -- conn = list_entry (ctmp, ksock_conn_t, ksnc_list); -- -- if (conn->ksnc_incarnation == incarnation) -- continue; - - CWARN("Closing stale conn nid:"LPX64" ip:%08x/%d " - "incarnation:"LPX64"("LPX64")\n", - peer->ksnp_nid, conn->ksnc_ipaddr, conn->ksnc_port, - conn->ksnc_incarnation, incarnation); -- -- count++; -- ksocknal_close_conn_locked (conn, -ESTALE); -- } -- -- return (count); --} -- --int --ksocknal_close_conn_and_siblings (ksock_conn_t *conn, int why) --{ -- ksock_peer_t *peer = conn->ksnc_peer; -- __u32 ipaddr = conn->ksnc_ipaddr; -- unsigned long flags; -- int count; -- -- write_lock_irqsave (&ksocknal_data.ksnd_global_lock, flags); -- -- count = ksocknal_close_peer_conns_locked (peer, ipaddr, why); -- -- write_unlock_irqrestore (&ksocknal_data.ksnd_global_lock, flags); -- -- return (count); --} -- --int --ksocknal_close_matching_conns (ptl_nid_t nid, __u32 ipaddr) --{ -- unsigned long flags; -- ksock_peer_t *peer; -- struct list_head *ptmp; -- struct list_head *pnxt; -- int lo; -- int hi; -- int i; -- int count = 0; -- -- write_lock_irqsave (&ksocknal_data.ksnd_global_lock, flags); -- -- if (nid != PTL_NID_ANY) -- lo = hi = ksocknal_nid2peerlist(nid) - ksocknal_data.ksnd_peers; -- else { -- lo = 0; -- hi = ksocknal_data.ksnd_peer_hash_size - 1; -- } -- -- for (i = lo; i <= hi; i++) { -- list_for_each_safe (ptmp, pnxt, &ksocknal_data.ksnd_peers[i]) { -- -- peer = list_entry (ptmp, ksock_peer_t, ksnp_list); -- -- if (!(nid == PTL_NID_ANY || nid == peer->ksnp_nid)) -- continue; -- -- count += ksocknal_close_peer_conns_locked (peer, ipaddr, 0); -- } -- } -- -- write_unlock_irqrestore (&ksocknal_data.ksnd_global_lock, flags); -- -- /* wildcards always succeed */ -- if (nid == PTL_NID_ANY || ipaddr == 0) -- return (0); -- -- return (count == 0 ? -ENOENT : 0); --} -- --void --ksocknal_notify (void *arg, ptl_nid_t gw_nid, int alive) --{ -- /* The router is telling me she's been notified of a change in -- * gateway state.... */ -- -- CDEBUG (D_NET, "gw "LPX64" %s\n", gw_nid, alive ? "up" : "down"); -- -- if (!alive) { -- /* If the gateway crashed, close all open connections... */ -- ksocknal_close_matching_conns (gw_nid, 0); -- return; -- } -- -- /* ...otherwise do nothing. We can only establish new connections -- * if we have autroutes, and these connect on demand. */ --} -- --#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) --struct tcp_opt *sock2tcp_opt(struct sock *sk) --{ -- return &(sk->tp_pinfo.af_tcp); --} --#else --struct tcp_opt *sock2tcp_opt(struct sock *sk) --{ -- struct tcp_sock *s = (struct tcp_sock *)sk; -- return &s->tcp; --} --#endif -- --void --ksocknal_push_conn (ksock_conn_t *conn) --{ -- struct sock *sk; -- struct tcp_opt *tp; -- int nonagle; -- int val = 1; -- int rc; -- mm_segment_t oldmm; -- -- rc = ksocknal_getconnsock (conn); -- if (rc != 0) /* being shut down */ -- return; -- -- sk = conn->ksnc_sock->sk; -- tp = sock2tcp_opt(sk); -- -- lock_sock (sk); -- nonagle = tp->nonagle; -- tp->nonagle = 1; -- release_sock (sk); -- -- oldmm = get_fs (); -- set_fs (KERNEL_DS); -- -- rc = sk->sk_prot->setsockopt (sk, SOL_TCP, TCP_NODELAY, -- (char *)&val, sizeof (val)); -- LASSERT (rc == 0); -- -- set_fs (oldmm); -- -- lock_sock (sk); -- tp->nonagle = nonagle; -- release_sock (sk); -- -- ksocknal_putconnsock (conn); --} -- --void --ksocknal_push_peer (ksock_peer_t *peer) --{ -- int index; -- int i; -- struct list_head *tmp; -- ksock_conn_t *conn; -- -- for (index = 0; ; index++) { -- read_lock (&ksocknal_data.ksnd_global_lock); -- -- i = 0; -- conn = NULL; -- -- list_for_each (tmp, &peer->ksnp_conns) { -- if (i++ == index) { -- conn = list_entry (tmp, ksock_conn_t, ksnc_list); -- atomic_inc (&conn->ksnc_refcount); -- break; -- } -- } -- -- read_unlock (&ksocknal_data.ksnd_global_lock); -- -- if (conn == NULL) -- break; -- -- ksocknal_push_conn (conn); -- ksocknal_put_conn (conn); -- } --} -- --int --ksocknal_push (ptl_nid_t nid) --{ -- ksock_peer_t *peer; -- struct list_head *tmp; -- int index; -- int i; -- int j; -- int rc = -ENOENT; -- -- if (nid != PTL_NID_ANY) { -- peer = ksocknal_get_peer (nid); -- -- if (peer != NULL) { -- rc = 0; -- ksocknal_push_peer (peer); -- ksocknal_put_peer (peer); -- } -- return (rc); -- } -- -- for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) { -- for (j = 0; ; j++) { -- read_lock (&ksocknal_data.ksnd_global_lock); -- -- index = 0; -- peer = NULL; -- -- list_for_each (tmp, &ksocknal_data.ksnd_peers[i]) { -- if (index++ == j) { -- peer = list_entry(tmp, ksock_peer_t, -- ksnp_list); -- atomic_inc (&peer->ksnp_refcount); -- break; -- } -- } -- -- read_unlock (&ksocknal_data.ksnd_global_lock); -- -- if (peer != NULL) { -- rc = 0; -- ksocknal_push_peer (peer); -- ksocknal_put_peer (peer); -- } -- } -- -- } -- -- return (rc); --} -- --int - ksocknal_cmd(struct portals_cfg *pcfg, void * private) -ksocknal_add_interface(__u32 ipaddress, __u32 netmask) --{ - int rc = -EINVAL; - unsigned long flags; - ksock_interface_t *iface; - int rc; - int i; - int j; - struct list_head *ptmp; - ksock_peer_t *peer; - struct list_head *rtmp; - ksock_route_t *route; -- - LASSERT (pcfg != NULL); - if (ipaddress == 0 || - netmask == 0) - return (-EINVAL); - - write_lock_irqsave(&ksocknal_data.ksnd_global_lock, flags); - - iface = ksocknal_ip2iface(ipaddress); - if (iface != NULL) { - /* silently ignore dups */ - rc = 0; - } else if (ksocknal_data.ksnd_ninterfaces == SOCKNAL_MAX_INTERFACES) { - rc = -ENOSPC; - } else { - iface = &ksocknal_data.ksnd_interfaces[ksocknal_data.ksnd_ninterfaces++]; - - iface->ksni_ipaddr = ipaddress; - iface->ksni_netmask = netmask; - iface->ksni_nroutes = 0; - iface->ksni_npeers = 0; - - for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) { - list_for_each(ptmp, &ksocknal_data.ksnd_peers[i]) { - peer = list_entry(ptmp, ksock_peer_t, ksnp_list); - - for (j = 0; i < peer->ksnp_n_passive_ips; j++) - if (peer->ksnp_passive_ips[j] == ipaddress) - iface->ksni_npeers++; - - list_for_each(rtmp, &peer->ksnp_routes) { - route = list_entry(rtmp, ksock_route_t, ksnr_list); - - if (route->ksnr_myipaddr == ipaddress) - iface->ksni_nroutes++; - } - } - } - - rc = 0; - /* NB only new connections will pay attention to the new interface! */ - } - - write_unlock_irqrestore(&ksocknal_data.ksnd_global_lock, flags); - - return (rc); -} - -void -ksocknal_peer_del_interface_locked(ksock_peer_t *peer, __u32 ipaddr) -{ - struct list_head *tmp; - struct list_head *nxt; - ksock_route_t *route; - ksock_conn_t *conn; - int i; - int j; - - for (i = 0; i < peer->ksnp_n_passive_ips; i++) - if (peer->ksnp_passive_ips[i] == ipaddr) { - for (j = i+1; j < peer->ksnp_n_passive_ips; j++) - peer->ksnp_passive_ips[j-1] = - peer->ksnp_passive_ips[j]; - peer->ksnp_n_passive_ips--; - break; - } - - list_for_each_safe(tmp, nxt, &peer->ksnp_routes) { - route = list_entry (tmp, ksock_route_t, ksnr_list); - - if (route->ksnr_myipaddr != ipaddr) - continue; - - if (route->ksnr_share_count != 0) { - /* Manually created; keep, but unbind */ - route->ksnr_myipaddr = 0; - } else { - ksocknal_del_route_locked(route); - } - } - - list_for_each_safe(tmp, nxt, &peer->ksnp_conns) { - conn = list_entry(tmp, ksock_conn_t, ksnc_list); - - if (conn->ksnc_myipaddr == ipaddr) - ksocknal_close_conn_locked (conn, 0); - } -} - -int -ksocknal_del_interface(__u32 ipaddress) -{ - int rc = -ENOENT; - unsigned long flags; - struct list_head *tmp; - struct list_head *nxt; - ksock_peer_t *peer; - __u32 this_ip; - int i; - int j; - - write_lock_irqsave(&ksocknal_data.ksnd_global_lock, flags); - - for (i = 0; i < ksocknal_data.ksnd_ninterfaces; i++) { - this_ip = ksocknal_data.ksnd_interfaces[i].ksni_ipaddr; - - if (!(ipaddress == 0 || - ipaddress == this_ip)) - continue; - - rc = 0; - - for (j = i+1; j < ksocknal_data.ksnd_ninterfaces; j++) - ksocknal_data.ksnd_interfaces[j-1] = - ksocknal_data.ksnd_interfaces[j]; - - ksocknal_data.ksnd_ninterfaces--; - - for (j = 0; j < ksocknal_data.ksnd_peer_hash_size; j++) { - list_for_each_safe(tmp, nxt, &ksocknal_data.ksnd_peers[j]) { - peer = list_entry(tmp, ksock_peer_t, ksnp_list); - - ksocknal_peer_del_interface_locked(peer, this_ip); - } - } - } - - write_unlock_irqrestore(&ksocknal_data.ksnd_global_lock, flags); - - return (rc); -} - -int -ksocknal_cmd(struct portals_cfg *pcfg, void * private) -{ - int rc; -- -- switch(pcfg->pcfg_command) { - case NAL_CMD_GET_AUTOCONN: { - ksock_route_t *route = ksocknal_get_route_by_idx (pcfg->pcfg_count); - case NAL_CMD_GET_INTERFACE: { - ksock_interface_t *iface; -- - if (route == NULL) - read_lock (&ksocknal_data.ksnd_global_lock); - - if (pcfg->pcfg_count < 0 || - pcfg->pcfg_count >= ksocknal_data.ksnd_ninterfaces) { -- rc = -ENOENT; - else { - } else { -- rc = 0; - pcfg->pcfg_nid = route->ksnr_peer->ksnp_nid; - pcfg->pcfg_id = route->ksnr_ipaddr; - pcfg->pcfg_misc = route->ksnr_port; - pcfg->pcfg_count = route->ksnr_conn_count; - pcfg->pcfg_size = route->ksnr_buffer_size; - pcfg->pcfg_wait = route->ksnr_sharecount; - pcfg->pcfg_flags = (route->ksnr_irq_affinity ? 2 : 0) | - (route->ksnr_eager ? 4 : 0); - ksocknal_put_route (route); - iface = &ksocknal_data.ksnd_interfaces[pcfg->pcfg_count]; - - pcfg->pcfg_id = iface->ksni_ipaddr; - pcfg->pcfg_misc = iface->ksni_netmask; - pcfg->pcfg_fd = iface->ksni_npeers; - pcfg->pcfg_count = iface->ksni_nroutes; -- } - - read_unlock (&ksocknal_data.ksnd_global_lock); -- break; -- } - case NAL_CMD_ADD_AUTOCONN: { - rc = ksocknal_add_route (pcfg->pcfg_nid, pcfg->pcfg_id, - pcfg->pcfg_misc, pcfg->pcfg_size, - (pcfg->pcfg_flags & 0x02) != 0, - (pcfg->pcfg_flags & 0x04) != 0, - (pcfg->pcfg_flags & 0x08) != 0); - case NAL_CMD_ADD_INTERFACE: { - rc = ksocknal_add_interface(pcfg->pcfg_id, /* IP address */ - pcfg->pcfg_misc); /* net mask */ -- break; -- } - case NAL_CMD_DEL_AUTOCONN: { - rc = ksocknal_del_route (pcfg->pcfg_nid, pcfg->pcfg_id, - (pcfg->pcfg_flags & 1) != 0, - (pcfg->pcfg_flags & 2) != 0); - case NAL_CMD_DEL_INTERFACE: { - rc = ksocknal_del_interface(pcfg->pcfg_id); /* IP address */ - break; - } - case NAL_CMD_GET_PEER: { - ptl_nid_t nid = 0; - __u32 myip = 0; - __u32 ip = 0; - int port = 0; - int conn_count = 0; - int share_count = 0; - - rc = ksocknal_get_peer_info(pcfg->pcfg_count, &nid, - &myip, &ip, &port, - &conn_count, &share_count); - pcfg->pcfg_nid = nid; - pcfg->pcfg_size = myip; - pcfg->pcfg_id = ip; - pcfg->pcfg_misc = port; - pcfg->pcfg_count = conn_count; - pcfg->pcfg_wait = share_count; - break; - } - case NAL_CMD_ADD_PEER: { - rc = ksocknal_add_peer (pcfg->pcfg_nid, - pcfg->pcfg_id, /* IP */ - pcfg->pcfg_misc); /* port */ - break; - } - case NAL_CMD_DEL_PEER: { - rc = ksocknal_del_peer (pcfg->pcfg_nid, - pcfg->pcfg_id, /* IP */ - pcfg->pcfg_flags); /* single_share? */ -- break; -- } -- case NAL_CMD_GET_CONN: { -- ksock_conn_t *conn = ksocknal_get_conn_by_idx (pcfg->pcfg_count); -- -- if (conn == NULL) -- rc = -ENOENT; -- else { - int txmem; - int rxmem; - int nagle; - - ksocknal_get_conn_tunables(conn, &txmem, &rxmem, &nagle); - -- rc = 0; - pcfg->pcfg_nid = conn->ksnc_peer->ksnp_nid; - pcfg->pcfg_id = conn->ksnc_ipaddr; - pcfg->pcfg_misc = conn->ksnc_port; - pcfg->pcfg_flags = conn->ksnc_type; - pcfg->pcfg_nid = conn->ksnc_peer->ksnp_nid; - pcfg->pcfg_id = conn->ksnc_ipaddr; - pcfg->pcfg_misc = conn->ksnc_port; - pcfg->pcfg_fd = conn->ksnc_myipaddr; - pcfg->pcfg_flags = conn->ksnc_type; - pcfg->pcfg_gw_nal = conn->ksnc_scheduler - - ksocknal_data.ksnd_schedulers; - pcfg->pcfg_count = txmem; - pcfg->pcfg_size = rxmem; - pcfg->pcfg_wait = nagle; -- ksocknal_put_conn (conn); -- } -- break; -- } -- case NAL_CMD_REGISTER_PEER_FD: { -- struct socket *sock = sockfd_lookup (pcfg->pcfg_fd, &rc); -- int type = pcfg->pcfg_misc; -- -- if (sock == NULL) -- break; -- -- switch (type) { -- case SOCKNAL_CONN_NONE: -- case SOCKNAL_CONN_ANY: -- case SOCKNAL_CONN_CONTROL: -- case SOCKNAL_CONN_BULK_IN: -- case SOCKNAL_CONN_BULK_OUT: - rc = ksocknal_create_conn(NULL, sock, pcfg->pcfg_flags, type); - rc = ksocknal_create_conn(NULL, sock, type); - break; -- default: - rc = -EINVAL; -- break; -- } -- if (rc != 0) -- fput (sock->file); -- break; -- } -- case NAL_CMD_CLOSE_CONNECTION: { -- rc = ksocknal_close_matching_conns (pcfg->pcfg_nid, -- pcfg->pcfg_id); -- break; -- } -- case NAL_CMD_REGISTER_MYNID: { -- rc = ksocknal_set_mynid (pcfg->pcfg_nid); -- break; -- } -- case NAL_CMD_PUSH_CONNECTION: { -- rc = ksocknal_push (pcfg->pcfg_nid); -- break; -- } - default: - rc = -EINVAL; - break; -- } -- -- return rc; --} -- --void --ksocknal_free_fmbs (ksock_fmb_pool_t *p) --{ -- int npages = p->fmp_buff_pages; -- ksock_fmb_t *fmb; -- int i; -- -- LASSERT (list_empty(&p->fmp_blocked_conns)); -- LASSERT (p->fmp_nactive_fmbs == 0); -- -- while (!list_empty(&p->fmp_idle_fmbs)) { -- -- fmb = list_entry(p->fmp_idle_fmbs.next, -- ksock_fmb_t, fmb_list); -- -- for (i = 0; i < npages; i++) -- if (fmb->fmb_kiov[i].kiov_page != NULL) -- __free_page(fmb->fmb_kiov[i].kiov_page); -- -- list_del(&fmb->fmb_list); -- PORTAL_FREE(fmb, offsetof(ksock_fmb_t, fmb_kiov[npages])); -- } --} -- --void --ksocknal_free_buffers (void) --{ -- ksocknal_free_fmbs(&ksocknal_data.ksnd_small_fmp); -- ksocknal_free_fmbs(&ksocknal_data.ksnd_large_fmp); -- -- LASSERT (atomic_read(&ksocknal_data.ksnd_nactive_ltxs) == 0); -- -- if (ksocknal_data.ksnd_schedulers != NULL) -- PORTAL_FREE (ksocknal_data.ksnd_schedulers, - sizeof (ksock_sched_t) * SOCKNAL_N_SCHED); - sizeof (ksock_sched_t) * ksocknal_data.ksnd_nschedulers); -- -- PORTAL_FREE (ksocknal_data.ksnd_peers, -- sizeof (struct list_head) * -- ksocknal_data.ksnd_peer_hash_size); --} -- --void - ksocknal_module_fini (void) -ksocknal_api_shutdown (nal_t *nal) --{ - int i; - ksock_sched_t *sched; - int i; - - if (nal->nal_refct != 0) { - /* This module got the first ref */ - PORTAL_MODULE_UNUSE; - return; - } -- -- CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n", -- atomic_read (&portal_kmemory)); - - LASSERT(nal == &ksocknal_api); -- -- switch (ksocknal_data.ksnd_init) { -- default: -- LASSERT (0); -- -- case SOCKNAL_INIT_ALL: - #if CONFIG_SYSCTL - if (ksocknal_data.ksnd_sysctl != NULL) - unregister_sysctl_table (ksocknal_data.ksnd_sysctl); - #endif - kportal_nal_unregister(SOCKNAL); - PORTAL_SYMBOL_UNREGISTER (ksocknal_ni); - libcfs_nal_cmd_unregister(SOCKNAL); - - ksocknal_data.ksnd_init = SOCKNAL_INIT_LIB; -- /* fall through */ -- - case SOCKNAL_INIT_PTL: - case SOCKNAL_INIT_LIB: -- /* No more calls to ksocknal_cmd() to create new -- * autoroutes/connections since we're being unloaded. */ - PtlNIFini(ksocknal_ni); -- - /* Delete all autoroute entries */ - ksocknal_del_route(PTL_NID_ANY, 0, 0, 0); - /* Delete all peers */ - ksocknal_del_peer(PTL_NID_ANY, 0, 0); -- - /* Delete all connections */ - ksocknal_close_matching_conns (PTL_NID_ANY, 0); - -- /* Wait for all peer state to clean up */ -- i = 2; -- while (atomic_read (&ksocknal_data.ksnd_npeers) != 0) { -- i++; -- CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */ -- "waiting for %d peers to disconnect\n", -- atomic_read (&ksocknal_data.ksnd_npeers)); -- set_current_state (TASK_UNINTERRUPTIBLE); -- schedule_timeout (HZ); -- } -- -- /* Tell lib we've stopped calling into her. */ -- lib_fini(&ksocknal_lib); - - ksocknal_data.ksnd_init = SOCKNAL_INIT_DATA; -- /* fall through */ -- -- case SOCKNAL_INIT_DATA: - /* Module refcount only gets to zero when all peers - * have been closed so all lists must be empty */ -- LASSERT (atomic_read (&ksocknal_data.ksnd_npeers) == 0); -- LASSERT (ksocknal_data.ksnd_peers != NULL); -- for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) { -- LASSERT (list_empty (&ksocknal_data.ksnd_peers[i])); -- } -- LASSERT (list_empty (&ksocknal_data.ksnd_enomem_conns)); -- LASSERT (list_empty (&ksocknal_data.ksnd_zombie_conns)); -- LASSERT (list_empty (&ksocknal_data.ksnd_autoconnectd_routes)); -- LASSERT (list_empty (&ksocknal_data.ksnd_small_fmp.fmp_blocked_conns)); -- LASSERT (list_empty (&ksocknal_data.ksnd_large_fmp.fmp_blocked_conns)); -- -- if (ksocknal_data.ksnd_schedulers != NULL) - for (i = 0; i < SOCKNAL_N_SCHED; i++) { - for (i = 0; i < ksocknal_data.ksnd_nschedulers; i++) { -- ksock_sched_t *kss = -- &ksocknal_data.ksnd_schedulers[i]; -- -- LASSERT (list_empty (&kss->kss_tx_conns)); -- LASSERT (list_empty (&kss->kss_rx_conns)); -- LASSERT (kss->kss_nconns == 0); -- } -- -- /* stop router calling me */ -- kpr_shutdown (&ksocknal_data.ksnd_router); -- -- /* flag threads to terminate; wake and wait for them to die */ -- ksocknal_data.ksnd_shuttingdown = 1; -- wake_up_all (&ksocknal_data.ksnd_autoconnectd_waitq); -- wake_up_all (&ksocknal_data.ksnd_reaper_waitq); -- - for (i = 0; i < SOCKNAL_N_SCHED; i++) - wake_up_all(&ksocknal_data.ksnd_schedulers[i].kss_waitq); - for (i = 0; i < ksocknal_data.ksnd_nschedulers; i++) { - sched = &ksocknal_data.ksnd_schedulers[i]; - wake_up_all(&sched->kss_waitq); - } -- - while (atomic_read (&ksocknal_data.ksnd_nthreads) != 0) { - CDEBUG (D_NET, "waitinf for %d threads to terminate\n", - atomic_read (&ksocknal_data.ksnd_nthreads)); - i = 4; - read_lock(&ksocknal_data.ksnd_global_lock); - while (ksocknal_data.ksnd_nthreads != 0) { - i++; - CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */ - "waiting for %d threads to terminate\n", - ksocknal_data.ksnd_nthreads); - read_unlock(&ksocknal_data.ksnd_global_lock); -- set_current_state (TASK_UNINTERRUPTIBLE); -- schedule_timeout (HZ); - read_lock(&ksocknal_data.ksnd_global_lock); -- } - read_unlock(&ksocknal_data.ksnd_global_lock); -- -- kpr_deregister (&ksocknal_data.ksnd_router); -- -- ksocknal_free_buffers(); - - ksocknal_data.ksnd_init = SOCKNAL_INIT_NOTHING; -- /* fall through */ -- -- case SOCKNAL_INIT_NOTHING: -- break; -- } -- -- CDEBUG(D_MALLOC, "after NAL cleanup: kmem %d\n", -- atomic_read (&portal_kmemory)); -- -- printk(KERN_INFO "Lustre: Routing socket NAL unloaded (final mem %d)\n", -- atomic_read(&portal_kmemory)); --} -- -- - void __init -void --ksocknal_init_incarnation (void) --{ -- struct timeval tv; -- -- /* The incarnation number is the time this module loaded and it -- * identifies this particular instance of the socknal. Hopefully -- * we won't be able to reboot more frequently than 1MHz for the -- * forseeable future :) */ -- -- do_gettimeofday(&tv); -- -- ksocknal_data.ksnd_incarnation = -- (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec; --} -- - int __init - ksocknal_module_init (void) -int -ksocknal_api_startup (nal_t *nal, ptl_pid_t requested_pid, - ptl_ni_limits_t *requested_limits, - ptl_ni_limits_t *actual_limits) --{ - int pkmem = atomic_read(&portal_kmemory); - int rc; - int i; - int j; - ptl_process_id_t process_id; - int pkmem = atomic_read(&portal_kmemory); - int rc; - int i; - int j; -- - /* packet descriptor must fit in a router descriptor's scratchpad */ - LASSERT(sizeof (ksock_tx_t) <= sizeof (kprfd_scratch_t)); - /* the following must be sizeof(int) for proc_dointvec() */ - LASSERT(sizeof (ksocknal_data.ksnd_io_timeout) == sizeof (int)); - LASSERT(sizeof (ksocknal_data.ksnd_eager_ack) == sizeof (int)); - /* check ksnr_connected/connecting field large enough */ - LASSERT(SOCKNAL_CONN_NTYPES <= 4); - - LASSERT (ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING); - LASSERT (nal == &ksocknal_api); -- - ksocknal_api.forward = ksocknal_api_forward; - ksocknal_api.shutdown = ksocknal_api_shutdown; - ksocknal_api.yield = ksocknal_api_yield; - ksocknal_api.validate = NULL; /* our api validate is a NOOP */ - ksocknal_api.lock = ksocknal_api_lock; - ksocknal_api.unlock = ksocknal_api_unlock; - ksocknal_api.nal_data = &ksocknal_data; - if (nal->nal_refct != 0) { - if (actual_limits != NULL) - *actual_limits = ksocknal_lib.libnal_ni.ni_actual_limits; - /* This module got the first ref */ - PORTAL_MODULE_USE; - return (PTL_OK); - } -- - ksocknal_lib.nal_data = &ksocknal_data; - LASSERT (ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING); -- -- memset (&ksocknal_data, 0, sizeof (ksocknal_data)); /* zero pointers */ -- - ksocknal_data.ksnd_io_timeout = SOCKNAL_IO_TIMEOUT; - ksocknal_data.ksnd_eager_ack = SOCKNAL_EAGER_ACK; - ksocknal_data.ksnd_typed_conns = SOCKNAL_TYPED_CONNS; - ksocknal_data.ksnd_min_bulk = SOCKNAL_MIN_BULK; - #if SOCKNAL_ZC - ksocknal_data.ksnd_zc_min_frag = SOCKNAL_ZC_MIN_FRAG; - #endif -- ksocknal_init_incarnation(); -- -- ksocknal_data.ksnd_peer_hash_size = SOCKNAL_PEER_HASH_SIZE; -- PORTAL_ALLOC (ksocknal_data.ksnd_peers, -- sizeof (struct list_head) * ksocknal_data.ksnd_peer_hash_size); -- if (ksocknal_data.ksnd_peers == NULL) -- return (-ENOMEM); -- -- for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) -- INIT_LIST_HEAD(&ksocknal_data.ksnd_peers[i]); -- -- rwlock_init(&ksocknal_data.ksnd_global_lock); - - ksocknal_data.ksnd_nal_cb = &ksocknal_lib; - spin_lock_init (&ksocknal_data.ksnd_nal_cb_lock); -- -- spin_lock_init(&ksocknal_data.ksnd_small_fmp.fmp_lock); -- INIT_LIST_HEAD(&ksocknal_data.ksnd_small_fmp.fmp_idle_fmbs); -- INIT_LIST_HEAD(&ksocknal_data.ksnd_small_fmp.fmp_blocked_conns); -- ksocknal_data.ksnd_small_fmp.fmp_buff_pages = SOCKNAL_SMALL_FWD_PAGES; -- -- spin_lock_init(&ksocknal_data.ksnd_large_fmp.fmp_lock); -- INIT_LIST_HEAD(&ksocknal_data.ksnd_large_fmp.fmp_idle_fmbs); -- INIT_LIST_HEAD(&ksocknal_data.ksnd_large_fmp.fmp_blocked_conns); -- ksocknal_data.ksnd_large_fmp.fmp_buff_pages = SOCKNAL_LARGE_FWD_PAGES; -- -- spin_lock_init (&ksocknal_data.ksnd_reaper_lock); -- INIT_LIST_HEAD (&ksocknal_data.ksnd_enomem_conns); -- INIT_LIST_HEAD (&ksocknal_data.ksnd_zombie_conns); -- INIT_LIST_HEAD (&ksocknal_data.ksnd_deathrow_conns); -- init_waitqueue_head(&ksocknal_data.ksnd_reaper_waitq); -- -- spin_lock_init (&ksocknal_data.ksnd_autoconnectd_lock); -- INIT_LIST_HEAD (&ksocknal_data.ksnd_autoconnectd_routes); -- init_waitqueue_head(&ksocknal_data.ksnd_autoconnectd_waitq); -- -- /* NB memset above zeros whole of ksocknal_data, including -- * ksocknal_data.ksnd_irqinfo[all].ksni_valid */ -- -- /* flag lists/ptrs/locks initialised */ -- ksocknal_data.ksnd_init = SOCKNAL_INIT_DATA; -- - ksocknal_data.ksnd_nschedulers = ksocknal_nsched(); -- PORTAL_ALLOC(ksocknal_data.ksnd_schedulers, - sizeof(ksock_sched_t) * SOCKNAL_N_SCHED); - sizeof(ksock_sched_t) * ksocknal_data.ksnd_nschedulers); -- if (ksocknal_data.ksnd_schedulers == NULL) { - ksocknal_module_fini (); - ksocknal_api_shutdown (nal); -- return (-ENOMEM); -- } -- - for (i = 0; i < SOCKNAL_N_SCHED; i++) { - for (i = 0; i < ksocknal_data.ksnd_nschedulers; i++) { -- ksock_sched_t *kss = &ksocknal_data.ksnd_schedulers[i]; -- -- spin_lock_init (&kss->kss_lock); -- INIT_LIST_HEAD (&kss->kss_rx_conns); -- INIT_LIST_HEAD (&kss->kss_tx_conns); --#if SOCKNAL_ZC -- INIT_LIST_HEAD (&kss->kss_zctxdone_list); --#endif -- init_waitqueue_head (&kss->kss_waitq); -- } -- - rc = PtlNIInit(ksocknal_init, 32, 4, 0, &ksocknal_ni); - if (rc != 0) { - CERROR("ksocknal: PtlNIInit failed: error %d\n", rc); - ksocknal_module_fini (); - /* NB we have to wait to be told our true NID... */ - process_id.pid = requested_pid; - process_id.nid = 0; - - rc = lib_init(&ksocknal_lib, nal, process_id, - requested_limits, actual_limits); - if (rc != PTL_OK) { - CERROR("lib_init failed: error %d\n", rc); - ksocknal_api_shutdown (nal); -- return (rc); -- } - PtlNIDebug(ksocknal_ni, ~0); -- - ksocknal_data.ksnd_init = SOCKNAL_INIT_PTL; // flag PtlNIInit() called - ksocknal_data.ksnd_init = SOCKNAL_INIT_LIB; // flag lib_init() called -- - for (i = 0; i < SOCKNAL_N_SCHED; i++) { - for (i = 0; i < ksocknal_data.ksnd_nschedulers; i++) { -- rc = ksocknal_thread_start (ksocknal_scheduler, -- &ksocknal_data.ksnd_schedulers[i]); -- if (rc != 0) { -- CERROR("Can't spawn socknal scheduler[%d]: %d\n", -- i, rc); - ksocknal_module_fini (); - ksocknal_api_shutdown (nal); -- return (rc); -- } -- } -- -- for (i = 0; i < SOCKNAL_N_AUTOCONNECTD; i++) { -- rc = ksocknal_thread_start (ksocknal_autoconnectd, (void *)((long)i)); -- if (rc != 0) { -- CERROR("Can't spawn socknal autoconnectd: %d\n", rc); - ksocknal_module_fini (); - ksocknal_api_shutdown (nal); -- return (rc); -- } -- } -- -- rc = ksocknal_thread_start (ksocknal_reaper, NULL); -- if (rc != 0) { -- CERROR ("Can't spawn socknal reaper: %d\n", rc); - ksocknal_module_fini (); - ksocknal_api_shutdown (nal); -- return (rc); -- } -- -- rc = kpr_register(&ksocknal_data.ksnd_router, -- &ksocknal_router_interface); -- if (rc != 0) { -- CDEBUG(D_NET, "Can't initialise routing interface " -- "(rc = %d): not routing\n", rc); -- } else { - /* Only allocate forwarding buffers if I'm on a gateway */ - /* Only allocate forwarding buffers if there's a router */ -- -- for (i = 0; i < (SOCKNAL_SMALL_FWD_NMSGS + -- SOCKNAL_LARGE_FWD_NMSGS); i++) { -- ksock_fmb_t *fmb; -- ksock_fmb_pool_t *pool; -- -- -- if (i < SOCKNAL_SMALL_FWD_NMSGS) -- pool = &ksocknal_data.ksnd_small_fmp; -- else -- pool = &ksocknal_data.ksnd_large_fmp; -- -- PORTAL_ALLOC(fmb, offsetof(ksock_fmb_t, -- fmb_kiov[pool->fmp_buff_pages])); -- if (fmb == NULL) { - ksocknal_module_fini(); - ksocknal_api_shutdown(nal); -- return (-ENOMEM); -- } -- -- fmb->fmb_pool = pool; -- -- for (j = 0; j < pool->fmp_buff_pages; j++) { -- fmb->fmb_kiov[j].kiov_page = alloc_page(GFP_KERNEL); -- -- if (fmb->fmb_kiov[j].kiov_page == NULL) { - ksocknal_module_fini (); - ksocknal_api_shutdown (nal); -- return (-ENOMEM); -- } -- -- LASSERT(page_address(fmb->fmb_kiov[j].kiov_page) != NULL); -- } -- -- list_add(&fmb->fmb_list, &pool->fmp_idle_fmbs); -- } -- } -- - rc = kportal_nal_register(SOCKNAL, &ksocknal_cmd, NULL); - rc = libcfs_nal_cmd_register(SOCKNAL, &ksocknal_cmd, NULL); -- if (rc != 0) { -- CERROR ("Can't initialise command interface (rc = %d)\n", rc); - ksocknal_module_fini (); - ksocknal_api_shutdown (nal); -- return (rc); -- } - - PORTAL_SYMBOL_REGISTER(ksocknal_ni); -- - #ifdef CONFIG_SYSCTL - /* Press on regardless even if registering sysctl doesn't work */ - ksocknal_data.ksnd_sysctl = register_sysctl_table (ksocknal_top_ctl_table, 0); - #endif -- /* flag everything initialised */ -- ksocknal_data.ksnd_init = SOCKNAL_INIT_ALL; -- -- printk(KERN_INFO "Lustre: Routing socket NAL loaded " - "(Routing %s, initial mem %d)\n", - "(Routing %s, initial mem %d, incarnation "LPX64")\n", -- kpr_routing (&ksocknal_data.ksnd_router) ? - "enabled" : "disabled", pkmem); - "enabled" : "disabled", pkmem, ksocknal_data.ksnd_incarnation); - - return (0); -} - -void __exit -ksocknal_module_fini (void) -{ -#ifdef CONFIG_SYSCTL - if (ksocknal_tunables.ksnd_sysctl != NULL) - unregister_sysctl_table (ksocknal_tunables.ksnd_sysctl); -#endif - PtlNIFini(ksocknal_ni); - - ptl_unregister_nal(SOCKNAL); -} - -int __init -ksocknal_module_init (void) -{ - int rc; - - /* packet descriptor must fit in a router descriptor's scratchpad */ - LASSERT(sizeof (ksock_tx_t) <= sizeof (kprfd_scratch_t)); - /* the following must be sizeof(int) for proc_dointvec() */ - LASSERT(sizeof (ksocknal_tunables.ksnd_io_timeout) == sizeof (int)); - LASSERT(sizeof (ksocknal_tunables.ksnd_eager_ack) == sizeof (int)); - LASSERT(sizeof (ksocknal_tunables.ksnd_typed_conns) == sizeof (int)); - LASSERT(sizeof (ksocknal_tunables.ksnd_min_bulk) == sizeof (int)); - LASSERT(sizeof (ksocknal_tunables.ksnd_buffer_size) == sizeof (int)); - LASSERT(sizeof (ksocknal_tunables.ksnd_nagle) == sizeof (int)); - LASSERT(sizeof (ksocknal_tunables.ksnd_keepalive_idle) == sizeof (int)); - LASSERT(sizeof (ksocknal_tunables.ksnd_keepalive_count) == sizeof (int)); - LASSERT(sizeof (ksocknal_tunables.ksnd_keepalive_intvl) == sizeof (int)); -#if CPU_AFFINITY - LASSERT(sizeof (ksocknal_tunables.ksnd_irq_affinity) == sizeof (int)); -#endif -#if SOCKNAL_ZC - LASSERT(sizeof (ksocknal_tunables.ksnd_zc_min_frag) == sizeof (int)); -#endif - /* check ksnr_connected/connecting field large enough */ - LASSERT(SOCKNAL_CONN_NTYPES <= 4); - - ksocknal_api.nal_ni_init = ksocknal_api_startup; - ksocknal_api.nal_ni_fini = ksocknal_api_shutdown; - - /* Initialise dynamic tunables to defaults once only */ - ksocknal_tunables.ksnd_io_timeout = SOCKNAL_IO_TIMEOUT; - ksocknal_tunables.ksnd_eager_ack = SOCKNAL_EAGER_ACK; - ksocknal_tunables.ksnd_typed_conns = SOCKNAL_TYPED_CONNS; - ksocknal_tunables.ksnd_min_bulk = SOCKNAL_MIN_BULK; - ksocknal_tunables.ksnd_buffer_size = SOCKNAL_BUFFER_SIZE; - ksocknal_tunables.ksnd_nagle = SOCKNAL_NAGLE; - ksocknal_tunables.ksnd_keepalive_idle = SOCKNAL_KEEPALIVE_IDLE; - ksocknal_tunables.ksnd_keepalive_count = SOCKNAL_KEEPALIVE_COUNT; - ksocknal_tunables.ksnd_keepalive_intvl = SOCKNAL_KEEPALIVE_INTVL; -#if CPU_AFFINITY - ksocknal_tunables.ksnd_irq_affinity = SOCKNAL_IRQ_AFFINITY; -#endif -#if SOCKNAL_ZC - ksocknal_tunables.ksnd_zc_min_frag = SOCKNAL_ZC_MIN_FRAG; -#endif - - rc = ptl_register_nal(SOCKNAL, &ksocknal_api); - if (rc != PTL_OK) { - CERROR("Can't register SOCKNAL: %d\n", rc); - return (-ENOMEM); /* or something... */ - } -- - /* Pure gateways want the NAL started up at module load time... */ - rc = PtlNIInit(SOCKNAL, LUSTRE_SRV_PTL_PID, NULL, NULL, &ksocknal_ni); - if (rc != PTL_OK && rc != PTL_IFACE_DUP) { - ptl_unregister_nal(SOCKNAL); - return (-ENODEV); - } - -#ifdef CONFIG_SYSCTL - /* Press on regardless even if registering sysctl doesn't work */ - ksocknal_tunables.ksnd_sysctl = - register_sysctl_table (ksocknal_top_ctl_table, 0); -#endif -- return (0); --} -- --MODULE_AUTHOR("Cluster File Systems, Inc. "); --MODULE_DESCRIPTION("Kernel TCP Socket NAL v0.01"); --MODULE_LICENSE("GPL"); -- --module_init(ksocknal_module_init); --module_exit(ksocknal_module_fini); -- - EXPORT_SYMBOL (ksocknal_ni); diff --cc lnet/klnds/socklnd/socklnd.h index 2bef800,0a5266a..0000000 deleted file mode 100644,100644 --- a/lnet/klnds/socklnd/socklnd.h +++ /dev/null @@@ -1,479 -1,516 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * Copyright (C) 2001, 2002 Cluster File Systems, Inc. -- * Author: Zach Brown -- * Author: Peter J. Braam -- * Author: Phil Schwan -- * Author: Eric Barton -- * -- * This file is part of Portals, http://www.sf.net/projects/lustre/ -- * -- * Portals is free software; you can redistribute it and/or -- * modify it under the terms of version 2 of the GNU General Public -- * License as published by the Free Software Foundation. -- * -- * Portals is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- * GNU General Public License for more details. -- * -- * You should have received a copy of the GNU General Public License -- * along with Portals; if not, write to the Free Software -- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -- * -- */ -- --#define DEBUG_PORTAL_ALLOC --#ifndef EXPORT_SYMTAB --# define EXPORT_SYMTAB --#endif -- --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include -- --#include --#include --#include -- --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include -- --#define DEBUG_SUBSYSTEM S_SOCKNAL -- --#include --#include --#include --#include --#include --#include --#include -#include -- - #define SOCKNAL_N_SCHED ksocknal_nsched() /* # socknal schedulers */ -#include --#define SOCKNAL_N_AUTOCONNECTD 4 /* # socknal autoconnect daemons */ -- --#define SOCKNAL_MIN_RECONNECT_INTERVAL HZ /* first failed connection retry... */ --#define SOCKNAL_MAX_RECONNECT_INTERVAL (60*HZ) /* ...exponentially increasing to this */ -- --/* default vals for runtime tunables */ --#define SOCKNAL_IO_TIMEOUT 50 /* default comms timeout (seconds) */ --#define SOCKNAL_EAGER_ACK 0 /* default eager ack (boolean) */ --#define SOCKNAL_TYPED_CONNS 1 /* unidirectional large, bidirectional small? */ --#define SOCKNAL_ZC_MIN_FRAG (2<<10) /* default smallest zerocopy fragment */ --#define SOCKNAL_MIN_BULK (1<<10) /* smallest "large" message */ - #define SOCKNAL_USE_KEEPALIVES 0 /* use tcp/ip keepalive? */ -#define SOCKNAL_BUFFER_SIZE (8<<20) /* default socket buffer size */ -#define SOCKNAL_NAGLE 0 /* enable/disable NAGLE? */ -#define SOCKNAL_IRQ_AFFINITY 1 /* enable/disable IRQ affinity? */ -#define SOCKNAL_KEEPALIVE_IDLE 0 /* # seconds idle before 1st probe */ -#define SOCKNAL_KEEPALIVE_COUNT 10 /* # unanswered probes to determine peer death */ -#define SOCKNAL_KEEPALIVE_INTVL 1 /* seconds between probes */ -- --#define SOCKNAL_PEER_HASH_SIZE 101 /* # peer lists */ -- --#define SOCKNAL_SMALL_FWD_NMSGS 128 /* # small messages I can be forwarding at any time */ --#define SOCKNAL_LARGE_FWD_NMSGS 64 /* # large messages I can be forwarding at any time */ -- --#define SOCKNAL_SMALL_FWD_PAGES 1 /* # pages in a small message fwd buffer */ -- --#define SOCKNAL_LARGE_FWD_PAGES (PAGE_ALIGN(PTL_MTU) >> PAGE_SHIFT) -- /* # pages in a large message fwd buffer */ -- --#define SOCKNAL_RESCHED 100 /* # scheduler loops before reschedule */ --#define SOCKNAL_ENOMEM_RETRY 1 /* jiffies between retries */ - -#define SOCKNAL_MAX_INTERFACES 16 /* Largest number of interfaces we bind */ - -#define SOCKNAL_ROUND_ROBIN 0 /* round robin / load balance */ -- --#define SOCKNAL_TX_LOW_WATER(sk) (((sk)->sk_sndbuf*8)/10) -- --#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,72)) -# define sk_allocation allocation --# define sk_data_ready data_ready --# define sk_write_space write_space --# define sk_user_data user_data --# define sk_prot prot --# define sk_sndbuf sndbuf --# define sk_socket socket --#endif -- --#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)) --# define sk_wmem_queued wmem_queued -# define sk_err err --#endif -- --typedef struct /* pool of forwarding buffers */ --{ -- spinlock_t fmp_lock; /* serialise */ -- struct list_head fmp_idle_fmbs; /* free buffers */ -- struct list_head fmp_blocked_conns; /* connections waiting for a buffer */ -- int fmp_nactive_fmbs; /* # buffers in use */ -- int fmp_buff_pages; /* # pages per buffer */ --} ksock_fmb_pool_t; -- -- --typedef struct /* per scheduler state */ --{ -- spinlock_t kss_lock; /* serialise */ -- struct list_head kss_rx_conns; /* conn waiting to be read */ -- struct list_head kss_tx_conns; /* conn waiting to be written */ --#if SOCKNAL_ZC -- struct list_head kss_zctxdone_list; /* completed ZC transmits */ --#endif -- wait_queue_head_t kss_waitq; /* where scheduler sleeps */ -- int kss_nconns; /* # connections assigned to this scheduler */ --} ksock_sched_t; -- - typedef struct { -typedef struct -{ -- int ksni_valid:1; /* been set yet? */ -- int ksni_bound:1; /* bound to a cpu yet? */ -- int ksni_sched:6; /* which scheduler (assumes < 64) */ --} ksock_irqinfo_t; -- - typedef struct { - int ksnd_init; /* initialisation state */ -typedef struct -{ - __u32 ksni_ipaddr; /* interface's IP address */ - __u32 ksni_netmask; /* interface's network mask */ - int ksni_nroutes; /* # routes using (active) */ - int ksni_npeers; /* # peers using (passive) */ -} ksock_interface_t; - -typedef struct -{ -- int ksnd_io_timeout; /* "stuck" socket timeout (seconds) */ -- int ksnd_eager_ack; /* make TCP ack eagerly? */ -- int ksnd_typed_conns; /* drive sockets by type? */ -- int ksnd_min_bulk; /* smallest "large" message */ - int ksnd_buffer_size; /* socket buffer size */ - int ksnd_nagle; /* enable NAGLE? */ - int ksnd_irq_affinity; /* enable IRQ affinity? */ - int ksnd_keepalive_idle; /* # idle secs before 1st probe */ - int ksnd_keepalive_count; /* # probes */ - int ksnd_keepalive_intvl; /* time between probes */ --#if SOCKNAL_ZC -- unsigned int ksnd_zc_min_frag; /* minimum zero copy frag size */ --#endif -- struct ctl_table_header *ksnd_sysctl; /* sysctl interface */ -} ksock_tunables_t; - -typedef struct -{ - int ksnd_init; /* initialisation state */ -- __u64 ksnd_incarnation; /* my epoch */ -- -- rwlock_t ksnd_global_lock; /* stabilize peer/conn ops */ -- struct list_head *ksnd_peers; /* hash table of all my known peers */ -- int ksnd_peer_hash_size; /* size of ksnd_peers */ - - nal_cb_t *ksnd_nal_cb; - spinlock_t ksnd_nal_cb_lock; /* lib cli/sti lock */ -- - atomic_t ksnd_nthreads; /* # live threads */ - int ksnd_nthreads; /* # live threads */ -- int ksnd_shuttingdown; /* tell threads to exit */ - ksock_sched_t *ksnd_schedulers; /* scheduler state */ - int ksnd_nschedulers; /* # schedulers */ - ksock_sched_t *ksnd_schedulers; /* their state */ -- -- atomic_t ksnd_npeers; /* total # peers extant */ -- atomic_t ksnd_nclosing_conns; /* # closed conns extant */ -- -- kpr_router_t ksnd_router; /* THE router */ -- -- ksock_fmb_pool_t ksnd_small_fmp; /* small message forwarding buffers */ -- ksock_fmb_pool_t ksnd_large_fmp; /* large message forwarding buffers */ -- -- atomic_t ksnd_nactive_ltxs; /* #active ltxs */ -- -- struct list_head ksnd_deathrow_conns; /* conns to be closed */ -- struct list_head ksnd_zombie_conns; /* conns to be freed */ -- struct list_head ksnd_enomem_conns; /* conns to be retried */ -- wait_queue_head_t ksnd_reaper_waitq; /* reaper sleeps here */ -- unsigned long ksnd_reaper_waketime; /* when reaper will wake */ -- spinlock_t ksnd_reaper_lock; /* serialise */ -- -- int ksnd_enomem_tx; /* test ENOMEM sender */ -- int ksnd_stall_tx; /* test sluggish sender */ -- int ksnd_stall_rx; /* test sluggish receiver */ -- -- struct list_head ksnd_autoconnectd_routes; /* routes waiting to be connected */ -- wait_queue_head_t ksnd_autoconnectd_waitq; /* autoconnectds sleep here */ -- spinlock_t ksnd_autoconnectd_lock; /* serialise */ -- -- ksock_irqinfo_t ksnd_irqinfo[NR_IRQS];/* irq->scheduler lookup */ - - int ksnd_ninterfaces; - ksock_interface_t ksnd_interfaces[SOCKNAL_MAX_INTERFACES]; /* published interfaces */ --} ksock_nal_data_t; -- --#define SOCKNAL_INIT_NOTHING 0 --#define SOCKNAL_INIT_DATA 1 - #define SOCKNAL_INIT_PTL 2 -#define SOCKNAL_INIT_LIB 2 --#define SOCKNAL_INIT_ALL 3 -- --/* A packet just assembled for transmission is represented by 1 or more -- * struct iovec fragments (the first frag contains the portals header), -- * followed by 0 or more ptl_kiov_t fragments. -- * -- * On the receive side, initially 1 struct iovec fragment is posted for -- * receive (the header). Once the header has been received, the payload is -- * received into either struct iovec or ptl_kiov_t fragments, depending on -- * what the header matched or whether the message needs forwarding. */ -- --struct ksock_conn; /* forward ref */ --struct ksock_peer; /* forward ref */ --struct ksock_route; /* forward ref */ -- --typedef struct /* transmit packet */ --{ -- struct list_head tx_list; /* queue on conn for transmission etc */ -- char tx_isfwd; /* forwarding / sourced here */ -- int tx_nob; /* # packet bytes */ -- int tx_resid; /* residual bytes */ -- int tx_niov; /* # packet iovec frags */ -- struct iovec *tx_iov; /* packet iovec frags */ -- int tx_nkiov; /* # packet page frags */ -- ptl_kiov_t *tx_kiov; /* packet page frags */ -- struct ksock_conn *tx_conn; /* owning conn */ -- ptl_hdr_t *tx_hdr; /* packet header (for debug only) */ --#if SOCKNAL_ZC -- zccd_t tx_zccd; /* zero copy callback descriptor */ --#endif --} ksock_tx_t; -- --typedef struct /* forwarded packet */ --{ -- ksock_tx_t ftx_tx; /* send info */ -- struct iovec ftx_iov; /* hdr iovec */ --} ksock_ftx_t; -- --#define KSOCK_ZCCD_2_TX(ptr) list_entry (ptr, ksock_tx_t, tx_zccd) --/* network zero copy callback descriptor embedded in ksock_tx_t */ -- --typedef struct /* locally transmitted packet */ --{ -- ksock_tx_t ltx_tx; /* send info */ -- void *ltx_private; /* lib_finalize() callback arg */ -- void *ltx_cookie; /* lib_finalize() callback arg */ -- ptl_hdr_t ltx_hdr; /* buffer for packet header */ -- int ltx_desc_size; /* bytes allocated for this desc */ -- struct iovec ltx_iov[1]; /* iov for hdr + payload */ -- ptl_kiov_t ltx_kiov[0]; /* kiov for payload */ --} ksock_ltx_t; -- --#define KSOCK_TX_2_KPR_FWD_DESC(ptr) list_entry ((kprfd_scratch_t *)ptr, kpr_fwd_desc_t, kprfd_scratch) --/* forwarded packets (router->socknal) embedded in kpr_fwd_desc_t::kprfd_scratch */ -- --#define KSOCK_TX_2_KSOCK_LTX(ptr) list_entry (ptr, ksock_ltx_t, ltx_tx) --/* local packets (lib->socknal) embedded in ksock_ltx_t::ltx_tx */ -- --/* NB list_entry() is used here as convenient macro for calculating a -- * pointer to a struct from the address of a member. */ -- --typedef struct /* Kernel portals Socket Forwarding message buffer */ --{ /* (socknal->router) */ -- struct list_head fmb_list; /* queue idle */ -- kpr_fwd_desc_t fmb_fwd; /* router's descriptor */ -- ksock_fmb_pool_t *fmb_pool; /* owning pool */ -- struct ksock_peer *fmb_peer; /* peer received from */ -- ptl_hdr_t fmb_hdr; /* message header */ -- ptl_kiov_t fmb_kiov[0]; /* payload frags */ --} ksock_fmb_t; -- --/* space for the rx frag descriptors; we either read a single contiguous -- * header, or up to PTL_MD_MAX_IOV frags of payload of either type. */ --typedef union { -- struct iovec iov[PTL_MD_MAX_IOV]; -- ptl_kiov_t kiov[PTL_MD_MAX_IOV]; --} ksock_rxiovspace_t; -- --#define SOCKNAL_RX_HEADER 1 /* reading header */ --#define SOCKNAL_RX_BODY 2 /* reading body (to deliver here) */ --#define SOCKNAL_RX_BODY_FWD 3 /* reading body (to forward) */ --#define SOCKNAL_RX_SLOP 4 /* skipping body */ --#define SOCKNAL_RX_GET_FMB 5 /* scheduled for forwarding */ --#define SOCKNAL_RX_FMB_SLEEP 6 /* blocked waiting for a fwd desc */ -- --typedef struct ksock_conn --{ -- struct ksock_peer *ksnc_peer; /* owning peer */ -- struct ksock_route *ksnc_route; /* owning route */ -- struct list_head ksnc_list; /* stash on peer's conn list */ -- struct socket *ksnc_sock; /* actual socket */ -- void *ksnc_saved_data_ready; /* socket's original data_ready() callback */ -- void *ksnc_saved_write_space; /* socket's original write_space() callback */ -- atomic_t ksnc_refcount; /* # users */ -- ksock_sched_t *ksnc_scheduler; /* who schedules this connection */ - __u32 ksnc_myipaddr; /* my IP */ -- __u32 ksnc_ipaddr; /* peer's IP */ -- int ksnc_port; /* peer's port */ -- int ksnc_closing; /* being shut down */ -- int ksnc_type; /* type of connection */ -- __u64 ksnc_incarnation; /* peer's incarnation */ -- -- /* reader */ -- struct list_head ksnc_rx_list; /* where I enq waiting input or a forwarding descriptor */ -- unsigned long ksnc_rx_deadline; /* when (in jiffies) receive times out */ -- int ksnc_rx_started; /* started receiving a message */ -- int ksnc_rx_ready; /* data ready to read */ -- int ksnc_rx_scheduled; /* being progressed */ -- int ksnc_rx_state; /* what is being read */ -- int ksnc_rx_nob_left; /* # bytes to next hdr/body */ -- int ksnc_rx_nob_wanted; /* bytes actually wanted */ -- int ksnc_rx_niov; /* # iovec frags */ -- struct iovec *ksnc_rx_iov; /* the iovec frags */ -- int ksnc_rx_nkiov; /* # page frags */ -- ptl_kiov_t *ksnc_rx_kiov; /* the page frags */ -- ksock_rxiovspace_t ksnc_rx_iov_space; /* space for frag descriptors */ -- void *ksnc_cookie; /* rx lib_finalize passthru arg */ -- ptl_hdr_t ksnc_hdr; /* where I read headers into */ -- -- /* WRITER */ -- struct list_head ksnc_tx_list; /* where I enq waiting for output space */ -- struct list_head ksnc_tx_queue; /* packets waiting to be sent */ -- unsigned long ksnc_tx_deadline; /* when (in jiffies) tx times out */ - int ksnc_tx_bufnob; /* send buffer marker */ -- atomic_t ksnc_tx_nob; /* # bytes queued */ -- int ksnc_tx_ready; /* write space */ -- int ksnc_tx_scheduled; /* being progressed */ --} ksock_conn_t; -- --#define KSNR_TYPED_ROUTES ((1 << SOCKNAL_CONN_CONTROL) | \ -- (1 << SOCKNAL_CONN_BULK_IN) | \ -- (1 << SOCKNAL_CONN_BULK_OUT)) -- --typedef struct ksock_route --{ -- struct list_head ksnr_list; /* chain on peer route list */ -- struct list_head ksnr_connect_list; /* chain on autoconnect list */ -- struct ksock_peer *ksnr_peer; /* owning peer */ -- atomic_t ksnr_refcount; /* # users */ - int ksnr_sharecount; /* lconf usage counter */ -- unsigned long ksnr_timeout; /* when (in jiffies) reconnection can happen next */ -- unsigned int ksnr_retry_interval; /* how long between retries */ - __u32 ksnr_ipaddr; /* an IP address for this peer */ - __u32 ksnr_myipaddr; /* my IP */ - __u32 ksnr_ipaddr; /* IP address to connect to */ -- int ksnr_port; /* port to connect to */ - int ksnr_buffer_size; /* size of socket buffers */ - unsigned int ksnr_irq_affinity:1; /* set affinity? */ - unsigned int ksnr_eager:1; /* connect eagery? */ -- unsigned int ksnr_connecting:4; /* autoconnects in progress by type */ -- unsigned int ksnr_connected:4; /* connections established by type */ -- unsigned int ksnr_deleted:1; /* been removed from peer? */ - unsigned int ksnr_share_count; /* created explicitly? */ -- int ksnr_conn_count; /* # conns established by this route */ --} ksock_route_t; -- --typedef struct ksock_peer --{ -- struct list_head ksnp_list; /* stash on global peer list */ -- ptl_nid_t ksnp_nid; /* who's on the other end(s) */ -- atomic_t ksnp_refcount; /* # users */ - int ksnp_sharecount; /* lconf usage counter */ -- int ksnp_closing; /* being closed */ -- int ksnp_error; /* errno on closing last conn */ -- struct list_head ksnp_conns; /* all active connections */ -- struct list_head ksnp_routes; /* routes */ -- struct list_head ksnp_tx_queue; /* waiting packets */ -- unsigned long ksnp_last_alive; /* when (in jiffies) I was last alive */ - int ksnp_n_passive_ips; /* # of... */ - __u32 ksnp_passive_ips[SOCKNAL_MAX_INTERFACES]; /* preferred local interfaces */ --} ksock_peer_t; -- -- - extern nal_cb_t ksocknal_lib; -extern lib_nal_t ksocknal_lib; --extern ksock_nal_data_t ksocknal_data; -extern ksock_tunables_t ksocknal_tunables; -- --static inline struct list_head * - ksocknal_nid2peerlist (ptl_nid_t nid) -ksocknal_nid2peerlist (ptl_nid_t nid) --{ -- unsigned int hash = ((unsigned int)nid) % ksocknal_data.ksnd_peer_hash_size; - - -- return (&ksocknal_data.ksnd_peers [hash]); --} -- --static inline int - ksocknal_getconnsock (ksock_conn_t *conn) -ksocknal_getconnsock (ksock_conn_t *conn) --{ -- int rc = -ESHUTDOWN; - - -- read_lock (&ksocknal_data.ksnd_global_lock); -- if (!conn->ksnc_closing) { -- rc = 0; -- get_file (conn->ksnc_sock->file); -- } -- read_unlock (&ksocknal_data.ksnd_global_lock); -- -- return (rc); --} -- --static inline void --ksocknal_putconnsock (ksock_conn_t *conn) --{ -- fput (conn->ksnc_sock->file); --} -- --#ifndef CONFIG_SMP - static inline -static inline --int ksocknal_nsched(void) --{ -- return 1; --} --#else --#include --# if !(defined(CONFIG_X86) && (LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,21))) || defined(CONFIG_X86_64) || (LUSTRE_KERNEL_VERSION < 39) || ((LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)) && !defined(CONFIG_X86_HT)) --static inline int --ksocknal_nsched(void) --{ -- return num_online_cpus(); --} -- --static inline int --ksocknal_sched2cpu(int i) --{ -- return i; --} -- --static inline int --ksocknal_irqsched2cpu(int i) --{ -- return i; --} - # else -# else --static inline int --ksocknal_nsched(void) --{ -- if (smp_num_siblings == 1) -- return (num_online_cpus()); -- -- /* We need to know if this assumption is crap */ -- LASSERT (smp_num_siblings == 2); -- return (num_online_cpus()/2); --} -- --static inline int --ksocknal_sched2cpu(int i) --{ -- if (smp_num_siblings == 1) -- return i; - - -- return (i * 2); --} -- --static inline int --ksocknal_irqsched2cpu(int i) --{ - if (smp_num_siblings == 1) - return ksocknal_sched2cpu(i); - -- return (ksocknal_sched2cpu(i) + 1); --} --# endif --#endif -- --extern void ksocknal_put_route (ksock_route_t *route); --extern void ksocknal_put_peer (ksock_peer_t *peer); --extern ksock_peer_t *ksocknal_find_peer_locked (ptl_nid_t nid); --extern ksock_peer_t *ksocknal_get_peer (ptl_nid_t nid); --extern int ksocknal_del_route (ptl_nid_t nid, __u32 ipaddr, -- int single, int keep_conn); --extern int ksocknal_create_conn (ksock_route_t *route, - struct socket *sock, int bind_irq, int type); - struct socket *sock, int type); --extern void ksocknal_close_conn_locked (ksock_conn_t *conn, int why); --extern void ksocknal_terminate_conn (ksock_conn_t *conn); --extern void ksocknal_destroy_conn (ksock_conn_t *conn); --extern void ksocknal_put_conn (ksock_conn_t *conn); --extern int ksocknal_close_stale_conns_locked (ksock_peer_t *peer, __u64 incarnation); --extern int ksocknal_close_conn_and_siblings (ksock_conn_t *conn, int why); --extern int ksocknal_close_matching_conns (ptl_nid_t nid, __u32 ipaddr); -- --extern void ksocknal_queue_tx_locked (ksock_tx_t *tx, ksock_conn_t *conn); --extern void ksocknal_tx_done (ksock_tx_t *tx, int asynch); --extern void ksocknal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd); --extern void ksocknal_fmb_callback (void *arg, int error); --extern void ksocknal_notify (void *arg, ptl_nid_t gw_nid, int alive); --extern int ksocknal_thread_start (int (*fn)(void *arg), void *arg); --extern int ksocknal_new_packet (ksock_conn_t *conn, int skip); --extern int ksocknal_scheduler (void *arg); --extern void ksocknal_data_ready(struct sock *sk, int n); --extern void ksocknal_write_space(struct sock *sk); --extern int ksocknal_autoconnectd (void *arg); --extern int ksocknal_reaper (void *arg); -extern int ksocknal_get_conn_tunables (ksock_conn_t *conn, int *txmem, - int *rxmem, int *nagle); --extern int ksocknal_setup_sock (struct socket *sock); - extern int ksocknal_hello (struct socket *sock, - ptl_nid_t *nid, int *type, __u64 *incarnation); -extern int ksocknal_send_hello (ksock_conn_t *conn, __u32 *ipaddrs, int nipaddrs); -extern int ksocknal_recv_hello (ksock_conn_t *conn, - ptl_nid_t *nid, __u64 *incarnation, __u32 *ipaddrs); diff --cc lnet/klnds/socklnd/socklnd_cb.c index f6ac855,b22d501..0000000 deleted file mode 100644,100644 --- a/lnet/klnds/socklnd/socklnd_cb.c +++ /dev/null @@@ -1,2753 -1,2794 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * Copyright (C) 2001, 2002 Cluster File Systems, Inc. -- * Author: Zach Brown -- * Author: Peter J. Braam -- * Author: Phil Schwan -- * Author: Eric Barton -- * -- * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ -- * -- * Portals is free software; you can redistribute it and/or -- * modify it under the terms of version 2 of the GNU General Public -- * License as published by the Free Software Foundation. -- * -- * Portals is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- * GNU General Public License for more details. -- * -- * You should have received a copy of the GNU General Public License -- * along with Portals; if not, write to the Free Software -- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -- */ -- --#include "socknal.h" --#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) --# include --#endif -- --/* -- * LIB functions follow -- * -- */ - ptl_err_t - ksocknal_read(nal_cb_t *nal, void *private, void *dst_addr, - user_ptr src_addr, size_t len) - { - CDEBUG(D_NET, LPX64": reading %ld bytes from %p -> %p\n", - nal->ni.nid, (long)len, src_addr, dst_addr); - - memcpy( dst_addr, src_addr, len ); - return PTL_OK; - } - - ptl_err_t - ksocknal_write(nal_cb_t *nal, void *private, user_ptr dst_addr, - void *src_addr, size_t len) - { - CDEBUG(D_NET, LPX64": writing %ld bytes from %p -> %p\n", - nal->ni.nid, (long)len, src_addr, dst_addr); - - memcpy( dst_addr, src_addr, len ); - return PTL_OK; - } - - void * - ksocknal_malloc(nal_cb_t *nal, size_t len) - { - void *buf; - - PORTAL_ALLOC(buf, len); - - if (buf != NULL) - memset(buf, 0, len); - - return (buf); - } - - void - ksocknal_free(nal_cb_t *nal, void *buf, size_t len) - { - PORTAL_FREE(buf, len); - } - - void - ksocknal_printf(nal_cb_t *nal, const char *fmt, ...) - { - va_list ap; - char msg[256]; - - va_start (ap, fmt); - vsnprintf (msg, sizeof (msg), fmt, ap); /* sprint safely */ - va_end (ap); - - msg[sizeof (msg) - 1] = 0; /* ensure terminated */ - - CDEBUG (D_NET, "%s", msg); - } - - void - ksocknal_cli(nal_cb_t *nal, unsigned long *flags) - { - ksock_nal_data_t *data = nal->nal_data; - - /* OK to ignore 'flags'; we're only ever serialise threads and - * never need to lock out interrupts */ - spin_lock(&data->ksnd_nal_cb_lock); - } - - void - ksocknal_sti(nal_cb_t *nal, unsigned long *flags) - { - ksock_nal_data_t *data; - data = nal->nal_data; - - spin_unlock(&data->ksnd_nal_cb_lock); - } - --int - ksocknal_dist(nal_cb_t *nal, ptl_nid_t nid, unsigned long *dist) -ksocknal_dist(lib_nal_t *nal, ptl_nid_t nid, unsigned long *dist) --{ -- /* I would guess that if ksocknal_get_peer (nid) == NULL, -- and we're not routing, then 'nid' is very distant :) */ - if ( nal->ni.nid == nid ) { - if (nal->libnal_ni.ni_pid.nid == nid) { -- *dist = 0; -- } else { -- *dist = 1; -- } -- -- return 0; --} -- --void --ksocknal_free_ltx (ksock_ltx_t *ltx) --{ -- atomic_dec(&ksocknal_data.ksnd_nactive_ltxs); -- PORTAL_FREE(ltx, ltx->ltx_desc_size); --} -- --#if (SOCKNAL_ZC && SOCKNAL_VADDR_ZC) --struct page * --ksocknal_kvaddr_to_page (unsigned long vaddr) --{ -- struct page *page; -- -- if (vaddr >= VMALLOC_START && -- vaddr < VMALLOC_END) -- page = vmalloc_to_page ((void *)vaddr); --#if CONFIG_HIGHMEM -- else if (vaddr >= PKMAP_BASE && -- vaddr < (PKMAP_BASE + LAST_PKMAP * PAGE_SIZE)) -- page = vmalloc_to_page ((void *)vaddr); -- /* in 2.4 ^ just walks the page tables */ --#endif -- else -- page = virt_to_page (vaddr); -- -- if (page == NULL || -- !VALID_PAGE (page)) -- return (NULL); -- -- return (page); --} --#endif -- --int --ksocknal_send_iov (ksock_conn_t *conn, ksock_tx_t *tx) --{ -- struct socket *sock = conn->ksnc_sock; -- struct iovec *iov = tx->tx_iov; -- int fragsize = iov->iov_len; -- unsigned long vaddr = (unsigned long)iov->iov_base; -- int more = (tx->tx_niov > 1) || -- (tx->tx_nkiov > 0) || -- (!list_empty (&conn->ksnc_tx_queue)); --#if (SOCKNAL_ZC && SOCKNAL_VADDR_ZC) -- int offset = vaddr & (PAGE_SIZE - 1); -- int zcsize = MIN (fragsize, PAGE_SIZE - offset); -- struct page *page; --#endif -- int rc; -- -- /* NB we can't trust socket ops to either consume our iovs -- * or leave them alone, so we only send 1 frag at a time. */ -- LASSERT (fragsize <= tx->tx_resid); -- LASSERT (tx->tx_niov > 0); -- --#if (SOCKNAL_ZC && SOCKNAL_VADDR_ZC) -- if (zcsize >= ksocknal_data.ksnd_zc_min_frag && -- (sock->sk->route_caps & NETIF_F_SG) && -- (sock->sk->route_caps & (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | NETIF_F_HW_CSUM)) && -- (page = ksocknal_kvaddr_to_page (vaddr)) != NULL) { -- -- CDEBUG(D_NET, "vaddr %p, page %p->%p + offset %x for %d\n", -- (void *)vaddr, page, page_address(page), offset, zcsize); -- -- if (fragsize > zcsize) { -- more = 1; -- fragsize = zcsize; -- } -- -- rc = tcp_sendpage_zccd(sock, page, offset, zcsize, -- more ? (MSG_DONTWAIT | MSG_MORE) : MSG_DONTWAIT, -- &tx->tx_zccd); -- } else --#endif -- { -- /* NB don't pass tx's iov; sendmsg may or may not update it */ -- struct iovec fragiov = { .iov_base = (void *)vaddr, -- .iov_len = fragsize}; -- struct msghdr msg = { -- .msg_name = NULL, -- .msg_namelen = 0, -- .msg_iov = &fragiov, -- .msg_iovlen = 1, -- .msg_control = NULL, -- .msg_controllen = 0, -- .msg_flags = more ? (MSG_DONTWAIT | MSG_MORE) : MSG_DONTWAIT -- }; -- mm_segment_t oldmm = get_fs(); -- -- set_fs (KERNEL_DS); -- rc = sock_sendmsg(sock, &msg, fragsize); -- set_fs (oldmm); -- } -- -- if (rc > 0) { -- tx->tx_resid -= rc; -- -- if (rc < iov->iov_len) { -- /* didn't send whole iov entry... */ -- iov->iov_base = (void *)(vaddr + rc); -- iov->iov_len -= rc; -- } else { -- tx->tx_iov++; -- tx->tx_niov--; -- } -- } -- -- return (rc); --} -- --int --ksocknal_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx) --{ -- struct socket *sock = conn->ksnc_sock; -- ptl_kiov_t *kiov = tx->tx_kiov; -- int fragsize = kiov->kiov_len; -- struct page *page = kiov->kiov_page; -- int offset = kiov->kiov_offset; -- int more = (tx->tx_nkiov > 1) || -- (!list_empty (&conn->ksnc_tx_queue)); -- int rc; -- -- /* NB we can't trust socket ops to either consume our iovs -- * or leave them alone, so we only send 1 frag at a time. */ -- LASSERT (fragsize <= tx->tx_resid); -- LASSERT (offset + fragsize <= PAGE_SIZE); -- LASSERT (tx->tx_niov == 0); -- LASSERT (tx->tx_nkiov > 0); -- --#if SOCKNAL_ZC - if (fragsize >= ksocknal_data.ksnd_zc_min_frag && - if (fragsize >= ksocknal_tunables.ksnd_zc_min_frag && -- (sock->sk->route_caps & NETIF_F_SG) && -- (sock->sk->route_caps & (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | NETIF_F_HW_CSUM))) { -- -- CDEBUG(D_NET, "page %p + offset %x for %d\n", -- page, offset, fragsize); -- -- rc = tcp_sendpage_zccd(sock, page, offset, fragsize, -- more ? (MSG_DONTWAIT | MSG_MORE) : MSG_DONTWAIT, -- &tx->tx_zccd); -- } else --#endif -- { -- char *addr = ((char *)kmap (page)) + offset; -- struct iovec fragiov = {.iov_base = addr, -- .iov_len = fragsize}; -- struct msghdr msg = { -- .msg_name = NULL, -- .msg_namelen = 0, -- .msg_iov = &fragiov, -- .msg_iovlen = 1, -- .msg_control = NULL, -- .msg_controllen = 0, -- .msg_flags = more ? (MSG_DONTWAIT | MSG_MORE) : MSG_DONTWAIT -- }; -- mm_segment_t oldmm = get_fs(); -- -- set_fs (KERNEL_DS); -- rc = sock_sendmsg(sock, &msg, fragsize); -- set_fs (oldmm); -- -- kunmap (page); -- } -- -- if (rc > 0) { -- tx->tx_resid -= rc; -- -- if (rc < fragsize) { -- kiov->kiov_offset = offset + rc; -- kiov->kiov_len = fragsize - rc; -- } else { -- tx->tx_kiov++; -- tx->tx_nkiov--; -- } -- } -- -- return (rc); --} -- --int --ksocknal_transmit (ksock_conn_t *conn, ksock_tx_t *tx) --{ -- int rc; - int bufnob; -- -- if (ksocknal_data.ksnd_stall_tx != 0) { -- set_current_state (TASK_UNINTERRUPTIBLE); -- schedule_timeout (ksocknal_data.ksnd_stall_tx * HZ); -- } -- -- LASSERT (tx->tx_resid != 0); -- -- rc = ksocknal_getconnsock (conn); -- if (rc != 0) { -- LASSERT (conn->ksnc_closing); -- return (-ESHUTDOWN); -- } -- -- do { -- if (ksocknal_data.ksnd_enomem_tx > 0) { -- /* testing... */ -- ksocknal_data.ksnd_enomem_tx--; -- rc = -EAGAIN; -- } else if (tx->tx_niov != 0) { -- rc = ksocknal_send_iov (conn, tx); -- } else { -- rc = ksocknal_send_kiov (conn, tx); - } - - bufnob = conn->ksnc_sock->sk->sk_wmem_queued; - if (rc > 0) /* sent something? */ - conn->ksnc_tx_bufnob += rc; /* account it */ - - if (bufnob < conn->ksnc_tx_bufnob) { - /* allocated send buffer bytes < computed; infer - * something got ACKed */ - conn->ksnc_tx_deadline = jiffies + - ksocknal_tunables.ksnd_io_timeout * HZ; - conn->ksnc_peer->ksnp_last_alive = jiffies; - conn->ksnc_tx_bufnob = bufnob; - mb(); -- } -- -- if (rc <= 0) { -- /* Didn't write anything. -- * -- * NB: rc == 0 and rc == -EAGAIN both mean try -- * again later (linux stack returns -EAGAIN for -- * this, but Adaptech TOE returns 0). -- * -- * Also, sends never fail with -ENOMEM, just -- * -EAGAIN, but with the added bonus that we can't -- * expect write_space() to call us back to tell us -- * when to try sending again. We use the -- * SOCK_NOSPACE flag to diagnose... */ -- -- LASSERT(rc != -ENOMEM); -- -- if (rc == 0 || rc == -EAGAIN) { -- if (test_bit(SOCK_NOSPACE, -- &conn->ksnc_sock->flags)) { -- rc = -EAGAIN; -- } else { -- static int counter; -- -- counter++; -- if ((counter & (-counter)) == counter) -- CWARN("%d ENOMEM tx %p\n", -- counter, conn); -- rc = -ENOMEM; -- } -- } -- break; -- } -- - /* socket's wmem_queued now includes 'rc' bytes */ - atomic_sub (rc, &conn->ksnc_tx_nob); -- rc = 0; - - /* Consider the connection alive since we managed to chuck - * more data into it. Really, we'd like to consider it - * alive only when the peer ACKs something, but - * write_space() only gets called back while SOCK_NOSPACE - * is set. Instead, we presume peer death has occurred if - * the socket doesn't drain within a timout */ - conn->ksnc_tx_deadline = jiffies + - ksocknal_data.ksnd_io_timeout * HZ; - conn->ksnc_peer->ksnp_last_alive = jiffies; -- -- } while (tx->tx_resid != 0); -- -- ksocknal_putconnsock (conn); -- return (rc); --} -- --void --ksocknal_eager_ack (ksock_conn_t *conn) --{ -- int opt = 1; -- mm_segment_t oldmm = get_fs(); -- struct socket *sock = conn->ksnc_sock; -- -- /* Remind the socket to ACK eagerly. If I don't, the socket might -- * think I'm about to send something it could piggy-back the ACK -- * on, introducing delay in completing zero-copy sends in my -- * peer. */ -- -- set_fs(KERNEL_DS); -- sock->ops->setsockopt (sock, SOL_TCP, TCP_QUICKACK, -- (char *)&opt, sizeof (opt)); -- set_fs(oldmm); --} -- --int --ksocknal_recv_iov (ksock_conn_t *conn) --{ -- struct iovec *iov = conn->ksnc_rx_iov; -- int fragsize = iov->iov_len; -- unsigned long vaddr = (unsigned long)iov->iov_base; -- struct iovec fragiov = { .iov_base = (void *)vaddr, -- .iov_len = fragsize}; -- struct msghdr msg = { -- .msg_name = NULL, -- .msg_namelen = 0, -- .msg_iov = &fragiov, -- .msg_iovlen = 1, -- .msg_control = NULL, -- .msg_controllen = 0, -- .msg_flags = 0 -- }; -- mm_segment_t oldmm = get_fs(); -- int rc; -- -- /* NB we can't trust socket ops to either consume our iovs -- * or leave them alone, so we only receive 1 frag at a time. */ -- LASSERT (conn->ksnc_rx_niov > 0); -- LASSERT (fragsize <= conn->ksnc_rx_nob_wanted); -- -- set_fs (KERNEL_DS); -- rc = sock_recvmsg (conn->ksnc_sock, &msg, fragsize, MSG_DONTWAIT); -- /* NB this is just a boolean............................^ */ -- set_fs (oldmm); -- -- if (rc <= 0) -- return (rc); -- -- /* received something... */ -- conn->ksnc_peer->ksnp_last_alive = jiffies; -- conn->ksnc_rx_deadline = jiffies + - ksocknal_data.ksnd_io_timeout * HZ; - ksocknal_tunables.ksnd_io_timeout * HZ; -- mb(); /* order with setting rx_started */ -- conn->ksnc_rx_started = 1; -- -- conn->ksnc_rx_nob_wanted -= rc; -- conn->ksnc_rx_nob_left -= rc; -- -- if (rc < fragsize) { -- iov->iov_base = (void *)(vaddr + rc); -- iov->iov_len = fragsize - rc; -- return (-EAGAIN); -- } -- -- conn->ksnc_rx_iov++; -- conn->ksnc_rx_niov--; -- return (1); --} -- --int --ksocknal_recv_kiov (ksock_conn_t *conn) --{ -- ptl_kiov_t *kiov = conn->ksnc_rx_kiov; -- struct page *page = kiov->kiov_page; -- int offset = kiov->kiov_offset; -- int fragsize = kiov->kiov_len; -- unsigned long vaddr = ((unsigned long)kmap (page)) + offset; -- struct iovec fragiov = { .iov_base = (void *)vaddr, -- .iov_len = fragsize}; -- struct msghdr msg = { -- .msg_name = NULL, -- .msg_namelen = 0, -- .msg_iov = &fragiov, -- .msg_iovlen = 1, -- .msg_control = NULL, -- .msg_controllen = 0, -- .msg_flags = 0 -- }; -- mm_segment_t oldmm = get_fs(); -- int rc; -- -- /* NB we can't trust socket ops to either consume our iovs -- * or leave them alone, so we only receive 1 frag at a time. */ -- LASSERT (fragsize <= conn->ksnc_rx_nob_wanted); -- LASSERT (conn->ksnc_rx_nkiov > 0); -- LASSERT (offset + fragsize <= PAGE_SIZE); -- -- set_fs (KERNEL_DS); -- rc = sock_recvmsg (conn->ksnc_sock, &msg, fragsize, MSG_DONTWAIT); -- /* NB this is just a boolean............................^ */ -- set_fs (oldmm); -- -- kunmap (page); -- -- if (rc <= 0) -- return (rc); -- -- /* received something... */ -- conn->ksnc_peer->ksnp_last_alive = jiffies; -- conn->ksnc_rx_deadline = jiffies + - ksocknal_data.ksnd_io_timeout * HZ; - ksocknal_tunables.ksnd_io_timeout * HZ; -- mb(); /* order with setting rx_started */ -- conn->ksnc_rx_started = 1; -- -- conn->ksnc_rx_nob_wanted -= rc; -- conn->ksnc_rx_nob_left -= rc; -- -- if (rc < fragsize) { -- kiov->kiov_offset = offset + rc; -- kiov->kiov_len = fragsize - rc; -- return (-EAGAIN); -- } -- -- conn->ksnc_rx_kiov++; -- conn->ksnc_rx_nkiov--; -- return (1); --} -- --int --ksocknal_receive (ksock_conn_t *conn) --{ -- /* Return 1 on success, 0 on EOF, < 0 on error. -- * Caller checks ksnc_rx_nob_wanted to determine -- * progress/completion. */ -- int rc; -- ENTRY; -- -- if (ksocknal_data.ksnd_stall_rx != 0) { -- set_current_state (TASK_UNINTERRUPTIBLE); -- schedule_timeout (ksocknal_data.ksnd_stall_rx * HZ); -- } -- -- rc = ksocknal_getconnsock (conn); -- if (rc != 0) { -- LASSERT (conn->ksnc_closing); -- return (-ESHUTDOWN); -- } -- -- for (;;) { -- if (conn->ksnc_rx_niov != 0) -- rc = ksocknal_recv_iov (conn); -- else -- rc = ksocknal_recv_kiov (conn); -- -- if (rc <= 0) { -- /* error/EOF or partial receive */ -- if (rc == -EAGAIN) { -- rc = 1; -- } else if (rc == 0 && conn->ksnc_rx_started) { -- /* EOF in the middle of a message */ -- rc = -EPROTO; -- } -- break; -- } -- -- /* Completed a fragment */ -- -- if (conn->ksnc_rx_nob_wanted == 0) { -- /* Completed a message segment (header or payload) */ - if ((ksocknal_data.ksnd_eager_ack & conn->ksnc_type) != 0 && - if ((ksocknal_tunables.ksnd_eager_ack & conn->ksnc_type) != 0 && -- (conn->ksnc_rx_state == SOCKNAL_RX_BODY || -- conn->ksnc_rx_state == SOCKNAL_RX_BODY_FWD)) { -- /* Remind the socket to ack eagerly... */ -- ksocknal_eager_ack(conn); -- } -- rc = 1; -- break; -- } -- } -- -- ksocknal_putconnsock (conn); -- RETURN (rc); --} -- --#if SOCKNAL_ZC --void --ksocknal_zc_callback (zccd_t *zcd) --{ -- ksock_tx_t *tx = KSOCK_ZCCD_2_TX(zcd); -- ksock_sched_t *sched = tx->tx_conn->ksnc_scheduler; -- unsigned long flags; -- ENTRY; -- -- /* Schedule tx for cleanup (can't do it now due to lock conflicts) */ -- -- spin_lock_irqsave (&sched->kss_lock, flags); -- -- list_add_tail (&tx->tx_list, &sched->kss_zctxdone_list); -- wake_up (&sched->kss_waitq); -- -- spin_unlock_irqrestore (&sched->kss_lock, flags); -- EXIT; --} --#endif -- --void --ksocknal_tx_done (ksock_tx_t *tx, int asynch) --{ -- ksock_ltx_t *ltx; -- ENTRY; -- -- if (tx->tx_conn != NULL) { - /* This tx got queued on a conn; do the accounting... */ - atomic_sub (tx->tx_nob, &tx->tx_conn->ksnc_tx_nob); --#if SOCKNAL_ZC -- /* zero copy completion isn't always from -- * process_transmit() so it needs to keep a ref on -- * tx_conn... */ -- if (asynch) -- ksocknal_put_conn (tx->tx_conn); --#else -- LASSERT (!asynch); --#endif -- } -- -- if (tx->tx_isfwd) { /* was a forwarded packet? */ -- kpr_fwd_done (&ksocknal_data.ksnd_router, -- KSOCK_TX_2_KPR_FWD_DESC (tx), -- (tx->tx_resid == 0) ? 0 : -ECONNABORTED); -- EXIT; -- return; -- } -- -- /* local send */ -- ltx = KSOCK_TX_2_KSOCK_LTX (tx); -- -- lib_finalize (&ksocknal_lib, ltx->ltx_private, ltx->ltx_cookie, -- (tx->tx_resid == 0) ? PTL_OK : PTL_FAIL); -- -- ksocknal_free_ltx (ltx); -- EXIT; --} -- --void --ksocknal_tx_launched (ksock_tx_t *tx) --{ --#if SOCKNAL_ZC -- if (atomic_read (&tx->tx_zccd.zccd_count) != 1) { -- ksock_conn_t *conn = tx->tx_conn; -- -- /* zccd skbufs are still in-flight. First take a ref on -- * conn, so it hangs about for ksocknal_tx_done... */ -- atomic_inc (&conn->ksnc_refcount); -- -- /* ...then drop the initial ref on zccd, so the zero copy -- * callback can occur */ -- zccd_put (&tx->tx_zccd); -- return; -- } --#endif -- /* Any zero-copy-ness (if any) has completed; I can complete the -- * transmit now, avoiding an extra schedule */ -- ksocknal_tx_done (tx, 0); --} -- --int --ksocknal_process_transmit (ksock_conn_t *conn, ksock_tx_t *tx) --{ -- unsigned long flags; -- int rc; -- -- rc = ksocknal_transmit (conn, tx); -- -- CDEBUG (D_NET, "send(%d) %d\n", tx->tx_resid, rc); -- -- if (tx->tx_resid == 0) { -- /* Sent everything OK */ -- LASSERT (rc == 0); -- -- ksocknal_tx_launched (tx); -- return (0); -- } -- -- if (rc == -EAGAIN) -- return (rc); -- -- if (rc == -ENOMEM) { -- /* Queue on ksnd_enomem_conns for retry after a timeout */ -- spin_lock_irqsave(&ksocknal_data.ksnd_reaper_lock, flags); -- -- /* enomem list takes over scheduler's ref... */ -- LASSERT (conn->ksnc_tx_scheduled); -- list_add_tail(&conn->ksnc_tx_list, -- &ksocknal_data.ksnd_enomem_conns); -- if (!time_after_eq(jiffies + SOCKNAL_ENOMEM_RETRY, -- ksocknal_data.ksnd_reaper_waketime)) -- wake_up (&ksocknal_data.ksnd_reaper_waitq); -- -- spin_unlock_irqrestore(&ksocknal_data.ksnd_reaper_lock, flags); -- return (rc); -- } -- -- /* Actual error */ -- LASSERT (rc < 0); -- -- if (!conn->ksnc_closing) -- CERROR("[%p] Error %d on write to "LPX64 -- " ip %d.%d.%d.%d:%d\n", conn, rc, -- conn->ksnc_peer->ksnp_nid, -- HIPQUAD(conn->ksnc_ipaddr), -- conn->ksnc_port); -- -- ksocknal_close_conn_and_siblings (conn, rc); -- ksocknal_tx_launched (tx); -- -- return (rc); --} -- --void --ksocknal_launch_autoconnect_locked (ksock_route_t *route) --{ -- unsigned long flags; -- -- /* called holding write lock on ksnd_global_lock */ -- -- LASSERT (!route->ksnr_deleted); -- LASSERT ((route->ksnr_connected & (1 << SOCKNAL_CONN_ANY)) == 0); -- LASSERT ((route->ksnr_connected & KSNR_TYPED_ROUTES) != KSNR_TYPED_ROUTES); - LASSERT (!route->ksnr_connecting); - LASSERT (route->ksnr_connecting == 0); -- - if (ksocknal_data.ksnd_typed_conns) - if (ksocknal_tunables.ksnd_typed_conns) -- route->ksnr_connecting = -- KSNR_TYPED_ROUTES & ~route->ksnr_connected; -- else -- route->ksnr_connecting = (1 << SOCKNAL_CONN_ANY); -- -- atomic_inc (&route->ksnr_refcount); /* extra ref for asynchd */ -- -- spin_lock_irqsave (&ksocknal_data.ksnd_autoconnectd_lock, flags); -- -- list_add_tail (&route->ksnr_connect_list, -- &ksocknal_data.ksnd_autoconnectd_routes); -- wake_up (&ksocknal_data.ksnd_autoconnectd_waitq); -- -- spin_unlock_irqrestore (&ksocknal_data.ksnd_autoconnectd_lock, flags); --} -- --ksock_peer_t * --ksocknal_find_target_peer_locked (ksock_tx_t *tx, ptl_nid_t nid) --{ -- char ipbuf[PTL_NALFMT_SIZE]; -- ptl_nid_t target_nid; -- int rc; -- ksock_peer_t *peer = ksocknal_find_peer_locked (nid); -- -- if (peer != NULL) -- return (peer); -- -- if (tx->tx_isfwd) { -- CERROR ("Can't send packet to "LPX64 -- " %s: routed target is not a peer\n", -- nid, portals_nid2str(SOCKNAL, nid, ipbuf)); -- return (NULL); -- } -- -- rc = kpr_lookup (&ksocknal_data.ksnd_router, nid, tx->tx_nob, -- &target_nid); -- if (rc != 0) { -- CERROR ("Can't route to "LPX64" %s: router error %d\n", -- nid, portals_nid2str(SOCKNAL, nid, ipbuf), rc); -- return (NULL); -- } -- -- peer = ksocknal_find_peer_locked (target_nid); -- if (peer != NULL) -- return (peer); -- -- CERROR ("Can't send packet to "LPX64" %s: no peer entry\n", -- target_nid, portals_nid2str(SOCKNAL, target_nid, ipbuf)); -- return (NULL); --} -- --ksock_conn_t * --ksocknal_find_conn_locked (ksock_tx_t *tx, ksock_peer_t *peer) --{ -- struct list_head *tmp; -- ksock_conn_t *typed = NULL; -- int tnob = 0; -- ksock_conn_t *fallback = NULL; -- int fnob = 0; - ksock_conn_t *conn; -- - /* Find the conn with the shortest tx queue */ -- list_for_each (tmp, &peer->ksnp_conns) { -- ksock_conn_t *c = list_entry(tmp, ksock_conn_t, ksnc_list); -#if SOCKNAL_ROUND_ROBIN - const int nob = 0; -#else -- int nob = atomic_read(&c->ksnc_tx_nob) + -- c->ksnc_sock->sk->sk_wmem_queued; - -#endif -- LASSERT (!c->ksnc_closing); -- -- if (fallback == NULL || nob < fnob) { -- fallback = c; -- fnob = nob; -- } -- - if (!ksocknal_data.ksnd_typed_conns) - if (!ksocknal_tunables.ksnd_typed_conns) -- continue; -- -- switch (c->ksnc_type) { -- default: -- LBUG(); -- case SOCKNAL_CONN_ANY: -- break; -- case SOCKNAL_CONN_BULK_IN: -- continue; -- case SOCKNAL_CONN_BULK_OUT: - if (tx->tx_nob < ksocknal_data.ksnd_min_bulk) - if (tx->tx_nob < ksocknal_tunables.ksnd_min_bulk) -- continue; -- break; -- case SOCKNAL_CONN_CONTROL: - if (tx->tx_nob >= ksocknal_data.ksnd_min_bulk) - if (tx->tx_nob >= ksocknal_tunables.ksnd_min_bulk) -- continue; -- break; -- } -- -- if (typed == NULL || nob < tnob) { -- typed = c; -- tnob = nob; -- } -- } -- -- /* prefer the typed selection */ - return ((typed != NULL) ? typed : fallback); - conn = (typed != NULL) ? typed : fallback; - -#if SOCKNAL_ROUND_ROBIN - if (conn != NULL) { - /* round-robin all else being equal */ - list_del (&conn->ksnc_list); - list_add_tail (&conn->ksnc_list, &peer->ksnp_conns); - } -#endif - return conn; --} -- --void --ksocknal_queue_tx_locked (ksock_tx_t *tx, ksock_conn_t *conn) --{ -- unsigned long flags; -- ksock_sched_t *sched = conn->ksnc_scheduler; -- -- /* called holding global lock (read or irq-write) and caller may -- * not have dropped this lock between finding conn and calling me, -- * so we don't need the {get,put}connsock dance to deref -- * ksnc_sock... */ -- LASSERT(!conn->ksnc_closing); -- LASSERT(tx->tx_resid == tx->tx_nob); -- -- CDEBUG (D_NET, "Sending to "LPX64" ip %d.%d.%d.%d:%d\n", -- conn->ksnc_peer->ksnp_nid, -- HIPQUAD(conn->ksnc_ipaddr), -- conn->ksnc_port); -- -- atomic_add (tx->tx_nob, &conn->ksnc_tx_nob); -- tx->tx_conn = conn; -- --#if SOCKNAL_ZC -- zccd_init (&tx->tx_zccd, ksocknal_zc_callback); -- /* NB this sets 1 ref on zccd, so the callback can only occur after -- * I've released this ref. */ --#endif -- spin_lock_irqsave (&sched->kss_lock, flags); -- - conn->ksnc_tx_deadline = jiffies + - ksocknal_data.ksnd_io_timeout * HZ; - mb(); /* order with list_add_tail */ - if (list_empty(&conn->ksnc_tx_queue) && - conn->ksnc_sock->sk->sk_wmem_queued == 0) { - /* First packet starts the timeout */ - conn->ksnc_tx_deadline = jiffies + - ksocknal_tunables.ksnd_io_timeout * HZ; - conn->ksnc_tx_bufnob = 0; - mb(); /* order with adding to tx_queue */ - } -- -- list_add_tail (&tx->tx_list, &conn->ksnc_tx_queue); -- -- if (conn->ksnc_tx_ready && /* able to send */ -- !conn->ksnc_tx_scheduled) { /* not scheduled to send */ -- /* +1 ref for scheduler */ -- atomic_inc (&conn->ksnc_refcount); -- list_add_tail (&conn->ksnc_tx_list, -- &sched->kss_tx_conns); -- conn->ksnc_tx_scheduled = 1; -- wake_up (&sched->kss_waitq); -- } -- -- spin_unlock_irqrestore (&sched->kss_lock, flags); --} -- --ksock_route_t * --ksocknal_find_connectable_route_locked (ksock_peer_t *peer) --{ -- struct list_head *tmp; -- ksock_route_t *route; - ksock_route_t *candidate = NULL; - int found = 0; -- int bits; -- -- list_for_each (tmp, &peer->ksnp_routes) { -- route = list_entry (tmp, ksock_route_t, ksnr_list); -- bits = route->ksnr_connected; - - if ((bits & KSNR_TYPED_ROUTES) == KSNR_TYPED_ROUTES || - (bits & (1 << SOCKNAL_CONN_ANY)) != 0 || - route->ksnr_connecting != 0) { - /* All typed connections have been established, or - * an untyped connection has been established, or - * connections are currently being established */ - found = 1; - - /* All typed connections established? */ - if ((bits & KSNR_TYPED_ROUTES) == KSNR_TYPED_ROUTES) -- continue; - } - - /* Untyped connection established? */ - if ((bits & (1 << SOCKNAL_CONN_ANY)) != 0) - continue; - - /* connection being established? */ - if (route->ksnr_connecting != 0) - continue; -- -- /* too soon to retry this guy? */ -- if (!time_after_eq (jiffies, route->ksnr_timeout)) -- continue; -- - /* always do eager routes */ - if (route->ksnr_eager) - return (route); - - if (candidate == NULL) { - /* If we don't find any other route that is fully - * connected or connecting, the first connectable - * route is returned. If it fails to connect, it - * will get placed at the end of the list */ - candidate = route; - } - return (route); -- } - - return (found ? NULL : candidate); - - return (NULL); --} -- --ksock_route_t * --ksocknal_find_connecting_route_locked (ksock_peer_t *peer) --{ -- struct list_head *tmp; -- ksock_route_t *route; -- -- list_for_each (tmp, &peer->ksnp_routes) { -- route = list_entry (tmp, ksock_route_t, ksnr_list); -- -- if (route->ksnr_connecting != 0) -- return (route); -- } -- -- return (NULL); --} -- --int --ksocknal_launch_packet (ksock_tx_t *tx, ptl_nid_t nid) --{ -- unsigned long flags; -- ksock_peer_t *peer; -- ksock_conn_t *conn; -- ksock_route_t *route; -- rwlock_t *g_lock; -- -- /* Ensure the frags we've been given EXACTLY match the number of -- * bytes we want to send. Many TCP/IP stacks disregard any total -- * size parameters passed to them and just look at the frags. -- * -- * We always expect at least 1 mapped fragment containing the -- * complete portals header. */ -- LASSERT (lib_iov_nob (tx->tx_niov, tx->tx_iov) + -- lib_kiov_nob (tx->tx_nkiov, tx->tx_kiov) == tx->tx_nob); -- LASSERT (tx->tx_niov >= 1); -- LASSERT (tx->tx_iov[0].iov_len >= sizeof (ptl_hdr_t)); -- -- CDEBUG (D_NET, "packet %p type %d, nob %d niov %d nkiov %d\n", -- tx, ((ptl_hdr_t *)tx->tx_iov[0].iov_base)->type, -- tx->tx_nob, tx->tx_niov, tx->tx_nkiov); -- -- tx->tx_conn = NULL; /* only set when assigned a conn */ -- tx->tx_resid = tx->tx_nob; -- tx->tx_hdr = (ptl_hdr_t *)tx->tx_iov[0].iov_base; -- -- g_lock = &ksocknal_data.ksnd_global_lock; -#if !SOCKNAL_ROUND_ROBIN -- read_lock (g_lock); - - -- peer = ksocknal_find_target_peer_locked (tx, nid); -- if (peer == NULL) { -- read_unlock (g_lock); -- return (-EHOSTUNREACH); -- } -- -- if (ksocknal_find_connectable_route_locked(peer) == NULL) { -- conn = ksocknal_find_conn_locked (tx, peer); -- if (conn != NULL) { -- /* I've got no autoconnect routes that need to be -- * connecting and I do have an actual connection... */ -- ksocknal_queue_tx_locked (tx, conn); -- read_unlock (g_lock); -- return (0); -- } -- } - - /* Making one or more connections; I'll need a write lock... */ - - atomic_inc (&peer->ksnp_refcount); /* +1 ref for me while I unlock */ - - /* I'll need a write lock... */ -- read_unlock (g_lock); - write_lock_irqsave (g_lock, flags); - - if (peer->ksnp_closing) { /* peer deleted as I blocked! */ - write_unlock_irqrestore (g_lock, flags); - ksocknal_put_peer (peer); -#endif - write_lock_irqsave(g_lock, flags); - - peer = ksocknal_find_target_peer_locked (tx, nid); - if (peer == NULL) { - write_unlock_irqrestore(g_lock, flags); -- return (-EHOSTUNREACH); -- } - ksocknal_put_peer (peer); /* drop ref I got above */ -- -- for (;;) { -- /* launch any/all autoconnections that need it */ -- route = ksocknal_find_connectable_route_locked (peer); -- if (route == NULL) -- break; -- -- ksocknal_launch_autoconnect_locked (route); -- } -- -- conn = ksocknal_find_conn_locked (tx, peer); -- if (conn != NULL) { -- /* Connection exists; queue message on it */ -- ksocknal_queue_tx_locked (tx, conn); -- write_unlock_irqrestore (g_lock, flags); -- return (0); -- } -- -- route = ksocknal_find_connecting_route_locked (peer); -- if (route != NULL) { -- /* At least 1 connection is being established; queue the -- * message... */ -- list_add_tail (&tx->tx_list, &peer->ksnp_tx_queue); -- write_unlock_irqrestore (g_lock, flags); -- return (0); -- } -- -- write_unlock_irqrestore (g_lock, flags); -- return (-EHOSTUNREACH); --} -- --ptl_err_t - ksocknal_sendmsg(nal_cb_t *nal, -ksocknal_sendmsg(lib_nal_t *nal, -- void *private, -- lib_msg_t *cookie, -- ptl_hdr_t *hdr, -- int type, -- ptl_nid_t nid, -- ptl_pid_t pid, -- unsigned int payload_niov, -- struct iovec *payload_iov, -- ptl_kiov_t *payload_kiov, -- size_t payload_offset, -- size_t payload_nob) --{ -- ksock_ltx_t *ltx; -- int desc_size; -- int rc; -- -- /* NB 'private' is different depending on what we're sending. -- * Just ignore it... */ -- -- CDEBUG(D_NET, "sending "LPSZ" bytes in %d frags to nid:"LPX64 -- " pid %d\n", payload_nob, payload_niov, nid , pid); -- -- LASSERT (payload_nob == 0 || payload_niov > 0); -- LASSERT (payload_niov <= PTL_MD_MAX_IOV); -- -- /* It must be OK to kmap() if required */ -- LASSERT (payload_kiov == NULL || !in_interrupt ()); -- /* payload is either all vaddrs or all pages */ -- LASSERT (!(payload_kiov != NULL && payload_iov != NULL)); -- -- if (payload_iov != NULL) -- desc_size = offsetof(ksock_ltx_t, ltx_iov[1 + payload_niov]); -- else -- desc_size = offsetof(ksock_ltx_t, ltx_kiov[payload_niov]); -- -- if (in_interrupt() || -- type == PTL_MSG_ACK || -- type == PTL_MSG_REPLY) { -- /* Can't block if in interrupt or responding to an incoming -- * message */ -- PORTAL_ALLOC_ATOMIC(ltx, desc_size); -- } else { -- PORTAL_ALLOC(ltx, desc_size); -- } -- -- if (ltx == NULL) { -- CERROR("Can't allocate tx desc type %d size %d %s\n", -- type, desc_size, in_interrupt() ? "(intr)" : ""); - return (PTL_NOSPACE); - return (PTL_NO_SPACE); -- } -- -- atomic_inc(&ksocknal_data.ksnd_nactive_ltxs); -- -- ltx->ltx_desc_size = desc_size; -- -- /* We always have 1 mapped frag for the header */ -- ltx->ltx_tx.tx_iov = ltx->ltx_iov; -- ltx->ltx_iov[0].iov_base = <x->ltx_hdr; -- ltx->ltx_iov[0].iov_len = sizeof(*hdr); -- ltx->ltx_hdr = *hdr; -- -- ltx->ltx_private = private; -- ltx->ltx_cookie = cookie; -- -- ltx->ltx_tx.tx_isfwd = 0; -- ltx->ltx_tx.tx_nob = sizeof (*hdr) + payload_nob; -- -- if (payload_iov != NULL) { -- /* payload is all mapped */ -- ltx->ltx_tx.tx_kiov = NULL; -- ltx->ltx_tx.tx_nkiov = 0; -- -- ltx->ltx_tx.tx_niov = -- 1 + lib_extract_iov(payload_niov, <x->ltx_iov[1], -- payload_niov, payload_iov, -- payload_offset, payload_nob); -- } else { -- /* payload is all pages */ -- ltx->ltx_tx.tx_niov = 1; -- -- ltx->ltx_tx.tx_kiov = ltx->ltx_kiov; -- ltx->ltx_tx.tx_nkiov = -- lib_extract_kiov(payload_niov, ltx->ltx_kiov, -- payload_niov, payload_kiov, -- payload_offset, payload_nob); -- } -- -- rc = ksocknal_launch_packet(<x->ltx_tx, nid); -- if (rc == 0) -- return (PTL_OK); -- -- ksocknal_free_ltx(ltx); -- return (PTL_FAIL); --} -- --ptl_err_t - ksocknal_send (nal_cb_t *nal, void *private, lib_msg_t *cookie, -ksocknal_send (lib_nal_t *nal, void *private, lib_msg_t *cookie, -- ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, -- unsigned int payload_niov, struct iovec *payload_iov, -- size_t payload_offset, size_t payload_len) --{ -- return (ksocknal_sendmsg(nal, private, cookie, -- hdr, type, nid, pid, -- payload_niov, payload_iov, NULL, -- payload_offset, payload_len)); --} -- --ptl_err_t - ksocknal_send_pages (nal_cb_t *nal, void *private, lib_msg_t *cookie, -ksocknal_send_pages (lib_nal_t *nal, void *private, lib_msg_t *cookie, -- ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, -- unsigned int payload_niov, ptl_kiov_t *payload_kiov, -- size_t payload_offset, size_t payload_len) --{ -- return (ksocknal_sendmsg(nal, private, cookie, -- hdr, type, nid, pid, -- payload_niov, NULL, payload_kiov, -- payload_offset, payload_len)); --} -- --void --ksocknal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd) --{ -- ptl_nid_t nid = fwd->kprfd_gateway_nid; -- ksock_ftx_t *ftx = (ksock_ftx_t *)&fwd->kprfd_scratch; -- int rc; -- -- CDEBUG (D_NET, "Forwarding [%p] -> "LPX64" ("LPX64"))\n", fwd, -- fwd->kprfd_gateway_nid, fwd->kprfd_target_nid); -- -- /* I'm the gateway; must be the last hop */ - if (nid == ksocknal_lib.ni.nid) - if (nid == ksocknal_lib.libnal_ni.ni_pid.nid) -- nid = fwd->kprfd_target_nid; -- -- /* setup iov for hdr */ -- ftx->ftx_iov.iov_base = fwd->kprfd_hdr; -- ftx->ftx_iov.iov_len = sizeof(ptl_hdr_t); -- -- ftx->ftx_tx.tx_isfwd = 1; /* This is a forwarding packet */ -- ftx->ftx_tx.tx_nob = sizeof(ptl_hdr_t) + fwd->kprfd_nob; -- ftx->ftx_tx.tx_niov = 1; -- ftx->ftx_tx.tx_iov = &ftx->ftx_iov; -- ftx->ftx_tx.tx_nkiov = fwd->kprfd_niov; -- ftx->ftx_tx.tx_kiov = fwd->kprfd_kiov; -- -- rc = ksocknal_launch_packet (&ftx->ftx_tx, nid); -- if (rc != 0) -- kpr_fwd_done (&ksocknal_data.ksnd_router, fwd, rc); --} -- --int --ksocknal_thread_start (int (*fn)(void *arg), void *arg) --{ - long pid = kernel_thread (fn, arg, 0); - long pid = kernel_thread (fn, arg, 0); - unsigned long flags; -- -- if (pid < 0) -- return ((int)pid); -- - atomic_inc (&ksocknal_data.ksnd_nthreads); - write_lock_irqsave(&ksocknal_data.ksnd_global_lock, flags); - ksocknal_data.ksnd_nthreads++; - write_unlock_irqrestore(&ksocknal_data.ksnd_global_lock, flags); -- return (0); --} -- --void --ksocknal_thread_fini (void) --{ - atomic_dec (&ksocknal_data.ksnd_nthreads); - unsigned long flags; - - write_lock_irqsave(&ksocknal_data.ksnd_global_lock, flags); - ksocknal_data.ksnd_nthreads--; - write_unlock_irqrestore(&ksocknal_data.ksnd_global_lock, flags); --} -- --void --ksocknal_fmb_callback (void *arg, int error) --{ -- ksock_fmb_t *fmb = (ksock_fmb_t *)arg; -- ksock_fmb_pool_t *fmp = fmb->fmb_pool; -- ptl_hdr_t *hdr = &fmb->fmb_hdr; -- ksock_conn_t *conn = NULL; -- ksock_sched_t *sched; -- unsigned long flags; -- char ipbuf[PTL_NALFMT_SIZE]; -- char ipbuf2[PTL_NALFMT_SIZE]; -- -- if (error != 0) -- CERROR("Failed to route packet from " -- LPX64" %s to "LPX64" %s: %d\n", - NTOH__u64(hdr->src_nid), - portals_nid2str(SOCKNAL, NTOH__u64(hdr->src_nid), ipbuf), - NTOH__u64(hdr->dest_nid), - portals_nid2str(SOCKNAL, NTOH__u64(hdr->dest_nid), ipbuf2), - le64_to_cpu(hdr->src_nid), - portals_nid2str(SOCKNAL, le64_to_cpu(hdr->src_nid), ipbuf), - le64_to_cpu(hdr->dest_nid), - portals_nid2str(SOCKNAL, le64_to_cpu(hdr->dest_nid), ipbuf2), -- error); -- else -- CDEBUG (D_NET, "routed packet from "LPX64" to "LPX64": OK\n", - NTOH__u64 (hdr->src_nid), NTOH__u64 (hdr->dest_nid)); - le64_to_cpu(hdr->src_nid), le64_to_cpu(hdr->dest_nid)); -- -- /* drop peer ref taken on init */ -- ksocknal_put_peer (fmb->fmb_peer); -- -- spin_lock_irqsave (&fmp->fmp_lock, flags); -- -- list_add (&fmb->fmb_list, &fmp->fmp_idle_fmbs); -- fmp->fmp_nactive_fmbs--; -- -- if (!list_empty (&fmp->fmp_blocked_conns)) { -- conn = list_entry (fmb->fmb_pool->fmp_blocked_conns.next, -- ksock_conn_t, ksnc_rx_list); -- list_del (&conn->ksnc_rx_list); -- } -- -- spin_unlock_irqrestore (&fmp->fmp_lock, flags); -- -- if (conn == NULL) -- return; -- -- CDEBUG (D_NET, "Scheduling conn %p\n", conn); -- LASSERT (conn->ksnc_rx_scheduled); -- LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_FMB_SLEEP); -- -- conn->ksnc_rx_state = SOCKNAL_RX_GET_FMB; -- -- sched = conn->ksnc_scheduler; -- -- spin_lock_irqsave (&sched->kss_lock, flags); -- -- list_add_tail (&conn->ksnc_rx_list, &sched->kss_rx_conns); -- wake_up (&sched->kss_waitq); -- -- spin_unlock_irqrestore (&sched->kss_lock, flags); --} -- --ksock_fmb_t * --ksocknal_get_idle_fmb (ksock_conn_t *conn) --{ -- int payload_nob = conn->ksnc_rx_nob_left; -- unsigned long flags; -- ksock_fmb_pool_t *pool; -- ksock_fmb_t *fmb; -- -- LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_GET_FMB); -- LASSERT (kpr_routing(&ksocknal_data.ksnd_router)); -- -- if (payload_nob <= SOCKNAL_SMALL_FWD_PAGES * PAGE_SIZE) -- pool = &ksocknal_data.ksnd_small_fmp; -- else -- pool = &ksocknal_data.ksnd_large_fmp; -- -- spin_lock_irqsave (&pool->fmp_lock, flags); -- -- if (!list_empty (&pool->fmp_idle_fmbs)) { -- fmb = list_entry(pool->fmp_idle_fmbs.next, -- ksock_fmb_t, fmb_list); -- list_del (&fmb->fmb_list); -- pool->fmp_nactive_fmbs++; -- spin_unlock_irqrestore (&pool->fmp_lock, flags); -- -- return (fmb); -- } -- -- /* deschedule until fmb free */ -- -- conn->ksnc_rx_state = SOCKNAL_RX_FMB_SLEEP; -- -- list_add_tail (&conn->ksnc_rx_list, -- &pool->fmp_blocked_conns); -- -- spin_unlock_irqrestore (&pool->fmp_lock, flags); -- return (NULL); --} -- --int --ksocknal_init_fmb (ksock_conn_t *conn, ksock_fmb_t *fmb) --{ -- int payload_nob = conn->ksnc_rx_nob_left; - ptl_nid_t dest_nid = NTOH__u64 (conn->ksnc_hdr.dest_nid); - ptl_nid_t dest_nid = le64_to_cpu(conn->ksnc_hdr.dest_nid); -- int niov = 0; -- int nob = payload_nob; -- -- LASSERT (conn->ksnc_rx_scheduled); -- LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_GET_FMB); -- LASSERT (conn->ksnc_rx_nob_wanted == conn->ksnc_rx_nob_left); -- LASSERT (payload_nob >= 0); -- LASSERT (payload_nob <= fmb->fmb_pool->fmp_buff_pages * PAGE_SIZE); -- LASSERT (sizeof (ptl_hdr_t) < PAGE_SIZE); -- LASSERT (fmb->fmb_kiov[0].kiov_offset == 0); -- -- /* Take a ref on the conn's peer to prevent module unload before -- * forwarding completes. */ -- fmb->fmb_peer = conn->ksnc_peer; -- atomic_inc (&conn->ksnc_peer->ksnp_refcount); -- -- /* Copy the header we just read into the forwarding buffer. If -- * there's payload, start reading reading it into the buffer, -- * otherwise the forwarding buffer can be kicked off -- * immediately. */ -- fmb->fmb_hdr = conn->ksnc_hdr; -- -- while (nob > 0) { -- LASSERT (niov < fmb->fmb_pool->fmp_buff_pages); -- LASSERT (fmb->fmb_kiov[niov].kiov_offset == 0); -- fmb->fmb_kiov[niov].kiov_len = MIN (PAGE_SIZE, nob); -- nob -= PAGE_SIZE; -- niov++; -- } -- -- kpr_fwd_init(&fmb->fmb_fwd, dest_nid, &fmb->fmb_hdr, -- payload_nob, niov, fmb->fmb_kiov, -- ksocknal_fmb_callback, fmb); -- -- if (payload_nob == 0) { /* got complete packet already */ -- CDEBUG (D_NET, "%p "LPX64"->"LPX64" fwd_start (immediate)\n", - conn, NTOH__u64 (conn->ksnc_hdr.src_nid), dest_nid); - conn, le64_to_cpu(conn->ksnc_hdr.src_nid), dest_nid); -- -- kpr_fwd_start (&ksocknal_data.ksnd_router, &fmb->fmb_fwd); -- -- ksocknal_new_packet (conn, 0); /* on to next packet */ -- return (1); -- } -- -- conn->ksnc_cookie = fmb; /* stash fmb for later */ -- conn->ksnc_rx_state = SOCKNAL_RX_BODY_FWD; /* read in the payload */ -- -- /* Set up conn->ksnc_rx_kiov to read the payload into fmb's kiov-ed -- * buffer */ -- LASSERT (niov <= sizeof(conn->ksnc_rx_iov_space)/sizeof(ptl_kiov_t)); -- -- conn->ksnc_rx_niov = 0; -- conn->ksnc_rx_nkiov = niov; -- conn->ksnc_rx_kiov = conn->ksnc_rx_iov_space.kiov; -- memcpy(conn->ksnc_rx_kiov, fmb->fmb_kiov, niov * sizeof(ptl_kiov_t)); -- -- CDEBUG (D_NET, "%p "LPX64"->"LPX64" %d reading body\n", conn, - NTOH__u64 (conn->ksnc_hdr.src_nid), dest_nid, payload_nob); - le64_to_cpu(conn->ksnc_hdr.src_nid), dest_nid, payload_nob); -- return (0); --} -- --void --ksocknal_fwd_parse (ksock_conn_t *conn) --{ -- ksock_peer_t *peer; - ptl_nid_t dest_nid = NTOH__u64 (conn->ksnc_hdr.dest_nid); - ptl_nid_t src_nid = NTOH__u64 (conn->ksnc_hdr.src_nid); - int body_len = NTOH__u32 (conn->ksnc_hdr.payload_length); - ptl_nid_t dest_nid = le64_to_cpu(conn->ksnc_hdr.dest_nid); - ptl_nid_t src_nid = le64_to_cpu(conn->ksnc_hdr.src_nid); - int body_len = le32_to_cpu(conn->ksnc_hdr.payload_length); -- char str[PTL_NALFMT_SIZE]; -- char str2[PTL_NALFMT_SIZE]; -- -- CDEBUG (D_NET, "%p "LPX64"->"LPX64" %d parsing header\n", conn, -- src_nid, dest_nid, conn->ksnc_rx_nob_left); -- -- LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_HEADER); -- LASSERT (conn->ksnc_rx_scheduled); -- -- if (body_len < 0) { /* length corrupt (overflow) */ -- CERROR("dropping packet from "LPX64" (%s) for "LPX64" (%s): " -- "packet size %d illegal\n", -- src_nid, portals_nid2str(TCPNAL, src_nid, str), -- dest_nid, portals_nid2str(TCPNAL, dest_nid, str2), -- body_len); -- -- ksocknal_new_packet (conn, 0); /* on to new packet */ -- return; -- } -- -- if (!kpr_routing(&ksocknal_data.ksnd_router)) { /* not forwarding */ -- CERROR("dropping packet from "LPX64" (%s) for "LPX64 -- " (%s): not forwarding\n", -- src_nid, portals_nid2str(TCPNAL, src_nid, str), -- dest_nid, portals_nid2str(TCPNAL, dest_nid, str2)); -- /* on to new packet (skip this one's body) */ -- ksocknal_new_packet (conn, body_len); -- return; -- } -- -- if (body_len > PTL_MTU) { /* too big to forward */ -- CERROR ("dropping packet from "LPX64" (%s) for "LPX64 -- "(%s): packet size %d too big\n", -- src_nid, portals_nid2str(TCPNAL, src_nid, str), -- dest_nid, portals_nid2str(TCPNAL, dest_nid, str2), -- body_len); -- /* on to new packet (skip this one's body) */ -- ksocknal_new_packet (conn, body_len); -- return; -- } -- -- /* should have gone direct */ -- peer = ksocknal_get_peer (conn->ksnc_hdr.dest_nid); -- if (peer != NULL) { -- CERROR ("dropping packet from "LPX64" (%s) for "LPX64 -- "(%s): target is a peer\n", -- src_nid, portals_nid2str(TCPNAL, src_nid, str), -- dest_nid, portals_nid2str(TCPNAL, dest_nid, str2)); -- ksocknal_put_peer (peer); /* drop ref from get above */ -- -- /* on to next packet (skip this one's body) */ -- ksocknal_new_packet (conn, body_len); -- return; -- } -- -- conn->ksnc_rx_state = SOCKNAL_RX_GET_FMB; /* Getting FMB now */ -- conn->ksnc_rx_nob_left = body_len; /* stash packet size */ -- conn->ksnc_rx_nob_wanted = body_len; /* (no slop) */ --} -- --int --ksocknal_new_packet (ksock_conn_t *conn, int nob_to_skip) --{ -- static char ksocknal_slop_buffer[4096]; -- -- int nob; -- int niov; -- int skipped; -- -- if (nob_to_skip == 0) { /* right at next packet boundary now */ -- conn->ksnc_rx_started = 0; -- mb (); /* racing with timeout thread */ -- -- conn->ksnc_rx_state = SOCKNAL_RX_HEADER; -- conn->ksnc_rx_nob_wanted = sizeof (ptl_hdr_t); -- conn->ksnc_rx_nob_left = sizeof (ptl_hdr_t); -- -- conn->ksnc_rx_iov = (struct iovec *)&conn->ksnc_rx_iov_space; -- conn->ksnc_rx_iov[0].iov_base = (char *)&conn->ksnc_hdr; -- conn->ksnc_rx_iov[0].iov_len = sizeof (ptl_hdr_t); -- conn->ksnc_rx_niov = 1; -- -- conn->ksnc_rx_kiov = NULL; -- conn->ksnc_rx_nkiov = 0; -- return (1); -- } -- -- /* Set up to skip as much a possible now. If there's more left -- * (ran out of iov entries) we'll get called again */ -- -- conn->ksnc_rx_state = SOCKNAL_RX_SLOP; -- conn->ksnc_rx_nob_left = nob_to_skip; -- conn->ksnc_rx_iov = (struct iovec *)&conn->ksnc_rx_iov_space; -- skipped = 0; -- niov = 0; -- -- do { -- nob = MIN (nob_to_skip, sizeof (ksocknal_slop_buffer)); -- -- conn->ksnc_rx_iov[niov].iov_base = ksocknal_slop_buffer; -- conn->ksnc_rx_iov[niov].iov_len = nob; -- niov++; -- skipped += nob; -- nob_to_skip -=nob; -- -- } while (nob_to_skip != 0 && /* mustn't overflow conn's rx iov */ -- niov < sizeof(conn->ksnc_rx_iov_space) / sizeof (struct iovec)); -- -- conn->ksnc_rx_niov = niov; -- conn->ksnc_rx_kiov = NULL; -- conn->ksnc_rx_nkiov = 0; -- conn->ksnc_rx_nob_wanted = skipped; -- return (0); --} -- --int --ksocknal_process_receive (ksock_conn_t *conn) --{ -- ksock_fmb_t *fmb; -- int rc; -- -- LASSERT (atomic_read (&conn->ksnc_refcount) > 0); -- -- /* doesn't need a forwarding buffer */ -- if (conn->ksnc_rx_state != SOCKNAL_RX_GET_FMB) -- goto try_read; -- -- get_fmb: -- fmb = ksocknal_get_idle_fmb (conn); -- if (fmb == NULL) { -- /* conn descheduled waiting for idle fmb */ -- return (0); -- } -- -- if (ksocknal_init_fmb (conn, fmb)) { -- /* packet forwarded */ -- return (0); -- } -- -- try_read: -- /* NB: sched lock NOT held */ -- LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_HEADER || -- conn->ksnc_rx_state == SOCKNAL_RX_BODY || -- conn->ksnc_rx_state == SOCKNAL_RX_BODY_FWD || -- conn->ksnc_rx_state == SOCKNAL_RX_SLOP); -- -- LASSERT (conn->ksnc_rx_nob_wanted > 0); -- -- rc = ksocknal_receive(conn); -- -- if (rc <= 0) { -- LASSERT (rc != -EAGAIN); -- -- if (rc == 0) -- CWARN ("[%p] EOF from "LPX64" ip %d.%d.%d.%d:%d\n", -- conn, conn->ksnc_peer->ksnp_nid, -- HIPQUAD(conn->ksnc_ipaddr), -- conn->ksnc_port); -- else if (!conn->ksnc_closing) -- CERROR ("[%p] Error %d on read from "LPX64 -- " ip %d.%d.%d.%d:%d\n", -- conn, rc, conn->ksnc_peer->ksnp_nid, -- HIPQUAD(conn->ksnc_ipaddr), -- conn->ksnc_port); -- -- ksocknal_close_conn_and_siblings (conn, rc); -- return (rc == 0 ? -ESHUTDOWN : rc); -- } -- -- if (conn->ksnc_rx_nob_wanted != 0) { -- /* short read */ -- return (-EAGAIN); -- } -- -- switch (conn->ksnc_rx_state) { -- case SOCKNAL_RX_HEADER: - if (conn->ksnc_hdr.type != HTON__u32(PTL_MSG_HELLO) && - NTOH__u64(conn->ksnc_hdr.dest_nid) != ksocknal_lib.ni.nid) { - if (conn->ksnc_hdr.type != cpu_to_le32(PTL_MSG_HELLO) && - le64_to_cpu(conn->ksnc_hdr.dest_nid) != - ksocknal_lib.libnal_ni.ni_pid.nid) { -- /* This packet isn't for me */ -- ksocknal_fwd_parse (conn); -- switch (conn->ksnc_rx_state) { -- case SOCKNAL_RX_HEADER: /* skipped (zero payload) */ -- return (0); /* => come back later */ -- case SOCKNAL_RX_SLOP: /* skipping packet's body */ -- goto try_read; /* => go read it */ -- case SOCKNAL_RX_GET_FMB: /* forwarding */ -- goto get_fmb; /* => go get a fwd msg buffer */ -- default: -- LBUG (); -- } -- /* Not Reached */ -- } -- -- /* sets wanted_len, iovs etc */ - lib_parse(&ksocknal_lib, &conn->ksnc_hdr, conn); - rc = lib_parse(&ksocknal_lib, &conn->ksnc_hdr, conn); - - if (rc != PTL_OK) { - /* I just received garbage: give up on this conn */ - ksocknal_close_conn_and_siblings (conn, rc); - return (-EPROTO); - } -- -- if (conn->ksnc_rx_nob_wanted != 0) { /* need to get payload? */ -- conn->ksnc_rx_state = SOCKNAL_RX_BODY; -- goto try_read; /* go read the payload */ -- } -- /* Fall through (completed packet for me) */ -- -- case SOCKNAL_RX_BODY: -- /* payload all received */ -- lib_finalize(&ksocknal_lib, NULL, conn->ksnc_cookie, PTL_OK); -- /* Fall through */ -- -- case SOCKNAL_RX_SLOP: -- /* starting new packet? */ -- if (ksocknal_new_packet (conn, conn->ksnc_rx_nob_left)) -- return (0); /* come back later */ -- goto try_read; /* try to finish reading slop now */ -- -- case SOCKNAL_RX_BODY_FWD: -- /* payload all received */ -- CDEBUG (D_NET, "%p "LPX64"->"LPX64" %d fwd_start (got body)\n", - conn, NTOH__u64 (conn->ksnc_hdr.src_nid), - NTOH__u64 (conn->ksnc_hdr.dest_nid), - conn, le64_to_cpu(conn->ksnc_hdr.src_nid), - le64_to_cpu(conn->ksnc_hdr.dest_nid), -- conn->ksnc_rx_nob_left); -- -- /* forward the packet. NB ksocknal_init_fmb() put fmb into -- * conn->ksnc_cookie */ -- fmb = (ksock_fmb_t *)conn->ksnc_cookie; -- kpr_fwd_start (&ksocknal_data.ksnd_router, &fmb->fmb_fwd); -- -- /* no slop in forwarded packets */ -- LASSERT (conn->ksnc_rx_nob_left == 0); -- -- ksocknal_new_packet (conn, 0); /* on to next packet */ -- return (0); /* (later) */ -- -- default: -- break; -- } -- -- /* Not Reached */ -- LBUG (); -- return (-EINVAL); /* keep gcc happy */ --} -- --ptl_err_t - ksocknal_recv (nal_cb_t *nal, void *private, lib_msg_t *msg, -ksocknal_recv (lib_nal_t *nal, void *private, lib_msg_t *msg, -- unsigned int niov, struct iovec *iov, -- size_t offset, size_t mlen, size_t rlen) --{ -- ksock_conn_t *conn = (ksock_conn_t *)private; -- -- LASSERT (mlen <= rlen); -- LASSERT (niov <= PTL_MD_MAX_IOV); -- -- conn->ksnc_cookie = msg; -- conn->ksnc_rx_nob_wanted = mlen; -- conn->ksnc_rx_nob_left = rlen; -- -- conn->ksnc_rx_nkiov = 0; -- conn->ksnc_rx_kiov = NULL; -- conn->ksnc_rx_iov = conn->ksnc_rx_iov_space.iov; -- conn->ksnc_rx_niov = -- lib_extract_iov(PTL_MD_MAX_IOV, conn->ksnc_rx_iov, -- niov, iov, offset, mlen); -- -- LASSERT (mlen == -- lib_iov_nob (conn->ksnc_rx_niov, conn->ksnc_rx_iov) + -- lib_kiov_nob (conn->ksnc_rx_nkiov, conn->ksnc_rx_kiov)); -- -- return (PTL_OK); --} -- --ptl_err_t - ksocknal_recv_pages (nal_cb_t *nal, void *private, lib_msg_t *msg, -ksocknal_recv_pages (lib_nal_t *nal, void *private, lib_msg_t *msg, -- unsigned int niov, ptl_kiov_t *kiov, -- size_t offset, size_t mlen, size_t rlen) --{ -- ksock_conn_t *conn = (ksock_conn_t *)private; -- -- LASSERT (mlen <= rlen); -- LASSERT (niov <= PTL_MD_MAX_IOV); -- -- conn->ksnc_cookie = msg; -- conn->ksnc_rx_nob_wanted = mlen; -- conn->ksnc_rx_nob_left = rlen; -- -- conn->ksnc_rx_niov = 0; -- conn->ksnc_rx_iov = NULL; -- conn->ksnc_rx_kiov = conn->ksnc_rx_iov_space.kiov; -- conn->ksnc_rx_nkiov = -- lib_extract_kiov(PTL_MD_MAX_IOV, conn->ksnc_rx_kiov, -- niov, kiov, offset, mlen); -- -- LASSERT (mlen == -- lib_iov_nob (conn->ksnc_rx_niov, conn->ksnc_rx_iov) + -- lib_kiov_nob (conn->ksnc_rx_nkiov, conn->ksnc_rx_kiov)); -- -- return (PTL_OK); -} - -static inline int -ksocknal_sched_cansleep(ksock_sched_t *sched) -{ - unsigned long flags; - int rc; - - spin_lock_irqsave(&sched->kss_lock, flags); - - rc = (!ksocknal_data.ksnd_shuttingdown && -#if SOCKNAL_ZC - list_empty(&sched->kss_zctxdone_list) && -#endif - list_empty(&sched->kss_rx_conns) && - list_empty(&sched->kss_tx_conns)); - - spin_unlock_irqrestore(&sched->kss_lock, flags); - return (rc); --} -- --int ksocknal_scheduler (void *arg) --{ -- ksock_sched_t *sched = (ksock_sched_t *)arg; -- ksock_conn_t *conn; -- ksock_tx_t *tx; -- unsigned long flags; -- int rc; -- int nloops = 0; -- int id = sched - ksocknal_data.ksnd_schedulers; -- char name[16]; -- -- snprintf (name, sizeof (name),"ksocknald_%02d", id); -- kportal_daemonize (name); -- kportal_blockallsigs (); -- --#if (CONFIG_SMP && CPU_AFFINITY) - id = ksocknal_sched2cpu(id); -- if (cpu_online(id)) { -- cpumask_t m; -- cpu_set(id, m); -- set_cpus_allowed(current, m); -- } else { - CERROR ("Can't set CPU affinity for %s\n", name); - CERROR ("Can't set CPU affinity for %s to %d\n", name, id); -- } --#endif /* CONFIG_SMP && CPU_AFFINITY */ -- -- spin_lock_irqsave (&sched->kss_lock, flags); -- -- while (!ksocknal_data.ksnd_shuttingdown) { -- int did_something = 0; -- -- /* Ensure I progress everything semi-fairly */ -- -- if (!list_empty (&sched->kss_rx_conns)) { -- conn = list_entry(sched->kss_rx_conns.next, -- ksock_conn_t, ksnc_rx_list); -- list_del(&conn->ksnc_rx_list); -- -- LASSERT(conn->ksnc_rx_scheduled); -- LASSERT(conn->ksnc_rx_ready); -- -- /* clear rx_ready in case receive isn't complete. -- * Do it BEFORE we call process_recv, since -- * data_ready can set it any time after we release -- * kss_lock. */ -- conn->ksnc_rx_ready = 0; -- spin_unlock_irqrestore(&sched->kss_lock, flags); -- -- rc = ksocknal_process_receive(conn); -- -- spin_lock_irqsave(&sched->kss_lock, flags); -- -- /* I'm the only one that can clear this flag */ -- LASSERT(conn->ksnc_rx_scheduled); -- -- /* Did process_receive get everything it wanted? */ -- if (rc == 0) -- conn->ksnc_rx_ready = 1; -- -- if (conn->ksnc_rx_state == SOCKNAL_RX_FMB_SLEEP || -- conn->ksnc_rx_state == SOCKNAL_RX_GET_FMB) { -- /* Conn blocked for a forwarding buffer. -- * It will get queued for my attention when -- * one becomes available (and it might just -- * already have been!). Meanwhile my ref -- * on it stays put. */ -- } else if (conn->ksnc_rx_ready) { -- /* reschedule for rx */ -- list_add_tail (&conn->ksnc_rx_list, -- &sched->kss_rx_conns); -- } else { -- conn->ksnc_rx_scheduled = 0; -- /* drop my ref */ -- ksocknal_put_conn(conn); -- } -- -- did_something = 1; -- } -- -- if (!list_empty (&sched->kss_tx_conns)) { -- conn = list_entry(sched->kss_tx_conns.next, -- ksock_conn_t, ksnc_tx_list); -- list_del (&conn->ksnc_tx_list); -- -- LASSERT(conn->ksnc_tx_scheduled); -- LASSERT(conn->ksnc_tx_ready); -- LASSERT(!list_empty(&conn->ksnc_tx_queue)); -- -- tx = list_entry(conn->ksnc_tx_queue.next, -- ksock_tx_t, tx_list); -- /* dequeue now so empty list => more to send */ -- list_del(&tx->tx_list); -- -- /* Clear tx_ready in case send isn't complete. Do -- * it BEFORE we call process_transmit, since -- * write_space can set it any time after we release -- * kss_lock. */ -- conn->ksnc_tx_ready = 0; -- spin_unlock_irqrestore (&sched->kss_lock, flags); -- -- rc = ksocknal_process_transmit(conn, tx); -- -- spin_lock_irqsave (&sched->kss_lock, flags); -- -- if (rc == -ENOMEM || rc == -EAGAIN) { -- /* Incomplete send: replace tx on HEAD of tx_queue */ -- list_add (&tx->tx_list, &conn->ksnc_tx_queue); -- } else { -- /* Complete send; assume space for more */ -- conn->ksnc_tx_ready = 1; -- } -- -- if (rc == -ENOMEM) { -- /* Do nothing; after a short timeout, this -- * conn will be reposted on kss_tx_conns. */ -- } else if (conn->ksnc_tx_ready && -- !list_empty (&conn->ksnc_tx_queue)) { -- /* reschedule for tx */ -- list_add_tail (&conn->ksnc_tx_list, -- &sched->kss_tx_conns); -- } else { -- conn->ksnc_tx_scheduled = 0; -- /* drop my ref */ -- ksocknal_put_conn (conn); -- } -- -- did_something = 1; -- } --#if SOCKNAL_ZC -- if (!list_empty (&sched->kss_zctxdone_list)) { -- ksock_tx_t *tx = -- list_entry(sched->kss_zctxdone_list.next, -- ksock_tx_t, tx_list); -- did_something = 1; -- -- list_del (&tx->tx_list); -- spin_unlock_irqrestore (&sched->kss_lock, flags); -- -- ksocknal_tx_done (tx, 1); -- -- spin_lock_irqsave (&sched->kss_lock, flags); -- } --#endif -- if (!did_something || /* nothing to do */ -- ++nloops == SOCKNAL_RESCHED) { /* hogging CPU? */ -- spin_unlock_irqrestore (&sched->kss_lock, flags); -- -- nloops = 0; -- -- if (!did_something) { /* wait for something to do */ - #if SOCKNAL_ZC - rc = wait_event_interruptible (sched->kss_waitq, - ksocknal_data.ksnd_shuttingdown || - !list_empty(&sched->kss_rx_conns) || - !list_empty(&sched->kss_tx_conns) || - !list_empty(&sched->kss_zctxdone_list)); - #else -- rc = wait_event_interruptible (sched->kss_waitq, - ksocknal_data.ksnd_shuttingdown || - !list_empty(&sched->kss_rx_conns) || - !list_empty(&sched->kss_tx_conns)); - #endif - !ksocknal_sched_cansleep(sched)); -- LASSERT (rc == 0); -- } else -- our_cond_resched(); -- -- spin_lock_irqsave (&sched->kss_lock, flags); -- } -- } -- -- spin_unlock_irqrestore (&sched->kss_lock, flags); -- ksocknal_thread_fini (); -- return (0); --} -- --void --ksocknal_data_ready (struct sock *sk, int n) --{ -- unsigned long flags; -- ksock_conn_t *conn; -- ksock_sched_t *sched; -- ENTRY; -- -- /* interleave correctly with closing sockets... */ -- read_lock (&ksocknal_data.ksnd_global_lock); -- -- conn = sk->sk_user_data; -- if (conn == NULL) { /* raced with ksocknal_terminate_conn */ -- LASSERT (sk->sk_data_ready != &ksocknal_data_ready); -- sk->sk_data_ready (sk, n); -- } else { -- sched = conn->ksnc_scheduler; -- -- spin_lock_irqsave (&sched->kss_lock, flags); -- -- conn->ksnc_rx_ready = 1; -- -- if (!conn->ksnc_rx_scheduled) { /* not being progressed */ -- list_add_tail(&conn->ksnc_rx_list, -- &sched->kss_rx_conns); -- conn->ksnc_rx_scheduled = 1; -- /* extra ref for scheduler */ -- atomic_inc (&conn->ksnc_refcount); -- -- wake_up (&sched->kss_waitq); -- } -- -- spin_unlock_irqrestore (&sched->kss_lock, flags); -- } -- -- read_unlock (&ksocknal_data.ksnd_global_lock); -- -- EXIT; --} -- --void --ksocknal_write_space (struct sock *sk) --{ -- unsigned long flags; -- ksock_conn_t *conn; -- ksock_sched_t *sched; -- -- /* interleave correctly with closing sockets... */ -- read_lock (&ksocknal_data.ksnd_global_lock); -- -- conn = sk->sk_user_data; -- -- CDEBUG(D_NET, "sk %p wspace %d low water %d conn %p%s%s%s\n", -- sk, tcp_wspace(sk), SOCKNAL_TX_LOW_WATER(sk), conn, -- (conn == NULL) ? "" : (conn->ksnc_tx_ready ? -- " ready" : " blocked"), -- (conn == NULL) ? "" : (conn->ksnc_tx_scheduled ? -- " scheduled" : " idle"), -- (conn == NULL) ? "" : (list_empty (&conn->ksnc_tx_queue) ? -- " empty" : " queued")); -- -- if (conn == NULL) { /* raced with ksocknal_terminate_conn */ -- LASSERT (sk->sk_write_space != &ksocknal_write_space); -- sk->sk_write_space (sk); -- -- read_unlock (&ksocknal_data.ksnd_global_lock); -- return; -- } -- -- if (tcp_wspace(sk) >= SOCKNAL_TX_LOW_WATER(sk)) { /* got enough space */ -- clear_bit (SOCK_NOSPACE, &sk->sk_socket->flags); -- -- sched = conn->ksnc_scheduler; -- -- spin_lock_irqsave (&sched->kss_lock, flags); -- -- conn->ksnc_tx_ready = 1; -- -- if (!conn->ksnc_tx_scheduled && // not being progressed -- !list_empty(&conn->ksnc_tx_queue)){//packets to send -- list_add_tail (&conn->ksnc_tx_list, -- &sched->kss_tx_conns); -- conn->ksnc_tx_scheduled = 1; -- /* extra ref for scheduler */ -- atomic_inc (&conn->ksnc_refcount); -- -- wake_up (&sched->kss_waitq); -- } -- -- spin_unlock_irqrestore (&sched->kss_lock, flags); -- } -- -- read_unlock (&ksocknal_data.ksnd_global_lock); --} -- --int --ksocknal_sock_write (struct socket *sock, void *buffer, int nob) --{ -- int rc; -- mm_segment_t oldmm = get_fs(); -- -- while (nob > 0) { -- struct iovec iov = { -- .iov_base = buffer, -- .iov_len = nob -- }; -- struct msghdr msg = { -- .msg_name = NULL, -- .msg_namelen = 0, -- .msg_iov = &iov, -- .msg_iovlen = 1, -- .msg_control = NULL, -- .msg_controllen = 0, -- .msg_flags = 0 -- }; -- -- set_fs (KERNEL_DS); -- rc = sock_sendmsg (sock, &msg, iov.iov_len); -- set_fs (oldmm); -- -- if (rc < 0) -- return (rc); -- -- if (rc == 0) { -- CERROR ("Unexpected zero rc\n"); -- return (-ECONNABORTED); -- } -- -- buffer = ((char *)buffer) + rc; -- nob -= rc; -- } -- -- return (0); --} -- --int --ksocknal_sock_read (struct socket *sock, void *buffer, int nob) --{ -- int rc; -- mm_segment_t oldmm = get_fs(); -- -- while (nob > 0) { -- struct iovec iov = { -- .iov_base = buffer, -- .iov_len = nob -- }; -- struct msghdr msg = { -- .msg_name = NULL, -- .msg_namelen = 0, -- .msg_iov = &iov, -- .msg_iovlen = 1, -- .msg_control = NULL, -- .msg_controllen = 0, -- .msg_flags = 0 -- }; -- -- set_fs (KERNEL_DS); -- rc = sock_recvmsg (sock, &msg, iov.iov_len, 0); -- set_fs (oldmm); -- -- if (rc < 0) -- return (rc); -- -- if (rc == 0) -- return (-ECONNABORTED); -- -- buffer = ((char *)buffer) + rc; -- nob -= rc; -- } -- -- return (0); --} -- --int - ksocknal_hello (struct socket *sock, ptl_nid_t *nid, int *type, - __u64 *incarnation) -ksocknal_send_hello (ksock_conn_t *conn, __u32 *ipaddrs, int nipaddrs) --{ - int rc; - /* CAVEAT EMPTOR: this byte flips 'ipaddrs' */ - struct socket *sock = conn->ksnc_sock; -- ptl_hdr_t hdr; -- ptl_magicversion_t *hmv = (ptl_magicversion_t *)&hdr.dest_nid; - char ipbuf[PTL_NALFMT_SIZE]; - char ipbuf2[PTL_NALFMT_SIZE]; - int i; - int rc; -- - LASSERT (sizeof (*hmv) == sizeof (hdr.dest_nid)); - LASSERT (conn->ksnc_type != SOCKNAL_CONN_NONE); - LASSERT (nipaddrs <= SOCKNAL_MAX_INTERFACES); -- - memset (&hdr, 0, sizeof (hdr)); - hmv->magic = __cpu_to_le32 (PORTALS_PROTO_MAGIC); - hmv->version_major = __cpu_to_le16 (PORTALS_PROTO_VERSION_MAJOR); - hmv->version_minor = __cpu_to_le16 (PORTALS_PROTO_VERSION_MINOR); - /* No need for getconnsock/putconnsock */ - LASSERT (!conn->ksnc_closing); -- - hdr.src_nid = __cpu_to_le64 (ksocknal_lib.ni.nid); - hdr.type = __cpu_to_le32 (PTL_MSG_HELLO); - LASSERT (sizeof (*hmv) == sizeof (hdr.dest_nid)); - hmv->magic = cpu_to_le32 (PORTALS_PROTO_MAGIC); - hmv->version_major = cpu_to_le16 (PORTALS_PROTO_VERSION_MAJOR); - hmv->version_minor = cpu_to_le16 (PORTALS_PROTO_VERSION_MINOR); -- - hdr.msg.hello.type = __cpu_to_le32 (*type); - hdr.src_nid = cpu_to_le64 (ksocknal_lib.libnal_ni.ni_pid.nid); - hdr.type = cpu_to_le32 (PTL_MSG_HELLO); - hdr.payload_length = cpu_to_le32 (nipaddrs * sizeof(*ipaddrs)); - - hdr.msg.hello.type = cpu_to_le32 (conn->ksnc_type); -- hdr.msg.hello.incarnation = - __cpu_to_le64 (ksocknal_data.ksnd_incarnation); - cpu_to_le64 (ksocknal_data.ksnd_incarnation); -- - /* Assume sufficient socket buffering for this message */ - rc = ksocknal_sock_write (sock, &hdr, sizeof (hdr)); - /* Receiver is eager */ - rc = ksocknal_sock_write (sock, &hdr, sizeof(hdr)); -- if (rc != 0) { - CERROR ("Error %d sending HELLO to "LPX64" %s\n", - rc, *nid, portals_nid2str(SOCKNAL, *nid, ipbuf)); - CERROR ("Error %d sending HELLO hdr to %u.%u.%u.%u/%d\n", - rc, HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port); -- return (rc); - } - - if (nipaddrs == 0) - return (0); - - for (i = 0; i < nipaddrs; i++) { - ipaddrs[i] = __cpu_to_le32 (ipaddrs[i]); - } - - rc = ksocknal_sock_write (sock, ipaddrs, nipaddrs * sizeof(*ipaddrs)); - if (rc != 0) - CERROR ("Error %d sending HELLO payload (%d)" - " to %u.%u.%u.%u/%d\n", rc, nipaddrs, - HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port); - return (rc); -} - -int -ksocknal_invert_type(int type) -{ - switch (type) - { - case SOCKNAL_CONN_ANY: - case SOCKNAL_CONN_CONTROL: - return (type); - case SOCKNAL_CONN_BULK_IN: - return SOCKNAL_CONN_BULK_OUT; - case SOCKNAL_CONN_BULK_OUT: - return SOCKNAL_CONN_BULK_IN; - default: - return (SOCKNAL_CONN_NONE); -- } -} - -int -ksocknal_recv_hello (ksock_conn_t *conn, ptl_nid_t *nid, - __u64 *incarnation, __u32 *ipaddrs) -{ - struct socket *sock = conn->ksnc_sock; - int rc; - int nips; - int i; - int type; - ptl_hdr_t hdr; - ptl_magicversion_t *hmv; - - hmv = (ptl_magicversion_t *)&hdr.dest_nid; - LASSERT (sizeof (*hmv) == sizeof (hdr.dest_nid)); -- -- rc = ksocknal_sock_read (sock, hmv, sizeof (*hmv)); -- if (rc != 0) { - CERROR ("Error %d reading HELLO from "LPX64" %s\n", - rc, *nid, portals_nid2str(SOCKNAL, *nid, ipbuf)); - CERROR ("Error %d reading HELLO from %u.%u.%u.%u\n", - rc, HIPQUAD(conn->ksnc_ipaddr)); -- return (rc); -- } -- - if (hmv->magic != __le32_to_cpu (PORTALS_PROTO_MAGIC)) { - CERROR ("Bad magic %#08x (%#08x expected) from "LPX64" %s\n", - __cpu_to_le32 (hmv->magic), PORTALS_PROTO_MAGIC, *nid, - portals_nid2str(SOCKNAL, *nid, ipbuf)); - if (hmv->magic != le32_to_cpu (PORTALS_PROTO_MAGIC)) { - CERROR ("Bad magic %#08x (%#08x expected) from %u.%u.%u.%u\n", - __cpu_to_le32 (hmv->magic), PORTALS_PROTO_MAGIC, - HIPQUAD(conn->ksnc_ipaddr)); -- return (-EPROTO); -- } -- - if (hmv->version_major != __cpu_to_le16 (PORTALS_PROTO_VERSION_MAJOR) || - hmv->version_minor != __cpu_to_le16 (PORTALS_PROTO_VERSION_MINOR)) { - if (hmv->version_major != cpu_to_le16 (PORTALS_PROTO_VERSION_MAJOR) || - hmv->version_minor != cpu_to_le16 (PORTALS_PROTO_VERSION_MINOR)) { -- CERROR ("Incompatible protocol version %d.%d (%d.%d expected)" - " from "LPX64" %s\n", - __le16_to_cpu (hmv->version_major), - __le16_to_cpu (hmv->version_minor), - " from %u.%u.%u.%u\n", - le16_to_cpu (hmv->version_major), - le16_to_cpu (hmv->version_minor), -- PORTALS_PROTO_VERSION_MAJOR, -- PORTALS_PROTO_VERSION_MINOR, - *nid, portals_nid2str(SOCKNAL, *nid, ipbuf)); - HIPQUAD(conn->ksnc_ipaddr)); -- return (-EPROTO); -- } -- - #if (PORTALS_PROTO_VERSION_MAJOR != 0) - # error "This code only understands protocol version 0.x" -#if (PORTALS_PROTO_VERSION_MAJOR != 1) -# error "This code only understands protocol version 1.x" --#endif - /* version 0 sends magic/version as the dest_nid of a 'hello' header, - * so read the rest of it in now... */ - /* version 1 sends magic/version as the dest_nid of a 'hello' - * header, followed by payload full of interface IP addresses. - * Read the rest of it in now... */ -- -- rc = ksocknal_sock_read (sock, hmv + 1, sizeof (hdr) - sizeof (*hmv)); -- if (rc != 0) { - CERROR ("Error %d reading rest of HELLO hdr from "LPX64" %s\n", - rc, *nid, portals_nid2str(SOCKNAL, *nid, ipbuf)); - CERROR ("Error %d reading rest of HELLO hdr from %u.%u.%u.%u\n", - rc, HIPQUAD(conn->ksnc_ipaddr)); -- return (rc); -- } -- -- /* ...and check we got what we expected */ - if (hdr.type != __cpu_to_le32 (PTL_MSG_HELLO) || - hdr.payload_length != __cpu_to_le32 (0)) { - CERROR ("Expecting a HELLO hdr with 0 payload," - " but got type %d with %d payload from "LPX64" %s\n", - __le32_to_cpu (hdr.type), - __le32_to_cpu (hdr.payload_length), *nid, - portals_nid2str(SOCKNAL, *nid, ipbuf)); - if (hdr.type != cpu_to_le32 (PTL_MSG_HELLO)) { - CERROR ("Expecting a HELLO hdr," - " but got type %d from %u.%u.%u.%u\n", - le32_to_cpu (hdr.type), - HIPQUAD(conn->ksnc_ipaddr)); -- return (-EPROTO); -- } -- - if (__le64_to_cpu(hdr.src_nid) == PTL_NID_ANY) { - CERROR("Expecting a HELLO hdr with a NID, but got PTL_NID_ANY\n"); - if (le64_to_cpu(hdr.src_nid) == PTL_NID_ANY) { - CERROR("Expecting a HELLO hdr with a NID, but got PTL_NID_ANY" - "from %u.%u.%u.%u\n", HIPQUAD(conn->ksnc_ipaddr)); -- return (-EPROTO); -- } -- -- if (*nid == PTL_NID_ANY) { /* don't know peer's nid yet */ - *nid = __le64_to_cpu(hdr.src_nid); - } else if (*nid != __le64_to_cpu (hdr.src_nid)) { - CERROR ("Connected to nid "LPX64" %s, but expecting "LPX64" %s\n", - __le64_to_cpu (hdr.src_nid), - portals_nid2str(SOCKNAL, - __le64_to_cpu(hdr.src_nid), - ipbuf), - *nid, portals_nid2str(SOCKNAL, *nid, ipbuf2)); - *nid = le64_to_cpu(hdr.src_nid); - } else if (*nid != le64_to_cpu (hdr.src_nid)) { - CERROR ("Connected to nid "LPX64"@%u.%u.%u.%u " - "but expecting "LPX64"\n", - le64_to_cpu (hdr.src_nid), - HIPQUAD(conn->ksnc_ipaddr), *nid); -- return (-EPROTO); -- } -- - if (*type == SOCKNAL_CONN_NONE) { - type = __le32_to_cpu(hdr.msg.hello.type); - - if (conn->ksnc_type == SOCKNAL_CONN_NONE) { -- /* I've accepted this connection; peer determines type */ - *type = __le32_to_cpu(hdr.msg.hello.type); - switch (*type) { - case SOCKNAL_CONN_ANY: - case SOCKNAL_CONN_CONTROL: - break; - case SOCKNAL_CONN_BULK_IN: - *type = SOCKNAL_CONN_BULK_OUT; - break; - case SOCKNAL_CONN_BULK_OUT: - *type = SOCKNAL_CONN_BULK_IN; - break; - default: - CERROR ("Unexpected type %d from "LPX64" %s\n", - *type, *nid, - portals_nid2str(SOCKNAL, *nid, ipbuf)); - conn->ksnc_type = ksocknal_invert_type(type); - if (conn->ksnc_type == SOCKNAL_CONN_NONE) { - CERROR ("Unexpected type %d from "LPX64"@%u.%u.%u.%u\n", - type, *nid, HIPQUAD(conn->ksnc_ipaddr)); -- return (-EPROTO); -- } - } else if (__le32_to_cpu(hdr.msg.hello.type) != SOCKNAL_CONN_NONE) { - CERROR ("Mismatched types: me %d "LPX64" %s %d\n", - *type, *nid, portals_nid2str(SOCKNAL, *nid, ipbuf), - __le32_to_cpu(hdr.msg.hello.type)); - } else if (ksocknal_invert_type(type) != conn->ksnc_type) { - CERROR ("Mismatched types: me %d, "LPX64"@%u.%u.%u.%u %d\n", - conn->ksnc_type, *nid, HIPQUAD(conn->ksnc_ipaddr), - le32_to_cpu(hdr.msg.hello.type)); -- return (-EPROTO); -- } -- - *incarnation = __le64_to_cpu(hdr.msg.hello.incarnation); - *incarnation = le64_to_cpu(hdr.msg.hello.incarnation); -- - return (0); - nips = __le32_to_cpu (hdr.payload_length) / sizeof (__u32); - - if (nips > SOCKNAL_MAX_INTERFACES || - nips * sizeof(__u32) != __le32_to_cpu (hdr.payload_length)) { - CERROR("Bad payload length %d from "LPX64"@%u.%u.%u.%u\n", - __le32_to_cpu (hdr.payload_length), - *nid, HIPQUAD(conn->ksnc_ipaddr)); - } - - if (nips == 0) - return (0); - - rc = ksocknal_sock_read (sock, ipaddrs, nips * sizeof(*ipaddrs)); - if (rc != 0) { - CERROR ("Error %d reading IPs from "LPX64"@%u.%u.%u.%u\n", - rc, *nid, HIPQUAD(conn->ksnc_ipaddr)); - return (rc); - } - - for (i = 0; i < nips; i++) { - ipaddrs[i] = __le32_to_cpu(ipaddrs[i]); - - if (ipaddrs[i] == 0) { - CERROR("Zero IP[%d] from "LPX64"@%u.%u.%u.%u\n", - i, *nid, HIPQUAD(conn->ksnc_ipaddr)); - return (-EPROTO); - } - } - - return (nips); -} - -int -ksocknal_get_conn_tunables (ksock_conn_t *conn, int *txmem, int *rxmem, int *nagle) -{ - mm_segment_t oldmm = get_fs (); - struct socket *sock = conn->ksnc_sock; - int len; - int rc; - - rc = ksocknal_getconnsock (conn); - if (rc != 0) { - LASSERT (conn->ksnc_closing); - *txmem = *rxmem = *nagle = 0; - return (-ESHUTDOWN); - } - - set_fs (KERNEL_DS); - - len = sizeof(*txmem); - rc = sock_getsockopt(sock, SOL_SOCKET, SO_SNDBUF, - (char *)txmem, &len); - if (rc == 0) { - len = sizeof(*rxmem); - rc = sock_getsockopt(sock, SOL_SOCKET, SO_RCVBUF, - (char *)rxmem, &len); - } - if (rc == 0) { - len = sizeof(*nagle); - rc = sock->ops->getsockopt(sock, SOL_TCP, TCP_NODELAY, - (char *)nagle, &len); - } - - set_fs (oldmm); - ksocknal_putconnsock (conn); - - if (rc == 0) - *nagle = !*nagle; - else - *txmem = *rxmem = *nagle = 0; - - return (rc); --} -- --int --ksocknal_setup_sock (struct socket *sock) --{ -- mm_segment_t oldmm = get_fs (); -- int rc; -- int option; - int keep_idle; - int keep_intvl; - int keep_count; - int do_keepalive; -- struct linger linger; -- - #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) -- sock->sk->sk_allocation = GFP_NOFS; - #else - sock->sk->allocation = GFP_NOFS; - #endif -- -- /* Ensure this socket aborts active sends immediately when we close -- * it. */ -- -- linger.l_onoff = 0; -- linger.l_linger = 0; -- -- set_fs (KERNEL_DS); -- rc = sock_setsockopt (sock, SOL_SOCKET, SO_LINGER, -- (char *)&linger, sizeof (linger)); -- set_fs (oldmm); -- if (rc != 0) { -- CERROR ("Can't set SO_LINGER: %d\n", rc); -- return (rc); -- } -- -- option = -1; -- set_fs (KERNEL_DS); -- rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_LINGER2, -- (char *)&option, sizeof (option)); -- set_fs (oldmm); -- if (rc != 0) { -- CERROR ("Can't set SO_LINGER2: %d\n", rc); -- return (rc); -- } -- - #if SOCKNAL_USE_KEEPALIVES - /* Keepalives: If 3/4 of the timeout elapses, start probing every - * second until the timeout elapses. */ - if (!ksocknal_tunables.ksnd_nagle) { - option = 1; - - set_fs (KERNEL_DS); - rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_NODELAY, - (char *)&option, sizeof (option)); - set_fs (oldmm); - if (rc != 0) { - CERROR ("Can't disable nagle: %d\n", rc); - return (rc); - } - } - - if (ksocknal_tunables.ksnd_buffer_size > 0) { - option = ksocknal_tunables.ksnd_buffer_size; - - set_fs (KERNEL_DS); - rc = sock_setsockopt (sock, SOL_SOCKET, SO_SNDBUF, - (char *)&option, sizeof (option)); - set_fs (oldmm); - if (rc != 0) { - CERROR ("Can't set send buffer %d: %d\n", - option, rc); - return (rc); - } -- - option = (ksocknal_data.ksnd_io_timeout * 3) / 4; - set_fs (KERNEL_DS); - rc = sock_setsockopt (sock, SOL_SOCKET, SO_RCVBUF, - (char *)&option, sizeof (option)); - set_fs (oldmm); - if (rc != 0) { - CERROR ("Can't set receive buffer %d: %d\n", - option, rc); - return (rc); - } - } - - /* snapshot tunables */ - keep_idle = ksocknal_tunables.ksnd_keepalive_idle; - keep_count = ksocknal_tunables.ksnd_keepalive_count; - keep_intvl = ksocknal_tunables.ksnd_keepalive_intvl; - - do_keepalive = (keep_idle > 0 && keep_count > 0 && keep_intvl > 0); - - option = (do_keepalive ? 1 : 0); -- set_fs (KERNEL_DS); - rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_KEEPIDLE, - (char *)&option, sizeof (option)); - rc = sock_setsockopt (sock, SOL_SOCKET, SO_KEEPALIVE, - (char *)&option, sizeof (option)); -- set_fs (oldmm); -- if (rc != 0) { - CERROR ("Can't set TCP_KEEPIDLE: %d\n", rc); - CERROR ("Can't set SO_KEEPALIVE: %d\n", rc); -- return (rc); -- } - - option = 1; - - if (!do_keepalive) - return (0); - -- set_fs (KERNEL_DS); - rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_KEEPINTVL, - (char *)&option, sizeof (option)); - rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_KEEPIDLE, - (char *)&keep_idle, sizeof (keep_idle)); -- set_fs (oldmm); -- if (rc != 0) { - CERROR ("Can't set TCP_KEEPINTVL: %d\n", rc); - CERROR ("Can't set TCP_KEEPIDLE: %d\n", rc); -- return (rc); -- } - - option = ksocknal_data.ksnd_io_timeout / 4; - -- set_fs (KERNEL_DS); - rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_KEEPCNT, - (char *)&option, sizeof (option)); - rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_KEEPINTVL, - (char *)&keep_intvl, sizeof (keep_intvl)); -- set_fs (oldmm); -- if (rc != 0) { -- CERROR ("Can't set TCP_KEEPINTVL: %d\n", rc); -- return (rc); -- } -- - option = 1; -- set_fs (KERNEL_DS); - rc = sock_setsockopt (sock, SOL_SOCKET, SO_KEEPALIVE, - (char *)&option, sizeof (option)); - rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_KEEPCNT, - (char *)&keep_count, sizeof (keep_count)); -- set_fs (oldmm); -- if (rc != 0) { - CERROR ("Can't set SO_KEEPALIVE: %d\n", rc); - CERROR ("Can't set TCP_KEEPCNT: %d\n", rc); -- return (rc); -- } - #endif - -- return (0); --} -- - static int - ksocknal_connect_sock(struct socket **sockp, int *may_retry, - ksock_route_t *route, int local_port) -int -ksocknal_connect_peer (ksock_route_t *route, int type) --{ - struct sockaddr_in locaddr; - struct sockaddr_in srvaddr; - struct socket *sock; - int rc; - int option; - struct sockaddr_in ipaddr; -- mm_segment_t oldmm = get_fs(); -- struct timeval tv; - - memset(&locaddr, 0, sizeof(locaddr)); - locaddr.sin_family = AF_INET; - locaddr.sin_port = htons(local_port); - locaddr.sin_addr.s_addr = INADDR_ANY; - - memset (&srvaddr, 0, sizeof (srvaddr)); - srvaddr.sin_family = AF_INET; - srvaddr.sin_port = htons (route->ksnr_port); - srvaddr.sin_addr.s_addr = htonl (route->ksnr_ipaddr); - - *may_retry = 0; - - int fd; - struct socket *sock; - int rc; - -- rc = sock_create (PF_INET, SOCK_STREAM, 0, &sock); - *sockp = sock; -- if (rc != 0) { -- CERROR ("Can't create autoconnect socket: %d\n", rc); -- return (rc); -- } -- -- /* Ugh; have to map_fd for compatibility with sockets passed in -- * from userspace. And we actually need the sock->file refcounting -- * that this gives you :) */ -- - rc = sock_map_fd (sock); - if (rc < 0) { - fd = sock_map_fd (sock); - if (fd < 0) { -- sock_release (sock); - CERROR ("sock_map_fd error %d\n", rc); - return (rc); - CERROR ("sock_map_fd error %d\n", fd); - return (fd); -- } -- - /* NB the file descriptor (rc) now owns the ref on sock->file */ - /* NB the fd now owns the ref on sock->file */ -- LASSERT (sock->file != NULL); - LASSERT (file_count(sock->file) == 1); - - get_file(sock->file); /* extra ref makes sock->file */ - sys_close(rc); /* survive this close */ - - /* Still got a single ref on sock->file */ -- LASSERT (file_count(sock->file) == 1); -- -- /* Set the socket timeouts, so our connection attempt completes in -- * finite time */ - tv.tv_sec = ksocknal_data.ksnd_io_timeout; - tv.tv_sec = ksocknal_tunables.ksnd_io_timeout; -- tv.tv_usec = 0; -- -- set_fs (KERNEL_DS); -- rc = sock_setsockopt (sock, SOL_SOCKET, SO_SNDTIMEO, -- (char *)&tv, sizeof (tv)); -- set_fs (oldmm); -- if (rc != 0) { - CERROR ("Can't set send timeout %d: %d\n", - ksocknal_data.ksnd_io_timeout, rc); - goto failed; - CERROR ("Can't set send timeout %d: %d\n", - ksocknal_tunables.ksnd_io_timeout, rc); - goto out; -- } -- -- set_fs (KERNEL_DS); -- rc = sock_setsockopt (sock, SOL_SOCKET, SO_RCVTIMEO, -- (char *)&tv, sizeof (tv)); -- set_fs (oldmm); -- if (rc != 0) { -- CERROR ("Can't set receive timeout %d: %d\n", - ksocknal_data.ksnd_io_timeout, rc); - goto failed; - ksocknal_tunables.ksnd_io_timeout, rc); - goto out; -- } -- - option = 1; - set_fs (KERNEL_DS); - rc = sock_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, - (char *)&option, sizeof (option)); - set_fs (oldmm); - if (rc != 0) { - CERROR("Can't set SO_REUSEADDR for socket: %d\n", rc); - goto failed; - } - - if (route->ksnr_buffer_size != 0) { - option = route->ksnr_buffer_size; - set_fs (KERNEL_DS); - rc = sock_setsockopt (sock, SOL_SOCKET, SO_SNDBUF, - (char *)&option, sizeof (option)); - set_fs (oldmm); - if (rc != 0) { - CERROR ("Can't set send buffer %d: %d\n", - route->ksnr_buffer_size, rc); - goto failed; - } - if (route->ksnr_myipaddr != 0) { - /* Bind to the local IP address */ - memset (&ipaddr, 0, sizeof (ipaddr)); - ipaddr.sin_family = AF_INET; - ipaddr.sin_port = htons (0); /* ANY */ - ipaddr.sin_addr.s_addr = htonl(route->ksnr_myipaddr); -- - set_fs (KERNEL_DS); - rc = sock_setsockopt (sock, SOL_SOCKET, SO_RCVBUF, - (char *)&option, sizeof (option)); - set_fs (oldmm); - rc = sock->ops->bind (sock, (struct sockaddr *)&ipaddr, - sizeof (ipaddr)); -- if (rc != 0) { - CERROR ("Can't set receive buffer %d: %d\n", - route->ksnr_buffer_size, rc); - goto failed; - CERROR ("Can't bind to local IP %u.%u.%u.%u: %d\n", - HIPQUAD(route->ksnr_myipaddr), rc); - goto out; -- } - } - - rc = sock->ops->bind(sock, - (struct sockaddr *)&locaddr, sizeof(locaddr)); - if (rc == -EADDRINUSE) { - CDEBUG(D_NET, "Port %d already in use\n", local_port); - *may_retry = 1; - goto failed; -- } - - memset (&ipaddr, 0, sizeof (ipaddr)); - ipaddr.sin_family = AF_INET; - ipaddr.sin_port = htons (route->ksnr_port); - ipaddr.sin_addr.s_addr = htonl (route->ksnr_ipaddr); - - rc = sock->ops->connect (sock, (struct sockaddr *)&ipaddr, - sizeof (ipaddr), sock->file->f_flags); -- if (rc != 0) { - CERROR("Error trying to bind to reserved port %d: %d\n", - local_port, rc); - goto failed; - CERROR ("Can't connect to nid "LPX64 - " local IP: %u.%u.%u.%u," - " remote IP: %u.%u.%u.%u/%d: %d\n", - route->ksnr_peer->ksnp_nid, - HIPQUAD(route->ksnr_myipaddr), - HIPQUAD(route->ksnr_ipaddr), - route->ksnr_port, rc); - goto out; -- } - - rc = sock->ops->connect(sock, - (struct sockaddr *)&srvaddr, sizeof(srvaddr), - sock->file->f_flags); - if (rc == 0) - return 0; - - /* EADDRNOTAVAIL probably means we're already connected to the same - * peer/port on the same local port on a differently typed - * connection. Let our caller retry with a different local - * port... */ - *may_retry = (rc == -EADDRNOTAVAIL); - - CDEBUG(*may_retry ? D_NET : D_ERROR, - "Error %d connecting to %u.%u.%u.%u/%d\n", rc, - HIPQUAD(route->ksnr_ipaddr), route->ksnr_port); - - failed: - fput(sock->file); - return rc; - } - - int - ksocknal_connect_peer (ksock_route_t *route, int type) - { - struct socket *sock; - int rc; - int port; - int may_retry; - - /* Iterate through reserved ports. When typed connections are - * used, we will need to bind to multiple ports, but we only know - * this at connect time. But, by that time we've already called - * bind() so we need a new socket. */ - - for (port = 1023; port > 512; --port) { - - rc = ksocknal_connect_sock(&sock, &may_retry, route, port); -- - if (rc == 0) { - rc = ksocknal_create_conn(route, sock, - route->ksnr_irq_affinity, type); - fput(sock->file); - return rc; - } - - if (!may_retry) - return rc; - rc = ksocknal_create_conn (route, sock, type); - if (rc == 0) { - /* Take an extra ref on sock->file to compensate for the - * upcoming close which will lose fd's ref on it. */ - get_file (sock->file); -- } -- - CERROR("Out of ports trying to bind to a reserved port\n"); - return (-EADDRINUSE); - out: - sys_close (fd); - return (rc); --} -- --void --ksocknal_autoconnect (ksock_route_t *route) --{ -- LIST_HEAD (zombies); -- ksock_tx_t *tx; -- ksock_peer_t *peer; -- unsigned long flags; -- int rc; -- int type; -- -- for (;;) { -- for (type = 0; type < SOCKNAL_CONN_NTYPES; type++) -- if ((route->ksnr_connecting & (1 << type)) != 0) -- break; -- LASSERT (type < SOCKNAL_CONN_NTYPES); -- -- rc = ksocknal_connect_peer (route, type); -- -- if (rc != 0) -- break; -- -- /* successfully autoconnected: create_conn did the -- * route/conn binding and scheduled any blocked packets */ -- -- if (route->ksnr_connecting == 0) { -- /* No more connections required */ -- return; -- } -- } -- -- /* Connection attempt failed */ -- -- write_lock_irqsave (&ksocknal_data.ksnd_global_lock, flags); -- -- peer = route->ksnr_peer; -- route->ksnr_connecting = 0; -- -- /* This is a retry rather than a new connection */ -- LASSERT (route->ksnr_retry_interval != 0); -- route->ksnr_timeout = jiffies + route->ksnr_retry_interval; -- route->ksnr_retry_interval = MIN (route->ksnr_retry_interval * 2, -- SOCKNAL_MAX_RECONNECT_INTERVAL); -- -- if (!list_empty (&peer->ksnp_tx_queue) && -- ksocknal_find_connecting_route_locked (peer) == NULL) { -- LASSERT (list_empty (&peer->ksnp_conns)); -- -- /* None of the connections that the blocked packets are -- * waiting for have been successful. Complete them now... */ -- do { -- tx = list_entry (peer->ksnp_tx_queue.next, -- ksock_tx_t, tx_list); -- list_del (&tx->tx_list); -- list_add_tail (&tx->tx_list, &zombies); -- } while (!list_empty (&peer->ksnp_tx_queue)); -- } -- - /* make this route least-favourite for re-selection */ -#if 0 /* irrelevent with only eager routes */ -- if (!route->ksnr_deleted) { - /* make this route least-favourite for re-selection */ -- list_del(&route->ksnr_list); -- list_add_tail(&route->ksnr_list, &peer->ksnp_routes); -- } - -#endif -- write_unlock_irqrestore (&ksocknal_data.ksnd_global_lock, flags); -- -- while (!list_empty (&zombies)) { -- char ipbuf[PTL_NALFMT_SIZE]; -- char ipbuf2[PTL_NALFMT_SIZE]; -- tx = list_entry (zombies.next, ksock_tx_t, tx_list); -- -- CERROR ("Deleting packet type %d len %d ("LPX64" %s->"LPX64" %s)\n", - NTOH__u32 (tx->tx_hdr->type), - NTOH__u32 (tx->tx_hdr->payload_length), - NTOH__u64 (tx->tx_hdr->src_nid), - le32_to_cpu (tx->tx_hdr->type), - le32_to_cpu (tx->tx_hdr->payload_length), - le64_to_cpu (tx->tx_hdr->src_nid), -- portals_nid2str(SOCKNAL, - NTOH__u64(tx->tx_hdr->src_nid), - le64_to_cpu(tx->tx_hdr->src_nid), -- ipbuf), - NTOH__u64 (tx->tx_hdr->dest_nid), - le64_to_cpu (tx->tx_hdr->dest_nid), -- portals_nid2str(SOCKNAL, - NTOH__u64(tx->tx_hdr->src_nid), - le64_to_cpu(tx->tx_hdr->src_nid), -- ipbuf2)); -- -- list_del (&tx->tx_list); -- /* complete now */ -- ksocknal_tx_done (tx, 0); -- } --} -- --int --ksocknal_autoconnectd (void *arg) --{ -- long id = (long)arg; -- char name[16]; -- unsigned long flags; -- ksock_route_t *route; -- int rc; -- -- snprintf (name, sizeof (name), "ksocknal_ad%02ld", id); -- kportal_daemonize (name); -- kportal_blockallsigs (); -- -- spin_lock_irqsave (&ksocknal_data.ksnd_autoconnectd_lock, flags); -- -- while (!ksocknal_data.ksnd_shuttingdown) { -- -- if (!list_empty (&ksocknal_data.ksnd_autoconnectd_routes)) { -- route = list_entry (ksocknal_data.ksnd_autoconnectd_routes.next, -- ksock_route_t, ksnr_connect_list); - - -- list_del (&route->ksnr_connect_list); -- spin_unlock_irqrestore (&ksocknal_data.ksnd_autoconnectd_lock, flags); -- -- ksocknal_autoconnect (route); -- ksocknal_put_route (route); -- - spin_lock_irqsave (&ksocknal_data.ksnd_autoconnectd_lock, flags); - spin_lock_irqsave(&ksocknal_data.ksnd_autoconnectd_lock, - flags); -- continue; -- } - - spin_unlock_irqrestore (&ksocknal_data.ksnd_autoconnectd_lock, flags); -- - rc = wait_event_interruptible (ksocknal_data.ksnd_autoconnectd_waitq, - ksocknal_data.ksnd_shuttingdown || - !list_empty (&ksocknal_data.ksnd_autoconnectd_routes)); - spin_unlock_irqrestore(&ksocknal_data.ksnd_autoconnectd_lock, - flags); -- - spin_lock_irqsave (&ksocknal_data.ksnd_autoconnectd_lock, flags); - rc = wait_event_interruptible(ksocknal_data.ksnd_autoconnectd_waitq, - ksocknal_data.ksnd_shuttingdown || - !list_empty(&ksocknal_data.ksnd_autoconnectd_routes)); - - spin_lock_irqsave(&ksocknal_data.ksnd_autoconnectd_lock, flags); -- } -- -- spin_unlock_irqrestore (&ksocknal_data.ksnd_autoconnectd_lock, flags); -- -- ksocknal_thread_fini (); -- return (0); --} -- --ksock_conn_t * --ksocknal_find_timed_out_conn (ksock_peer_t *peer) --{ -- /* We're called with a shared lock on ksnd_global_lock */ -- ksock_conn_t *conn; -- struct list_head *ctmp; - ksock_sched_t *sched; -- -- list_for_each (ctmp, &peer->ksnp_conns) { -- conn = list_entry (ctmp, ksock_conn_t, ksnc_list); - sched = conn->ksnc_scheduler; -- -- /* Don't need the {get,put}connsock dance to deref ksnc_sock... */ -- LASSERT (!conn->ksnc_closing); - - - if (conn->ksnc_sock->sk->sk_err != 0) { - /* Something (e.g. failed keepalive) set the socket error */ - atomic_inc (&conn->ksnc_refcount); - CERROR ("Socket error %d: "LPX64" %p %d.%d.%d.%d\n", - conn->ksnc_sock->sk->sk_err, peer->ksnp_nid, - conn, HIPQUAD(conn->ksnc_ipaddr)); - return (conn); - } - -- if (conn->ksnc_rx_started && -- time_after_eq (jiffies, conn->ksnc_rx_deadline)) { -- /* Timed out incomplete incoming message */ -- atomic_inc (&conn->ksnc_refcount); -- CERROR ("Timed out RX from "LPX64" %p %d.%d.%d.%d\n", - peer->ksnp_nid, conn, HIPQUAD(conn->ksnc_ipaddr)); - peer->ksnp_nid,conn,HIPQUAD(conn->ksnc_ipaddr)); -- return (conn); -- } - - -- if ((!list_empty (&conn->ksnc_tx_queue) || -- conn->ksnc_sock->sk->sk_wmem_queued != 0) && -- time_after_eq (jiffies, conn->ksnc_tx_deadline)) { - /* Timed out messages queued for sending, or - * messages buffered in the socket's send buffer */ - /* Timed out messages queued for sending or - * buffered in the socket's send buffer */ -- atomic_inc (&conn->ksnc_refcount); - CERROR ("Timed out TX to "LPX64" %s%d %p %d.%d.%d.%d\n", - peer->ksnp_nid, - CERROR ("Timed out TX to "LPX64" %s%d %p %d.%d.%d.%d\n", - peer->ksnp_nid, -- list_empty (&conn->ksnc_tx_queue) ? "" : "Q ", -- conn->ksnc_sock->sk->sk_wmem_queued, conn, -- HIPQUAD(conn->ksnc_ipaddr)); -- return (conn); -- } -- } -- -- return (NULL); --} -- --void --ksocknal_check_peer_timeouts (int idx) --{ -- struct list_head *peers = &ksocknal_data.ksnd_peers[idx]; -- struct list_head *ptmp; -- ksock_peer_t *peer; -- ksock_conn_t *conn; -- -- again: -- /* NB. We expect to have a look at all the peers and not find any -- * connections to time out, so we just use a shared lock while we -- * take a look... */ -- read_lock (&ksocknal_data.ksnd_global_lock); -- -- list_for_each (ptmp, peers) { -- peer = list_entry (ptmp, ksock_peer_t, ksnp_list); -- conn = ksocknal_find_timed_out_conn (peer); -- -- if (conn != NULL) { -- read_unlock (&ksocknal_data.ksnd_global_lock); -- -- CERROR ("Timeout out conn->"LPX64" ip %d.%d.%d.%d:%d\n", -- peer->ksnp_nid, -- HIPQUAD(conn->ksnc_ipaddr), -- conn->ksnc_port); -- ksocknal_close_conn_and_siblings (conn, -ETIMEDOUT); -- -- /* NB we won't find this one again, but we can't -- * just proceed with the next peer, since we dropped -- * ksnd_global_lock and it might be dead already! */ -- ksocknal_put_conn (conn); -- goto again; -- } -- } -- -- read_unlock (&ksocknal_data.ksnd_global_lock); --} -- --int --ksocknal_reaper (void *arg) --{ -- wait_queue_t wait; -- unsigned long flags; -- ksock_conn_t *conn; -- ksock_sched_t *sched; -- struct list_head enomem_conns; -- int nenomem_conns; -- int timeout; -- int i; -- int peer_index = 0; -- unsigned long deadline = jiffies; -- -- kportal_daemonize ("ksocknal_reaper"); -- kportal_blockallsigs (); -- -- INIT_LIST_HEAD(&enomem_conns); -- init_waitqueue_entry (&wait, current); -- -- spin_lock_irqsave (&ksocknal_data.ksnd_reaper_lock, flags); -- -- while (!ksocknal_data.ksnd_shuttingdown) { -- -- if (!list_empty (&ksocknal_data.ksnd_deathrow_conns)) { -- conn = list_entry (ksocknal_data.ksnd_deathrow_conns.next, -- ksock_conn_t, ksnc_list); -- list_del (&conn->ksnc_list); -- -- spin_unlock_irqrestore (&ksocknal_data.ksnd_reaper_lock, flags); -- -- ksocknal_terminate_conn (conn); -- ksocknal_put_conn (conn); -- -- spin_lock_irqsave (&ksocknal_data.ksnd_reaper_lock, flags); -- continue; -- } -- -- if (!list_empty (&ksocknal_data.ksnd_zombie_conns)) { -- conn = list_entry (ksocknal_data.ksnd_zombie_conns.next, -- ksock_conn_t, ksnc_list); -- list_del (&conn->ksnc_list); -- -- spin_unlock_irqrestore (&ksocknal_data.ksnd_reaper_lock, flags); -- -- ksocknal_destroy_conn (conn); -- -- spin_lock_irqsave (&ksocknal_data.ksnd_reaper_lock, flags); -- continue; -- } -- -- if (!list_empty (&ksocknal_data.ksnd_enomem_conns)) { -- list_add(&enomem_conns, &ksocknal_data.ksnd_enomem_conns); -- list_del_init(&ksocknal_data.ksnd_enomem_conns); -- } -- -- spin_unlock_irqrestore (&ksocknal_data.ksnd_reaper_lock, flags); -- -- /* reschedule all the connections that stalled with ENOMEM... */ -- nenomem_conns = 0; -- while (!list_empty (&enomem_conns)) { -- conn = list_entry (enomem_conns.next, -- ksock_conn_t, ksnc_tx_list); -- list_del (&conn->ksnc_tx_list); -- -- sched = conn->ksnc_scheduler; -- -- spin_lock_irqsave (&sched->kss_lock, flags); -- -- LASSERT (conn->ksnc_tx_scheduled); -- conn->ksnc_tx_ready = 1; -- list_add_tail (&conn->ksnc_tx_list, &sched->kss_tx_conns); -- wake_up (&sched->kss_waitq); -- -- spin_unlock_irqrestore (&sched->kss_lock, flags); -- nenomem_conns++; -- } -- -- /* careful with the jiffy wrap... */ -- while ((timeout = (int)(deadline - jiffies)) <= 0) { -- const int n = 4; -- const int p = 1; -- int chunk = ksocknal_data.ksnd_peer_hash_size; -- -- /* Time to check for timeouts on a few more peers: I do -- * checks every 'p' seconds on a proportion of the peer -- * table and I need to check every connection 'n' times -- * within a timeout interval, to ensure I detect a -- * timeout on any connection within (n+1)/n times the -- * timeout interval. */ -- - if (ksocknal_data.ksnd_io_timeout > n * p) - if (ksocknal_tunables.ksnd_io_timeout > n * p) -- chunk = (chunk * n * p) / - ksocknal_data.ksnd_io_timeout; - ksocknal_tunables.ksnd_io_timeout; -- if (chunk == 0) -- chunk = 1; -- -- for (i = 0; i < chunk; i++) { -- ksocknal_check_peer_timeouts (peer_index); -- peer_index = (peer_index + 1) % -- ksocknal_data.ksnd_peer_hash_size; -- } -- -- deadline += p * HZ; -- } -- -- if (nenomem_conns != 0) { -- /* Reduce my timeout if I rescheduled ENOMEM conns. -- * This also prevents me getting woken immediately -- * if any go back on my enomem list. */ -- timeout = SOCKNAL_ENOMEM_RETRY; -- } -- ksocknal_data.ksnd_reaper_waketime = jiffies + timeout; -- - add_wait_queue (&ksocknal_data.ksnd_reaper_waitq, &wait); -- set_current_state (TASK_INTERRUPTIBLE); - add_wait_queue (&ksocknal_data.ksnd_reaper_waitq, &wait); -- -- if (!ksocknal_data.ksnd_shuttingdown && -- list_empty (&ksocknal_data.ksnd_deathrow_conns) && -- list_empty (&ksocknal_data.ksnd_zombie_conns)) -- schedule_timeout (timeout); -- -- set_current_state (TASK_RUNNING); -- remove_wait_queue (&ksocknal_data.ksnd_reaper_waitq, &wait); -- -- spin_lock_irqsave (&ksocknal_data.ksnd_reaper_lock, flags); -- } -- -- spin_unlock_irqrestore (&ksocknal_data.ksnd_reaper_lock, flags); -- -- ksocknal_thread_fini (); -- return (0); --} -- - nal_cb_t ksocknal_lib = { - nal_data: &ksocknal_data, /* NAL private data */ - cb_send: ksocknal_send, - cb_send_pages: ksocknal_send_pages, - cb_recv: ksocknal_recv, - cb_recv_pages: ksocknal_recv_pages, - cb_read: ksocknal_read, - cb_write: ksocknal_write, - cb_malloc: ksocknal_malloc, - cb_free: ksocknal_free, - cb_printf: ksocknal_printf, - cb_cli: ksocknal_cli, - cb_sti: ksocknal_sti, - cb_dist: ksocknal_dist -lib_nal_t ksocknal_lib = { - libnal_data: &ksocknal_data, /* NAL private data */ - libnal_send: ksocknal_send, - libnal_send_pages: ksocknal_send_pages, - libnal_recv: ksocknal_recv, - libnal_recv_pages: ksocknal_recv_pages, - libnal_dist: ksocknal_dist --}; diff --cc lnet/libcfs/.cvsignore index df12db6,c6f0aa4..0000000 deleted file mode 100644,100644 --- a/lnet/libcfs/.cvsignore +++ /dev/null @@@ -1,12 -1,11 +1,0 @@@ --.deps --Makefile --link-stamp --.*.cmd --autoMakefile.in --autoMakefile - sources --*.ko --*.mod.c --.*.flags --.tmp_versions --.depend diff --cc lnet/libcfs/Makefile.in index 598adc1,0967123..0000000 deleted file mode 100644,100644 --- a/lnet/libcfs/Makefile.in +++ /dev/null @@@ -1,9 -1,4 +1,0 @@@ - MODULES = portals -MODULES = libcfs --libcfs-objs := debug.o lwt.o module.o proc.o tracefile.o - - api-sources := $(wildcard @LUSTRE@/portals/portals/api-*.c) - lib-sources := $(wildcard @LUSTRE@/portals/portals/lib-*.c) - - portals-objs += $(libcfs-objs) $(patsubst %.c,%.o,$(notdir $(api-sources) $(lib-sources))) -- --@INCLUDE_RULES@ diff --cc lnet/libcfs/Makefile.mk index 8ecf3c9,8ecf3c9..0000000 deleted file mode 100644,100644 --- a/lnet/libcfs/Makefile.mk +++ /dev/null @@@ -1,9 -1,9 +1,0 @@@ --# Copyright (C) 2001 Cluster File Systems, Inc. --# --# This code is issued under the GNU General Public License. --# See the file COPYING in this distribution -- --include fs/lustre/portals/Kernelenv -- --obj-y += libcfs.o --libcfs-objs := module.o proc.o debug.o lwt.o tracefile.o diff --cc lnet/libcfs/autoMakefile.am index cacd769,9c27693..0000000 deleted file mode 100644,100644 --- a/lnet/libcfs/autoMakefile.am +++ /dev/null @@@ -1,20 -1,11 +1,0 @@@ --# Copyright (C) 2001, 2002 Cluster File Systems, Inc. --# --# This code is issued under the GNU General Public License. --# See the file COPYING in this distribution -- --if MODULES - modulenet_DATA := portals$(KMODEXT) -modulenet_DATA := libcfs$(KMODEXT) --endif - - sources: - rm -f sources - @for i in $(api-sources) $(lib-sources) ; do \ - echo ln -sf $$i . ; \ - ln -sf $$i . || exit 1 ; \ - done - touch sources -- --MOSTLYCLEANFILES = *.o *.ko *.mod.c - CLEANFILES = sources lib-*.c api-*.c - DIST_SOURCES = $(libcfs-objs:%.o=%.c) *.h -DIST_SOURCES = $(libcfs-objs:%.o=%.c) tracefile.h diff --cc lnet/libcfs/debug.c index 3e5531a,c56f76f..0000000 deleted file mode 100644,100644 --- a/lnet/libcfs/debug.c +++ /dev/null @@@ -1,372 -1,407 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * Copyright (C) 2002 Cluster File Systems, Inc. -- * Author: Phil Schwan -- * -- * This file is part of Lustre, http://www.lustre.org. -- * -- * Lustre is free software; you can redistribute it and/or -- * modify it under the terms of version 2 of the GNU General Public -- * License as published by the Free Software Foundation. -- * -- * Lustre is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- * GNU General Public License for more details. -- * -- * You should have received a copy of the GNU General Public License -- * along with Lustre; if not, write to the Free Software -- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -- */ -- --#ifndef EXPORT_SYMTAB --# define EXPORT_SYMTAB --#endif -- --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include -- --#include --#include --#include --#include --#include --#include -- --# define DEBUG_SUBSYSTEM S_PORTALS -- --#include --#include --#include -- --#include "tracefile.h" -- --#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) --#include --#endif -- --unsigned int portal_subsystem_debug = ~0 - (S_PORTALS | S_QSWNAL | S_SOCKNAL | - S_GMNAL | S_IBNAL); - S_GMNAL | S_OPENIBNAL); --EXPORT_SYMBOL(portal_subsystem_debug); -- --unsigned int portal_debug = (D_WARNING | D_DLMTRACE | D_ERROR | D_EMERG | D_HA | -- D_RPCTRACE | D_VFSTRACE); --EXPORT_SYMBOL(portal_debug); -- --unsigned int portal_printk; --EXPORT_SYMBOL(portal_printk); -- --unsigned int portal_stack; --EXPORT_SYMBOL(portal_stack); -- --#ifdef __KERNEL__ --atomic_t portal_kmemory = ATOMIC_INIT(0); --EXPORT_SYMBOL(portal_kmemory); --#endif -- --static DECLARE_WAIT_QUEUE_HEAD(debug_ctlwq); -- --char debug_file_path[1024] = "/tmp/lustre-log"; --static char debug_file_name[1024]; --static int handled_panic; /* to avoid recursive calls to notifiers */ --char portals_upcall[1024] = "/usr/lib/lustre/portals_upcall"; -- --int portals_do_debug_dumplog(void *arg) --{ -- void *journal_info; -- -- kportal_daemonize(""); -- -- reparent_to_init(); -- journal_info = current->journal_info; -- current->journal_info = NULL; -- -- snprintf(debug_file_name, sizeof(debug_file_path) - 1, -- "%s.%ld.%ld", debug_file_path, CURRENT_SECONDS, (long)arg); - printk(KERN_ALERT "LustreError: dumping log to %s\n", debug_file_name); -- tracefile_dump_all_pages(debug_file_name); -- -- current->journal_info = journal_info; -- wake_up(&debug_ctlwq); -- return 0; --} -- --void portals_debug_dumplog(void) --{ -- int rc; -- DECLARE_WAITQUEUE(wait, current); -- ENTRY; -- -- /* we're being careful to ensure that the kernel thread is -- * able to set our state to running as it exits before we -- * get to schedule() */ -- set_current_state(TASK_INTERRUPTIBLE); -- add_wait_queue(&debug_ctlwq, &wait); -- -- rc = kernel_thread(portals_do_debug_dumplog, (void *)(long)current->pid, -- CLONE_VM | CLONE_FS | CLONE_FILES); -- if (rc < 0) -- printk(KERN_ERR "LustreError: cannot start log dump thread: " -- "%d\n", rc); -- else -- schedule(); -- -- /* be sure to teardown if kernel_thread() failed */ -- remove_wait_queue(&debug_ctlwq, &wait); -- set_current_state(TASK_RUNNING); --} -- --static int panic_dumplog(struct notifier_block *self, unsigned long unused1, -- void *unused2) --{ -- if (handled_panic) -- return 0; -- else -- handled_panic = 1; -- -- if (in_interrupt()) { -- trace_debug_print(); -- return 0; -- } -- -- while (current->lock_depth >= 0) -- unlock_kernel(); -- portals_debug_dumplog(); -- return 0; --} -- --static struct notifier_block lustre_panic_notifier = { -- notifier_call : panic_dumplog, -- next : NULL, -- priority : 10000 --}; -- --int portals_debug_init(unsigned long bufsize) --{ -- notifier_chain_register(&panic_notifier_list, &lustre_panic_notifier); -- return tracefile_init(); --} -- --int portals_debug_cleanup(void) --{ -- tracefile_exit(); -- notifier_chain_unregister(&panic_notifier_list, &lustre_panic_notifier); -- return 0; --} -- --int portals_debug_clear_buffer(void) --{ -- trace_flush_pages(); -- return 0; --} -- --/* Debug markers, although printed by S_PORTALS -- * should not be be marked as such. */ --#undef DEBUG_SUBSYSTEM --#define DEBUG_SUBSYSTEM S_UNDEFINED --int portals_debug_mark_buffer(char *text) --{ -- CDEBUG(D_TRACE,"***************************************************\n"); - CDEBUG(D_WARNING, "DEBUG MARKER: %s\n", text); - CWARN("DEBUG MARKER: %s\n", text); -- CDEBUG(D_TRACE,"***************************************************\n"); -- -- return 0; --} --#undef DEBUG_SUBSYSTEM --#define DEBUG_SUBSYSTEM S_PORTALS -- --void portals_debug_set_level(unsigned int debug_level) --{ - printk("Lustre: Setting portals debug level to %08x\n", debug_level); - printk(KERN_WARNING "Lustre: Setting portals debug level to %08x\n", - debug_level); -- portal_debug = debug_level; --} -- --void portals_run_upcall(char **argv) --{ -- int rc; -- int argc; -- char *envp[] = { -- "HOME=/", -- "PATH=/sbin:/bin:/usr/sbin:/usr/bin", -- NULL}; -- ENTRY; -- -- argv[0] = portals_upcall; -- argc = 1; -- while (argv[argc] != NULL) -- argc++; -- -- LASSERT(argc >= 2); -- -- rc = USERMODEHELPER(argv[0], argv, envp); -- if (rc < 0) { -- CERROR("Error %d invoking portals upcall %s %s%s%s%s%s%s%s%s; " -- "check /proc/sys/portals/upcall\n", -- rc, argv[0], argv[1], -- argc < 3 ? "" : ",", argc < 3 ? "" : argv[2], -- argc < 4 ? "" : ",", argc < 4 ? "" : argv[3], -- argc < 5 ? "" : ",", argc < 5 ? "" : argv[4], -- argc < 6 ? "" : ",..."); -- } else { -- CERROR("Invoked portals upcall %s %s%s%s%s%s%s%s%s\n", -- argv[0], argv[1], -- argc < 3 ? "" : ",", argc < 3 ? "" : argv[2], -- argc < 4 ? "" : ",", argc < 4 ? "" : argv[3], -- argc < 5 ? "" : ",", argc < 5 ? "" : argv[4], -- argc < 6 ? "" : ",..."); -- } --} -- --void portals_run_lbug_upcall(char *file, const char *fn, const int line) --{ -- char *argv[6]; -- char buf[32]; -- -- ENTRY; -- snprintf (buf, sizeof buf, "%d", line); -- -- argv[1] = "LBUG"; -- argv[2] = file; -- argv[3] = (char *)fn; -- argv[4] = buf; -- argv[5] = NULL; -- -- portals_run_upcall (argv); --} -- --char *portals_nid2str(int nal, ptl_nid_t nid, char *str) --{ - if (nid == PTL_NID_ANY) { - snprintf(str, PTL_NALFMT_SIZE - 1, "%s", - "PTL_NID_ANY"); - return str; - } - -- switch(nal){ --/* XXX this could be a nal method of some sort, 'cept it's config -- * dependent whether (say) socknal NIDs are actually IP addresses... */ --#ifndef CRAY_PORTALS -- case TCPNAL: -- /* userspace NAL */ - case OPENIBNAL: -- case SOCKNAL: -- snprintf(str, PTL_NALFMT_SIZE - 1, "%u:%u.%u.%u.%u", -- (__u32)(nid >> 32), HIPQUAD(nid)); -- break; -- case QSWNAL: -- case GMNAL: - case IBNAL: - case SCIMACNAL: -- snprintf(str, PTL_NALFMT_SIZE - 1, "%u:%u", -- (__u32)(nid >> 32), (__u32)nid); -- break; --#endif -- default: -- snprintf(str, PTL_NALFMT_SIZE - 1, "?%d? %llx", -- nal, (long long)nid); - break; - } - return str; -} -/* bug #4615 */ -char *portals_id2str(int nal, ptl_process_id_t id, char *str) -{ - switch(nal){ -#ifndef CRAY_PORTALS - case TCPNAL: - /* userspace NAL */ - case OPENIBNAL: - case SOCKNAL: - snprintf(str, PTL_NALFMT_SIZE - 1, "%u:%u.%u.%u.%u,%u", - (__u32)(id.nid >> 32), HIPQUAD((id.nid)) , id.pid); - break; - case QSWNAL: - case GMNAL: - snprintf(str, PTL_NALFMT_SIZE - 1, "%u:%u,%u", - (__u32)(id.nid >> 32), (__u32)id.nid, id.pid); - break; -#endif - default: - snprintf(str, PTL_NALFMT_SIZE - 1, "?%d? %llx,%lx", - nal, (long long)id.nid, (long)id.pid ); - break; -- } -- return str; --} - -- --#ifdef __KERNEL__ --char stack_backtrace[LUSTRE_TRACE_SIZE]; --spinlock_t stack_backtrace_lock = SPIN_LOCK_UNLOCKED; -- --#if defined(__arch_um__) -- --char *portals_debug_dumpstack(void) --{ -- asm("int $3"); -- return "dump stack\n"; --} -- --#elif defined(__i386__) -- --#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) --extern int lookup_symbol(unsigned long address, char *buf, int buflen); --const char *kallsyms_lookup(unsigned long addr, -- unsigned long *symbolsize, -- unsigned long *offset, -- char **modname, char *namebuf) --{ -- int rc = lookup_symbol(addr, namebuf, 128); -- if (rc == -ENOSYS) -- return NULL; -- return namebuf; --} --#endif -- --char *portals_debug_dumpstack(void) --{ -- unsigned long esp = current->thread.esp, addr; -- unsigned long *stack = (unsigned long *)&esp; -- char *buf = stack_backtrace, *pbuf = buf; -- int size; -- -- /* User space on another CPU? */ -- if ((esp ^ (unsigned long)current) & (PAGE_MASK << 1)){ -- buf[0] = '\0'; -- goto out; -- } -- -- size = sprintf(pbuf, " Call Trace: "); -- pbuf += size; -- while (((long) stack & (THREAD_SIZE - 1)) != 0) { -- addr = *stack++; -- if (kernel_text_address(addr)) { -- const char *sym_name; -- char *modname, buffer[128]; -- unsigned long junk, offset; -- -- sym_name = kallsyms_lookup(addr, &junk, &offset, -- &modname, buffer); -- if (sym_name == NULL) { -- if (buf + LUSTRE_TRACE_SIZE <= pbuf + 12) -- break; -- size = sprintf(pbuf, "[<%08lx>] ", addr); -- } else { -- if (buf + LUSTRE_TRACE_SIZE -- /* fix length + sizeof('\0') */ -- <= pbuf + strlen(buffer) + 28 + 1) -- break; -- size = sprintf(pbuf, "([<%08lx>] %s (0x%p)) ", -- addr, buffer, stack - 1); -- } -- pbuf += size; -- } -- } --out: -- return buf; --} -- --#else /* !__arch_um__ && !__i386__ */ -- --char *portals_debug_dumpstack(void) --{ - return "dump_stack\n"; - char *buf = stack_backtrace; - buf[0] = '\0'; - return buf; --} -- --#endif /* __arch_um__ */ --struct task_struct *portals_current(void) --{ -- CWARN("current task struct is %p\n", current); -- return current; --} -- --EXPORT_SYMBOL(stack_backtrace_lock); --EXPORT_SYMBOL(portals_debug_dumpstack); --EXPORT_SYMBOL(portals_current); --#endif /* __KERNEL__ */ -- --EXPORT_SYMBOL(portals_debug_dumplog); --EXPORT_SYMBOL(portals_debug_set_level); --EXPORT_SYMBOL(portals_run_upcall); --EXPORT_SYMBOL(portals_run_lbug_upcall); --EXPORT_SYMBOL(portals_nid2str); -EXPORT_SYMBOL(portals_id2str); diff --cc lnet/libcfs/lwt.c index a24423e,3f6a9c2..0000000 deleted file mode 100644,100644 --- a/lnet/libcfs/lwt.c +++ /dev/null @@@ -1,248 -1,268 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * Copyright (C) 2003 Cluster File Systems, Inc. -- * Author: Eric Barton -- * -- * This file is part of Lustre, http://www.lustre.org. -- * -- * Lustre is free software; you can redistribute it and/or -- * modify it under the terms of version 2 of the GNU General Public -- * License as published by the Free Software Foundation. -- * -- * Lustre is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- * GNU General Public License for more details. -- * -- * You should have received a copy of the GNU General Public License -- * along with Lustre; if not, write to the Free Software -- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -- */ -- --#ifndef EXPORT_SYMTAB --# define EXPORT_SYMTAB --#endif -- --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include -- --#define DEBUG_SUBSYSTEM S_PORTALS -- --#include -- --#if LWT_SUPPORT -- -#if !KLWT_SUPPORT --int lwt_enabled; -lwt_cpu_t lwt_cpus[NR_CPUS]; -#endif - --int lwt_pages_per_cpu; - lwt_cpu_t lwt_cpus[LWT_MAX_CPUS]; -- --/* NB only root is allowed to retrieve LWT info; it's an open door into the -- * kernel... */ -- --int --lwt_lookup_string (int *size, char *knl_ptr, -- char *user_ptr, int user_size) --{ -- int maxsize = 128; -- -- /* knl_ptr was retrieved from an LWT snapshot and the caller wants to -- * turn it into a string. NB we can crash with an access violation -- * trying to determine the string length, so we're trusting our -- * caller... */ -- -- if (!capable(CAP_SYS_ADMIN)) -- return (-EPERM); -- -- if (user_size > 0 && -- maxsize > user_size) -- maxsize = user_size; -- -- *size = strnlen (knl_ptr, maxsize - 1) + 1; -- -- if (user_ptr != NULL) { -- if (user_size < 4) -- return (-EINVAL); -- -- if (copy_to_user (user_ptr, knl_ptr, *size)) -- return (-EFAULT); -- -- /* Did I truncate the string? */ -- if (knl_ptr[*size - 1] != 0) -- copy_to_user (user_ptr + *size - 4, "...", 4); -- } -- -- return (0); --} -- --int --lwt_control (int enable, int clear) --{ -- lwt_page_t *p; -- int i; -- int j; -- -- if (!capable(CAP_SYS_ADMIN)) -- return (-EPERM); -- - if (clear) - for (i = 0; i < num_online_cpus(); i++) { - p = lwt_cpus[i].lwtc_current_page; - if (!enable) { - LWT_EVENT(0,0,0,0); - lwt_enabled = 0; - mb(); - /* give people some time to stop adding traces */ - schedule_timeout(10); - } -- - for (j = 0; j < lwt_pages_per_cpu; j++) { - memset (p->lwtp_events, 0, PAGE_SIZE); - for (i = 0; i < num_online_cpus(); i++) { - p = lwt_cpus[i].lwtc_current_page; -- - p = list_entry (p->lwtp_list.next, - lwt_page_t, lwtp_list); - } - if (p == NULL) - return (-ENODATA); - - if (!clear) - continue; - - for (j = 0; j < lwt_pages_per_cpu; j++) { - memset (p->lwtp_events, 0, PAGE_SIZE); - - p = list_entry (p->lwtp_list.next, - lwt_page_t, lwtp_list); - } -- } -- - lwt_enabled = enable; - mb(); - if (!enable) { - /* give people some time to stop adding traces */ - schedule_timeout(10); - if (enable) { - lwt_enabled = 1; - mb(); - LWT_EVENT(0,0,0,0); -- } -- -- return (0); --} -- --int --lwt_snapshot (cycles_t *now, int *ncpu, int *total_size, -- void *user_ptr, int user_size) --{ -- const int events_per_page = PAGE_SIZE / sizeof(lwt_event_t); -- const int bytes_per_page = events_per_page * sizeof(lwt_event_t); -- lwt_page_t *p; -- int i; -- int j; -- -- if (!capable(CAP_SYS_ADMIN)) -- return (-EPERM); -- -- *ncpu = num_online_cpus(); -- *total_size = num_online_cpus() * lwt_pages_per_cpu * bytes_per_page; -- *now = get_cycles(); -- -- if (user_ptr == NULL) -- return (0); -- -- for (i = 0; i < num_online_cpus(); i++) { -- p = lwt_cpus[i].lwtc_current_page; - - if (p == NULL) - return (-ENODATA); -- -- for (j = 0; j < lwt_pages_per_cpu; j++) { -- if (copy_to_user(user_ptr, p->lwtp_events, -- bytes_per_page)) -- return (-EFAULT); -- -- user_ptr = ((char *)user_ptr) + bytes_per_page; -- p = list_entry(p->lwtp_list.next, -- lwt_page_t, lwtp_list); -- -- } -- } -- -- return (0); --} -- --int --lwt_init () --{ -- int i; -- int j; - - for (i = 0; i < num_online_cpus(); i++) - if (lwt_cpus[i].lwtc_current_page != NULL) - return (-EALREADY); -- - if (num_online_cpus() > LWT_MAX_CPUS) { - CERROR ("Too many CPUs\n"); - return (-EINVAL); - } - LASSERT (!lwt_enabled); -- -- /* NULL pointers, zero scalars */ -- memset (lwt_cpus, 0, sizeof (lwt_cpus)); -- lwt_pages_per_cpu = LWT_MEMORY / (num_online_cpus() * PAGE_SIZE); -- -- for (i = 0; i < num_online_cpus(); i++) -- for (j = 0; j < lwt_pages_per_cpu; j++) { -- struct page *page = alloc_page (GFP_KERNEL); -- lwt_page_t *lwtp; -- -- if (page == NULL) { -- CERROR ("Can't allocate page\n"); -- lwt_fini (); -- return (-ENOMEM); -- } -- -- PORTAL_ALLOC(lwtp, sizeof (*lwtp)); -- if (lwtp == NULL) { -- CERROR ("Can't allocate lwtp\n"); -- __free_page(page); -- lwt_fini (); -- return (-ENOMEM); -- } -- -- lwtp->lwtp_page = page; -- lwtp->lwtp_events = page_address(page); -- memset (lwtp->lwtp_events, 0, PAGE_SIZE); -- -- if (j == 0) { -- INIT_LIST_HEAD (&lwtp->lwtp_list); -- lwt_cpus[i].lwtc_current_page = lwtp; -- } else { -- list_add (&lwtp->lwtp_list, -- &lwt_cpus[i].lwtc_current_page->lwtp_list); -- } -- } -- -- lwt_enabled = 1; -- mb(); - - LWT_EVENT(0,0,0,0); -- -- return (0); --} -- --void --lwt_fini () --{ -- int i; - - if (num_online_cpus() > LWT_MAX_CPUS) - return; -- - lwt_control(0, 0); - -- for (i = 0; i < num_online_cpus(); i++) -- while (lwt_cpus[i].lwtc_current_page != NULL) { -- lwt_page_t *lwtp = lwt_cpus[i].lwtc_current_page; -- -- if (list_empty (&lwtp->lwtp_list)) { -- lwt_cpus[i].lwtc_current_page = NULL; -- } else { -- lwt_cpus[i].lwtc_current_page = -- list_entry (lwtp->lwtp_list.next, -- lwt_page_t, lwtp_list); -- -- list_del (&lwtp->lwtp_list); -- } -- -- __free_page (lwtp->lwtp_page); -- PORTAL_FREE (lwtp, sizeof (*lwtp)); -- } --} -- --EXPORT_SYMBOL(lwt_enabled); --EXPORT_SYMBOL(lwt_cpus); -- --EXPORT_SYMBOL(lwt_init); --EXPORT_SYMBOL(lwt_fini); --EXPORT_SYMBOL(lwt_lookup_string); --EXPORT_SYMBOL(lwt_control); --EXPORT_SYMBOL(lwt_snapshot); --#endif diff --cc lnet/libcfs/module.c index f1d086b,3703013..0000000 deleted file mode 100644,100644 --- a/lnet/libcfs/module.c +++ /dev/null @@@ -1,816 -1,601 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * Copyright (C) 2001, 2002 Cluster File Systems, Inc. -- * -- * This file is part of Lustre, http://www.lustre.org. -- * -- * Lustre is free software; you can redistribute it and/or -- * modify it under the terms of version 2 of the GNU General Public -- * License as published by the Free Software Foundation. -- * -- * Lustre is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- * GNU General Public License for more details. -- * -- * You should have received a copy of the GNU General Public License -- * along with Lustre; if not, write to the Free Software -- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -- */ -- --#ifndef EXPORT_SYMTAB --# define EXPORT_SYMTAB --#endif --#define DEBUG_SUBSYSTEM S_PORTALS -- --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include -- --#include --#include -- --#include --#include --#include --#include --#include -- --#include --#include --#include - #include --#include -- --#define PORTAL_MINOR 240 - - extern void (kping_client)(struct portal_ioctl_data *); -- --struct nal_cmd_handler { - nal_cmd_handler_t nch_handler; - void * nch_private; - int nch_number; - nal_cmd_handler_fn *nch_handler; - void *nch_private; --}; -- - static struct nal_cmd_handler nal_cmd[NAL_MAX_NR + 1]; -static struct nal_cmd_handler nal_cmd[16]; --static DECLARE_MUTEX(nal_cmd_sem); -- --#ifdef PORTAL_DEBUG --void kportal_assertion_failed(char *expr, char *file, const char *func, -- const int line) --{ -- portals_debug_msg(0, D_EMERG, file, func, line, CDEBUG_STACK, -- "ASSERTION(%s) failed\n", expr); -- LBUG_WITH_LOC(file, func, line); --} --#endif -- --void --kportal_daemonize (char *str) --{ --#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,63)) -- daemonize(str); --#else -- daemonize(); -- snprintf (current->comm, sizeof (current->comm), "%s", str); --#endif --} -- --void --kportal_memhog_free (struct portals_device_userstate *pdu) --{ -- struct page **level0p = &pdu->pdu_memhog_root_page; -- struct page **level1p; -- struct page **level2p; -- int count1; -- int count2; -- -- if (*level0p != NULL) { -- -- level1p = (struct page **)page_address(*level0p); -- count1 = 0; -- -- while (count1 < PAGE_SIZE/sizeof(struct page *) && -- *level1p != NULL) { -- -- level2p = (struct page **)page_address(*level1p); -- count2 = 0; -- -- while (count2 < PAGE_SIZE/sizeof(struct page *) && -- *level2p != NULL) { -- -- __free_page(*level2p); -- pdu->pdu_memhog_pages--; -- level2p++; -- count2++; -- } -- -- __free_page(*level1p); -- pdu->pdu_memhog_pages--; -- level1p++; -- count1++; -- } -- -- __free_page(*level0p); -- pdu->pdu_memhog_pages--; -- -- *level0p = NULL; -- } -- -- LASSERT (pdu->pdu_memhog_pages == 0); --} -- --int --kportal_memhog_alloc (struct portals_device_userstate *pdu, int npages, int flags) --{ -- struct page **level0p; -- struct page **level1p; -- struct page **level2p; -- int count1; -- int count2; -- -- LASSERT (pdu->pdu_memhog_pages == 0); -- LASSERT (pdu->pdu_memhog_root_page == NULL); -- -- if (npages < 0) -- return -EINVAL; -- -- if (npages == 0) -- return 0; -- -- level0p = &pdu->pdu_memhog_root_page; -- *level0p = alloc_page(flags); -- if (*level0p == NULL) -- return -ENOMEM; -- pdu->pdu_memhog_pages++; -- -- level1p = (struct page **)page_address(*level0p); -- count1 = 0; -- memset(level1p, 0, PAGE_SIZE); -- -- while (pdu->pdu_memhog_pages < npages && -- count1 < PAGE_SIZE/sizeof(struct page *)) { -- -- if (signal_pending(current)) -- return (-EINTR); -- -- *level1p = alloc_page(flags); -- if (*level1p == NULL) -- return -ENOMEM; -- pdu->pdu_memhog_pages++; -- -- level2p = (struct page **)page_address(*level1p); -- count2 = 0; -- memset(level2p, 0, PAGE_SIZE); -- -- while (pdu->pdu_memhog_pages < npages && -- count2 < PAGE_SIZE/sizeof(struct page *)) { -- -- if (signal_pending(current)) -- return (-EINTR); -- -- *level2p = alloc_page(flags); -- if (*level2p == NULL) -- return (-ENOMEM); -- pdu->pdu_memhog_pages++; -- -- level2p++; -- count2++; -- } -- -- level1p++; -- count1++; -- } -- -- return 0; --} -- --void --kportal_blockallsigs () --{ -- unsigned long flags; -- -- SIGNAL_MASK_LOCK(current, flags); -- sigfillset(¤t->blocked); -- RECALC_SIGPENDING; -- SIGNAL_MASK_UNLOCK(current, flags); --} -- --/* called when opening /dev/device */ - static int kportal_psdev_open(struct inode * inode, struct file * file) -static int libcfs_psdev_open(struct inode * inode, struct file * file) --{ -- struct portals_device_userstate *pdu; -- ENTRY; -- -- if (!inode) -- RETURN(-EINVAL); -- -- PORTAL_MODULE_USE; -- -- PORTAL_ALLOC(pdu, sizeof(*pdu)); -- if (pdu != NULL) { -- pdu->pdu_memhog_pages = 0; -- pdu->pdu_memhog_root_page = NULL; -- } -- file->private_data = pdu; -- -- RETURN(0); --} -- --/* called when closing /dev/device */ - static int kportal_psdev_release(struct inode * inode, struct file * file) -static int libcfs_psdev_release(struct inode * inode, struct file * file) --{ -- struct portals_device_userstate *pdu; -- ENTRY; -- -- if (!inode) -- RETURN(-EINVAL); -- -- pdu = file->private_data; -- if (pdu != NULL) { -- kportal_memhog_free(pdu); -- PORTAL_FREE(pdu, sizeof(*pdu)); -- } -- -- PORTAL_MODULE_UNUSE; -- RETURN(0); --} -- --static inline void freedata(void *data, int len) --{ -- PORTAL_FREE(data, len); - } - - static int - kportal_add_route(int gateway_nalid, ptl_nid_t gateway_nid, - ptl_nid_t lo_nid, ptl_nid_t hi_nid) - { - int rc; - kpr_control_interface_t *ci; - - ci = (kpr_control_interface_t *) PORTAL_SYMBOL_GET (kpr_control_interface); - if (ci == NULL) - return (-ENODEV); - - rc = ci->kprci_add_route (gateway_nalid, gateway_nid, lo_nid, hi_nid); - - PORTAL_SYMBOL_PUT(kpr_control_interface); - return (rc); - } - - static int - kportal_del_route(int gw_nalid, ptl_nid_t gw_nid, - ptl_nid_t lo, ptl_nid_t hi) - { - int rc; - kpr_control_interface_t *ci; - - ci = (kpr_control_interface_t *)PORTAL_SYMBOL_GET(kpr_control_interface); - if (ci == NULL) - return (-ENODEV); - - rc = ci->kprci_del_route (gw_nalid, gw_nid, lo, hi); - - PORTAL_SYMBOL_PUT(kpr_control_interface); - return (rc); --} -- - static int - kportal_notify_router (int gw_nalid, ptl_nid_t gw_nid, - int alive, time_t when) -struct nal_cmd_handler * -libcfs_find_nal_cmd_handler(int nal) --{ - int rc; - kpr_control_interface_t *ci; - - /* No error if router not preset. Sysadmin is allowed to notify - * _everywhere_ when a NID boots or crashes, even if they know - * nothing of the peer. */ - ci = (kpr_control_interface_t *)PORTAL_SYMBOL_GET(kpr_control_interface); - if (ci == NULL) - return (0); - int i; -- - rc = ci->kprci_notify (gw_nalid, gw_nid, alive, when); - for (i = 0; i < sizeof(nal_cmd)/sizeof(nal_cmd[0]); i++) - if (nal_cmd[i].nch_handler != NULL && - nal_cmd[i].nch_number == nal) - return (&nal_cmd[i]); -- - PORTAL_SYMBOL_PUT(kpr_control_interface); - return (rc); - return (NULL); --} -- - static int - kportal_get_route(int index, __u32 *gateway_nalidp, ptl_nid_t *gateway_nidp, - ptl_nid_t *lo_nidp, ptl_nid_t *hi_nidp, int *alivep) -int -libcfs_nal_cmd_register(int nal, nal_cmd_handler_fn *handler, void *private) --{ - int gateway_nalid; - ptl_nid_t gateway_nid; - ptl_nid_t lo_nid; - ptl_nid_t hi_nid; - int alive; - int rc; - kpr_control_interface_t *ci; - struct nal_cmd_handler *cmd; - int i; - int rc; -- - ci = (kpr_control_interface_t *) PORTAL_SYMBOL_GET(kpr_control_interface); - if (ci == NULL) - return (-ENODEV); - CDEBUG(D_IOCTL, "Register NAL %d, handler: %p\n", nal, handler); -- - rc = ci->kprci_get_route(index, &gateway_nalid, &gateway_nid, - &lo_nid, &hi_nid, &alive); - down(&nal_cmd_sem); -- - if (rc == 0) { - CDEBUG(D_IOCTL, "got route [%d] %d "LPX64":"LPX64" - "LPX64", %s\n", - index, gateway_nalid, gateway_nid, lo_nid, hi_nid, - alive ? "up" : "down"); - if (libcfs_find_nal_cmd_handler(nal) != NULL) { - up (&nal_cmd_sem); - return (-EBUSY); - } -- - *gateway_nalidp = (__u32)gateway_nalid; - *gateway_nidp = gateway_nid; - *lo_nidp = lo_nid; - *hi_nidp = hi_nid; - *alivep = alive; - cmd = NULL; - for (i = 0; i < sizeof(nal_cmd)/sizeof(nal_cmd[0]); i++) - if (nal_cmd[i].nch_handler == NULL) { - cmd = &nal_cmd[i]; - break; - } - - if (cmd == NULL) { - rc = -EBUSY; - } else { - rc = 0; - cmd->nch_number = nal; - cmd->nch_handler = handler; - cmd->nch_private = private; -- } -- - PORTAL_SYMBOL_PUT (kpr_control_interface); - return (rc); - up(&nal_cmd_sem); - - return rc; --} -EXPORT_SYMBOL(libcfs_nal_cmd_register); -- - static int - kportal_router_cmd(struct portals_cfg *pcfg, void * private) -void -libcfs_nal_cmd_unregister(int nal) --{ - int err = -EINVAL; - ENTRY; - - switch(pcfg->pcfg_command) { - default: - CDEBUG(D_IOCTL, "Inappropriate cmd: %d\n", pcfg->pcfg_command); - break; - - case NAL_CMD_ADD_ROUTE: - CDEBUG(D_IOCTL, "Adding route: [%d] "LPU64" : "LPU64" - "LPU64"\n", - pcfg->pcfg_nal, pcfg->pcfg_nid, - pcfg->pcfg_nid2, pcfg->pcfg_nid3); - err = kportal_add_route(pcfg->pcfg_gw_nal, pcfg->pcfg_nid, - pcfg->pcfg_nid2, pcfg->pcfg_nid3); - break; - struct nal_cmd_handler *cmd; -- - case NAL_CMD_DEL_ROUTE: - CDEBUG (D_IOCTL, "Removing routes via [%d] "LPU64" : "LPU64" - "LPU64"\n", - pcfg->pcfg_gw_nal, pcfg->pcfg_nid, - pcfg->pcfg_nid2, pcfg->pcfg_nid3); - err = kportal_del_route (pcfg->pcfg_gw_nal, pcfg->pcfg_nid, - pcfg->pcfg_nid2, pcfg->pcfg_nid3); - break; - CDEBUG(D_IOCTL, "Unregister NAL %d\n", nal); -- - case NAL_CMD_NOTIFY_ROUTER: { - CDEBUG (D_IOCTL, "Notifying peer [%d] "LPU64" %s @ %ld\n", - pcfg->pcfg_gw_nal, pcfg->pcfg_nid, - pcfg->pcfg_flags ? "Enabling" : "Disabling", - (time_t)pcfg->pcfg_nid3); - - err = kportal_notify_router (pcfg->pcfg_gw_nal, pcfg->pcfg_nid, - pcfg->pcfg_flags, - (time_t)pcfg->pcfg_nid3); - break; - } - - case NAL_CMD_GET_ROUTE: - CDEBUG (D_IOCTL, "Getting route [%d]\n", pcfg->pcfg_count); - err = kportal_get_route(pcfg->pcfg_count, &pcfg->pcfg_gw_nal, - &pcfg->pcfg_nid, - &pcfg->pcfg_nid2, &pcfg->pcfg_nid3, - &pcfg->pcfg_flags); - break; - } - RETURN(err); - down(&nal_cmd_sem); - cmd = libcfs_find_nal_cmd_handler(nal); - LASSERT (cmd != NULL); - cmd->nch_handler = NULL; - cmd->nch_private = NULL; - up(&nal_cmd_sem); --} -EXPORT_SYMBOL(libcfs_nal_cmd_unregister); -- --int - kportal_nal_cmd(struct portals_cfg *pcfg) -libcfs_nal_cmd(struct portals_cfg *pcfg) --{ - struct nal_cmd_handler *cmd; -- __u32 nal = pcfg->pcfg_nal; - int rc = -EINVAL; - - int rc = -EINVAL; -- ENTRY; -- -- down(&nal_cmd_sem); - if (nal > 0 && nal <= NAL_MAX_NR && nal_cmd[nal].nch_handler) { - cmd = libcfs_find_nal_cmd_handler(nal); - if (cmd != NULL) { -- CDEBUG(D_IOCTL, "calling handler nal: %d, cmd: %d\n", nal, -- pcfg->pcfg_command); - rc = nal_cmd[nal].nch_handler(pcfg, nal_cmd[nal].nch_private); - } else { - CERROR("invalid nal: %d, cmd: %d\n", nal, pcfg->pcfg_command); - rc = cmd->nch_handler(pcfg, cmd->nch_private); -- } -- up(&nal_cmd_sem); - RETURN(rc); - } - - ptl_handle_ni_t * - kportal_get_ni (int nal) - { -- - switch (nal) - { - case QSWNAL: - return (PORTAL_SYMBOL_GET(kqswnal_ni)); - case SOCKNAL: - return (PORTAL_SYMBOL_GET(ksocknal_ni)); - case GMNAL: - return (PORTAL_SYMBOL_GET(kgmnal_ni)); - case IBNAL: - return (PORTAL_SYMBOL_GET(kibnal_ni)); - case TCPNAL: - /* userspace NAL */ - return (NULL); - case SCIMACNAL: - return (PORTAL_SYMBOL_GET(kscimacnal_ni)); - default: - /* A warning to a naive caller */ - CERROR ("unknown nal: %d\n", nal); - return (NULL); - } - RETURN(rc); --} - - void - kportal_put_ni (int nal) - { -EXPORT_SYMBOL(libcfs_nal_cmd); -- - switch (nal) - { - case QSWNAL: - PORTAL_SYMBOL_PUT(kqswnal_ni); - break; - case SOCKNAL: - PORTAL_SYMBOL_PUT(ksocknal_ni); - break; - case GMNAL: - PORTAL_SYMBOL_PUT(kgmnal_ni); - break; - case IBNAL: - PORTAL_SYMBOL_PUT(kibnal_ni); - break; - case TCPNAL: - /* A lesson to a malicious caller */ - LBUG (); - case SCIMACNAL: - PORTAL_SYMBOL_PUT(kscimacnal_ni); - break; - default: - CERROR ("unknown nal: %d\n", nal); - } - } -static DECLARE_RWSEM(ioctl_list_sem); -static LIST_HEAD(ioctl_list); -- - int - kportal_nal_register(int nal, nal_cmd_handler_t handler, void * private) -int libcfs_register_ioctl(struct libcfs_ioctl_handler *hand) --{ -- int rc = 0; - - CDEBUG(D_IOCTL, "Register NAL %d, handler: %p\n", nal, handler); - down_read(&ioctl_list_sem); - if (!list_empty(&hand->item)) - rc = -EBUSY; - up_read(&ioctl_list_sem); -- - if (nal > 0 && nal <= NAL_MAX_NR) { - down(&nal_cmd_sem); - if (nal_cmd[nal].nch_handler != NULL) - rc = -EBUSY; - else { - nal_cmd[nal].nch_handler = handler; - nal_cmd[nal].nch_private = private; - } - up(&nal_cmd_sem); - if (rc == 0) { - down_write(&ioctl_list_sem); - list_add_tail(&hand->item, &ioctl_list); - up_write(&ioctl_list_sem); -- } - return rc; - RETURN(0); --} -EXPORT_SYMBOL(libcfs_register_ioctl); -- - int - kportal_nal_unregister(int nal) -int libcfs_deregister_ioctl(struct libcfs_ioctl_handler *hand) --{ -- int rc = 0; - - CDEBUG(D_IOCTL, "Unregister NAL %d\n", nal); - down_read(&ioctl_list_sem); - if (list_empty(&hand->item)) - rc = -ENOENT; - up_read(&ioctl_list_sem); -- - if (nal > 0 && nal <= NAL_MAX_NR) { - down(&nal_cmd_sem); - nal_cmd[nal].nch_handler = NULL; - nal_cmd[nal].nch_private = NULL; - up(&nal_cmd_sem); - if (rc == 0) { - down_write(&ioctl_list_sem); - list_del_init(&hand->item); - up_write(&ioctl_list_sem); -- } - return rc; - RETURN(0); --} - -EXPORT_SYMBOL(libcfs_deregister_ioctl); -- - static int kportal_ioctl(struct inode *inode, struct file *file, - unsigned int cmd, unsigned long arg) -static int libcfs_ioctl(struct inode *inode, struct file *file, - unsigned int cmd, unsigned long arg) --{ - int err = 0; - int err = -EINVAL; -- char buf[1024]; -- struct portal_ioctl_data *data; - char str[PTL_NALFMT_SIZE]; - -- ENTRY; -- -- if (current->fsuid != 0) -- RETURN(err = -EACCES); -- -- if ( _IOC_TYPE(cmd) != IOC_PORTAL_TYPE || -- _IOC_NR(cmd) < IOC_PORTAL_MIN_NR || -- _IOC_NR(cmd) > IOC_PORTAL_MAX_NR ) { -- CDEBUG(D_IOCTL, "invalid ioctl ( type %d, nr %d, size %d )\n", -- _IOC_TYPE(cmd), _IOC_NR(cmd), _IOC_SIZE(cmd)); -- RETURN(-EINVAL); -- } -- -- if (portal_ioctl_getdata(buf, buf + 800, (void *)arg)) { -- CERROR("PORTALS ioctl: data error\n"); -- RETURN(-EINVAL); -- } -- -- data = (struct portal_ioctl_data *)buf; -- -- switch (cmd) { -- case IOC_PORTAL_CLEAR_DEBUG: -- portals_debug_clear_buffer(); -- RETURN(0); -- case IOC_PORTAL_PANIC: -- if (!capable (CAP_SYS_BOOT)) -- RETURN (-EPERM); -- panic("debugctl-invoked panic"); -- RETURN(0); -- case IOC_PORTAL_MARK_DEBUG: -- if (data->ioc_inlbuf1 == NULL || -- data->ioc_inlbuf1[data->ioc_inllen1 - 1] != '\0') -- RETURN(-EINVAL); -- portals_debug_mark_buffer(data->ioc_inlbuf1); - RETURN(0); - case IOC_PORTAL_PING: { - void (*ping)(struct portal_ioctl_data *); - - CDEBUG(D_IOCTL, "doing %d pings to nid "LPX64" (%s)\n", - data->ioc_count, data->ioc_nid, - portals_nid2str(data->ioc_nal, data->ioc_nid, str)); - ping = PORTAL_SYMBOL_GET(kping_client); - if (!ping) - CERROR("PORTAL_SYMBOL_GET failed\n"); - else { - ping(data); - PORTAL_SYMBOL_PUT(kping_client); - } -- RETURN(0); - } - - case IOC_PORTAL_GET_NID: { - const ptl_handle_ni_t *nip; - ptl_process_id_t pid; - - CDEBUG (D_IOCTL, "Getting nid for nal [%d]\n", data->ioc_nal); - - nip = kportal_get_ni (data->ioc_nal); - if (nip == NULL) - RETURN (-EINVAL); - - err = PtlGetId (*nip, &pid); - LASSERT (err == PTL_OK); - kportal_put_ni (data->ioc_nal); - - data->ioc_nid = pid.nid; - if (copy_to_user ((char *)arg, data, sizeof (*data))) - err = -EFAULT; - break; - } - - case IOC_PORTAL_NAL_CMD: { - struct portals_cfg pcfg; - - LASSERT (data->ioc_plen1 == sizeof(pcfg)); - err = copy_from_user(&pcfg, (void *)data->ioc_pbuf1, - sizeof(pcfg)); - if ( err ) { - EXIT; - return err; - } - - CDEBUG (D_IOCTL, "nal command nal %d cmd %d\n", pcfg.pcfg_nal, - pcfg.pcfg_command); - err = kportal_nal_cmd(&pcfg); - if (err == 0) { - if (copy_to_user((char *)data->ioc_pbuf1, &pcfg, - sizeof (pcfg))) - err = -EFAULT; - if (copy_to_user((char *)arg, data, sizeof (*data))) - err = -EFAULT; - } - break; - } - case IOC_PORTAL_FAIL_NID: { - const ptl_handle_ni_t *nip; - - CDEBUG (D_IOCTL, "fail nid: [%d] "LPU64" count %d\n", - data->ioc_nal, data->ioc_nid, data->ioc_count); - - nip = kportal_get_ni (data->ioc_nal); - if (nip == NULL) - return (-EINVAL); - - err = PtlFailNid (*nip, data->ioc_nid, data->ioc_count); - kportal_put_ni (data->ioc_nal); - break; - } --#if LWT_SUPPORT -- case IOC_PORTAL_LWT_CONTROL: -- err = lwt_control (data->ioc_flags, data->ioc_misc); -- break; -- - case IOC_PORTAL_LWT_SNAPSHOT: - err = lwt_snapshot (&data->ioc_nid, - &data->ioc_count, &data->ioc_misc, - case IOC_PORTAL_LWT_SNAPSHOT: { - cycles_t now; - int ncpu; - int total_size; - - err = lwt_snapshot (&now, &ncpu, &total_size, -- data->ioc_pbuf1, data->ioc_plen1); - data->ioc_nid = now; - data->ioc_count = ncpu; - data->ioc_misc = total_size; - - /* Hedge against broken user/kernel typedefs (e.g. cycles_t) */ - data->ioc_nid = sizeof(lwt_event_t); - data->ioc_nid2 = offsetof(lwt_event_t, lwte_where); - -- if (err == 0 && -- copy_to_user((char *)arg, data, sizeof (*data))) -- err = -EFAULT; -- break; - } -- -- case IOC_PORTAL_LWT_LOOKUP_STRING: -- err = lwt_lookup_string (&data->ioc_count, data->ioc_pbuf1, -- data->ioc_pbuf2, data->ioc_plen2); -- if (err == 0 && -- copy_to_user((char *)arg, data, sizeof (*data))) -- err = -EFAULT; -- break; --#endif - case IOC_PORTAL_NAL_CMD: { - struct portals_cfg pcfg; - - if (data->ioc_plen1 != sizeof(pcfg)) { - CERROR("Bad ioc_plen1 %d (wanted %d)\n", - data->ioc_plen1, sizeof(pcfg)); - err = -EINVAL; - break; - } - - if (copy_from_user(&pcfg, (void *)data->ioc_pbuf1, - sizeof(pcfg))) { - err = -EFAULT; - break; - } - - CDEBUG (D_IOCTL, "nal command nal %d cmd %d\n", pcfg.pcfg_nal, - pcfg.pcfg_command); - err = libcfs_nal_cmd(&pcfg); - - if (err == 0 && - copy_to_user((char *)data->ioc_pbuf1, &pcfg, - sizeof (pcfg))) - err = -EFAULT; - break; - } - -- case IOC_PORTAL_MEMHOG: -- if (!capable (CAP_SYS_ADMIN)) -- err = -EPERM; -- else if (file->private_data == NULL) { -- err = -EINVAL; -- } else { -- kportal_memhog_free(file->private_data); -- err = kportal_memhog_alloc(file->private_data, -- data->ioc_count, -- data->ioc_flags); -- if (err != 0) -- kportal_memhog_free(file->private_data); -- } -- break; -- - default: - default: { - struct libcfs_ioctl_handler *hand; -- err = -EINVAL; - break; - down_read(&ioctl_list_sem); - list_for_each_entry(hand, &ioctl_list, item) { - err = hand->handle_ioctl(data, cmd, arg); - if (err != -EINVAL) - break; - } - up_read(&ioctl_list_sem); - } break; -- } -- -- RETURN(err); --} -- -- - static struct file_operations portalsdev_fops = { - ioctl: kportal_ioctl, - open: kportal_psdev_open, - release: kportal_psdev_release -static struct file_operations libcfs_fops = { - ioctl: libcfs_ioctl, - open: libcfs_psdev_open, - release: libcfs_psdev_release --}; -- -- - static struct miscdevice portal_dev = { -static struct miscdevice libcfs_dev = { -- PORTAL_MINOR, -- "portals", - &portalsdev_fops - &libcfs_fops --}; -- --extern int insert_proc(void); --extern void remove_proc(void); --MODULE_AUTHOR("Peter J. Braam "); --MODULE_DESCRIPTION("Portals v3.1"); --MODULE_LICENSE("GPL"); -- - static int init_kportals_module(void) -static int init_libcfs_module(void) --{ -- int rc; -- -- rc = portals_debug_init(5 * 1024 * 1024); -- if (rc < 0) { -- printk(KERN_ERR "LustreError: portals_debug_init: %d\n", rc); -- return (rc); -- } -- --#if LWT_SUPPORT -- rc = lwt_init(); -- if (rc != 0) { -- CERROR("lwt_init: error %d\n", rc); -- goto cleanup_debug; -- } --#endif - sema_init(&nal_cmd_sem, 1); - - rc = misc_register(&portal_dev); - rc = misc_register(&libcfs_dev); -- if (rc) { -- CERROR("misc_register: error %d\n", rc); -- goto cleanup_lwt; - } - - rc = PtlInit(); - if (rc) { - CERROR("PtlInit: error %d\n", rc); - goto cleanup_deregister; -- } -- -- rc = insert_proc(); -- if (rc) { -- CERROR("insert_proc: error %d\n", rc); - goto cleanup_fini; - } - - rc = kportal_nal_register(ROUTER, kportal_router_cmd, NULL); - if (rc) { - CERROR("kportal_nal_registre: ROUTER error %d\n", rc); - goto cleanup_proc; - goto cleanup_deregister; -- } -- -- CDEBUG (D_OTHER, "portals setup OK\n"); -- return (0); -- - cleanup_proc: - remove_proc(); - cleanup_fini: - PtlFini(); -- cleanup_deregister: - misc_deregister(&portal_dev); - misc_deregister(&libcfs_dev); -- cleanup_lwt: --#if LWT_SUPPORT -- lwt_fini(); -- cleanup_debug: --#endif -- portals_debug_cleanup(); -- return rc; --} -- - static void exit_kportals_module(void) -static void exit_libcfs_module(void) --{ -- int rc; -- - kportal_nal_unregister(ROUTER); -- remove_proc(); - PtlFini(); -- -- CDEBUG(D_MALLOC, "before Portals cleanup: kmem %d\n", -- atomic_read(&portal_kmemory)); -- - rc = misc_deregister(&portal_dev); - rc = misc_deregister(&libcfs_dev); -- if (rc) -- CERROR("misc_deregister error %d\n", rc); -- --#if LWT_SUPPORT -- lwt_fini(); --#endif -- -- if (atomic_read(&portal_kmemory) != 0) -- CERROR("Portals memory leaked: %d bytes\n", -- atomic_read(&portal_kmemory)); -- -- rc = portals_debug_cleanup(); -- if (rc) -- printk(KERN_ERR "LustreError: portals_debug_cleanup: %d\n", rc); --} -- - EXPORT_SYMBOL(lib_dispatch); - EXPORT_SYMBOL(PtlMEAttach); - EXPORT_SYMBOL(PtlMEInsert); - EXPORT_SYMBOL(PtlMEUnlink); - EXPORT_SYMBOL(PtlEQAlloc); - EXPORT_SYMBOL(PtlMDAttach); - EXPORT_SYMBOL(PtlMDUnlink); - EXPORT_SYMBOL(PtlNIInit); - EXPORT_SYMBOL(PtlNIFini); - EXPORT_SYMBOL(PtlNIDebug); - EXPORT_SYMBOL(PtlInit); - EXPORT_SYMBOL(PtlFini); - EXPORT_SYMBOL(PtlPut); - EXPORT_SYMBOL(PtlGet); - EXPORT_SYMBOL(ptl_err_str); - EXPORT_SYMBOL(PtlEQWait); - EXPORT_SYMBOL(PtlEQFree); - EXPORT_SYMBOL(PtlEQGet); - EXPORT_SYMBOL(PtlGetId); - EXPORT_SYMBOL(PtlMDBind); - EXPORT_SYMBOL(lib_iov_nob); - EXPORT_SYMBOL(lib_copy_iov2buf); - EXPORT_SYMBOL(lib_copy_buf2iov); - EXPORT_SYMBOL(lib_extract_iov); - EXPORT_SYMBOL(lib_kiov_nob); - EXPORT_SYMBOL(lib_copy_kiov2buf); - EXPORT_SYMBOL(lib_copy_buf2kiov); - EXPORT_SYMBOL(lib_extract_kiov); - EXPORT_SYMBOL(lib_finalize); - EXPORT_SYMBOL(lib_parse); - EXPORT_SYMBOL(lib_fake_reply_msg); - EXPORT_SYMBOL(lib_init); - EXPORT_SYMBOL(lib_fini); - EXPORT_SYMBOL(dispatch_name); --EXPORT_SYMBOL(kportal_daemonize); --EXPORT_SYMBOL(kportal_blockallsigs); - EXPORT_SYMBOL(kportal_nal_register); - EXPORT_SYMBOL(kportal_nal_unregister); --EXPORT_SYMBOL(kportal_assertion_failed); - EXPORT_SYMBOL(kportal_get_ni); - EXPORT_SYMBOL(kportal_put_ni); - EXPORT_SYMBOL(kportal_nal_cmd); -- - module_init(init_kportals_module); - module_exit (exit_kportals_module); -module_init(init_libcfs_module); -module_exit(exit_libcfs_module); diff --cc lnet/libcfs/proc.c index 4b39902,aa9cfa8..0000000 deleted file mode 100644,100644 --- a/lnet/libcfs/proc.c +++ /dev/null @@@ -1,317 -1,321 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * Copyright (C) 2001, 2002 Cluster File Systems, Inc. -- * Author: Zach Brown -- * Author: Peter J. Braam -- * Author: Phil Schwan -- * -- * This file is part of Lustre, http://www.lustre.org. -- * -- * Lustre is free software; you can redistribute it and/or -- * modify it under the terms of version 2 of the GNU General Public -- * License as published by the Free Software Foundation. -- * -- * Lustre is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- * GNU General Public License for more details. -- * -- * You should have received a copy of the GNU General Public License -- * along with Lustre; if not, write to the Free Software -- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -- */ -- --#ifndef EXPORT_SYMTAB --# define EXPORT_SYMTAB --#endif -- --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include -- --#include --#include -- --#include --#include --#include --#include --#include --#include -- --#include --#include -- --# define DEBUG_SUBSYSTEM S_PORTALS -- --#include --#include --#include "tracefile.h" -- --static struct ctl_table_header *portals_table_header = NULL; --extern char debug_file_path[1024]; --extern char portals_upcall[1024]; -- --#define PSDEV_PORTALS (0x100) - #define PSDEV_DEBUG 1 /* control debugging */ - #define PSDEV_SUBSYSTEM_DEBUG 2 /* control debugging */ - #define PSDEV_PRINTK 3 /* force all errors to console */ - #define PSDEV_CONSOLE 4 /* allow _any_ messages to console */ - #define PSDEV_DEBUG_PATH 5 /* crashdump log location */ - #define PSDEV_DEBUG_DUMP_PATH 6 /* crashdump tracelog location */ - #define PSDEV_PORTALS_UPCALL 7 /* User mode upcall script */ -enum { - PSDEV_DEBUG = 1, /* control debugging */ - PSDEV_SUBSYSTEM_DEBUG, /* control debugging */ - PSDEV_PRINTK, /* force all errors to console */ - PSDEV_CONSOLE, /* allow _any_ messages to console */ - PSDEV_DEBUG_PATH, /* crashdump log location */ - PSDEV_DEBUG_DUMP_PATH, /* crashdump tracelog location */ - PSDEV_PORTALS_UPCALL, /* User mode upcall script */ - PSDEV_PORTALS_MEMUSED, /* bytes currently PORTAL_ALLOCated */ -}; -- - #define PORTALS_PRIMARY_CTLCNT 7 - static struct ctl_table portals_table[PORTALS_PRIMARY_CTLCNT + 1] = { -static struct ctl_table portals_table[] = { -- {PSDEV_DEBUG, "debug", &portal_debug, sizeof(int), 0644, NULL, -- &proc_dointvec}, -- {PSDEV_SUBSYSTEM_DEBUG, "subsystem_debug", &portal_subsystem_debug, -- sizeof(int), 0644, NULL, &proc_dointvec}, -- {PSDEV_PRINTK, "printk", &portal_printk, sizeof(int), 0644, NULL, -- &proc_dointvec}, -- {PSDEV_DEBUG_PATH, "debug_path", debug_file_path, -- sizeof(debug_file_path), 0644, NULL, &proc_dostring, &sysctl_string}, -- {PSDEV_PORTALS_UPCALL, "upcall", portals_upcall, -- sizeof(portals_upcall), 0644, NULL, &proc_dostring, -- &sysctl_string}, - {PSDEV_PORTALS_MEMUSED, "memused", (int *)&portal_kmemory.counter, - sizeof(int), 0644, NULL, &proc_dointvec}, -- {0} --}; -- --static struct ctl_table top_table[2] = { -- {PSDEV_PORTALS, "portals", NULL, 0, 0555, portals_table}, -- {0} --}; -- -- --#ifdef PORTALS_PROFILING --/* -- * profiling stuff. we do this statically for now 'cause its simple, -- * but we could do some tricks with elf sections to have this array -- * automatically built. -- */ --#define def_prof(FOO) [PROF__##FOO] = {#FOO, 0, } -- --struct prof_ent prof_ents[] = { -- def_prof(our_recvmsg), -- def_prof(our_sendmsg), -- def_prof(socknal_recv), -- def_prof(lib_parse), -- def_prof(conn_list_walk), -- def_prof(memcpy), -- def_prof(lib_finalize), -- def_prof(pingcli_time), -- def_prof(gmnal_send), -- def_prof(gmnal_recv), --}; -- --EXPORT_SYMBOL(prof_ents); -- --/* -- * this function is as crazy as the proc filling api -- * requires. -- * -- * buffer: page allocated for us to scribble in. the -- * data returned to the user will be taken from here. -- * *start: address of the pointer that will tell the -- * caller where in buffer the data the user wants is. -- * ppos: offset in the entire /proc file that the user -- * currently wants. -- * wanted: the amount of data the user wants. -- * -- * while going, 'curpos' is the offset in the entire -- * file where we currently are. We only actually -- * start filling buffer when we get to a place in -- * the file that the user cares about. -- * -- * we take care to only sprintf when the user cares because -- * we're holding a lock while we do this. -- * -- * we're smart and know that we generate fixed size lines. -- * we only start writing to the buffer when the user cares. -- * This is unpredictable because we don't snapshot the -- * list between calls that are filling in a file from -- * the list. The list could change mid read and the -- * output will look very weird indeed. oh well. -- */ -- --static int prof_read_proc(char *buffer, char **start, off_t ppos, int wanted, -- int *eof, void *data) --{ -- int len = 0, i; -- int curpos; -- char *header = "Interval Cycles_per (Starts Finishes Total)\n"; -- int header_len = strlen(header); -- char *format = "%-15s %.12Ld (%.12d %.12d %.12Ld)"; -- int line_len = (15 + 1 + 12 + 2 + 12 + 1 + 12 + 1 + 12 + 1); -- -- *start = buffer; -- -- if (ppos < header_len) { -- int diff = MIN(header_len, wanted); -- memcpy(buffer, header + ppos, diff); -- len += diff; -- ppos += diff; -- } -- -- if (len >= wanted) -- goto out; -- -- curpos = header_len; -- -- for ( i = 0; i < MAX_PROFS ; i++) { -- int copied; -- struct prof_ent *pe = &prof_ents[i]; -- long long cycles_per; -- /* -- * find the part of the array that the buffer wants -- */ -- if (ppos >= (curpos + line_len)) { -- curpos += line_len; -- continue; -- } -- /* the clever caller split a line */ -- if (ppos > curpos) { -- *start = buffer + (ppos - curpos); -- } -- -- if (pe->finishes == 0) -- cycles_per = 0; -- else -- { -- cycles_per = pe->total_cycles; -- do_div (cycles_per, pe->finishes); -- } -- -- copied = sprintf(buffer + len, format, pe->str, cycles_per, -- pe->starts, pe->finishes, pe->total_cycles); -- -- len += copied; -- -- /* pad to line len, -1 for \n */ -- if ((copied < line_len-1)) { -- int diff = (line_len-1) - copied; -- memset(buffer + len, ' ', diff); -- len += diff; -- copied += diff; -- } -- -- buffer[len++]= '\n'; -- -- /* bail if we have enough */ -- if (((buffer + len) - *start) >= wanted) -- break; -- -- curpos += line_len; -- } -- -- /* lameness */ -- if (i == MAX_PROFS) -- *eof = 1; -- out: -- -- return MIN(((buffer + len) - *start), wanted); --} -- --/* -- * all kids love /proc :/ -- */ --static unsigned char basedir[]="net/portals"; --#endif /* PORTALS_PROFILING */ -- --int insert_proc(void) --{ -- struct proc_dir_entry *ent; --#if PORTALS_PROFILING -- unsigned char dir[128]; -- -- if (ARRAY_SIZE(prof_ents) != MAX_PROFS) { -- CERROR("profiling enum and array are out of sync.\n"); -- return -1; -- } -- -- /* -- * This is pretty lame. assuming that failure just -- * means that they already existed. -- */ -- strcat(dir, basedir); -- create_proc_entry(dir, S_IFDIR, 0); -- -- strcat(dir, "/cycles"); -- ent = create_proc_entry(dir, 0, 0); -- if (!ent) { -- CERROR("couldn't register %s?\n", dir); -- return -1; -- } -- -- ent->data = NULL; -- ent->read_proc = prof_read_proc; --#endif /* PORTALS_PROFILING */ -- --#ifdef CONFIG_SYSCTL -- if (!portals_table_header) -- portals_table_header = register_sysctl_table(top_table, 0); --#endif -- -- ent = create_proc_entry("sys/portals/dump_kernel", 0, NULL); -- if (ent == NULL) { -- CERROR("couldn't register dump_kernel\n"); -- return -1; -- } -- ent->write_proc = trace_dk; -- -- ent = create_proc_entry("sys/portals/daemon_file", 0, NULL); -- if (ent == NULL) { -- CERROR("couldn't register daemon_file\n"); -- return -1; -- } -- ent->write_proc = trace_write_daemon_file; -- ent->read_proc = trace_read_daemon_file; -- - ent = create_proc_entry("sys/portals/debug_mb", 0, NULL); - ent = create_proc_entry("sys/portals/debug_size", 0, NULL); -- if (ent == NULL) { - CERROR("couldn't register debug_mb\n"); - CERROR("couldn't register debug_size\n"); -- return -1; -- } - ent->write_proc = trace_write_debug_mb; - ent->read_proc = trace_read_debug_mb; - ent->write_proc = trace_write_debug_size; - ent->read_proc = trace_read_debug_size; -- -- return 0; --} -- --void remove_proc(void) --{ --#if PORTALS_PROFILING -- unsigned char dir[128]; -- int end; -- -- dir[0]='\0'; -- strcat(dir, basedir); -- -- end = strlen(dir); -- -- strcat(dir, "/cycles"); -- remove_proc_entry(dir, 0); -- -- dir[end] = '\0'; -- remove_proc_entry(dir, 0); --#endif /* PORTALS_PROFILING */ -- -- remove_proc_entry("sys/portals/dump_kernel", NULL); -- remove_proc_entry("sys/portals/daemon_file", NULL); - remove_proc_entry("sys/portals/debug_mb", NULL); - remove_proc_entry("sys/portals/debug_size", NULL); -- --#ifdef CONFIG_SYSCTL -- if (portals_table_header) -- unregister_sysctl_table(portals_table_header); -- portals_table_header = NULL; --#endif --} diff --cc lnet/libcfs/tracefile.c index a064605,562abcf..0000000 deleted file mode 100644,100644 --- a/lnet/libcfs/tracefile.c +++ /dev/null @@@ -1,876 -1,858 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * Copyright (C) 2004 Cluster File Systems, Inc. -- * Author: Zach Brown -- * Author: Phil Schwan -- * -- * This file is part of Lustre, http://www.lustre.org. -- * -- * Lustre is free software; you can redistribute it and/or -- * modify it under the terms of version 2 of the GNU General Public -- * License as published by the Free Software Foundation. -- * -- * Lustre is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- * GNU General Public License for more details. -- * -- * You should have received a copy of the GNU General Public License -- * along with Lustre; if not, write to the Free Software -- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -- */ -- --#include --#include --#include --#include --#include --#include --#include --#include --#include --#ifdef HAVE_MM_INLINE --#include --#endif -- --#define DEBUG_SUBSYSTEM S_PORTALS -- --#include --#include -#include --#include -- - #define TCD_MAX_PAGES (5 << (20 - PAGE_SHIFT)) -#define TCD_MAX_PAGES 1280 -- --/* XXX move things up to the top, comment */ -- --static union { -- struct trace_cpu_data { -- struct list_head tcd_pages; -- unsigned long tcd_cur_pages; -- -- struct list_head tcd_daemon_pages; -- unsigned long tcd_cur_daemon_pages; -- -- unsigned long tcd_max_pages; -- int tcd_shutting_down; -- } tcd; -- char __pad[SMP_CACHE_BYTES]; --} trace_data[NR_CPUS] __cacheline_aligned; -- --struct page_collection { -- struct list_head pc_pages; -- spinlock_t pc_lock; -- int pc_want_daemon_pages; --}; -- --struct tracefiled_ctl { -- struct completion tctl_start; -- struct completion tctl_stop; -- wait_queue_head_t tctl_waitq; -- pid_t tctl_pid; -- atomic_t tctl_shutdown; --}; -- - #define TRACEFILE_SIZE (500 << 20) --static DECLARE_RWSEM(tracefile_sem); --static char *tracefile = NULL; - static long long tracefile_size = TRACEFILE_SIZE; --static struct tracefiled_ctl trace_tctl; --static DECLARE_MUTEX(trace_thread_sem); --static int thread_running = 0; -- --#ifndef get_cpu --#define get_cpu() smp_processor_id() --#define put_cpu() do { } while (0) --#endif -- --#define trace_get_tcd(FLAGS) ({ \ -- struct trace_cpu_data *__ret; \ -- int __cpu = get_cpu(); \ -- local_irq_save(FLAGS); \ -- __ret = &trace_data[__cpu].tcd; \ -- __ret; \ --}) -- --#define trace_put_tcd(TCD, FLAGS) do { \ -- local_irq_restore(FLAGS); \ -- put_cpu(); \ --} while (0) -- --static void put_pages_on_daemon_list_on_cpu(void *info); -- --/* return a page that has 'len' bytes left at the end */ --static struct page *trace_get_page(struct trace_cpu_data *tcd, -- unsigned long len) --{ -- struct page *page = NULL; -- -- if (len > PAGE_SIZE) { -- printk(KERN_ERR "cowardly refusing to write %lu bytes in a " -- "page\n", len); -- return NULL; -- } -- -- if (!list_empty(&tcd->tcd_pages)) { -- page = list_entry(tcd->tcd_pages.prev, struct page, -- PAGE_LIST_ENTRY); -- if (page->index + len <= PAGE_SIZE) -- return page; -- } -- -- if (tcd->tcd_cur_pages < tcd->tcd_max_pages) { -- page = alloc_page(GFP_ATOMIC); -- if (page == NULL) { -- /* the kernel should print a message for us. fall back -- * to using the last page in the ring buffer. */ -- goto ring_buffer; - return NULL; -- } -- page->index = 0; -- page->mapping = (void *)(long)smp_processor_id(); -- list_add_tail(&PAGE_LIST(page), &tcd->tcd_pages); -- tcd->tcd_cur_pages++; -- -- if (tcd->tcd_cur_pages > 8 && thread_running) { -- struct tracefiled_ctl *tctl = &trace_tctl; -- wake_up(&tctl->tctl_waitq); -- } -- return page; -- } -- -- ring_buffer: -- if (thread_running) { -- int pgcount = tcd->tcd_cur_pages / 10; -- struct page_collection pc; -- struct list_head *pos, *tmp; -- printk(KERN_WARNING "debug daemon buffer overflowed; discarding" -- " 10%% of pages (%d)\n", pgcount + 1); -- -- INIT_LIST_HEAD(&pc.pc_pages); -- spin_lock_init(&pc.pc_lock); -- -- list_for_each_safe(pos, tmp, &tcd->tcd_pages) { -- struct page *page; -- -- if (pgcount-- == 0) -- break; -- -- page = list_entry(pos, struct page, PAGE_LIST_ENTRY); -- list_del(&PAGE_LIST(page)); -- list_add_tail(&PAGE_LIST(page), &pc.pc_pages); -- tcd->tcd_cur_pages--; -- } -- put_pages_on_daemon_list_on_cpu(&pc); -- } -- LASSERT(!list_empty(&tcd->tcd_pages)); -- -- page = list_entry(tcd->tcd_pages.next, struct page, PAGE_LIST_ENTRY); -- page->index = 0; -- -- list_del(&PAGE_LIST(page)); -- list_add_tail(&PAGE_LIST(page), &tcd->tcd_pages); -- return page; --} -- --static void print_to_console(struct ptldebug_header *hdr, int mask, char *buf, -- int len, char *file, const char *fn) --{ -- char *prefix = NULL, *ptype = NULL; -- -- if ((mask & D_EMERG) != 0) { -- prefix = "LustreError"; -- ptype = KERN_EMERG; -- } else if ((mask & D_ERROR) != 0) { -- prefix = "LustreError"; -- ptype = KERN_ERR; -- } else if ((mask & D_WARNING) != 0) { -- prefix = "Lustre"; -- ptype = KERN_WARNING; -- } else if (portal_printk) { -- prefix = "Lustre"; -- ptype = KERN_INFO; -- } - - -- printk("%s%s: %d:%d:(%s:%d:%s()) %.*s", ptype, prefix, hdr->ph_pid, -- hdr->ph_extern_pid, file, hdr->ph_line_num, fn, len, buf); --} -- --void portals_debug_msg(int subsys, int mask, char *file, const char *fn, -- const int line, unsigned long stack, char *format, ...) --{ -- struct trace_cpu_data *tcd; -- struct ptldebug_header header; -- struct page *page; - char *debug_buf = format; - int known_size, needed = 85 /* average message length */, max_nob; - char *debug_buf; - int known_size, needed, max_nob; -- va_list ap; -- unsigned long flags; -- struct timeval tv; -- -- if (*(format + strlen(format) - 1) != '\n') -- printk(KERN_INFO "format at %s:%d:%s doesn't end in newline\n", -- file, line, fn); -- -- tcd = trace_get_tcd(flags); -- if (tcd->tcd_shutting_down) -- goto out; -- -- do_gettimeofday(&tv); -- -- header.ph_subsys = subsys; -- header.ph_mask = mask; -- header.ph_cpu_id = smp_processor_id(); -- header.ph_sec = (__u32)tv.tv_sec; -- header.ph_usec = tv.tv_usec; -- header.ph_stack = stack; -- header.ph_pid = current->pid; -- header.ph_line_num = line; -- --#if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20)) -- header.ph_extern_pid = current->thread.extern_pid; --#elif defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -- header.ph_extern_pid = current->thread.mode.tt.extern_pid; --#else -- header.ph_extern_pid = 0; --#endif -- -- known_size = sizeof(header) + strlen(file) + strlen(fn) + 2; // nulls -- - page = trace_get_page(tcd, known_size + 40); /* slop */ -- retry: - page = trace_get_page(tcd, needed + known_size); - if (page == NULL) { - debug_buf = format; - if (needed + known_size > PAGE_SIZE) - mask |= D_ERROR; - needed = strlen(format); - if (page == NULL) -- goto out; - } -- -- debug_buf = page_address(page) + page->index + known_size; -- - va_start(ap, format); -- max_nob = PAGE_SIZE - page->index - known_size; -- LASSERT(max_nob > 0); - va_start(ap, format); -- needed = vsnprintf(debug_buf, max_nob, format, ap); -- va_end(ap); -- - if (needed > max_nob) /* overflow. oh poop. */ - if (needed > max_nob) { - /* overflow. oh poop. */ - page = trace_get_page(tcd, needed + known_size); -- goto retry; - } -- -- header.ph_len = known_size + needed; -- debug_buf = page_address(page) + page->index; -- -- memcpy(debug_buf, &header, sizeof(header)); -- page->index += sizeof(header); -- debug_buf += sizeof(header); -- -- strcpy(debug_buf, file); -- page->index += strlen(file) + 1; -- debug_buf += strlen(file) + 1; -- -- strcpy(debug_buf, fn); -- page->index += strlen(fn) + 1; -- debug_buf += strlen(fn) + 1; -- -- page->index += needed; -- if (page->index > PAGE_SIZE) -- printk(KERN_EMERG "page->index == %lu in portals_debug_msg\n", -- page->index); -- - out: -- if ((mask & (D_EMERG | D_ERROR | D_WARNING)) || portal_printk) -- print_to_console(&header, mask, debug_buf, needed, file, fn); -- - out: -- trace_put_tcd(tcd, flags); --} --EXPORT_SYMBOL(portals_debug_msg); -- --static void collect_pages_on_cpu(void *info) --{ -- struct trace_cpu_data *tcd; -- unsigned long flags; -- struct page_collection *pc = info; -- -- tcd = trace_get_tcd(flags); -- -- spin_lock(&pc->pc_lock); -- list_splice(&tcd->tcd_pages, &pc->pc_pages); -- INIT_LIST_HEAD(&tcd->tcd_pages); -- tcd->tcd_cur_pages = 0; -- if (pc->pc_want_daemon_pages) { -- list_splice(&tcd->tcd_daemon_pages, &pc->pc_pages); -- INIT_LIST_HEAD(&tcd->tcd_daemon_pages); -- tcd->tcd_cur_daemon_pages = 0; -- } -- spin_unlock(&pc->pc_lock); -- -- trace_put_tcd(tcd, flags); --} -- --static void collect_pages(struct page_collection *pc) --{ -- /* needs to be fixed up for preempt */ -- INIT_LIST_HEAD(&pc->pc_pages); -- collect_pages_on_cpu(pc); -- smp_call_function(collect_pages_on_cpu, pc, 0, 1); --} -- --static void put_pages_back_on_cpu(void *info) --{ -- struct page_collection *pc = info; -- struct trace_cpu_data *tcd; -- struct list_head *pos, *tmp, *cur_head; -- unsigned long flags; -- -- tcd = trace_get_tcd(flags); -- -- cur_head = tcd->tcd_pages.next; -- -- spin_lock(&pc->pc_lock); -- list_for_each_safe(pos, tmp, &pc->pc_pages) { -- struct page *page; -- -- page = list_entry(pos, struct page, PAGE_LIST_ENTRY); -- LASSERT(page->index <= PAGE_SIZE); -- LASSERT(page_count(page) > 0); -- -- if ((unsigned long)page->mapping != smp_processor_id()) -- continue; -- -- list_del(&PAGE_LIST(page)); -- list_add_tail(&PAGE_LIST(page), cur_head); -- tcd->tcd_cur_pages++; -- } -- spin_unlock(&pc->pc_lock); -- -- trace_put_tcd(tcd, flags); --} -- --static void put_pages_back(struct page_collection *pc) --{ -- /* needs to be fixed up for preempt */ -- put_pages_back_on_cpu(pc); -- smp_call_function(put_pages_back_on_cpu, pc, 0, 1); --} -- --/* Add pages to a per-cpu debug daemon ringbuffer. This buffer makes sure that -- * we have a good amount of data at all times for dumping during an LBUG, even -- * if we have been steadily writing (and otherwise discarding) pages via the -- * debug daemon. */ --static void put_pages_on_daemon_list_on_cpu(void *info) --{ -- struct page_collection *pc = info; -- struct trace_cpu_data *tcd; -- struct list_head *pos, *tmp; -- unsigned long flags; -- -- tcd = trace_get_tcd(flags); -- -- spin_lock(&pc->pc_lock); -- list_for_each_safe(pos, tmp, &pc->pc_pages) { -- struct page *page; -- -- page = list_entry(pos, struct page, PAGE_LIST_ENTRY); -- LASSERT(page->index <= PAGE_SIZE); -- LASSERT(page_count(page) > 0); -- if ((unsigned long)page->mapping != smp_processor_id()) -- continue; -- -- list_del(&PAGE_LIST(page)); -- list_add_tail(&PAGE_LIST(page), &tcd->tcd_daemon_pages); -- tcd->tcd_cur_daemon_pages++; -- -- if (tcd->tcd_cur_daemon_pages > tcd->tcd_max_pages) { -- LASSERT(!list_empty(&tcd->tcd_daemon_pages)); -- page = list_entry(tcd->tcd_daemon_pages.next, -- struct page, PAGE_LIST_ENTRY); -- -- LASSERT(page->index <= PAGE_SIZE); -- LASSERT(page_count(page) > 0); -- -- page->index = 0; -- list_del(&PAGE_LIST(page)); -- page->mapping = NULL; -- __free_page(page); -- tcd->tcd_cur_daemon_pages--; -- } -- } -- spin_unlock(&pc->pc_lock); -- -- trace_put_tcd(tcd, flags); --} -- --static void put_pages_on_daemon_list(struct page_collection *pc) --{ -- put_pages_on_daemon_list_on_cpu(pc); -- smp_call_function(put_pages_on_daemon_list_on_cpu, pc, 0, 1); --} -- --void trace_debug_print(void) --{ -- struct page_collection pc; -- struct list_head *pos, *tmp; -- -- spin_lock_init(&pc.pc_lock); -- -- collect_pages(&pc); -- list_for_each_safe(pos, tmp, &pc.pc_pages) { -- struct page *page; -- char *p, *file, *fn; -- -- page = list_entry(pos, struct page, PAGE_LIST_ENTRY); -- LASSERT(page->index <= PAGE_SIZE); -- LASSERT(page_count(page) > 0); -- -- p = page_address(page); -- while (p < ((char *)page_address(page) + PAGE_SIZE)) { -- struct ptldebug_header *hdr; -- int len; -- hdr = (void *)p; -- p += sizeof(*hdr); -- file = p; -- p += strlen(file) + 1; -- fn = p; -- p += strlen(fn) + 1; -- len = hdr->ph_len - (p - (char *)hdr); -- -- print_to_console(hdr, D_EMERG, p, len, file, fn); -- } -- -- list_del(&PAGE_LIST(page)); -- page->mapping = NULL; -- __free_page(page); -- } --} -- --int tracefile_dump_all_pages(char *filename) --{ -- struct page_collection pc; -- struct file *filp; -- struct list_head *pos, *tmp; -- mm_segment_t oldfs; -- int rc; -- -- down_write(&tracefile_sem); -- - filp = filp_open(filename, O_CREAT|O_EXCL|O_WRONLY|O_LARGEFILE, 0600); - filp = filp_open(filename, O_CREAT|O_EXCL|O_WRONLY, 0600); -- if (IS_ERR(filp)) { -- rc = PTR_ERR(filp); -- printk(KERN_ERR "LustreError: can't open %s for dump: rc %d\n", - filename, rc); - filename, rc); -- goto out; -- } -- -- spin_lock_init(&pc.pc_lock); -- pc.pc_want_daemon_pages = 1; -- collect_pages(&pc); -- if (list_empty(&pc.pc_pages)) { -- rc = 0; -- goto close; -- } -- -- /* ok, for now, just write the pages. in the future we'll be building -- * iobufs with the pages and calling generic_direct_IO */ -- oldfs = get_fs(); -- set_fs(get_ds()); -- list_for_each_safe(pos, tmp, &pc.pc_pages) { -- struct page *page; -- -- page = list_entry(pos, struct page, PAGE_LIST_ENTRY); -- LASSERT(page->index <= PAGE_SIZE); -- LASSERT(page_count(page) > 0); -- -- rc = filp->f_op->write(filp, page_address(page), page->index, -- &filp->f_pos); -- if (rc != page->index) { -- printk(KERN_WARNING "wanted to write %lu but wrote " -- "%d\n", page->index, rc); -- put_pages_back(&pc); -- break; -- } -- list_del(&PAGE_LIST(page)); -- page->mapping = NULL; -- __free_page(page); -- } -- set_fs(oldfs); -- rc = filp->f_op->fsync(filp, filp->f_dentry, 1); -- if (rc) -- printk(KERN_ERR "sync returns %d\n", rc); -- close: -- filp_close(filp, 0); -- out: -- up_write(&tracefile_sem); -- return rc; --} -- --void trace_flush_pages(void) --{ -- struct page_collection pc; -- struct list_head *pos, *tmp; -- -- spin_lock_init(&pc.pc_lock); -- -- collect_pages(&pc); -- list_for_each_safe(pos, tmp, &pc.pc_pages) { -- struct page *page; -- -- page = list_entry(pos, struct page, PAGE_LIST_ENTRY); -- LASSERT(page->index <= PAGE_SIZE); -- LASSERT(page_count(page) > 0); -- -- list_del(&PAGE_LIST(page)); -- page->mapping = NULL; -- __free_page(page); -- } --} -- --int trace_dk(struct file *file, const char *buffer, unsigned long count, -- void *data) --{ -- char *name; -- unsigned long off; -- int rc; -- -- name = kmalloc(count + 1, GFP_KERNEL); -- if (name == NULL) -- return -ENOMEM; -- -- if (copy_from_user(name, buffer, count)) { -- rc = -EFAULT; -- goto out; -- } -- -- if (name[0] != '/') { -- rc = -EINVAL; -- goto out; -- } -- -- /* be nice and strip out trailing '\n' */ -- for (off = count ; off > 2 && isspace(name[off - 1]); off--) -- ; -- -- name[off] = '\0'; -- rc = tracefile_dump_all_pages(name); --out: -- if (name) -- kfree(name); -- return count; --} --EXPORT_SYMBOL(trace_dk); -- --static int tracefiled(void *arg) --{ -- struct page_collection pc; -- struct tracefiled_ctl *tctl = arg; -- struct list_head *pos, *tmp; -- struct ptldebug_header *hdr; -- struct file *filp; -- struct page *page; -- mm_segment_t oldfs; -- int rc; -- -- /* we're started late enough that we pick up init's fs context */ -- /* this is so broken in uml? what on earth is going on? */ -- kportal_daemonize("ktracefiled"); -- reparent_to_init(); -- -- spin_lock_init(&pc.pc_lock); -- complete(&tctl->tctl_start); -- -- while (1) { -- wait_queue_t __wait; -- -- init_waitqueue_entry(&__wait, current); -- add_wait_queue(&tctl->tctl_waitq, &__wait); -- set_current_state(TASK_INTERRUPTIBLE); -- schedule_timeout(HZ); -- remove_wait_queue(&tctl->tctl_waitq, &__wait); -- -- if (atomic_read(&tctl->tctl_shutdown)) -- break; -- -- pc.pc_want_daemon_pages = 0; -- collect_pages(&pc); -- if (list_empty(&pc.pc_pages)) -- continue; -- -- filp = NULL; -- down_read(&tracefile_sem); -- if (tracefile != NULL) { - filp = filp_open(tracefile, O_CREAT|O_RDWR|O_LARGEFILE, - 0600); - filp = filp_open(tracefile, O_CREAT|O_RDWR|O_APPEND|O_LARGEFILE, - 0600); -- if (IS_ERR(filp)) { -- printk("couldn't open %s: %ld\n", tracefile, -- PTR_ERR(filp)); -- filp = NULL; -- } -- } -- up_read(&tracefile_sem); -- if (filp == NULL) { -- put_pages_on_daemon_list(&pc); -- continue; -- } -- -- oldfs = get_fs(); -- set_fs(get_ds()); -- -- /* mark the first header, so we can sort in chunks */ -- page = list_entry(pc.pc_pages.next, struct page, -- PAGE_LIST_ENTRY); -- LASSERT(page->index <= PAGE_SIZE); -- LASSERT(page_count(page) > 0); -- -- hdr = page_address(page); -- hdr->ph_flags |= PH_FLAG_FIRST_RECORD; -- -- list_for_each_safe(pos, tmp, &pc.pc_pages) { - static loff_t f_pos; -- page = list_entry(pos, struct page, PAGE_LIST_ENTRY); -- LASSERT(page->index <= PAGE_SIZE); -- LASSERT(page_count(page) > 0); - - if (f_pos >= tracefile_size) - f_pos = 0; - else if (f_pos > filp->f_dentry->d_inode->i_size) - f_pos = filp->f_dentry->d_inode->i_size; -- -- rc = filp->f_op->write(filp, page_address(page), - page->index, &f_pos); - page->index, &filp->f_pos); -- if (rc != page->index) { -- printk(KERN_WARNING "wanted to write %lu but " -- "wrote %d\n", page->index, rc); -- put_pages_back(&pc); -- } -- } -- set_fs(oldfs); -- filp_close(filp, 0); -- -- put_pages_on_daemon_list(&pc); -- } -- complete(&tctl->tctl_stop); -- return 0; --} -- --int trace_start_thread(void) --{ -- struct tracefiled_ctl *tctl = &trace_tctl; -- int rc = 0; -- -- down(&trace_thread_sem); -- if (thread_running) -- goto out; -- -- init_completion(&tctl->tctl_start); -- init_completion(&tctl->tctl_stop); -- init_waitqueue_head(&tctl->tctl_waitq); -- atomic_set(&tctl->tctl_shutdown, 0); -- -- if (kernel_thread(tracefiled, tctl, 0) < 0) { -- rc = -ECHILD; -- goto out; -- } -- -- wait_for_completion(&tctl->tctl_start); -- thread_running = 1; --out: -- up(&trace_thread_sem); -- return rc; --} -- --void trace_stop_thread(void) --{ -- struct tracefiled_ctl *tctl = &trace_tctl; -- -- down(&trace_thread_sem); -- if (thread_running) { -- printk(KERN_INFO "Shutting down debug daemon thread...\n"); -- atomic_set(&tctl->tctl_shutdown, 1); -- wait_for_completion(&tctl->tctl_stop); -- thread_running = 0; -- } -- up(&trace_thread_sem); --} -- --int trace_write_daemon_file(struct file *file, const char *buffer, -- unsigned long count, void *data) --{ -- char *name; -- unsigned long off; -- int rc; -- -- name = kmalloc(count + 1, GFP_KERNEL); -- if (name == NULL) -- return -ENOMEM; -- -- if (copy_from_user(name, buffer, count)) { -- rc = -EFAULT; -- goto out; -- } -- -- /* be nice and strip out trailing '\n' */ -- for (off = count ; off > 2 && isspace(name[off - 1]); off--) -- ; -- -- name[off] = '\0'; -- -- down_write(&tracefile_sem); -- if (strcmp(name, "stop") == 0) { -- tracefile = NULL; -- trace_stop_thread(); - goto out_sem; - } else if (strncmp(name, "size=", 5) == 0) { - tracefile_size = simple_strtoul(name + 5, NULL, 0); - if (tracefile_size < 10 || tracefile_size > 20480) - tracefile_size = TRACEFILE_SIZE; - else - tracefile_size <<= 20; -- goto out_sem; -- } -- -- if (name[0] != '/') { -- rc = -EINVAL; -- goto out_sem; -- } -- -- if (tracefile != NULL) -- kfree(tracefile); -- -- tracefile = name; -- name = NULL; - - printk(KERN_INFO "Lustre: debug daemon will attempt to start writing " - "to %s (%lukB max)\n", tracefile, (long)(tracefile_size >> 10)); - -- trace_start_thread(); -- -- out_sem: -- up_write(&tracefile_sem); -- -- out: - kfree(name); - if (name) - kfree(name); -- return count; --} -- --int trace_read_daemon_file(char *page, char **start, off_t off, int count, -- int *eof, void *data) --{ -- int rc; -- -- down_read(&tracefile_sem); -- rc = snprintf(page, count, "%s", tracefile); -- up_read(&tracefile_sem); -- -- return rc; --} -- - int trace_write_debug_mb(struct file *file, const char *buffer, - unsigned long count, void *data) -int trace_write_debug_size(struct file *file, const char *buffer, - unsigned long count, void *data) --{ - char string[32]; - int i; - unsigned max; - char *string; - int rc, i, max; -- - if (count >= sizeof(string)) { - printk(KERN_ERR "Lustre: value too large (length %lu bytes)\n", - count); - return -EOVERFLOW; - } - string = kmalloc(count + 1, GFP_KERNEL); - if (string == NULL) - return -ENOMEM; -- - if (copy_from_user(string, buffer, count)) - return -EFAULT; - if (copy_from_user(string, buffer, count)) { - rc = -EFAULT; - goto out; - } -- -- max = simple_strtoul(string, NULL, 0); - if (max == 0) - return -EINVAL; - if (max == 0) { - rc = -EINVAL; - goto out; - } -- max /= smp_num_cpus; -- - if (max * smp_num_cpus > (num_physpages >> (20 - 2 - PAGE_SHIFT)) / 5) { - if (max > num_physpages / 5 * 4) { -- printk(KERN_ERR "Lustre: Refusing to set debug buffer size to " - "%d MB, which is more than 80%% of physical RAM " - "(%lu).\n", max * smp_num_cpus, - (num_physpages >> (20 - 2 - PAGE_SHIFT)) / 5); - "%d pages, which is more than 80%% of physical pages " - "(%lu).\n", max * smp_num_cpus, num_physpages / 5 * 4); -- return count; -- } - -- for (i = 0; i < NR_CPUS; i++) { -- struct trace_cpu_data *tcd; -- tcd = &trace_data[i].tcd; - tcd->tcd_max_pages = max << (20 - PAGE_SHIFT); - tcd->tcd_max_pages = max; -- } - out: - kfree(string); -- return count; --} -- - int trace_read_debug_mb(char *page, char **start, off_t off, int count, - int *eof, void *data) -int trace_read_debug_size(char *page, char **start, off_t off, int count, - int *eof, void *data) --{ -- struct trace_cpu_data *tcd; -- unsigned long flags; -- int rc; -- -- tcd = trace_get_tcd(flags); - rc = snprintf(page, count, "%lu\n", - tcd->tcd_max_pages * smp_num_cpus << (20 - PAGE_SHIFT)); - rc = snprintf(page, count, "%lu", tcd->tcd_max_pages); -- trace_put_tcd(tcd, flags); -- -- return rc; --} -- --int tracefile_init(void) --{ -- struct trace_cpu_data *tcd; -- int i; -- -- for (i = 0; i < NR_CPUS; i++) { -- tcd = &trace_data[i].tcd; -- INIT_LIST_HEAD(&tcd->tcd_pages); -- INIT_LIST_HEAD(&tcd->tcd_daemon_pages); -- tcd->tcd_cur_pages = 0; -- tcd->tcd_cur_daemon_pages = 0; -- tcd->tcd_max_pages = TCD_MAX_PAGES; -- tcd->tcd_shutting_down = 0; -- } -- return 0; --} -- --static void trace_cleanup_on_cpu(void *info) --{ -- struct trace_cpu_data *tcd; -- struct list_head *pos, *tmp; -- unsigned long flags; -- -- tcd = trace_get_tcd(flags); -- -- tcd->tcd_shutting_down = 1; -- -- list_for_each_safe(pos, tmp, &tcd->tcd_pages) { -- struct page *page; -- -- page = list_entry(pos, struct page, PAGE_LIST_ENTRY); -- LASSERT(page->index <= PAGE_SIZE); -- LASSERT(page_count(page) > 0); -- -- list_del(&PAGE_LIST(page)); -- page->mapping = NULL; -- __free_page(page); -- } -- tcd->tcd_cur_pages = 0; -- -- trace_put_tcd(tcd, flags); --} -- --static void trace_cleanup(void) --{ -- struct page_collection pc; -- -- INIT_LIST_HEAD(&pc.pc_pages); -- spin_lock_init(&pc.pc_lock); -- -- trace_cleanup_on_cpu(&pc); -- smp_call_function(trace_cleanup_on_cpu, &pc, 0, 1); --} -- --void tracefile_exit(void) --{ -- trace_stop_thread(); -- trace_cleanup(); --} diff --cc lnet/libcfs/tracefile.h index f581257,1b6e7a0..0000000 deleted file mode 100644,100644 --- a/lnet/libcfs/tracefile.h +++ /dev/null @@@ -1,22 -1,22 +1,0 @@@ --#ifndef __PORTALS_TRACEFILE_H --#define __PORTALS_TRACEFILE_H -- --int tracefile_dump_all_pages(char *filename); --void trace_debug_print(void); --void trace_flush_pages(void); --int trace_start_thread(void); --void trace_stop_thread(void); --int tracefile_init(void); --void tracefile_exit(void); --int trace_write_daemon_file(struct file *file, const char *buffer, -- unsigned long count, void *data); --int trace_read_daemon_file(char *page, char **start, off_t off, int count, -- int *eof, void *data); - int trace_write_debug_mb(struct file *file, const char *buffer, - unsigned long count, void *data); - int trace_read_debug_mb(char *page, char **start, off_t off, int count, - int *eof, void *data); -int trace_write_debug_size(struct file *file, const char *buffer, - unsigned long count, void *data); -int trace_read_debug_size(char *page, char **start, off_t off, int count, - int *eof, void *data); --int trace_dk(struct file *file, const char *buffer, unsigned long count, -- void *data); -- --#endif /* __PORTALS_TRACEFILE_H */ diff --cc lnet/lnet/.cvsignore index 5ed596b,5ed596b..0000000 deleted file mode 100644,100644 --- a/lnet/lnet/.cvsignore +++ /dev/null @@@ -1,10 -1,10 +1,0 @@@ --.deps --Makefile --.*.cmd --autoMakefile.in --autoMakefile --*.ko --*.mod.c --.*.flags --.tmp_versions --.depend diff --cc lnet/lnet/Makefile.in index 71067ac,c0f2e71..0000000 deleted file mode 100644,100644 --- a/lnet/lnet/Makefile.in +++ /dev/null @@@ -1,6 -1,6 +1,0 @@@ - #MODULES := portals - #portals-objs := api-eq.o api-init.o api-me.o api-errno.o api-ni.o api-wrap.o - #portals-objs += lib-dispatch.o lib-init.o lib-me.o lib-msg.o lib-eq.o lib-md.o - #portals-objs += lib-move.o lib-ni.o lib-pid.o -MODULES := portals -portals-objs := api-errno.o api-ni.o api-wrap.o -portals-objs += lib-init.o lib-me.o lib-msg.o lib-eq.o lib-md.o -portals-objs += lib-move.o lib-ni.o lib-pid.o module.o -- --@INCLUDE_RULES@ diff --cc lnet/lnet/Makefile.mk index de01765,088902a..0000000 deleted file mode 100644,100644 --- a/lnet/lnet/Makefile.mk +++ /dev/null @@@ -1,12 -1,12 +1,0 @@@ --# Copyright (C) 2001 Cluster File Systems, Inc. --# --# This code is issued under the GNU General Public License. --# See the file COPYING in this distribution -- --include $(src)/../Kernelenv -- --obj-y += portals.o - portals-objs := lib-dispatch.o lib-eq.o lib-init.o lib-md.o lib-me.o \ -portals-objs := lib-eq.o lib-init.o lib-md.o lib-me.o \ -- lib-move.o lib-msg.o lib-ni.o lib-pid.o \ - api-eq.o api-errno.o api-init.o api-me.o api-ni.o \ - api-wrap.o module.o - api-errno.o api-ni.o api-wrap.o \ - module.o diff --cc lnet/lnet/api-errno.c index b5e7aa1,9a4e5ac..0000000 deleted file mode 100644,100644 --- a/lnet/lnet/api-errno.c +++ /dev/null @@@ -1,54 -1,48 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * api/api-errno.c -- * Instantiate the string table of errors -- * -- * This file is part of Lustre, http://www.sf.net/projects/lustre/ -- */ -- --/* If you change these, you must update the number table in portals/errno.h */ --const char *ptl_err_str[] = { -- "PTL_OK", -- "PTL_SEGV", -- - "PTL_NOSPACE", - "PTL_INUSE", - "PTL_NO_SPACE", - "PTL_ME_IN_USE", -- "PTL_VAL_FAILED", -- -- "PTL_NAL_FAILED", - "PTL_NOINIT", - "PTL_INIT_DUP", - "PTL_INIT_INV", - "PTL_AC_INV_INDEX", - "PTL_NO_INIT", - "PTL_IFACE_DUP", - "PTL_IFACE_INVALID", -- - "PTL_INV_ASIZE", - "PTL_INV_HANDLE", - "PTL_INV_MD", - "PTL_INV_ME", - "PTL_INV_NI", - "PTL_HANDLE_INVALID", - "PTL_MD_INVALID", - "PTL_ME_INVALID", --/* If you change these, you must update the number table in portals/errno.h */ - "PTL_ILL_MD", - "PTL_INV_PROC", - "PTL_INV_PSIZE", - "PTL_INV_PTINDEX", - "PTL_INV_REG", - "PTL_PROCESS_INVALID", - "PTL_PT_INDEX_INVALID", -- - "PTL_INV_SR_INDX", - "PTL_ML_TOOLONG", - "PTL_ADDR_UNKNOWN", - "PTL_INV_EQ", - "PTL_SR_INDEX_INVALID", - "PTL_EQ_INVALID", -- "PTL_EQ_DROPPED", -- -- "PTL_EQ_EMPTY", - "PTL_NOUPDATE", - "PTL_MD_NO_UPDATE", -- "PTL_FAIL", - "PTL_NOT_IMPLEMENTED", - "PTL_NO_ACK", -- - "PTL_IOV_TOO_MANY", - "PTL_IOV_TOO_SMALL", - "PTL_IOV_INVALID", -- - "PTL_EQ_INUSE", - "PTL_EQ_IN_USE", - - "PTL_NI_INVALID", - "PTL_MD_ILLEGAL", - - "PTL_MAX_ERRNO" --}; --/* If you change these, you must update the number table in portals/errno.h */ diff --cc lnet/lnet/api-ni.c index 18eea91,72d3b41..0000000 deleted file mode 100644,100644 --- a/lnet/lnet/api-ni.c +++ /dev/null @@@ -1,197 -1,265 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * api/api-ni.c -- * Network Interface code -- * -- * Copyright (c) 2001-2003 Cluster File Systems, Inc. -- * Copyright (c) 2001-2002 Sandia National Laboratories -- * -- * This file is part of Lustre, http://www.sf.net/projects/lustre/ -- * -- * Lustre is free software; you can redistribute it and/or -- * modify it under the terms of version 2 of the GNU General Public -- * License as published by the Free Software Foundation. -- * -- * Lustre is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- * GNU General Public License for more details. -- * -- * You should have received a copy of the GNU General Public License -- * along with Lustre; if not, write to the Free Software -- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -- */ -- -#define DEBUG_SUBSYSTEM S_PORTALS --#include - -int ptl_init; -- --/* Put some magic in the NI handle so uninitialised/zeroed handles are easy -- * to spot */ --#define NI_HANDLE_MAGIC 0xebc0de00 --#define NI_HANDLE_MASK 0x000000ff - #define MAX_NIS 8 - static nal_t *ptl_interfaces[MAX_NIS]; - int ptl_num_interfaces = 0; - -static struct nal_t *ptl_nal_table[NAL_MAX_NR + 1]; - -#ifdef __KERNEL__ -DECLARE_MUTEX(ptl_mutex); - -static void ptl_mutex_enter (void) -{ - down (&ptl_mutex); -} - -static void ptl_mutex_exit (void) -{ - up (&ptl_mutex); -} -#else -static void ptl_mutex_enter (void) -{ -} - -static void ptl_mutex_exit (void) -{ -} -#endif -- --nal_t *ptl_hndl2nal(ptl_handle_any_t *handle) --{ -- unsigned int idx = handle->nal_idx; -- -- /* XXX we really rely on the caller NOT racing with interface -- * setup/teardown. That ensures her NI handle can't get -- * invalidated out from under her (or worse, swapped for a -- * completely different interface!) */ - - LASSERT (ptl_init); -- -- if (((idx ^ NI_HANDLE_MAGIC) & ~NI_HANDLE_MASK) != 0) -- return NULL; -- -- idx &= NI_HANDLE_MASK; - if (idx < MAX_NIS) - return ptl_interfaces[idx]; - - if (idx > NAL_MAX_NR || - ptl_nal_table[idx] == NULL || - ptl_nal_table[idx]->nal_refct == 0) - return NULL; -- - return NULL; - return ptl_nal_table[idx]; --} -- - int ptl_ni_init(void) -int ptl_register_nal (ptl_interface_t interface, nal_t *nal) --{ - int i; - int rc; - - ptl_mutex_enter(); - - if (interface < 0 || interface > NAL_MAX_NR) - rc = PTL_IFACE_INVALID; - else if (ptl_nal_table[interface] != NULL) - rc = PTL_IFACE_DUP; - else { - rc = PTL_OK; - ptl_nal_table[interface] = nal; - LASSERT(nal->nal_refct == 0); - } -- - LASSERT (MAX_NIS <= (NI_HANDLE_MASK + 1)); - ptl_mutex_exit(); - return (rc); -} - -void ptl_unregister_nal (ptl_interface_t interface) -{ - LASSERT(interface >= 0 && interface <= NAL_MAX_NR); - LASSERT(ptl_nal_table[interface] != NULL); - LASSERT(ptl_nal_table[interface]->nal_refct == 0); -- - for (i = 0; i < MAX_NIS; i++) - ptl_interfaces[i] = NULL; - ptl_mutex_enter(); - - ptl_nal_table[interface] = NULL; -- - return PTL_OK; - ptl_mutex_exit(); --} -- - void ptl_ni_fini(void) -int PtlInit(int *max_interfaces) --{ - int i; - LASSERT(!strcmp(ptl_err_str[PTL_MAX_ERRNO], "PTL_MAX_ERRNO")); -- - for (i = 0; i < MAX_NIS; i++) { - nal_t *nal = ptl_interfaces[i]; - if (!nal) - continue; - /* If this assertion fails, we need more bits in NI_HANDLE_MASK and - * to shift NI_HANDLE_MAGIC left appropriately */ - LASSERT (NAL_MAX_NR < (NI_HANDLE_MASK + 1)); - - if (max_interfaces != NULL) - *max_interfaces = NAL_MAX_NR + 1; -- - if (nal->shutdown) - nal->shutdown(nal, i); - } - } - ptl_mutex_enter(); -- - #ifdef __KERNEL__ - DECLARE_MUTEX(ptl_ni_init_mutex); - if (!ptl_init) { - /* NULL pointers, clear flags */ - memset(ptl_nal_table, 0, sizeof(ptl_nal_table)); -#ifndef __KERNEL__ - /* Kernel NALs register themselves when their module loads, - * and unregister themselves when their module is unloaded. - * Userspace NALs, are plugged in explicitly here... */ - { - extern nal_t procapi_nal; -- - static void ptl_ni_init_mutex_enter (void) - { - down (&ptl_ni_init_mutex); - } - /* XXX pretend it's socknal to keep liblustre happy... */ - ptl_nal_table[SOCKNAL] = &procapi_nal; - LASSERT (procapi_nal.nal_refct == 0); - } -#endif - ptl_init = 1; - } -- - static void ptl_ni_init_mutex_exit (void) - { - up (&ptl_ni_init_mutex); - ptl_mutex_exit(); - - return PTL_OK; --} -- - #else - static void ptl_ni_init_mutex_enter (void) -void PtlFini(void) --{ - } - nal_t *nal; - int i; -- - static void ptl_ni_init_mutex_exit (void) - { - } - ptl_mutex_enter(); -- - #endif - if (ptl_init) { - for (i = 0; i <= NAL_MAX_NR; i++) { -- - int PtlNIInit(ptl_interface_t interface, ptl_pt_index_t ptl_size, - ptl_ac_index_t acl_size, ptl_pid_t requested_pid, - ptl_handle_ni_t * handle) - nal = ptl_nal_table[i]; - if (nal == NULL) - continue; - - if (nal->nal_refct != 0) { - CWARN("NAL %d has outstanding refcount %d\n", - i, nal->nal_refct); - nal->nal_ni_fini(nal); - } - - ptl_nal_table[i] = NULL; - } - - ptl_init = 0; - } - - ptl_mutex_exit(); -} - -int PtlNIInit(ptl_interface_t interface, ptl_pid_t requested_pid, - ptl_ni_limits_t *desired_limits, ptl_ni_limits_t *actual_limits, - ptl_handle_ni_t *handle) --{ -- nal_t *nal; - int i; - int i; - int rc; -- -- if (!ptl_init) - return PTL_NOINIT; - - ptl_ni_init_mutex_enter (); - return PTL_NO_INIT; -- - nal = interface(ptl_num_interfaces, ptl_size, acl_size, requested_pid); - ptl_mutex_enter (); -- - if (!nal) { - ptl_ni_init_mutex_exit (); - return PTL_NAL_FAILED; - if (interface == PTL_IFACE_DEFAULT) { - for (i = 0; i <= NAL_MAX_NR; i++) - if (ptl_nal_table[i] != NULL) { - interface = i; - break; - } - /* NB if no interfaces are registered, 'interface' will - * fail the valid test below */ -- } - - for (i = 0; i < ptl_num_interfaces; i++) { - if (ptl_interfaces[i] == nal) { - nal->refct++; - handle->nal_idx = (NI_HANDLE_MAGIC & ~NI_HANDLE_MASK) | i; - CDEBUG(D_OTHER, "Returning existing NAL (%d)\n", i); - ptl_ni_init_mutex_exit (); - return PTL_OK; - } - - if (interface < 0 || - interface > NAL_MAX_NR || - ptl_nal_table[interface] == NULL) { - GOTO(out, rc = PTL_IFACE_INVALID); -- } - nal->refct = 1; -- - if (ptl_num_interfaces >= MAX_NIS) { - if (nal->shutdown) - nal->shutdown (nal, ptl_num_interfaces); - ptl_ni_init_mutex_exit (); - return PTL_NOSPACE; - } - nal = ptl_nal_table[interface]; - nal->nal_handle.nal_idx = (NI_HANDLE_MAGIC & ~NI_HANDLE_MASK) | interface; - nal->nal_handle.cookie = 0; - - CDEBUG(D_OTHER, "Starting up NAL (%d) refs %d\n", interface, nal->nal_refct); - rc = nal->nal_ni_init(nal, requested_pid, desired_limits, actual_limits); -- - handle->nal_idx = (NI_HANDLE_MAGIC & ~NI_HANDLE_MASK) | ptl_num_interfaces; - ptl_interfaces[ptl_num_interfaces++] = nal; - if (rc != PTL_OK) { - CERROR("Error %d starting up NAL %d, refs %d\n", rc, - interface, nal->nal_refct); - GOTO(out, rc); - } - - if (nal->nal_refct != 0) { - /* Caller gets to know if this was the first ref or not */ - rc = PTL_IFACE_DUP; - } - - nal->nal_refct++; - *handle = nal->nal_handle; -- - ptl_eq_ni_init(nal); - ptl_me_ni_init(nal); - out: - ptl_mutex_exit (); -- - ptl_ni_init_mutex_exit (); - return PTL_OK; - return rc; --} - -- --int PtlNIFini(ptl_handle_ni_t ni) --{ -- nal_t *nal; - int idx; - int rc; - int idx; -- -- if (!ptl_init) - return PTL_NOINIT; - return PTL_NO_INIT; -- - ptl_ni_init_mutex_enter (); - ptl_mutex_enter (); -- -- nal = ptl_hndl2nal (&ni); -- if (nal == NULL) { - ptl_ni_init_mutex_exit (); - return PTL_INV_HANDLE; - ptl_mutex_exit (); - return PTL_HANDLE_INVALID; -- } -- -- idx = ni.nal_idx & NI_HANDLE_MASK; - - nal->refct--; - if (nal->refct > 0) { - ptl_ni_init_mutex_exit (); - return PTL_OK; - } - - ptl_me_ni_fini(nal); - ptl_eq_ni_fini(nal); - - rc = PTL_OK; - if (nal->shutdown) - rc = nal->shutdown(nal, idx); -- - ptl_interfaces[idx] = NULL; - ptl_num_interfaces--; - LASSERT(nal->nal_refct > 0); -- - ptl_ni_init_mutex_exit (); - return rc; - } - nal->nal_refct--; -- - int PtlNIHandle(ptl_handle_any_t handle_in, ptl_handle_ni_t * ni_out) - { - *ni_out = handle_in; - /* nal_refct == 0 tells nal->shutdown to really shut down */ - nal->nal_ni_fini(nal); -- - ptl_mutex_exit (); -- return PTL_OK; --} diff --cc lnet/lnet/api-wrap.c index d23a6aa,37f6c0b..0000000 deleted file mode 100644,100644 --- a/lnet/lnet/api-wrap.c +++ /dev/null @@@ -1,599 -1,366 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * api/api-wrap.c -- * User-level wrappers that dispatch across the protection boundaries -- * -- * Copyright (c) 2001-2003 Cluster File Systems, Inc. -- * Copyright (c) 2001-2002 Sandia National Laboratories -- * -- * This file is part of Lustre, http://www.sf.net/projects/lustre/ -- * -- * Lustre is free software; you can redistribute it and/or -- * modify it under the terms of version 2 of the GNU General Public -- * License as published by the Free Software Foundation. -- * -- * Lustre is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- * GNU General Public License for more details. -- * -- * You should have received a copy of the GNU General Public License -- * along with Lustre; if not, write to the Free Software -- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -- */ -- --# define DEBUG_SUBSYSTEM S_PORTALS --#include -- - static int do_forward(ptl_handle_any_t any_h, int cmd, void *argbuf, - int argsize, void *retbuf, int retsize) -void PtlSnprintHandle(char *str, int len, ptl_handle_any_t h) --{ - nal_t *nal; - - if (!ptl_init) { - CERROR("Not initialized\n"); - return PTL_NOINIT; - } - - nal = ptl_hndl2nal(&any_h); - if (!nal) - return PTL_INV_HANDLE; - - nal->forward(nal, cmd, argbuf, argsize, retbuf, retsize); - snprintf(str, len, "0x%lx."LPX64, h.nal_idx, h.cookie); -} -- -int PtlNIHandle(ptl_handle_any_t handle_in, ptl_handle_ni_t *ni_out) -{ - if (!ptl_init) - return PTL_NO_INIT; - - if (ptl_hndl2nal(&handle_in) == NULL) - return PTL_HANDLE_INVALID; - - *ni_out = handle_in; -- return PTL_OK; --} -- --int PtlGetId(ptl_handle_ni_t ni_handle, ptl_process_id_t *id) --{ - PtlGetId_in args; - PtlGetId_out ret; - int rc; - - args.handle_in = ni_handle; - nal_t *nal; -- - rc = do_forward(ni_handle, PTL_GETID, &args, sizeof(args), &ret, - sizeof(ret)); - if (rc != PTL_OK) - return rc; - if (!ptl_init) - return PTL_NO_INIT; -- - if (id) - *id = ret.id_out; - nal = ptl_hndl2nal(&ni_handle); - if (nal == NULL) - return PTL_NI_INVALID; -- - return ret.rc; - return nal->nal_get_id(nal, id); --} -- - int PtlFailNid (ptl_handle_ni_t interface, ptl_nid_t nid, unsigned int threshold) -int PtlGetUid(ptl_handle_ni_t ni_handle, ptl_uid_t *uid) --{ - PtlFailNid_in args; - PtlFailNid_out ret; - int rc; - - args.interface = interface; - args.nid = nid; - args.threshold = threshold; - nal_t *nal; - - if (!ptl_init) - return PTL_NO_INIT; -- - rc = do_forward (interface, PTL_FAILNID, - &args, sizeof(args), &ret, sizeof (ret)); - nal = ptl_hndl2nal(&ni_handle); - if (nal == NULL) - return PTL_NI_INVALID; -- - return ((rc != PTL_OK) ? rc : ret.rc); - /* We don't support different uids yet */ - *uid = 0; - return PTL_OK; --} -- - int PtlNIStatus(ptl_handle_ni_t interface_in, ptl_sr_index_t register_in, - ptl_sr_value_t * status_out) -int PtlFailNid (ptl_handle_ni_t interface, ptl_nid_t nid, unsigned int threshold) --{ - PtlNIStatus_in args; - PtlNIStatus_out ret; - int rc; - - args.interface_in = interface_in; - args.register_in = register_in; - - rc = do_forward(interface_in, PTL_NISTATUS, &args, sizeof(args), &ret, - sizeof(ret)); - - if (rc != PTL_OK) - return rc; - nal_t *nal; -- - if (status_out) - *status_out = ret.status_out; - if (!ptl_init) - return PTL_NO_INIT; - - nal = ptl_hndl2nal(&interface); - if (nal == NULL) - return PTL_NI_INVALID; -- - return ret.rc; - return nal->nal_fail_nid(nal, nid, threshold); --} -- - int PtlNIDist(ptl_handle_ni_t interface_in, ptl_process_id_t process_in, - unsigned long *distance_out) -int PtlNIStatus(ptl_handle_ni_t interface_in, ptl_sr_index_t register_in, - ptl_sr_value_t *status_out) --{ - PtlNIDist_in args; - PtlNIDist_out ret; - int rc; - - args.interface_in = interface_in; - args.process_in = process_in; - - rc = do_forward(interface_in, PTL_NIDIST, &args, sizeof(args), &ret, - sizeof(ret)); - - if (rc != PTL_OK) - return rc; - nal_t *nal; -- - if (distance_out) - *distance_out = ret.distance_out; - if (!ptl_init) - return PTL_NO_INIT; - - nal = ptl_hndl2nal(&interface_in); - if (nal == NULL) - return PTL_NI_INVALID; -- - return ret.rc; - return nal->nal_ni_status(nal, register_in, status_out); --} - - -- - unsigned int PtlNIDebug(ptl_handle_ni_t ni, unsigned int mask_in) -int PtlNIDist(ptl_handle_ni_t interface_in, ptl_process_id_t process_in, - unsigned long *distance_out) --{ - PtlNIDebug_in args; - PtlNIDebug_out ret; - int rc; - - args.mask_in = mask_in; - - rc = do_forward(ni, PTL_NIDEBUG, &args, sizeof(args), &ret, - sizeof(ret)); - nal_t *nal; -- - if (rc != PTL_OK) - return rc; - if (!ptl_init) - return PTL_NO_INIT; - - nal = ptl_hndl2nal(&interface_in); - if (nal == NULL) - return PTL_NI_INVALID; -- - return ret.rc; - return nal->nal_ni_dist(nal, &process_in, distance_out); --} -- --int PtlMEAttach(ptl_handle_ni_t interface_in, ptl_pt_index_t index_in, -- ptl_process_id_t match_id_in, ptl_match_bits_t match_bits_in, -- ptl_match_bits_t ignore_bits_in, ptl_unlink_t unlink_in, - ptl_ins_pos_t pos_in, ptl_handle_me_t * handle_out) - ptl_ins_pos_t pos_in, ptl_handle_me_t *handle_out) --{ - PtlMEAttach_in args; - PtlMEAttach_out ret; - int rc; - - args.interface_in = interface_in; - args.index_in = index_in; - args.match_id_in = match_id_in; - args.match_bits_in = match_bits_in; - args.ignore_bits_in = ignore_bits_in; - args.unlink_in = unlink_in; - args.position_in = pos_in; - - rc = do_forward(interface_in, PTL_MEATTACH, &args, sizeof(args), &ret, - sizeof(ret)); - - if (rc != PTL_OK) - return rc; - nal_t *nal; -- - if (handle_out) { - handle_out->nal_idx = interface_in.nal_idx; - handle_out->cookie = ret.handle_out.cookie; - } - if (!ptl_init) - return PTL_NO_INIT; - - nal = ptl_hndl2nal(&interface_in); - if (nal == NULL) - return PTL_NI_INVALID; -- - return ret.rc; - return nal->nal_me_attach(nal, index_in, match_id_in, - match_bits_in, ignore_bits_in, - unlink_in, pos_in, handle_out); --} -- --int PtlMEInsert(ptl_handle_me_t current_in, ptl_process_id_t match_id_in, -- ptl_match_bits_t match_bits_in, ptl_match_bits_t ignore_bits_in, -- ptl_unlink_t unlink_in, ptl_ins_pos_t position_in, -- ptl_handle_me_t * handle_out) --{ - PtlMEInsert_in args; - PtlMEInsert_out ret; - int rc; - - args.current_in = current_in; - args.match_id_in = match_id_in; - args.match_bits_in = match_bits_in; - args.ignore_bits_in = ignore_bits_in; - args.unlink_in = unlink_in; - args.position_in = position_in; - - rc = do_forward(current_in, PTL_MEINSERT, &args, sizeof(args), &ret, - sizeof(ret)); - nal_t *nal; -- - if (rc != PTL_OK) - return (rc == PTL_INV_HANDLE) ? PTL_INV_ME : rc; - if (!ptl_init) - return PTL_NO_INIT; - - nal = ptl_hndl2nal(¤t_in); - if (nal == NULL) - return PTL_ME_INVALID; -- - if (handle_out) { - handle_out->nal_idx = current_in.nal_idx; - handle_out->cookie = ret.handle_out.cookie; - } - return ret.rc; - return nal->nal_me_insert(nal, ¤t_in, match_id_in, - match_bits_in, ignore_bits_in, - unlink_in, position_in, handle_out); --} -- --int PtlMEUnlink(ptl_handle_me_t current_in) - { - PtlMEUnlink_in args; - PtlMEUnlink_out ret; - int rc; - - args.current_in = current_in; - args.unlink_in = PTL_RETAIN; - - rc = do_forward(current_in, PTL_MEUNLINK, &args, sizeof(args), &ret, - sizeof(ret)); - - if (rc != PTL_OK) - return (rc == PTL_INV_HANDLE) ? PTL_INV_ME : rc; - - return ret.rc; - } - - int PtlTblDump(ptl_handle_ni_t ni, int index_in) - { - PtlTblDump_in args; - PtlTblDump_out ret; - int rc; - - args.index_in = index_in; - - rc = do_forward(ni, PTL_TBLDUMP, &args, sizeof(args), &ret, - sizeof(ret)); - - if (rc != PTL_OK) - return rc; - - return ret.rc; - } - - int PtlMEDump(ptl_handle_me_t current_in) - { - PtlMEDump_in args; - PtlMEDump_out ret; - int rc; - - args.current_in = current_in; - - rc = do_forward(current_in, PTL_MEDUMP, &args, sizeof(args), &ret, - sizeof(ret)); - - if (rc != PTL_OK) - return (rc == PTL_INV_HANDLE) ? PTL_INV_ME : rc; - - return ret.rc; - } - - static int validate_md(ptl_handle_any_t current_in, ptl_md_t md_in) --{ - nal_t *nal; - int rc; - int i; - - if (!ptl_init) { - CERROR("PtlMDAttach/Bind/Update: Not initialized\n"); - return PTL_NOINIT; - } - nal_t *nal; -- - if (!ptl_init) - return PTL_NO_INIT; - -- nal = ptl_hndl2nal(¤t_in); - if (!nal) - return PTL_INV_HANDLE; - - if (nal->validate != NULL) /* nal->validate not a NOOP */ - { - if ((md_in.options & PTL_MD_IOV) == 0) /* contiguous */ - { - rc = nal->validate (nal, md_in.start, md_in.length); - if (rc) - return (PTL_SEGV); - } - else - { - struct iovec *iov = (struct iovec *)md_in.start; - - for (i = 0; i < md_in.niov; i++, iov++) - { - rc = nal->validate (nal, iov->iov_base, iov->iov_len); - if (rc) - return (PTL_SEGV); - } - } - } - - return 0; - } - if (nal == NULL) - return PTL_ME_INVALID; -- - static ptl_handle_eq_t md2eq (ptl_md_t *md) - { - if (PtlHandleEqual (md->eventq, PTL_EQ_NONE)) - return (PTL_EQ_NONE); - - return (ptl_handle2usereq (&md->eventq)->cb_eq_handle); - return nal->nal_me_unlink(nal, ¤t_in); --} - -- --int PtlMDAttach(ptl_handle_me_t me_in, ptl_md_t md_in, -- ptl_unlink_t unlink_in, ptl_handle_md_t * handle_out) --{ - PtlMDAttach_in args; - PtlMDAttach_out ret; - int rc; - nal_t *nal; -- - rc = validate_md(me_in, md_in); - if (rc == PTL_OK) { - args.eq_in = md2eq(&md_in); - args.me_in = me_in; - args.md_in = md_in; - args.unlink_in = unlink_in; - - rc = do_forward(me_in, PTL_MDATTACH, - &args, sizeof(args), &ret, sizeof(ret)); - } - if (!ptl_init) - return PTL_NO_INIT; - - nal = ptl_hndl2nal(&me_in); - if (nal == NULL) - return PTL_ME_INVALID; -- - if (rc != PTL_OK) - return (rc == PTL_INV_HANDLE) ? PTL_INV_ME : rc; - if (!PtlHandleIsEqual(md_in.eq_handle, PTL_EQ_NONE) && - ptl_hndl2nal(&md_in.eq_handle) != nal) - return PTL_MD_ILLEGAL; -- - if (handle_out) { - handle_out->nal_idx = me_in.nal_idx; - handle_out->cookie = ret.handle_out.cookie; - } - return ret.rc; - return (nal->nal_md_attach)(nal, &me_in, &md_in, - unlink_in, handle_out); --} - - -- --int PtlMDBind(ptl_handle_ni_t ni_in, ptl_md_t md_in, - ptl_handle_md_t * handle_out) - ptl_unlink_t unlink_in, ptl_handle_md_t *handle_out) --{ - PtlMDBind_in args; - PtlMDBind_out ret; - int rc; - - rc = validate_md(ni_in, md_in); - if (rc != PTL_OK) - return rc; - - args.eq_in = md2eq(&md_in); - args.ni_in = ni_in; - args.md_in = md_in; - nal_t *nal; -- - rc = do_forward(ni_in, PTL_MDBIND, - &args, sizeof(args), &ret, sizeof(ret)); - if (!ptl_init) - return PTL_NO_INIT; - - nal = ptl_hndl2nal(&ni_in); - if (nal == NULL) - return PTL_NI_INVALID; -- - if (rc != PTL_OK) - return rc; - if (!PtlHandleIsEqual(md_in.eq_handle, PTL_EQ_NONE) && - ptl_hndl2nal(&md_in.eq_handle) != nal) - return PTL_MD_ILLEGAL; -- - if (handle_out) { - handle_out->nal_idx = ni_in.nal_idx; - handle_out->cookie = ret.handle_out.cookie; - } - return ret.rc; - return (nal->nal_md_bind)(nal, &md_in, unlink_in, handle_out); --} -- --int PtlMDUpdate(ptl_handle_md_t md_in, ptl_md_t *old_inout, -- ptl_md_t *new_inout, ptl_handle_eq_t testq_in) --{ - PtlMDUpdate_internal_in args; - PtlMDUpdate_internal_out ret; - int rc; - - args.md_in = md_in; - - if (old_inout) { - args.old_inout = *old_inout; - args.old_inout_valid = 1; - } else - args.old_inout_valid = 0; - - if (new_inout) { - rc = validate_md (md_in, *new_inout); - if (rc != PTL_OK) - return (rc == PTL_INV_HANDLE) ? PTL_INV_MD : rc; - args.new_inout = *new_inout; - args.new_inout_valid = 1; - } else - args.new_inout_valid = 0; - - if (PtlHandleEqual (testq_in, PTL_EQ_NONE)) { - args.testq_in = PTL_EQ_NONE; - args.sequence_in = -1; - } else { - ptl_eq_t *eq = ptl_handle2usereq (&testq_in); - - args.testq_in = eq->cb_eq_handle; - args.sequence_in = eq->sequence; - } - - rc = do_forward(md_in, PTL_MDUPDATE, &args, sizeof(args), &ret, - sizeof(ret)); - if (rc != PTL_OK) - return (rc == PTL_INV_HANDLE) ? PTL_INV_MD : rc; - nal_t *nal; - - if (!ptl_init) - return PTL_NO_INIT; - - nal = ptl_hndl2nal(&md_in); - if (nal == NULL) - return PTL_MD_INVALID; -- - if (old_inout) - *old_inout = ret.old_inout; - if (!PtlHandleIsEqual(testq_in, PTL_EQ_NONE) && - ptl_hndl2nal(&testq_in) != nal) - return PTL_EQ_INVALID; -- - return ret.rc; - return (nal->nal_md_update)(nal, &md_in, - old_inout, new_inout, &testq_in); --} -- --int PtlMDUnlink(ptl_handle_md_t md_in) --{ - PtlMDUnlink_in args; - PtlMDUnlink_out ret; - int rc; - - args.md_in = md_in; - rc = do_forward(md_in, PTL_MDUNLINK, &args, sizeof(args), &ret, - sizeof(ret)); - if (rc != PTL_OK) - return (rc == PTL_INV_HANDLE) ? PTL_INV_MD : rc; - - return ret.rc; - nal_t *nal; - - if (!ptl_init) - return PTL_NO_INIT; - - nal = ptl_hndl2nal(&md_in); - if (nal == NULL) - return PTL_MD_INVALID; - - return (nal->nal_md_unlink)(nal, &md_in); --} -- --int PtlEQAlloc(ptl_handle_ni_t interface, ptl_size_t count, - int (*callback) (ptl_event_t * event), - ptl_handle_eq_t * handle_out) - ptl_eq_handler_t callback, - ptl_handle_eq_t *handle_out) --{ - ptl_eq_t *eq = NULL; - ptl_event_t *ev = NULL; - PtlEQAlloc_in args; - PtlEQAlloc_out ret; - int rc, i; - nal_t *nal; - - nal_t *nal; - -- if (!ptl_init) - return PTL_NOINIT; - return PTL_NO_INIT; -- - nal = ptl_hndl2nal (&interface); - nal = ptl_hndl2nal(&interface); -- if (nal == NULL) - return PTL_INV_HANDLE; - - if (count != LOWEST_BIT_SET(count)) { /* not a power of 2 already */ - do { /* knock off all but the top bit... */ - count &= ~LOWEST_BIT_SET (count); - } while (count != LOWEST_BIT_SET(count)); - - count <<= 1; /* ...and round up */ - } - - if (count == 0) /* catch bad parameter / overflow on roundup */ - return (PTL_VAL_FAILED); - - PORTAL_ALLOC(ev, count * sizeof(ptl_event_t)); - if (!ev) - return PTL_NOSPACE; - - for (i = 0; i < count; i++) - ev[i].sequence = 0; - - if (nal->validate != NULL) { - rc = nal->validate(nal, ev, count * sizeof(ptl_event_t)); - if (rc != PTL_OK) - goto fail; - } - - args.ni_in = interface; - args.count_in = count; - args.base_in = ev; - args.len_in = count * sizeof(*ev); - args.callback_in = callback; - - rc = do_forward(interface, PTL_EQALLOC, &args, sizeof(args), &ret, - sizeof(ret)); - if (rc != PTL_OK) - goto fail; - if (ret.rc) - GOTO(fail, rc = ret.rc); - return PTL_NI_INVALID; -- - PORTAL_ALLOC(eq, sizeof(*eq)); - if (!eq) { - rc = PTL_NOSPACE; - goto fail; - } - return (nal->nal_eq_alloc)(nal, count, callback, handle_out); -} -- - eq->sequence = 1; - eq->size = count; - eq->base = ev; -int PtlEQFree(ptl_handle_eq_t eventq) -{ - nal_t *nal; -- - /* EQ handles are a little wierd. PtlEQGet() just looks at the - * queued events in shared memory. It doesn't want to do_forward() - * at all, so the cookie in the EQ handle we pass out of here is - * simply a pointer to the event queue we just set up. We stash - * the handle returned by do_forward(), so we can pass it back via - * do_forward() when we need to. */ - if (!ptl_init) - return PTL_NO_INIT; - - nal = ptl_hndl2nal(&eventq); - if (nal == NULL) - return PTL_EQ_INVALID; -- - eq->cb_eq_handle.nal_idx = interface.nal_idx; - eq->cb_eq_handle.cookie = ret.handle_out.cookie; - return (nal->nal_eq_free)(nal, &eventq); -} -- - handle_out->nal_idx = interface.nal_idx; - handle_out->cookie = (__u64)((unsigned long)eq); - return PTL_OK; -int PtlEQGet(ptl_handle_eq_t eventq, ptl_event_t *ev) -{ - int which; - - return (PtlEQPoll (&eventq, 1, 0, ev, &which)); -} -- - fail: - PORTAL_FREE(ev, count * sizeof(ptl_event_t)); - return rc; -int PtlEQWait(ptl_handle_eq_t eventq_in, ptl_event_t *event_out) -{ - int which; - - return (PtlEQPoll (&eventq_in, 1, PTL_TIME_FOREVER, - event_out, &which)); --} -- - int PtlEQFree(ptl_handle_eq_t eventq) -int PtlEQPoll(ptl_handle_eq_t *eventqs_in, int neq_in, int timeout, - ptl_event_t *event_out, int *which_out) --{ - PtlEQFree_in args; - PtlEQFree_out ret; - ptl_eq_t *eq; - int rc; - int i; - nal_t *nal; -- - eq = ptl_handle2usereq (&eventq); - args.eventq_in = eq->cb_eq_handle; - if (!ptl_init) - return PTL_NO_INIT; -- - rc = do_forward(eq->cb_eq_handle, PTL_EQFREE, &args, - sizeof(args), &ret, sizeof(ret)); - if (neq_in < 1) - return PTL_EQ_INVALID; -- - /* XXX we're betting rc == PTL_OK here */ - PORTAL_FREE(eq->base, eq->size * sizeof(ptl_event_t)); - PORTAL_FREE(eq, sizeof(*eq)); - nal = ptl_hndl2nal(&eventqs_in[0]); - if (nal == NULL) - return PTL_EQ_INVALID; -- - return rc; - for (i = 1; i < neq_in; i++) - if (ptl_hndl2nal(&eventqs_in[i]) != nal) - return PTL_EQ_INVALID; - - return (nal->nal_eq_poll)(nal, eventqs_in, neq_in, timeout, - event_out, which_out); --} - -- --int PtlACEntry(ptl_handle_ni_t ni_in, ptl_ac_index_t index_in, -- ptl_process_id_t match_id_in, ptl_pt_index_t portal_in) --{ - PtlACEntry_in args; - PtlACEntry_out ret; - int rc; - - /* - * Copy arguments into the argument block to - * hand to the forwarding object - */ - args.ni_in = ni_in; - args.index_in = index_in; - args.match_id_in = match_id_in; - args.portal_in = portal_in; - - rc = do_forward(ni_in, PTL_ACENTRY, &args, sizeof(args), &ret, - sizeof(ret)); - nal_t *nal; -- - return (rc != PTL_OK) ? rc : ret.rc; - if (!ptl_init) - return PTL_NO_INIT; - - nal = ptl_hndl2nal(&ni_in); - if (nal == NULL) - return PTL_NI_INVALID; - - return (nal->nal_ace_entry)(nal, index_in, match_id_in, portal_in); --} -- --int PtlPut(ptl_handle_md_t md_in, ptl_ack_req_t ack_req_in, -- ptl_process_id_t target_in, ptl_pt_index_t portal_in, - ptl_ac_index_t cookie_in, ptl_match_bits_t match_bits_in, - ptl_ac_index_t ac_in, ptl_match_bits_t match_bits_in, -- ptl_size_t offset_in, ptl_hdr_data_t hdr_data_in) --{ - PtlPut_in args; - PtlPut_out ret; - int rc; - - /* - * Copy arguments into the argument block to - * hand to the forwarding object - */ - args.md_in = md_in; - args.ack_req_in = ack_req_in; - args.target_in = target_in; - args.portal_in = portal_in; - args.cookie_in = cookie_in; - args.match_bits_in = match_bits_in; - args.offset_in = offset_in; - args.hdr_data_in = hdr_data_in; - nal_t *nal; -- - rc = do_forward(md_in, PTL_PUT, &args, sizeof(args), &ret, sizeof(ret)); - if (!ptl_init) - return PTL_NO_INIT; - - nal = ptl_hndl2nal(&md_in); - if (nal == NULL) - return PTL_MD_INVALID; -- - return (rc != PTL_OK) ? rc : ret.rc; - return (nal->nal_put)(nal, &md_in, ack_req_in, - &target_in, portal_in, ac_in, - match_bits_in, offset_in, hdr_data_in); --} -- --int PtlGet(ptl_handle_md_t md_in, ptl_process_id_t target_in, - ptl_pt_index_t portal_in, ptl_ac_index_t cookie_in, - ptl_pt_index_t portal_in, ptl_ac_index_t ac_in, -- ptl_match_bits_t match_bits_in, ptl_size_t offset_in) --{ - PtlGet_in args; - PtlGet_out ret; - int rc; - nal_t *nal; -- - /* - * Copy arguments into the argument block to - * hand to the forwarding object - */ - args.md_in = md_in; - args.target_in = target_in; - args.portal_in = portal_in; - args.cookie_in = cookie_in; - args.match_bits_in = match_bits_in; - args.offset_in = offset_in; - if (!ptl_init) - return PTL_NO_INIT; -- - rc = do_forward(md_in, PTL_GET, &args, sizeof(args), &ret, sizeof(ret)); - nal = ptl_hndl2nal(&md_in); - if (nal == NULL) - return PTL_MD_INVALID; -- - return (rc != PTL_OK) ? rc : ret.rc; - return (nal->nal_get)(nal, &md_in, - &target_in, portal_in, ac_in, - match_bits_in, offset_in); --} - diff --cc lnet/lnet/autoMakefile.am index 22565dd,285f8fe..0000000 deleted file mode 100644,100644 --- a/lnet/lnet/autoMakefile.am +++ /dev/null @@@ -1,26 -1,26 +1,0 @@@ --# Copyright (C) 2002 Cluster File Systems, Inc. --# --# This code is issued under the GNU General Public License. --# See the file COPYING in this distribution -- - my_sources = api-eq.c api-init.c api-me.c api-errno.c api-ni.c api-wrap.c \ - lib-dispatch.c lib-init.c lib-me.c lib-msg.c lib-eq.c \ -my_sources = api-errno.c api-ni.c api-wrap.c \ - lib-init.c lib-me.c lib-msg.c lib-eq.c \ -- lib-md.c lib-move.c lib-ni.c lib-pid.c -- --if !CRAY_PORTALS -- --if LIBLUSTRE --noinst_LIBRARIES= libportals.a --libportals_a_SOURCES= $(my_sources) --libportals_a_CPPFLAGS = $(LLCPPFLAGS) --libportals_a_CFLAGS = $(LLCFLAGS) --endif -- - #if MODULES - #modulenet_DATA = portals$(KMODEXT) - #endif # MODULES -if MODULES -modulenet_DATA = portals$(KMODEXT) -endif # MODULES -- --endif # CRAY_PORTALS -- --MOSTLYCLEANFILES = *.o *.ko *.mod.c - #DIST_SOURCES = $(portals-objs:%.o=%.c) -DIST_SOURCES = $(portals-objs:%.o=%.c) diff --cc lnet/lnet/lib-eq.c index ce343c1,8ea6fdd..0000000 deleted file mode 100644,100644 --- a/lnet/lnet/lib-eq.c +++ /dev/null @@@ -1,128 -1,265 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * lib/lib-eq.c -- * Library level Event queue management routines -- * -- * Copyright (c) 2001-2003 Cluster File Systems, Inc. -- * Copyright (c) 2001-2002 Sandia National Laboratories -- * -- * This file is part of Lustre, http://www.sf.net/projects/lustre/ -- * -- * Lustre is free software; you can redistribute it and/or -- * modify it under the terms of version 2 of the GNU General Public -- * License as published by the Free Software Foundation. -- * -- * Lustre is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- * GNU General Public License for more details. -- * -- * You should have received a copy of the GNU General Public License -- * along with Lustre; if not, write to the Free Software -- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -- */ -- --#define DEBUG_SUBSYSTEM S_PORTALS --#include - #include -- - int do_PtlEQAlloc_internal(nal_cb_t * nal, void *private, void *v_args, - void *v_ret) -int -lib_api_eq_alloc (nal_t *apinal, ptl_size_t count, - ptl_eq_handler_t callback, - ptl_handle_eq_t *handle) --{ - /* - * Incoming: - * ptl_handle_ni_t ni_in - * ptl_size_t count_in - * void * base_in - * - * Outgoing: - * ptl_handle_eq_t * handle_out - */ - lib_nal_t *nal = apinal->nal_data; - lib_eq_t *eq; - unsigned long flags; - int rc; -- - PtlEQAlloc_in *args = v_args; - PtlEQAlloc_out *ret = v_ret; - /* We need count to be a power of 2 so that when eq_{enq,deq}_seq - * overflow, they don't skip entries, so the queue has the same - * apparant capacity at all times */ -- - lib_eq_t *eq; - unsigned long flags; - if (count != LOWEST_BIT_SET(count)) { /* not a power of 2 already */ - do { /* knock off all but the top bit... */ - count &= ~LOWEST_BIT_SET (count); - } while (count != LOWEST_BIT_SET(count)); -- - /* api should have rounded up */ - if (args->count_in != LOWEST_BIT_SET (args->count_in)) - return ret->rc = PTL_VAL_FAILED; - count <<= 1; /* ...and round up */ - } -- - if (count == 0) /* catch bad parameter / overflow on roundup */ - return (PTL_VAL_FAILED); - -- eq = lib_eq_alloc (nal); -- if (eq == NULL) - return (ret->rc = PTL_NOSPACE); - return (PTL_NO_SPACE); -- - state_lock(nal, &flags); - PORTAL_ALLOC(eq->eq_events, count * sizeof(ptl_event_t)); - if (eq->eq_events == NULL) { - LIB_LOCK(nal, flags); - lib_eq_free (nal, eq); - LIB_UNLOCK(nal, flags); - } -- - if (nal->cb_map != NULL) { - if (nal->libnal_map != NULL) { -- struct iovec iov = { - .iov_base = args->base_in, - .iov_len = args->count_in * sizeof (ptl_event_t) }; - .iov_base = eq->eq_events, - .iov_len = count * sizeof(ptl_event_t)}; -- - ret->rc = nal->cb_map (nal, 1, &iov, &eq->eq_addrkey); - if (ret->rc != PTL_OK) { - rc = nal->libnal_map(nal, 1, &iov, &eq->eq_addrkey); - if (rc != PTL_OK) { - LIB_LOCK(nal, flags); -- lib_eq_free (nal, eq); - - state_unlock (nal, &flags); - return (ret->rc); - LIB_UNLOCK(nal, flags); - return (rc); -- } -- } -- - eq->sequence = 1; - eq->base = args->base_in; - eq->size = args->count_in; - /* NB this resets all event sequence numbers to 0, to be earlier - * than eq_deq_seq */ - memset(eq->eq_events, 0, count * sizeof(ptl_event_t)); - - eq->eq_deq_seq = 1; - eq->eq_enq_seq = 1; - eq->eq_size = count; -- eq->eq_refcount = 0; - eq->event_callback = args->callback_in; - eq->eq_callback = callback; - - LIB_LOCK(nal, flags); -- -- lib_initialise_handle (nal, &eq->eq_lh, PTL_COOKIE_TYPE_EQ); - list_add (&eq->eq_list, &nal->ni.ni_active_eqs); - list_add (&eq->eq_list, &nal->libnal_ni.ni_active_eqs); -- - state_unlock(nal, &flags); - LIB_UNLOCK(nal, flags); -- - ptl_eq2handle(&ret->handle_out, eq); - return (ret->rc = PTL_OK); - ptl_eq2handle(handle, nal, eq); - return (PTL_OK); --} -- - int do_PtlEQFree_internal(nal_cb_t * nal, void *private, void *v_args, - void *v_ret) -int -lib_api_eq_free(nal_t *apinal, ptl_handle_eq_t *eqh) --{ - /* - * Incoming: - * ptl_handle_eq_t eventq_in - * - * Outgoing: - */ - - PtlEQFree_in *args = v_args; - PtlEQFree_out *ret = v_ret; - lib_eq_t *eq; - long flags; - lib_nal_t *nal = apinal->nal_data; - lib_eq_t *eq; - int size; - ptl_event_t *events; - void *addrkey; - unsigned long flags; -- - state_lock (nal, &flags); - LIB_LOCK(nal, flags); -- - eq = ptl_handle2eq(&args->eventq_in, nal); - eq = ptl_handle2eq(eqh, nal); -- if (eq == NULL) { - ret->rc = PTL_INV_EQ; - } else if (eq->eq_refcount != 0) { - ret->rc = PTL_EQ_INUSE; - LIB_UNLOCK(nal, flags); - return (PTL_EQ_INVALID); - } - - if (eq->eq_refcount != 0) { - LIB_UNLOCK(nal, flags); - return (PTL_EQ_IN_USE); - } - - /* stash for free after lock dropped */ - events = eq->eq_events; - size = eq->eq_size; - addrkey = eq->eq_addrkey; - - lib_invalidate_handle (nal, &eq->eq_lh); - list_del (&eq->eq_list); - lib_eq_free (nal, eq); - - LIB_UNLOCK(nal, flags); - - if (nal->libnal_unmap != NULL) { - struct iovec iov = { - .iov_base = events, - .iov_len = size * sizeof(ptl_event_t)}; - - nal->libnal_unmap(nal, 1, &iov, &addrkey); - } - - PORTAL_FREE(events, size * sizeof (ptl_event_t)); - - return (PTL_OK); -} - -int -lib_get_event (lib_eq_t *eq, ptl_event_t *ev) -{ - int new_index = eq->eq_deq_seq & (eq->eq_size - 1); - ptl_event_t *new_event = &eq->eq_events[new_index]; - int rc; - ENTRY; - - CDEBUG(D_INFO, "event: %p, sequence: %lu, eq->size: %u\n", - new_event, eq->eq_deq_seq, eq->eq_size); - - if (PTL_SEQ_GT (eq->eq_deq_seq, new_event->sequence)) { - RETURN(PTL_EQ_EMPTY); - } - - /* We've got a new event... */ - *ev = *new_event; - - /* ...but did it overwrite an event we've not seen yet? */ - if (eq->eq_deq_seq == new_event->sequence) { - rc = PTL_OK; -- } else { - if (nal->cb_unmap != NULL) { - struct iovec iov = { - .iov_base = eq->base, - .iov_len = eq->size * sizeof (ptl_event_t) }; - - nal->cb_unmap(nal, 1, &iov, &eq->eq_addrkey); - CERROR("Event Queue Overflow: eq seq %lu ev seq %lu\n", - eq->eq_deq_seq, new_event->sequence); - rc = PTL_EQ_DROPPED; - } - - eq->eq_deq_seq = new_event->sequence + 1; - RETURN(rc); -} - - -int -lib_api_eq_poll (nal_t *apinal, - ptl_handle_eq_t *eventqs, int neq, int timeout_ms, - ptl_event_t *event, int *which) -{ - lib_nal_t *nal = apinal->nal_data; - lib_ni_t *ni = &nal->libnal_ni; - unsigned long flags; - int i; - int rc; -#ifdef __KERNEL__ - wait_queue_t wq; - unsigned long now; -#else - struct timeval then; - struct timeval now; - struct timespec ts; -#endif - ENTRY; - - LIB_LOCK(nal, flags); - - for (;;) { - for (i = 0; i < neq; i++) { - lib_eq_t *eq = ptl_handle2eq(&eventqs[i], nal); - - rc = lib_get_event (eq, event); - if (rc != PTL_EQ_EMPTY) { - LIB_UNLOCK(nal, flags); - *which = i; - RETURN(rc); - } - } - - if (timeout_ms == 0) { - LIB_UNLOCK (nal, flags); - RETURN (PTL_EQ_EMPTY); -- } -- - lib_invalidate_handle (nal, &eq->eq_lh); - list_del (&eq->eq_list); - lib_eq_free (nal, eq); - ret->rc = PTL_OK; - } - /* Some architectures force us to do spin locking/unlocking - * in the same stack frame, means we can abstract the - * locking here */ -#ifdef __KERNEL__ - init_waitqueue_entry(&wq, current); - set_current_state(TASK_INTERRUPTIBLE); - add_wait_queue(&ni->ni_waitq, &wq); -- - state_unlock (nal, &flags); - LIB_UNLOCK(nal, flags); -- - return (ret->rc); - if (timeout_ms < 0) { - schedule (); - } else { - now = jiffies; - schedule_timeout((timeout_ms * HZ)/1000); - timeout_ms -= ((jiffies - now) * 1000)/HZ; - if (timeout_ms < 0) - timeout_ms = 0; - } - - LIB_LOCK(nal, flags); -#else - if (timeout_ms < 0) { - pthread_cond_wait(&ni->ni_cond, &ni->ni_mutex); - } else { - gettimeofday(&then, NULL); - - ts.tv_sec = then.tv_sec + timeout_ms/1000; - ts.tv_nsec = then.tv_usec * 1000 + - (timeout_ms%1000) * 1000000; - if (ts.tv_nsec >= 1000000000) { - ts.tv_sec++; - ts.tv_nsec -= 1000000000; - } - - pthread_cond_timedwait(&ni->ni_cond, - &ni->ni_mutex, &ts); - - gettimeofday(&now, NULL); - timeout_ms -= (now.tv_sec - then.tv_sec) * 1000 + - (now.tv_usec - then.tv_usec) / 1000; - - if (timeout_ms < 0) - timeout_ms = 0; - } -#endif - } --} diff --cc lnet/lnet/lib-init.c index d4d8860,9d97bc1..0000000 deleted file mode 100644,100644 --- a/lnet/lnet/lib-init.c +++ /dev/null @@@ -1,380 -1,434 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * lib/lib-init.c -- * Start up the internal library and clear all structures -- * Called by the NAL when it initializes. Safe to call multiple times. -- * -- * Copyright (c) 2001-2003 Cluster File Systems, Inc. -- * Copyright (c) 2001-2002 Sandia National Laboratories -- * -- * This file is part of Lustre, http://www.sf.net/projects/lustre/ -- * -- * Lustre is free software; you can redistribute it and/or -- * modify it under the terms of version 2 of the GNU General Public -- * License as published by the Free Software Foundation. -- * -- * Lustre is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- * GNU General Public License for more details. -- * -- * You should have received a copy of the GNU General Public License -- * along with Lustre; if not, write to the Free Software -- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -- */ -- --# define DEBUG_SUBSYSTEM S_PORTALS --#include -- --#ifdef __KERNEL__ --# include /* for memset() */ --# include --# ifdef KERNEL_ADDR_CACHE --# include --# endif --#else --# include --# include --#endif -- --#ifndef PTL_USE_LIB_FREELIST -- --int - kportal_descriptor_setup (nal_cb_t *nal) -kportal_descriptor_setup (lib_nal_t *nal, - ptl_ni_limits_t *requested_limits, - ptl_ni_limits_t *actual_limits) --{ - /* Ignore requested limits! */ - actual_limits->max_mes = INT_MAX; - actual_limits->max_mds = INT_MAX; - actual_limits->max_eqs = INT_MAX; - -- return PTL_OK; --} -- --void - kportal_descriptor_cleanup (nal_cb_t *nal) -kportal_descriptor_cleanup (lib_nal_t *nal) --{ --} --#else -- --int - lib_freelist_init (nal_cb_t *nal, lib_freelist_t *fl, int n, int size) -lib_freelist_init (lib_nal_t *nal, lib_freelist_t *fl, int n, int size) --{ -- char *space; -- -- LASSERT (n > 0); -- -- size += offsetof (lib_freeobj_t, fo_contents); -- - space = nal->cb_malloc (nal, n * size); - PORTAL_ALLOC(space, n * size); -- if (space == NULL) - return (PTL_NOSPACE); - return (PTL_NO_SPACE); -- -- INIT_LIST_HEAD (&fl->fl_list); -- fl->fl_objs = space; -- fl->fl_nobjs = n; -- fl->fl_objsize = size; -- -- do -- { -- memset (space, 0, size); -- list_add ((struct list_head *)space, &fl->fl_list); -- space += size; -- } while (--n != 0); -- -- return (PTL_OK); --} -- --void - lib_freelist_fini (nal_cb_t *nal, lib_freelist_t *fl) -lib_freelist_fini (lib_nal_t *nal, lib_freelist_t *fl) --{ -- struct list_head *el; -- int count; -- -- if (fl->fl_nobjs == 0) -- return; -- -- count = 0; -- for (el = fl->fl_list.next; el != &fl->fl_list; el = el->next) -- count++; -- -- LASSERT (count == fl->fl_nobjs); -- - nal->cb_free (nal, fl->fl_objs, fl->fl_nobjs * fl->fl_objsize); - PORTAL_FREE(fl->fl_objs, fl->fl_nobjs * fl->fl_objsize); -- memset (fl, 0, sizeof (fl)); --} -- --int - kportal_descriptor_setup (nal_cb_t *nal) -kportal_descriptor_setup (lib_nal_t *nal, - ptl_ni_limits_t *requested_limits, - ptl_ni_limits_t *actual_limits) --{ -- /* NB on failure caller must still call kportal_descriptor_cleanup */ -- /* ****** */ - int rc; - lib_ni_t *ni = &nal->libnal_ni; - int rc; -- - memset (&nal->ni.ni_free_mes, 0, sizeof (nal->ni.ni_free_mes)); - memset (&nal->ni.ni_free_msgs, 0, sizeof (nal->ni.ni_free_msgs)); - memset (&nal->ni.ni_free_mds, 0, sizeof (nal->ni.ni_free_mds)); - memset (&nal->ni.ni_free_eqs, 0, sizeof (nal->ni.ni_free_eqs)); - memset (&ni->ni_free_mes, 0, sizeof (ni->ni_free_mes)); - memset (&ni->ni_free_msgs, 0, sizeof (ni->ni_free_msgs)); - memset (&ni->ni_free_mds, 0, sizeof (ni->ni_free_mds)); - memset (&ni->ni_free_eqs, 0, sizeof (ni->ni_free_eqs)); -- - rc = lib_freelist_init (nal, &nal->ni.ni_free_mes, - /* Ignore requested limits! */ - actual_limits->max_mes = MAX_MES; - actual_limits->max_mds = MAX_MDS; - actual_limits->max_eqs = MAX_EQS; - /* Hahahah what a load of bollocks. There's nowhere to - * specify the max # messages in-flight */ - - rc = lib_freelist_init (nal, &ni->ni_free_mes, -- MAX_MES, sizeof (lib_me_t)); -- if (rc != PTL_OK) -- return (rc); -- - rc = lib_freelist_init (nal, &nal->ni.ni_free_msgs, - rc = lib_freelist_init (nal, &ni->ni_free_msgs, -- MAX_MSGS, sizeof (lib_msg_t)); -- if (rc != PTL_OK) -- return (rc); -- - rc = lib_freelist_init (nal, &nal->ni.ni_free_mds, - rc = lib_freelist_init (nal, &ni->ni_free_mds, -- MAX_MDS, sizeof (lib_md_t)); -- if (rc != PTL_OK) -- return (rc); -- - rc = lib_freelist_init (nal, &nal->ni.ni_free_eqs, - rc = lib_freelist_init (nal, &ni->ni_free_eqs, -- MAX_EQS, sizeof (lib_eq_t)); -- return (rc); --} -- --void - kportal_descriptor_cleanup (nal_cb_t *nal) -kportal_descriptor_cleanup (lib_nal_t *nal) --{ - lib_freelist_fini (nal, &nal->ni.ni_free_mes); - lib_freelist_fini (nal, &nal->ni.ni_free_msgs); - lib_freelist_fini (nal, &nal->ni.ni_free_mds); - lib_freelist_fini (nal, &nal->ni.ni_free_eqs); - lib_ni_t *ni = &nal->libnal_ni; - - lib_freelist_fini (nal, &ni->ni_free_mes); - lib_freelist_fini (nal, &ni->ni_free_msgs); - lib_freelist_fini (nal, &ni->ni_free_mds); - lib_freelist_fini (nal, &ni->ni_free_eqs); --} -- --#endif -- --__u64 - lib_create_interface_cookie (nal_cb_t *nal) -lib_create_interface_cookie (lib_nal_t *nal) --{ -- /* NB the interface cookie in wire handles guards against delayed -- * replies and ACKs appearing valid in a new instance of the same -- * interface. Initialisation time, even if it's only implemented -- * to millisecond resolution is probably easily good enough. */ -- struct timeval tv; -- __u64 cookie; --#ifndef __KERNEL__ -- int rc = gettimeofday (&tv, NULL); -- LASSERT (rc == 0); --#else -- do_gettimeofday(&tv); --#endif -- cookie = tv.tv_sec; -- cookie *= 1000000; -- cookie += tv.tv_usec; -- return (cookie); --} -- --int - lib_setup_handle_hash (nal_cb_t *nal) -lib_setup_handle_hash (lib_nal_t *nal) --{ - lib_ni_t *ni = &nal->ni; - lib_ni_t *ni = &nal->libnal_ni; -- int i; -- -- /* Arbitrary choice of hash table size */ --#ifdef __KERNEL__ -- ni->ni_lh_hash_size = PAGE_SIZE / sizeof (struct list_head); --#else -- ni->ni_lh_hash_size = (MAX_MES + MAX_MDS + MAX_EQS)/4; --#endif - ni->ni_lh_hash_table = - (struct list_head *)nal->cb_malloc (nal, ni->ni_lh_hash_size - * sizeof (struct list_head)); - PORTAL_ALLOC(ni->ni_lh_hash_table, - ni->ni_lh_hash_size * sizeof (struct list_head)); -- if (ni->ni_lh_hash_table == NULL) - return (PTL_NOSPACE); - return (PTL_NO_SPACE); -- -- for (i = 0; i < ni->ni_lh_hash_size; i++) -- INIT_LIST_HEAD (&ni->ni_lh_hash_table[i]); -- -- ni->ni_next_object_cookie = PTL_COOKIE_TYPES; -- -- return (PTL_OK); --} -- --void - lib_cleanup_handle_hash (nal_cb_t *nal) -lib_cleanup_handle_hash (lib_nal_t *nal) --{ - lib_ni_t *ni = &nal->ni; - lib_ni_t *ni = &nal->libnal_ni; -- -- if (ni->ni_lh_hash_table == NULL) -- return; -- - nal->cb_free (nal, ni->ni_lh_hash_table, - ni->ni_lh_hash_size * sizeof (struct list_head)); - PORTAL_FREE(ni->ni_lh_hash_table, - ni->ni_lh_hash_size * sizeof (struct list_head)); --} -- --lib_handle_t * - lib_lookup_cookie (nal_cb_t *nal, __u64 cookie, int type) -lib_lookup_cookie (lib_nal_t *nal, __u64 cookie, int type) --{ -- /* ALWAYS called with statelock held */ - lib_ni_t *ni = &nal->ni; - lib_ni_t *ni = &nal->libnal_ni; -- struct list_head *list; -- struct list_head *el; -- unsigned int hash; -- -- if ((cookie & (PTL_COOKIE_TYPES - 1)) != type) -- return (NULL); -- -- hash = ((unsigned int)cookie) % ni->ni_lh_hash_size; -- list = &ni->ni_lh_hash_table[hash]; -- -- list_for_each (el, list) { -- lib_handle_t *lh = list_entry (el, lib_handle_t, lh_hash_chain); -- -- if (lh->lh_cookie == cookie) -- return (lh); -- } -- -- return (NULL); --} -- --void - lib_initialise_handle (nal_cb_t *nal, lib_handle_t *lh, int type) -lib_initialise_handle (lib_nal_t *nal, lib_handle_t *lh, int type) --{ -- /* ALWAYS called with statelock held */ - lib_ni_t *ni = &nal->ni; - lib_ni_t *ni = &nal->libnal_ni; -- unsigned int hash; -- -- LASSERT (type >= 0 && type < PTL_COOKIE_TYPES); -- lh->lh_cookie = ni->ni_next_object_cookie | type; -- ni->ni_next_object_cookie += PTL_COOKIE_TYPES; -- -- hash = ((unsigned int)lh->lh_cookie) % ni->ni_lh_hash_size; -- list_add (&lh->lh_hash_chain, &ni->ni_lh_hash_table[hash]); --} -- --void - lib_invalidate_handle (nal_cb_t *nal, lib_handle_t *lh) -lib_invalidate_handle (lib_nal_t *nal, lib_handle_t *lh) --{ -- list_del (&lh->lh_hash_chain); --} -- --int - lib_init(nal_cb_t * nal, ptl_nid_t nid, ptl_pid_t pid, int gsize, - ptl_pt_index_t ptl_size, ptl_ac_index_t acl_size) -lib_init(lib_nal_t *libnal, nal_t *apinal, - ptl_process_id_t process_id, - ptl_ni_limits_t *requested_limits, - ptl_ni_limits_t *actual_limits) --{ -- int rc = PTL_OK; - lib_ni_t *ni = &nal->ni; - int i; - lib_ni_t *ni = &libnal->libnal_ni; - int ptl_size; - int i; -- ENTRY; -- -- /* NB serialised in PtlNIInit() */ - - if (ni->refcnt != 0) { /* already initialised */ - ni->refcnt++; - goto out; - } -- -- lib_assert_wire_constants (); - - /* - * Allocate the portal table for this interface - * and all per-interface objects. - */ - memset(&ni->counters, 0, sizeof(lib_counters_t)); -- - rc = kportal_descriptor_setup (nal); - /* Setup the API nal with the lib API handling functions */ - apinal->nal_get_id = lib_api_get_id; - apinal->nal_ni_status = lib_api_ni_status; - apinal->nal_ni_dist = lib_api_ni_dist; - apinal->nal_fail_nid = lib_api_fail_nid; - apinal->nal_me_attach = lib_api_me_attach; - apinal->nal_me_insert = lib_api_me_insert; - apinal->nal_me_unlink = lib_api_me_unlink; - apinal->nal_md_attach = lib_api_md_attach; - apinal->nal_md_bind = lib_api_md_bind; - apinal->nal_md_unlink = lib_api_md_unlink; - apinal->nal_md_update = lib_api_md_update; - apinal->nal_eq_alloc = lib_api_eq_alloc; - apinal->nal_eq_free = lib_api_eq_free; - apinal->nal_eq_poll = lib_api_eq_poll; - apinal->nal_put = lib_api_put; - apinal->nal_get = lib_api_get; - - apinal->nal_data = libnal; - ni->ni_api = apinal; - - rc = kportal_descriptor_setup (libnal, requested_limits, - &ni->ni_actual_limits); -- if (rc != PTL_OK) -- goto out; - - memset(&ni->ni_counters, 0, sizeof(lib_counters_t)); -- -- INIT_LIST_HEAD (&ni->ni_active_msgs); -- INIT_LIST_HEAD (&ni->ni_active_mds); -- INIT_LIST_HEAD (&ni->ni_active_eqs); - -- INIT_LIST_HEAD (&ni->ni_test_peers); -- - ni->ni_interface_cookie = lib_create_interface_cookie (nal); -#ifdef __KERNEL__ - spin_lock_init (&ni->ni_lock); - init_waitqueue_head (&ni->ni_waitq); -#else - pthread_mutex_init(&ni->ni_mutex, NULL); - pthread_cond_init(&ni->ni_cond, NULL); -#endif - - ni->ni_interface_cookie = lib_create_interface_cookie (libnal); -- ni->ni_next_object_cookie = 0; - rc = lib_setup_handle_hash (nal); - rc = lib_setup_handle_hash (libnal); -- if (rc != PTL_OK) -- goto out; -- - ni->nid = nid; - ni->pid = pid; - ni->ni_pid = process_id; -- - ni->num_nodes = gsize; - ni->tbl.size = ptl_size; - if (requested_limits != NULL) - ptl_size = requested_limits->max_pt_index + 1; - else - ptl_size = 64; -- - ni->tbl.tbl = nal->cb_malloc(nal, sizeof(struct list_head) * ptl_size); - if (ni->tbl.tbl == NULL) { - rc = PTL_NOSPACE; - ni->ni_portals.size = ptl_size; - PORTAL_ALLOC(ni->ni_portals.tbl, - ptl_size * sizeof(struct list_head)); - if (ni->ni_portals.tbl == NULL) { - rc = PTL_NO_SPACE; -- goto out; -- } -- -- for (i = 0; i < ptl_size; i++) - INIT_LIST_HEAD(&(ni->tbl.tbl[i])); - INIT_LIST_HEAD(&(ni->ni_portals.tbl[i])); -- - ni->debug = PTL_DEBUG_NONE; - ni->up = 1; - ni->refcnt++; - /* max_{mes,mds,eqs} set in kportal_descriptor_setup */ - - /* We don't have an access control table! */ - ni->ni_actual_limits.max_ac_index = -1; - - ni->ni_actual_limits.max_pt_index = ptl_size - 1; - ni->ni_actual_limits.max_md_iovecs = PTL_MD_MAX_IOV; - ni->ni_actual_limits.max_me_list = INT_MAX; - - /* We don't support PtlGetPut! */ - ni->ni_actual_limits.max_getput_md = 0; - - if (actual_limits != NULL) - *actual_limits = ni->ni_actual_limits; -- -- out: -- if (rc != PTL_OK) { - lib_cleanup_handle_hash (nal); - kportal_descriptor_cleanup (nal); - lib_cleanup_handle_hash (libnal); - kportal_descriptor_cleanup (libnal); -- } -- -- RETURN (rc); --} -- --int - lib_fini(nal_cb_t * nal) -lib_fini(lib_nal_t *nal) --{ - lib_ni_t *ni = &nal->ni; - lib_ni_t *ni = &nal->libnal_ni; -- int idx; - - ni->refcnt--; - - if (ni->refcnt != 0) - goto out; -- - /* NB no stat_lock() since this is the last reference. The NAL - /* NB no state_lock() since this is the last reference. The NAL -- * should have shut down already, so it should be safe to unlink -- * and free all descriptors, even those that appear committed to a -- * network op (eg MD with non-zero pending count) -- */ -- - for (idx = 0; idx < ni->tbl.size; idx++) - while (!list_empty (&ni->tbl.tbl[idx])) { - lib_me_t *me = list_entry (ni->tbl.tbl[idx].next, - for (idx = 0; idx < ni->ni_portals.size; idx++) - while (!list_empty (&ni->ni_portals.tbl[idx])) { - lib_me_t *me = list_entry (ni->ni_portals.tbl[idx].next, -- lib_me_t, me_list); -- -- CERROR ("Active me %p on exit\n", me); -- list_del (&me->me_list); -- lib_me_free (nal, me); -- } -- -- while (!list_empty (&ni->ni_active_mds)) { -- lib_md_t *md = list_entry (ni->ni_active_mds.next, -- lib_md_t, md_list); -- -- CERROR ("Active md %p on exit\n", md); -- list_del (&md->md_list); -- lib_md_free (nal, md); -- } -- -- while (!list_empty (&ni->ni_active_eqs)) { -- lib_eq_t *eq = list_entry (ni->ni_active_eqs.next, -- lib_eq_t, eq_list); -- -- CERROR ("Active eq %p on exit\n", eq); -- list_del (&eq->eq_list); -- lib_eq_free (nal, eq); -- } -- -- while (!list_empty (&ni->ni_active_msgs)) { -- lib_msg_t *msg = list_entry (ni->ni_active_msgs.next, -- lib_msg_t, msg_list); -- -- CERROR ("Active msg %p on exit\n", msg); -- list_del (&msg->msg_list); -- lib_msg_free (nal, msg); -- } -- - nal->cb_free(nal, ni->tbl.tbl, sizeof(struct list_head) * ni->tbl.size); - ni->up = 0; - PORTAL_FREE(ni->ni_portals.tbl, - ni->ni_portals.size * sizeof(struct list_head)); -- -- lib_cleanup_handle_hash (nal); -- kportal_descriptor_cleanup (nal); -- - out: -#ifndef __KERNEL__ - pthread_mutex_destroy(&ni->ni_mutex); - pthread_cond_destroy(&ni->ni_cond); -#endif - -- return (PTL_OK); --} diff --cc lnet/lnet/lib-md.c index a1ed583,6deadb8..0000000 deleted file mode 100644,100644 --- a/lnet/lnet/lib-md.c +++ /dev/null @@@ -1,446 -1,426 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * lib/lib-md.c -- * Memory Descriptor management routines -- * -- * Copyright (c) 2001-2003 Cluster File Systems, Inc. -- * Copyright (c) 2001-2002 Sandia National Laboratories -- * -- * This file is part of Lustre, http://www.sf.net/projects/lustre/ -- * -- * Lustre is free software; you can redistribute it and/or -- * modify it under the terms of version 2 of the GNU General Public -- * License as published by the Free Software Foundation. -- * -- * Lustre is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- * GNU General Public License for more details. -- * -- * You should have received a copy of the GNU General Public License -- * along with Lustre; if not, write to the Free Software -- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -- */ -- --#ifndef __KERNEL__ --# include --#else --# define DEBUG_SUBSYSTEM S_PORTALS --# include --#endif -- --#include - #include -- - /* - * must be called with state lock held - */ - void lib_md_unlink(nal_cb_t * nal, lib_md_t * md) -/* must be called with state lock held */ -void -lib_md_unlink(lib_nal_t *nal, lib_md_t *md) --{ - lib_me_t *me = md->me; - if ((md->md_flags & PTL_MD_FLAG_ZOMBIE) == 0) { - /* first unlink attempt... */ - lib_me_t *me = md->me; - - md->md_flags |= PTL_MD_FLAG_ZOMBIE; - - /* Disassociate from ME (if any), and unlink it if it was created - * with PTL_UNLINK */ - if (me != NULL) { - me->md = NULL; - if (me->unlink == PTL_UNLINK) - lib_me_unlink(nal, me); - } - - /* emsure all future handle lookups fail */ - lib_invalidate_handle(nal, &md->md_lh); - } -- -- if (md->pending != 0) { -- CDEBUG(D_NET, "Queueing unlink of md %p\n", md); - md->md_flags |= PTL_MD_FLAG_UNLINK; -- return; -- } -- -- CDEBUG(D_NET, "Unlinking md %p\n", md); -- -- if ((md->options & PTL_MD_KIOV) != 0) { - if (nal->cb_unmap_pages != NULL) - nal->cb_unmap_pages (nal, md->md_niov, md->md_iov.kiov, - &md->md_addrkey); - } else if (nal->cb_unmap != NULL) - nal->cb_unmap (nal, md->md_niov, md->md_iov.iov, - &md->md_addrkey); - - if (me) { - me->md = NULL; - if (me->unlink == PTL_UNLINK) - lib_me_unlink(nal, me); - if (nal->libnal_unmap_pages != NULL) - nal->libnal_unmap_pages (nal, - md->md_niov, - md->md_iov.kiov, - &md->md_addrkey); - } else if (nal->libnal_unmap != NULL) { - nal->libnal_unmap (nal, - md->md_niov, md->md_iov.iov, - &md->md_addrkey); -- } -- - if (md->eq != NULL) - { - if (md->eq != NULL) { -- md->eq->eq_refcount--; -- LASSERT (md->eq->eq_refcount >= 0); -- } -- - lib_invalidate_handle (nal, &md->md_lh); -- list_del (&md->md_list); -- lib_md_free(nal, md); --} -- --/* must be called with state lock held */ - static int lib_md_build(nal_cb_t *nal, lib_md_t *new, void *private, - ptl_md_t *md, ptl_handle_eq_t *eqh, int unlink) -static int -lib_md_build(lib_nal_t *nal, lib_md_t *lmd, ptl_md_t *umd, int unlink) --{ - const int max_size_opts = PTL_MD_AUTO_UNLINK | - PTL_MD_MAX_SIZE; -- lib_eq_t *eq = NULL; -- int rc; -- int i; - int niov; - int total_length = 0; -- -- /* NB we are passed an allocated, but uninitialised/active md. -- * if we return success, caller may lib_md_unlink() it. -- * otherwise caller may only lib_md_free() it. -- */ -- - if (!PtlHandleEqual (*eqh, PTL_EQ_NONE)) { - eq = ptl_handle2eq(eqh, nal); - if (!PtlHandleIsEqual (umd->eq_handle, PTL_EQ_NONE)) { - eq = ptl_handle2eq(&umd->eq_handle, nal); -- if (eq == NULL) - return PTL_INV_EQ; - return PTL_EQ_INVALID; -- } - - /* Must check this _before_ allocation. Also, note that non-iov - * MDs must set md_niov to 0. */ - LASSERT((md->options & (PTL_MD_IOV | PTL_MD_KIOV)) == 0 || - md->niov <= PTL_MD_MAX_IOV); -- - if ((md->options & max_size_opts) != 0 && /* max size used */ - (md->max_size < 0 || md->max_size > md->length)) // illegal max_size - return PTL_INV_MD; - /* This implementation doesn't know how to create START events or - * disable END events. Best to LASSERT our caller is compliant so - * we find out quickly... */ - LASSERT (eq == NULL || - ((umd->options & PTL_MD_EVENT_START_DISABLE) != 0 && - (umd->options & PTL_MD_EVENT_END_DISABLE) == 0)); -- - new->me = NULL; - new->start = md->start; - new->length = md->length; - new->offset = 0; - new->max_size = md->max_size; - new->unlink = unlink; - new->options = md->options; - new->user_ptr = md->user_ptr; - new->eq = eq; - new->threshold = md->threshold; - new->pending = 0; - new->md_flags = 0; - lmd->me = NULL; - lmd->start = umd->start; - lmd->offset = 0; - lmd->max_size = umd->max_size; - lmd->options = umd->options; - lmd->user_ptr = umd->user_ptr; - lmd->eq = eq; - lmd->threshold = umd->threshold; - lmd->pending = 0; - lmd->md_flags = (unlink == PTL_UNLINK) ? PTL_MD_FLAG_AUTO_UNLINK : 0; -- - if ((md->options & PTL_MD_IOV) != 0) { - int total_length = 0; - if ((umd->options & PTL_MD_IOVEC) != 0) { -- - if ((md->options & PTL_MD_KIOV) != 0) /* Can't specify both */ - return PTL_INV_MD; - if ((umd->options & PTL_MD_KIOV) != 0) /* Can't specify both */ - return PTL_MD_ILLEGAL; -- - new->md_niov = md->niov; - - if (nal->cb_read (nal, private, new->md_iov.iov, md->start, - md->niov * sizeof (new->md_iov.iov[0]))) - return PTL_SEGV; - lmd->md_niov = niov = umd->length; - memcpy(lmd->md_iov.iov, umd->start, - niov * sizeof (lmd->md_iov.iov[0])); -- - for (i = 0; i < new->md_niov; i++) { - for (i = 0; i < niov; i++) { -- /* We take the base address on trust */ - if (new->md_iov.iov[i].iov_len <= 0) /* invalid length */ - return PTL_VAL_FAILED; - if (lmd->md_iov.iov[i].iov_len <= 0) /* invalid length */ - return PTL_MD_ILLEGAL; -- - total_length += new->md_iov.iov[i].iov_len; - total_length += lmd->md_iov.iov[i].iov_len; -- } -- - if (md->length > total_length) - return PTL_IOV_TOO_SMALL; - - if (nal->cb_map != NULL) { - rc = nal->cb_map (nal, new->md_niov, new->md_iov.iov, - &new->md_addrkey); - lmd->length = total_length; - - if ((umd->options & PTL_MD_MAX_SIZE) != 0 && /* max size used */ - (umd->max_size < 0 || - umd->max_size > total_length)) // illegal max_size - return PTL_MD_ILLEGAL; - - if (nal->libnal_map != NULL) { - rc = nal->libnal_map (nal, niov, lmd->md_iov.iov, - &lmd->md_addrkey); -- if (rc != PTL_OK) -- return (rc); -- } - } else if ((md->options & PTL_MD_KIOV) != 0) { - } else if ((umd->options & PTL_MD_KIOV) != 0) { --#ifndef __KERNEL__ - return PTL_INV_MD; - #else - int total_length = 0; - - return PTL_MD_ILLEGAL; -#else -- /* Trap attempt to use paged I/O if unsupported early. */ - if (nal->cb_send_pages == NULL || - nal->cb_recv_pages == NULL) - return PTL_INV_MD; - if (nal->libnal_send_pages == NULL || - nal->libnal_recv_pages == NULL) - return PTL_MD_INVALID; -- - new->md_niov = md->niov; - lmd->md_niov = niov = umd->length; - memcpy(lmd->md_iov.kiov, umd->start, - niov * sizeof (lmd->md_iov.kiov[0])); -- - if (nal->cb_read (nal, private, new->md_iov.kiov, md->start, - md->niov * sizeof (new->md_iov.kiov[0]))) - return PTL_SEGV; - - for (i = 0; i < new->md_niov; i++) { - for (i = 0; i < niov; i++) { -- /* We take the page pointer on trust */ - if (new->md_iov.kiov[i].kiov_offset + - new->md_iov.kiov[i].kiov_len > PAGE_SIZE ) - if (lmd->md_iov.kiov[i].kiov_offset + - lmd->md_iov.kiov[i].kiov_len > PAGE_SIZE ) -- return PTL_VAL_FAILED; /* invalid length */ -- - total_length += new->md_iov.kiov[i].kiov_len; - total_length += lmd->md_iov.kiov[i].kiov_len; -- } -- - if (md->length > total_length) - return PTL_IOV_TOO_SMALL; - lmd->length = total_length; -- - if (nal->cb_map_pages != NULL) { - rc = nal->cb_map_pages (nal, new->md_niov, new->md_iov.kiov, - &new->md_addrkey); - if ((umd->options & PTL_MD_MAX_SIZE) != 0 && /* max size used */ - (umd->max_size < 0 || - umd->max_size > total_length)) // illegal max_size - return PTL_MD_ILLEGAL; - - if (nal->libnal_map_pages != NULL) { - rc = nal->libnal_map_pages (nal, niov, lmd->md_iov.kiov, - &lmd->md_addrkey); -- if (rc != PTL_OK) -- return (rc); -- } --#endif -- } else { /* contiguous */ - new->md_niov = 1; - new->md_iov.iov[0].iov_base = md->start; - new->md_iov.iov[0].iov_len = md->length; - lmd->length = umd->length; - lmd->md_niov = niov = 1; - lmd->md_iov.iov[0].iov_base = umd->start; - lmd->md_iov.iov[0].iov_len = umd->length; -- - if (nal->cb_map != NULL) { - rc = nal->cb_map (nal, new->md_niov, new->md_iov.iov, - &new->md_addrkey); - if ((umd->options & PTL_MD_MAX_SIZE) != 0 && /* max size used */ - (umd->max_size < 0 || - umd->max_size > umd->length)) // illegal max_size - return PTL_MD_ILLEGAL; - - if (nal->libnal_map != NULL) { - rc = nal->libnal_map (nal, niov, lmd->md_iov.iov, - &lmd->md_addrkey); -- if (rc != PTL_OK) -- return (rc); -- } -- } -- -- if (eq != NULL) -- eq->eq_refcount++; -- -- /* It's good; let handle2md succeed and add to active mds */ - lib_initialise_handle (nal, &new->md_lh, PTL_COOKIE_TYPE_MD); - list_add (&new->md_list, &nal->ni.ni_active_mds); - lib_initialise_handle (nal, &lmd->md_lh, PTL_COOKIE_TYPE_MD); - list_add (&lmd->md_list, &nal->libnal_ni.ni_active_mds); -- -- return PTL_OK; --} -- --/* must be called with state lock held */ - void lib_md_deconstruct(nal_cb_t * nal, lib_md_t * md, ptl_md_t * new) -void -lib_md_deconstruct(lib_nal_t *nal, lib_md_t *lmd, ptl_md_t *umd) --{ -- /* NB this doesn't copy out all the iov entries so when a -- * discontiguous MD is copied out, the target gets to know the -- * original iov pointer (in start) and the number of entries it had -- * and that's all. -- */ - new->start = md->start; - new->length = md->length; - new->threshold = md->threshold; - new->max_size = md->max_size; - new->options = md->options; - new->user_ptr = md->user_ptr; - ptl_eq2handle(&new->eventq, md->eq); - new->niov = ((md->options & (PTL_MD_IOV | PTL_MD_KIOV)) == 0) ? 0 : md->md_niov; - umd->start = lmd->start; - umd->length = ((lmd->options & (PTL_MD_IOVEC | PTL_MD_KIOV)) == 0) ? - lmd->length : lmd->md_niov; - umd->threshold = lmd->threshold; - umd->max_size = lmd->max_size; - umd->options = lmd->options; - umd->user_ptr = lmd->user_ptr; - ptl_eq2handle(&umd->eq_handle, nal, lmd->eq); --} -- - int do_PtlMDAttach(nal_cb_t * nal, void *private, void *v_args, void *v_ret) -int -lib_api_md_attach(nal_t *apinal, ptl_handle_me_t *meh, - ptl_md_t *umd, ptl_unlink_t unlink, - ptl_handle_md_t *handle) --{ - /* - * Incoming: - * ptl_handle_me_t current_in - * ptl_md_t md_in - * ptl_unlink_t unlink_in - * - * Outgoing: - * ptl_handle_md_t * handle_out - */ - - PtlMDAttach_in *args = v_args; - PtlMDAttach_out *ret = v_ret; - lib_me_t *me; - lib_md_t *md; - lib_nal_t *nal = apinal->nal_data; - lib_me_t *me; - lib_md_t *md; -- unsigned long flags; - int rc; -- - if ((args->md_in.options & (PTL_MD_KIOV | PTL_MD_IOV)) != 0 && - args->md_in.niov > PTL_MD_MAX_IOV) /* too many fragments */ - return (ret->rc = PTL_IOV_TOO_MANY); - if ((umd->options & (PTL_MD_KIOV | PTL_MD_IOVEC)) != 0 && - umd->length > PTL_MD_MAX_IOV) /* too many fragments */ - return PTL_IOV_INVALID; -- - md = lib_md_alloc(nal, &args->md_in); - md = lib_md_alloc(nal, umd); -- if (md == NULL) - return (ret->rc = PTL_NOSPACE); - return PTL_NO_SPACE; -- - state_lock(nal, &flags); - LIB_LOCK(nal, flags); -- - me = ptl_handle2me(&args->me_in, nal); - me = ptl_handle2me(meh, nal); -- if (me == NULL) { - ret->rc = PTL_INV_ME; - rc = PTL_ME_INVALID; -- } else if (me->md != NULL) { - ret->rc = PTL_INUSE; - rc = PTL_ME_IN_USE; -- } else { - ret->rc = lib_md_build(nal, md, private, &args->md_in, - &args->eq_in, args->unlink_in); - - if (ret->rc == PTL_OK) { - rc = lib_md_build(nal, md, umd, unlink); - if (rc == PTL_OK) { -- me->md = md; -- md->me = me; -- - ptl_md2handle(&ret->handle_out, md); - ptl_md2handle(handle, nal, md); -- - state_unlock (nal, &flags); - LIB_UNLOCK(nal, flags); -- return (PTL_OK); -- } -- } -- -- lib_md_free (nal, md); -- - state_unlock (nal, &flags); - return (ret->rc); - LIB_UNLOCK(nal, flags); - return (rc); --} -- - int do_PtlMDBind(nal_cb_t * nal, void *private, void *v_args, void *v_ret) -int -lib_api_md_bind(nal_t *apinal, - ptl_md_t *umd, ptl_unlink_t unlink, - ptl_handle_md_t *handle) --{ - /* - * Incoming: - * ptl_handle_ni_t ni_in - * ptl_md_t md_in - * - * Outgoing: - * ptl_handle_md_t * handle_out - */ - - PtlMDBind_in *args = v_args; - PtlMDBind_out *ret = v_ret; - lib_md_t *md; - lib_nal_t *nal = apinal->nal_data; - lib_md_t *md; -- unsigned long flags; - int rc; -- - if ((args->md_in.options & (PTL_MD_KIOV | PTL_MD_IOV)) != 0 && - args->md_in.niov > PTL_MD_MAX_IOV) /* too many fragments */ - return (ret->rc = PTL_IOV_TOO_MANY); - if ((umd->options & (PTL_MD_KIOV | PTL_MD_IOVEC)) != 0 && - umd->length > PTL_MD_MAX_IOV) /* too many fragments */ - return PTL_IOV_INVALID; -- - md = lib_md_alloc(nal, &args->md_in); - md = lib_md_alloc(nal, umd); -- if (md == NULL) - return (ret->rc = PTL_NOSPACE); - return PTL_NO_SPACE; -- - state_lock(nal, &flags); - LIB_LOCK(nal, flags); -- - ret->rc = lib_md_build(nal, md, private, - &args->md_in, &args->eq_in, PTL_UNLINK); - rc = lib_md_build(nal, md, umd, unlink); -- - if (ret->rc == PTL_OK) { - ptl_md2handle(&ret->handle_out, md); - if (rc == PTL_OK) { - ptl_md2handle(handle, nal, md); -- - state_unlock(nal, &flags); - LIB_UNLOCK(nal, flags); -- return (PTL_OK); -- } -- -- lib_md_free (nal, md); -- - state_unlock(nal, &flags); - return (ret->rc); - LIB_UNLOCK(nal, flags); - return (rc); --} -- - int do_PtlMDUnlink(nal_cb_t * nal, void *private, void *v_args, void *v_ret) -int -lib_api_md_unlink (nal_t *apinal, ptl_handle_md_t *mdh) --{ - PtlMDUnlink_in *args = v_args; - PtlMDUnlink_out *ret = v_ret; - lib_nal_t *nal = apinal->nal_data; -- ptl_event_t ev; -- lib_md_t *md; -- unsigned long flags; -- - state_lock(nal, &flags); - LIB_LOCK(nal, flags); -- - md = ptl_handle2md(&args->md_in, nal); - md = ptl_handle2md(mdh, nal); -- if (md == NULL) { - state_unlock(nal, &flags); - return (ret->rc = PTL_INV_MD); - LIB_UNLOCK(nal, flags); - return PTL_MD_INVALID; -- } -- -- /* If the MD is busy, lib_md_unlink just marks it for deletion, and -- * when the NAL is done, the completion event flags that the MD was -- * unlinked. Otherwise, we enqueue an event now... */ -- -- if (md->eq != NULL && -- md->pending == 0) { -- memset(&ev, 0, sizeof(ev)); -- -- ev.type = PTL_EVENT_UNLINK; - ev.status = PTL_OK; - ev.ni_fail_type = PTL_OK; -- ev.unlinked = 1; - lib_md_deconstruct(nal, md, &ev.mem_desc); - lib_md_deconstruct(nal, md, &ev.md); - ptl_md2handle(&ev.md_handle, nal, md); -- - lib_enq_event_locked(nal, private, md->eq, &ev); - lib_enq_event_locked(nal, NULL, md->eq, &ev); -- } -- - lib_md_deconstruct(nal, md, &ret->status_out); -- lib_md_unlink(nal, md); - ret->rc = PTL_OK; - - state_unlock(nal, &flags); -- - return (PTL_OK); - LIB_UNLOCK(nal, flags); - return PTL_OK; --} -- - int do_PtlMDUpdate_internal(nal_cb_t * nal, void *private, void *v_args, - void *v_ret) -int -lib_api_md_update (nal_t *apinal, - ptl_handle_md_t *mdh, - ptl_md_t *oldumd, ptl_md_t *newumd, - ptl_handle_eq_t *testqh) --{ - /* - * Incoming: - * ptl_handle_md_t md_in - * ptl_md_t * old_inout - * ptl_md_t * new_inout - * ptl_handle_eq_t testq_in - * ptl_seq_t sequence_in - * - * Outgoing: - * ptl_md_t * old_inout - * ptl_md_t * new_inout - */ - PtlMDUpdate_internal_in *args = v_args; - PtlMDUpdate_internal_out *ret = v_ret; - lib_md_t *md; - lib_eq_t *test_eq = NULL; - ptl_md_t *new = &args->new_inout; - lib_nal_t *nal = apinal->nal_data; - lib_md_t *md; - lib_eq_t *test_eq = NULL; -- unsigned long flags; - int rc; -- - state_lock(nal, &flags); - LIB_LOCK(nal, flags); -- - md = ptl_handle2md(&args->md_in, nal); - md = ptl_handle2md(mdh, nal); -- if (md == NULL) { - ret->rc = PTL_INV_MD; - rc = PTL_MD_INVALID; -- goto out; -- } - - if (args->old_inout_valid) - lib_md_deconstruct(nal, md, &ret->old_inout); -- - if (!args->new_inout_valid) { - ret->rc = PTL_OK; - goto out; - } - if (oldumd != NULL) - lib_md_deconstruct(nal, md, oldumd); -- - /* XXX fttb, the new MD must be the same type wrt fragmentation */ - if (((new->options ^ md->options) & - (PTL_MD_IOV | PTL_MD_KIOV)) != 0) { - ret->rc = PTL_INV_MD; - if (newumd == NULL) { - rc = PTL_OK; -- goto out; -- } -- - if (new->niov > md->md_niov) { - ret->rc = PTL_IOV_TOO_MANY; - /* XXX fttb, the new MD must be the same "shape" wrt fragmentation, - * since we simply overwrite the old lib-md */ - if ((((newumd->options ^ md->options) & - (PTL_MD_IOVEC | PTL_MD_KIOV)) != 0) || - ((newumd->options & (PTL_MD_IOVEC | PTL_MD_KIOV)) != 0 && - newumd->length != md->md_niov)) { - rc = PTL_IOV_INVALID; -- goto out; -- } - - if (new->niov < md->md_niov) { - ret->rc = PTL_IOV_TOO_SMALL; - goto out; - } -- - if (!PtlHandleEqual (args->testq_in, PTL_EQ_NONE)) { - test_eq = ptl_handle2eq(&args->testq_in, nal); - if (!PtlHandleIsEqual (*testqh, PTL_EQ_NONE)) { - test_eq = ptl_handle2eq(testqh, nal); -- if (test_eq == NULL) { - ret->rc = PTL_INV_EQ; - rc = PTL_EQ_INVALID; -- goto out; -- } -- } -- -- if (md->pending != 0) { - ret->rc = PTL_NOUPDATE; - goto out; - rc = PTL_MD_NO_UPDATE; - goto out; -- } -- -- if (test_eq == NULL || - test_eq->sequence == args->sequence_in) { - test_eq->eq_deq_seq == test_eq->eq_enq_seq) { -- lib_me_t *me = md->me; - int unlink = (md->md_flags & PTL_MD_FLAG_AUTO_UNLINK) ? - PTL_UNLINK : PTL_RETAIN; -- -- // #warning this does not track eq refcounts properly - ret->rc = lib_md_build(nal, md, private, - new, &new->eventq, md->unlink); - rc = lib_md_build(nal, md, newumd, unlink); -- -- md->me = me; -- } else { - ret->rc = PTL_NOUPDATE; - rc = PTL_MD_NO_UPDATE; -- } -- -- out: - state_unlock(nal, &flags); - return (ret->rc); - LIB_UNLOCK(nal, flags); - - return rc; --} diff --cc lnet/lnet/lib-me.c index 31ac214,9665b4f..0000000 deleted file mode 100644,100644 --- a/lnet/lnet/lib-me.c +++ /dev/null @@@ -1,227 -1,185 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * lib/lib-me.c -- * Match Entry management routines -- * -- * Copyright (c) 2001-2003 Cluster File Systems, Inc. -- * Copyright (c) 2001-2002 Sandia National Laboratories -- * -- * This file is part of Lustre, http://www.sf.net/projects/lustre/ -- * -- * Lustre is free software; you can redistribute it and/or -- * modify it under the terms of version 2 of the GNU General Public -- * License as published by the Free Software Foundation. -- * -- * Lustre is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- * GNU General Public License for more details. -- * -- * You should have received a copy of the GNU General Public License -- * along with Lustre; if not, write to the Free Software -- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -- */ -- --#ifndef __KERNEL__ --# include --#else --# define DEBUG_SUBSYSTEM S_PORTALS --# include --#endif -- --#include - #include - - static void lib_me_dump(nal_cb_t * nal, lib_me_t * me); -- - int do_PtlMEAttach(nal_cb_t * nal, void *private, void *v_args, void *v_ret) -int -lib_api_me_attach(nal_t *apinal, - ptl_pt_index_t portal, - ptl_process_id_t match_id, - ptl_match_bits_t match_bits, - ptl_match_bits_t ignore_bits, - ptl_unlink_t unlink, ptl_ins_pos_t pos, - ptl_handle_me_t *handle) --{ - PtlMEAttach_in *args = v_args; - PtlMEAttach_out *ret = v_ret; - lib_ni_t *ni = &nal->ni; - lib_ptl_t *tbl = &ni->tbl; - lib_nal_t *nal = apinal->nal_data; - lib_ni_t *ni = &nal->libnal_ni; - lib_ptl_t *tbl = &ni->ni_portals; - lib_me_t *me; -- unsigned long flags; - lib_me_t *me; -- - if (args->index_in >= tbl->size) - return ret->rc = PTL_INV_PTINDEX; - if (portal >= tbl->size) - return PTL_PT_INDEX_INVALID; -- -- /* Should check for valid matchid, but not yet */ - if (0) - return ret->rc = PTL_INV_PROC; -- -- me = lib_me_alloc (nal); -- if (me == NULL) - return (ret->rc = PTL_NOSPACE); - return PTL_NO_SPACE; -- - state_lock(nal, &flags); - LIB_LOCK(nal, flags); -- - me->match_id = args->match_id_in; - me->match_bits = args->match_bits_in; - me->ignore_bits = args->ignore_bits_in; - me->unlink = args->unlink_in; - me->match_id = match_id; - me->match_bits = match_bits; - me->ignore_bits = ignore_bits; - me->unlink = unlink; -- me->md = NULL; -- -- lib_initialise_handle (nal, &me->me_lh, PTL_COOKIE_TYPE_ME); -- - if (args->position_in == PTL_INS_AFTER) - list_add_tail(&me->me_list, &(tbl->tbl[args->index_in])); - if (pos == PTL_INS_AFTER) - list_add_tail(&me->me_list, &(tbl->tbl[portal])); -- else - list_add(&me->me_list, &(tbl->tbl[args->index_in])); - list_add(&me->me_list, &(tbl->tbl[portal])); -- - ptl_me2handle(&ret->handle_out, me); - ptl_me2handle(handle, nal, me); -- - state_unlock(nal, &flags); - LIB_UNLOCK(nal, flags); -- - return ret->rc = PTL_OK; - return PTL_OK; --} -- - int do_PtlMEInsert(nal_cb_t * nal, void *private, void *v_args, void *v_ret) -int -lib_api_me_insert(nal_t *apinal, - ptl_handle_me_t *current_meh, - ptl_process_id_t match_id, - ptl_match_bits_t match_bits, - ptl_match_bits_t ignore_bits, - ptl_unlink_t unlink, ptl_ins_pos_t pos, - ptl_handle_me_t *handle) --{ - PtlMEInsert_in *args = v_args; - PtlMEInsert_out *ret = v_ret; - lib_nal_t *nal = apinal->nal_data; - lib_me_t *current_me; - lib_me_t *new_me; -- unsigned long flags; - lib_me_t *me; - lib_me_t *new; -- - new = lib_me_alloc (nal); - if (new == NULL) - return (ret->rc = PTL_NOSPACE); - new_me = lib_me_alloc (nal); - if (new_me == NULL) - return PTL_NO_SPACE; -- -- /* Should check for valid matchid, but not yet */ -- - state_lock(nal, &flags); - LIB_LOCK(nal, flags); -- - me = ptl_handle2me(&args->current_in, nal); - if (me == NULL) { - lib_me_free (nal, new); - current_me = ptl_handle2me(current_meh, nal); - if (current_me == NULL) { - lib_me_free (nal, new_me); -- - state_unlock (nal, &flags); - return (ret->rc = PTL_INV_ME); - LIB_UNLOCK(nal, flags); - return PTL_ME_INVALID; -- } -- - new->match_id = args->match_id_in; - new->match_bits = args->match_bits_in; - new->ignore_bits = args->ignore_bits_in; - new->unlink = args->unlink_in; - new->md = NULL; - new_me->match_id = match_id; - new_me->match_bits = match_bits; - new_me->ignore_bits = ignore_bits; - new_me->unlink = unlink; - new_me->md = NULL; -- - lib_initialise_handle (nal, &new->me_lh, PTL_COOKIE_TYPE_ME); - lib_initialise_handle (nal, &new_me->me_lh, PTL_COOKIE_TYPE_ME); -- - if (args->position_in == PTL_INS_AFTER) - list_add_tail(&new->me_list, &me->me_list); - if (pos == PTL_INS_AFTER) - list_add_tail(&new_me->me_list, ¤t_me->me_list); -- else - list_add(&new->me_list, &me->me_list); - list_add(&new_me->me_list, ¤t_me->me_list); -- - ptl_me2handle(&ret->handle_out, new); - ptl_me2handle(handle, nal, new_me); -- - state_unlock(nal, &flags); - LIB_UNLOCK(nal, flags); -- - return ret->rc = PTL_OK; - return PTL_OK; --} -- - int do_PtlMEUnlink(nal_cb_t * nal, void *private, void *v_args, void *v_ret) -int -lib_api_me_unlink (nal_t *apinal, ptl_handle_me_t *meh) --{ - PtlMEUnlink_in *args = v_args; - PtlMEUnlink_out *ret = v_ret; - lib_nal_t *nal = apinal->nal_data; -- unsigned long flags; - lib_me_t *me; - lib_me_t *me; - int rc; -- - state_lock(nal, &flags); - LIB_LOCK(nal, flags); -- - me = ptl_handle2me(&args->current_in, nal); - me = ptl_handle2me(meh, nal); -- if (me == NULL) { - ret->rc = PTL_INV_ME; - rc = PTL_ME_INVALID; -- } else { -- lib_me_unlink(nal, me); - ret->rc = PTL_OK; - rc = PTL_OK; -- } -- - state_unlock(nal, &flags); - LIB_UNLOCK(nal, flags); -- - return (ret->rc); - return (rc); --} -- --/* call with state_lock please */ - void lib_me_unlink(nal_cb_t *nal, lib_me_t *me) -void -lib_me_unlink(lib_nal_t *nal, lib_me_t *me) --{ - lib_ni_t *ni = &nal->ni; - - if (ni->debug & PTL_DEBUG_UNLINK) { - ptl_handle_any_t handle; - ptl_me2handle(&handle, me); - } - -- list_del (&me->me_list); -- -- if (me->md) { -- me->md->me = NULL; -- lib_md_unlink(nal, me->md); -- } -- -- lib_invalidate_handle (nal, &me->me_lh); -- lib_me_free(nal, me); - } - - int do_PtlTblDump(nal_cb_t * nal, void *private, void *v_args, void *v_ret) - { - PtlTblDump_in *args = v_args; - PtlTblDump_out *ret = v_ret; - lib_ptl_t *tbl = &nal->ni.tbl; - ptl_handle_any_t handle; - struct list_head *tmp; - unsigned long flags; - - if (args->index_in < 0 || args->index_in >= tbl->size) - return ret->rc = PTL_INV_PTINDEX; - - nal->cb_printf(nal, "Portal table index %d\n", args->index_in); - - state_lock(nal, &flags); - list_for_each(tmp, &(tbl->tbl[args->index_in])) { - lib_me_t *me = list_entry(tmp, lib_me_t, me_list); - ptl_me2handle(&handle, me); - lib_me_dump(nal, me); - } - state_unlock(nal, &flags); - - return ret->rc = PTL_OK; - } - - int do_PtlMEDump(nal_cb_t * nal, void *private, void *v_args, void *v_ret) - { - PtlMEDump_in *args = v_args; - PtlMEDump_out *ret = v_ret; - lib_me_t *me; - unsigned long flags; - - state_lock(nal, &flags); - - me = ptl_handle2me(&args->current_in, nal); - if (me == NULL) { - ret->rc = PTL_INV_ME; - } else { - lib_me_dump(nal, me); - ret->rc = PTL_OK; - } - - state_unlock(nal, &flags); - - return ret->rc; --} -- - static void lib_me_dump(nal_cb_t * nal, lib_me_t * me) -#if 0 -static void -lib_me_dump(lib_nal_t *nal, lib_me_t * me) --{ - nal->cb_printf(nal, "Match Entry %p ("LPX64")\n", me, - me->me_lh.lh_cookie); - CWARN("Match Entry %p ("LPX64")\n", me, - me->me_lh.lh_cookie); -- - nal->cb_printf(nal, "\tMatch/Ignore\t= %016lx / %016lx\n", - me->match_bits, me->ignore_bits); - CWARN("\tMatch/Ignore\t= %016lx / %016lx\n", - me->match_bits, me->ignore_bits); -- - nal->cb_printf(nal, "\tMD\t= %p\n", me->md); - nal->cb_printf(nal, "\tprev\t= %p\n", - list_entry(me->me_list.prev, lib_me_t, me_list)); - nal->cb_printf(nal, "\tnext\t= %p\n", - list_entry(me->me_list.next, lib_me_t, me_list)); - CWARN("\tMD\t= %p\n", me->md); - CWARN("\tprev\t= %p\n", - list_entry(me->me_list.prev, lib_me_t, me_list)); - CWARN("\tnext\t= %p\n", - list_entry(me->me_list.next, lib_me_t, me_list)); --} -#endif diff --cc lnet/lnet/lib-move.c index ecd543c,13451d9..0000000 deleted file mode 100644,100644 --- a/lnet/lnet/lib-move.c +++ /dev/null @@@ -1,1445 -1,1426 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * lib/lib-move.c -- * Data movement routines -- * -- * Copyright (c) 2001-2003 Cluster File Systems, Inc. -- * Copyright (c) 2001-2002 Sandia National Laboratories -- * -- * This file is part of Lustre, http://www.sf.net/projects/lustre/ -- * -- * Lustre is free software; you can redistribute it and/or -- * modify it under the terms of version 2 of the GNU General Public -- * License as published by the Free Software Foundation. -- * -- * Lustre is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- * GNU General Public License for more details. -- * -- * You should have received a copy of the GNU General Public License -- * along with Lustre; if not, write to the Free Software -- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -- */ -- --#ifndef __KERNEL__ --# include --#else --# define DEBUG_SUBSYSTEM S_PORTALS --# include --#endif --#include --#include - #include -- - /* - * Right now it does not check access control lists. - * - * We only support one MD per ME, which is how the Portals 3.1 spec is written. - * All previous complication is removed. - */ -/* forward ref */ -static void lib_commit_md (lib_nal_t *nal, lib_md_t *md, lib_msg_t *msg); -- - static lib_me_t * - lib_find_me(nal_cb_t *nal, int index, int op_mask, ptl_nid_t src_nid, - ptl_pid_t src_pid, ptl_size_t rlength, ptl_size_t roffset, - ptl_match_bits_t match_bits, ptl_size_t *mlength_out, - ptl_size_t *offset_out, int *unlink_out) -static lib_md_t * -lib_match_md(lib_nal_t *nal, int index, int op_mask, - ptl_nid_t src_nid, ptl_pid_t src_pid, - ptl_size_t rlength, ptl_size_t roffset, - ptl_match_bits_t match_bits, lib_msg_t *msg, - ptl_size_t *mlength_out, ptl_size_t *offset_out) --{ - lib_ni_t *ni = &nal->ni; - struct list_head *match_list = &ni->tbl.tbl[index]; - lib_ni_t *ni = &nal->libnal_ni; - struct list_head *match_list = &ni->ni_portals.tbl[index]; -- struct list_head *tmp; -- lib_me_t *me; -- lib_md_t *md; -- ptl_size_t mlength; -- ptl_size_t offset; - -- ENTRY; -- -- CDEBUG (D_NET, "Request from "LPU64".%d of length %d into portal %d " -- "MB="LPX64"\n", src_nid, src_pid, rlength, index, match_bits); -- - if (index < 0 || index >= ni->tbl.size) { - if (index < 0 || index >= ni->ni_portals.size) { -- CERROR("Invalid portal %d not in [0-%d]\n", - index, ni->tbl.size); - index, ni->ni_portals.size); -- goto failed; -- } -- -- list_for_each (tmp, match_list) { -- me = list_entry(tmp, lib_me_t, me_list); -- md = me->md; -- -- /* ME attached but MD not attached yet */ -- if (md == NULL) -- continue; -- -- LASSERT (me == md->me); - - /* MD deactivated */ - if (md->threshold == 0) - continue; -- -- /* mismatched MD op */ -- if ((md->options & op_mask) == 0) - continue; - - /* MD exhausted */ - if (lib_md_exhausted(md)) -- continue; -- -- /* mismatched ME nid/pid? */ -- if (me->match_id.nid != PTL_NID_ANY && -- me->match_id.nid != src_nid) -- continue; - - CDEBUG(D_NET,"match_id.pid [%x], src_pid [%x]\n", me->match_id.pid, src_pid); -- -- if (me->match_id.pid != PTL_PID_ANY && -- me->match_id.pid != src_pid) -- continue; -- -- /* mismatched ME matchbits? */ -- if (((me->match_bits ^ match_bits) & ~me->ignore_bits) != 0) -- continue; -- -- /* Hurrah! This _is_ a match; check it out... */ -- -- if ((md->options & PTL_MD_MANAGE_REMOTE) == 0) -- offset = md->offset; -- else -- offset = roffset; -- - mlength = md->length - offset; - if ((md->options & PTL_MD_MAX_SIZE) != 0 && - mlength > md->max_size) - if ((md->options & PTL_MD_MAX_SIZE) != 0) { -- mlength = md->max_size; - LASSERT (md->offset + mlength <= md->length); - } else { - mlength = md->length - offset; - } -- -- if (rlength <= mlength) { /* fits in allowed space */ -- mlength = rlength; -- } else if ((md->options & PTL_MD_TRUNCATE) == 0) { -- /* this packet _really_ is too big */ -- CERROR("Matching packet %d too big: %d left, " -- "%d allowed\n", rlength, md->length - offset, -- mlength); -- goto failed; -- } - - /* Commit to this ME/MD */ - CDEBUG(D_NET, "Incoming %s index %x from "LPU64"/%u of " - "length %d/%d into md "LPX64" [%d] + %d\n", - (op_mask == PTL_MD_OP_PUT) ? "put" : "get", - index, src_nid, src_pid, mlength, rlength, - md->md_lh.lh_cookie, md->md_niov, offset); -- - lib_commit_md(nal, md, msg); -- md->offset = offset + mlength; - - /* NB Caller sets ev.type and ev.hdr_data */ - msg->ev.initiator.nid = src_nid; - msg->ev.initiator.pid = src_pid; - msg->ev.pt_index = index; - msg->ev.match_bits = match_bits; - msg->ev.rlength = rlength; - msg->ev.mlength = mlength; - msg->ev.offset = offset; - - lib_md_deconstruct(nal, md, &msg->ev.md); - ptl_md2handle(&msg->ev.md_handle, nal, md); -- -- *offset_out = offset; -- *mlength_out = mlength; - *unlink_out = ((md->options & PTL_MD_AUTO_UNLINK) != 0 && - md->offset >= (md->length - md->max_size)); - RETURN (me); - - /* Auto-unlink NOW, so the ME gets unlinked if required. - * We bumped md->pending above so the MD just gets flagged - * for unlink when it is finalized. */ - if ((md->md_flags & PTL_MD_FLAG_AUTO_UNLINK) != 0 && - lib_md_exhausted(md)) - lib_md_unlink(nal, md); - - RETURN (md); -- } -- -- failed: -- CERROR (LPU64": Dropping %s from "LPU64".%d portal %d match "LPX64 -- " offset %d length %d: no match\n", - ni->nid, (op_mask == PTL_MD_OP_GET) ? "GET" : "PUT", - ni->ni_pid.nid, (op_mask == PTL_MD_OP_GET) ? "GET" : "PUT", -- src_nid, src_pid, index, match_bits, roffset, rlength); -- RETURN(NULL); --} -- - int do_PtlFailNid (nal_cb_t *nal, void *private, void *v_args, void *v_ret) -int lib_api_fail_nid (nal_t *apinal, ptl_nid_t nid, unsigned int threshold) --{ - PtlFailNid_in *args = v_args; - PtlFailNid_out *ret = v_ret; - lib_nal_t *nal = apinal->nal_data; -- lib_test_peer_t *tp; -- unsigned long flags; -- struct list_head *el; -- struct list_head *next; -- struct list_head cull; -- - if (args->threshold != 0) { - if (threshold != 0) { -- /* Adding a new entry */ - tp = (lib_test_peer_t *)nal->cb_malloc (nal, sizeof (*tp)); - PORTAL_ALLOC(tp, sizeof(*tp)); -- if (tp == NULL) - return (ret->rc = PTL_FAIL); - return PTL_NO_SPACE; -- - tp->tp_nid = args->nid; - tp->tp_threshold = args->threshold; - tp->tp_nid = nid; - tp->tp_threshold = threshold; -- - state_lock (nal, &flags); - list_add (&tp->tp_list, &nal->ni.ni_test_peers); - state_unlock (nal, &flags); - return (ret->rc = PTL_OK); - LIB_LOCK(nal, flags); - list_add_tail (&tp->tp_list, &nal->libnal_ni.ni_test_peers); - LIB_UNLOCK(nal, flags); - return PTL_OK; -- } -- -- /* removing entries */ -- INIT_LIST_HEAD (&cull); -- - state_lock (nal, &flags); - LIB_LOCK(nal, flags); -- - list_for_each_safe (el, next, &nal->ni.ni_test_peers) { - list_for_each_safe (el, next, &nal->libnal_ni.ni_test_peers) { -- tp = list_entry (el, lib_test_peer_t, tp_list); -- -- if (tp->tp_threshold == 0 || /* needs culling anyway */ - args->nid == PTL_NID_ANY || /* removing all entries */ - tp->tp_nid == args->nid) /* matched this one */ - nid == PTL_NID_ANY || /* removing all entries */ - tp->tp_nid == nid) /* matched this one */ -- { -- list_del (&tp->tp_list); -- list_add (&tp->tp_list, &cull); -- } -- } -- - state_unlock (nal, &flags); - LIB_UNLOCK(nal, flags); -- -- while (!list_empty (&cull)) { -- tp = list_entry (cull.next, lib_test_peer_t, tp_list); -- -- list_del (&tp->tp_list); - nal->cb_free (nal, tp, sizeof (*tp)); - PORTAL_FREE(tp, sizeof (*tp)); -- } - return (ret->rc = PTL_OK); - return PTL_OK; --} -- --static int - fail_peer (nal_cb_t *nal, ptl_nid_t nid, int outgoing) -fail_peer (lib_nal_t *nal, ptl_nid_t nid, int outgoing) --{ -- lib_test_peer_t *tp; -- struct list_head *el; -- struct list_head *next; -- unsigned long flags; -- struct list_head cull; -- int fail = 0; -- -- INIT_LIST_HEAD (&cull); -- - state_lock (nal, &flags); - LIB_LOCK (nal, flags); -- - list_for_each_safe (el, next, &nal->ni.ni_test_peers) { - list_for_each_safe (el, next, &nal->libnal_ni.ni_test_peers) { -- tp = list_entry (el, lib_test_peer_t, tp_list); -- -- if (tp->tp_threshold == 0) { -- /* zombie entry */ -- if (outgoing) { -- /* only cull zombies on outgoing tests, -- * since we may be at interrupt priority on -- * incoming messages. */ -- list_del (&tp->tp_list); -- list_add (&tp->tp_list, &cull); -- } -- continue; -- } -- -- if (tp->tp_nid == PTL_NID_ANY || /* fail every peer */ -- nid == tp->tp_nid) { /* fail this peer */ -- fail = 1; -- -- if (tp->tp_threshold != PTL_MD_THRESH_INF) { -- tp->tp_threshold--; -- if (outgoing && -- tp->tp_threshold == 0) { -- /* see above */ -- list_del (&tp->tp_list); -- list_add (&tp->tp_list, &cull); -- } -- } -- break; -- } -- } -- - state_unlock (nal, &flags); - LIB_UNLOCK (nal, flags); -- -- while (!list_empty (&cull)) { -- tp = list_entry (cull.next, lib_test_peer_t, tp_list); -- list_del (&tp->tp_list); -- - nal->cb_free (nal, tp, sizeof (*tp)); - PORTAL_FREE(tp, sizeof (*tp)); -- } -- -- return (fail); --} -- --ptl_size_t --lib_iov_nob (int niov, struct iovec *iov) --{ -- ptl_size_t nob = 0; -- -- while (niov-- > 0) -- nob += (iov++)->iov_len; -- -- return (nob); --} -- --void --lib_copy_iov2buf (char *dest, int niov, struct iovec *iov, -- ptl_size_t offset, ptl_size_t len) --{ -- ptl_size_t nob; -- -- if (len == 0) -- return; -- -- /* skip complete frags before 'offset' */ -- LASSERT (niov > 0); -- while (offset >= iov->iov_len) { -- offset -= iov->iov_len; -- iov++; -- niov--; -- LASSERT (niov > 0); -- } -- -- do { -- LASSERT (niov > 0); -- nob = MIN (iov->iov_len - offset, len); -- memcpy (dest, iov->iov_base + offset, nob); -- -- len -= nob; -- dest += nob; -- niov--; -- iov++; -- offset = 0; -- } while (len > 0); --} -- --void --lib_copy_buf2iov (int niov, struct iovec *iov, ptl_size_t offset, -- char *src, ptl_size_t len) --{ -- ptl_size_t nob; -- -- if (len == 0) -- return; -- -- /* skip complete frags before 'offset' */ -- LASSERT (niov > 0); -- while (offset >= iov->iov_len) { -- offset -= iov->iov_len; -- iov++; -- niov--; -- LASSERT (niov > 0); -- } -- -- do { -- LASSERT (niov > 0); -- nob = MIN (iov->iov_len - offset, len); -- memcpy (iov->iov_base + offset, src, nob); -- -- len -= nob; -- src += nob; -- niov--; -- iov++; -- offset = 0; -- } while (len > 0); --} -- --int --lib_extract_iov (int dst_niov, struct iovec *dst, -- int src_niov, struct iovec *src, -- ptl_size_t offset, ptl_size_t len) --{ -- /* Initialise 'dst' to the subset of 'src' starting at 'offset', -- * for exactly 'len' bytes, and return the number of entries. -- * NB not destructive to 'src' */ -- ptl_size_t frag_len; -- int niov; -- -- if (len == 0) /* no data => */ -- return (0); /* no frags */ -- -- LASSERT (src_niov > 0); -- while (offset >= src->iov_len) { /* skip initial frags */ -- offset -= src->iov_len; -- src_niov--; -- src++; -- LASSERT (src_niov > 0); -- } -- -- niov = 1; -- for (;;) { -- LASSERT (src_niov > 0); -- LASSERT (niov <= dst_niov); -- -- frag_len = src->iov_len - offset; -- dst->iov_base = ((char *)src->iov_base) + offset; -- -- if (len <= frag_len) { -- dst->iov_len = len; -- return (niov); -- } -- -- dst->iov_len = frag_len; -- -- len -= frag_len; -- dst++; -- src++; -- niov++; -- src_niov--; -- offset = 0; -- } --} -- --#ifndef __KERNEL__ --ptl_size_t --lib_kiov_nob (int niov, ptl_kiov_t *kiov) --{ -- LASSERT (0); -- return (0); --} -- --void --lib_copy_kiov2buf (char *dest, int niov, ptl_kiov_t *kiov, -- ptl_size_t offset, ptl_size_t len) --{ -- LASSERT (0); --} -- --void --lib_copy_buf2kiov (int niov, ptl_kiov_t *kiov, ptl_size_t offset, -- char *src, ptl_size_t len) --{ -- LASSERT (0); --} -- --int --lib_extract_kiov (int dst_niov, ptl_kiov_t *dst, -- int src_niov, ptl_kiov_t *src, -- ptl_size_t offset, ptl_size_t len) --{ -- LASSERT (0); --} -- --#else -- --ptl_size_t --lib_kiov_nob (int niov, ptl_kiov_t *kiov) --{ -- ptl_size_t nob = 0; -- -- while (niov-- > 0) -- nob += (kiov++)->kiov_len; -- -- return (nob); --} -- --void --lib_copy_kiov2buf (char *dest, int niov, ptl_kiov_t *kiov, -- ptl_size_t offset, ptl_size_t len) --{ -- ptl_size_t nob; -- char *addr; -- -- if (len == 0) -- return; -- -- LASSERT (!in_interrupt ()); -- -- LASSERT (niov > 0); -- while (offset > kiov->kiov_len) { -- offset -= kiov->kiov_len; -- kiov++; -- niov--; -- LASSERT (niov > 0); -- } -- -- do{ -- LASSERT (niov > 0); -- nob = MIN (kiov->kiov_len - offset, len); -- -- addr = ((char *)kmap (kiov->kiov_page)) + kiov->kiov_offset + offset; -- memcpy (dest, addr, nob); -- kunmap (kiov->kiov_page); -- -- len -= nob; -- dest += nob; -- niov--; -- kiov++; -- offset = 0; -- } while (len > 0); --} -- --void --lib_copy_buf2kiov (int niov, ptl_kiov_t *kiov, ptl_size_t offset, -- char *src, ptl_size_t len) --{ -- ptl_size_t nob; -- char *addr; -- -- if (len == 0) -- return; -- -- LASSERT (!in_interrupt ()); -- -- LASSERT (niov > 0); -- while (offset >= kiov->kiov_len) { -- offset -= kiov->kiov_len; -- kiov++; -- niov--; -- LASSERT (niov > 0); -- } -- -- do { -- LASSERT (niov > 0); -- nob = MIN (kiov->kiov_len - offset, len); -- -- addr = ((char *)kmap (kiov->kiov_page)) + kiov->kiov_offset + offset; -- memcpy (addr, src, nob); -- kunmap (kiov->kiov_page); -- -- len -= nob; -- src += nob; -- niov--; -- kiov++; -- offset = 0; -- } while (len > 0); --} -- --int --lib_extract_kiov (int dst_niov, ptl_kiov_t *dst, -- int src_niov, ptl_kiov_t *src, -- ptl_size_t offset, ptl_size_t len) --{ -- /* Initialise 'dst' to the subset of 'src' starting at 'offset', -- * for exactly 'len' bytes, and return the number of entries. -- * NB not destructive to 'src' */ -- ptl_size_t frag_len; -- int niov; -- -- if (len == 0) /* no data => */ -- return (0); /* no frags */ -- -- LASSERT (src_niov > 0); -- while (offset >= src->kiov_len) { /* skip initial frags */ -- offset -= src->kiov_len; -- src_niov--; -- src++; -- LASSERT (src_niov > 0); -- } -- -- niov = 1; -- for (;;) { -- LASSERT (src_niov > 0); -- LASSERT (niov <= dst_niov); -- -- frag_len = src->kiov_len - offset; -- dst->kiov_page = src->kiov_page; -- dst->kiov_offset = src->kiov_offset + offset; -- -- if (len <= frag_len) { -- dst->kiov_len = len; -- LASSERT (dst->kiov_offset + dst->kiov_len <= PAGE_SIZE); -- return (niov); -- } -- -- dst->kiov_len = frag_len; -- LASSERT (dst->kiov_offset + dst->kiov_len <= PAGE_SIZE); -- -- len -= frag_len; -- dst++; -- src++; -- niov++; -- src_niov--; -- offset = 0; -- } --} --#endif -- --ptl_err_t - lib_recv (nal_cb_t *nal, void *private, lib_msg_t *msg, lib_md_t *md, -lib_recv (lib_nal_t *nal, void *private, lib_msg_t *msg, lib_md_t *md, -- ptl_size_t offset, ptl_size_t mlen, ptl_size_t rlen) --{ -- if (mlen == 0) - return (nal->cb_recv(nal, private, msg, - 0, NULL, - offset, mlen, rlen)); - return (nal->libnal_recv(nal, private, msg, - 0, NULL, - offset, mlen, rlen)); -- -- if ((md->options & PTL_MD_KIOV) == 0) - return (nal->cb_recv(nal, private, msg, - md->md_niov, md->md_iov.iov, - offset, mlen, rlen)); - return (nal->libnal_recv(nal, private, msg, - md->md_niov, md->md_iov.iov, - offset, mlen, rlen)); -- - return (nal->cb_recv_pages(nal, private, msg, - md->md_niov, md->md_iov.kiov, - offset, mlen, rlen)); - return (nal->libnal_recv_pages(nal, private, msg, - md->md_niov, md->md_iov.kiov, - offset, mlen, rlen)); --} -- --ptl_err_t - lib_send (nal_cb_t *nal, void *private, lib_msg_t *msg, -lib_send (lib_nal_t *nal, void *private, lib_msg_t *msg, -- ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, -- lib_md_t *md, ptl_size_t offset, ptl_size_t len) --{ -- if (len == 0) - return (nal->cb_send(nal, private, msg, - hdr, type, nid, pid, - 0, NULL, - offset, len)); - return (nal->libnal_send(nal, private, msg, - hdr, type, nid, pid, - 0, NULL, - offset, len)); -- -- if ((md->options & PTL_MD_KIOV) == 0) - return (nal->cb_send(nal, private, msg, - hdr, type, nid, pid, - md->md_niov, md->md_iov.iov, - offset, len)); - return (nal->libnal_send(nal, private, msg, - hdr, type, nid, pid, - md->md_niov, md->md_iov.iov, - offset, len)); -- - return (nal->cb_send_pages(nal, private, msg, - hdr, type, nid, pid, - md->md_niov, md->md_iov.kiov, - offset, len)); - return (nal->libnal_send_pages(nal, private, msg, - hdr, type, nid, pid, - md->md_niov, md->md_iov.kiov, - offset, len)); --} -- --static void - lib_commit_md (nal_cb_t *nal, lib_md_t *md, lib_msg_t *msg) -lib_commit_md (lib_nal_t *nal, lib_md_t *md, lib_msg_t *msg) --{ - /* ALWAYS called holding the state_lock */ - lib_counters_t *counters = &nal->ni.counters; - /* ALWAYS called holding the LIB_LOCK */ - lib_counters_t *counters = &nal->libnal_ni.ni_counters; -- -- /* Here, we commit the MD to a network OP by marking it busy and -- * decrementing its threshold. Come what may, the network "owns" -- * the MD until a call to lib_finalize() signals completion. */ -- msg->md = md; -- -- md->pending++; -- if (md->threshold != PTL_MD_THRESH_INF) { -- LASSERT (md->threshold > 0); -- md->threshold--; -- } -- -- counters->msgs_alloc++; -- if (counters->msgs_alloc > counters->msgs_max) -- counters->msgs_max = counters->msgs_alloc; -- - list_add (&msg->msg_list, &nal->ni.ni_active_msgs); - list_add (&msg->msg_list, &nal->libnal_ni.ni_active_msgs); --} -- --static void - lib_drop_message (nal_cb_t *nal, void *private, ptl_hdr_t *hdr) -lib_drop_message (lib_nal_t *nal, void *private, ptl_hdr_t *hdr) --{ -- unsigned long flags; -- -- /* CAVEAT EMPTOR: this only drops messages that we've not committed -- * to receive (init_msg() not called) and therefore can't cause an -- * event. */ -- - state_lock(nal, &flags); - nal->ni.counters.drop_count++; - nal->ni.counters.drop_length += hdr->payload_length; - state_unlock(nal, &flags); - LIB_LOCK(nal, flags); - nal->libnal_ni.ni_counters.drop_count++; - nal->libnal_ni.ni_counters.drop_length += hdr->payload_length; - LIB_UNLOCK(nal, flags); -- -- /* NULL msg => if NAL calls lib_finalize it will be a noop */ -- (void) lib_recv(nal, private, NULL, NULL, 0, 0, hdr->payload_length); --} -- --/* -- * Incoming messages have a ptl_msg_t object associated with them -- * by the library. This object encapsulates the state of the -- * message and allows the NAL to do non-blocking receives or sends -- * of long messages. -- * -- */ --static ptl_err_t - parse_put(nal_cb_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg) -parse_put(lib_nal_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg) --{ - lib_ni_t *ni = &nal->ni; - lib_ni_t *ni = &nal->libnal_ni; -- ptl_size_t mlength = 0; -- ptl_size_t offset = 0; - int unlink = 0; -- ptl_err_t rc; - lib_me_t *me; -- lib_md_t *md; -- unsigned long flags; -- -- /* Convert put fields to host byte order */ - hdr->msg.put.match_bits = NTOH__u64 (hdr->msg.put.match_bits); - hdr->msg.put.ptl_index = NTOH__u32 (hdr->msg.put.ptl_index); - hdr->msg.put.offset = NTOH__u32 (hdr->msg.put.offset); - hdr->msg.put.match_bits = le64_to_cpu(hdr->msg.put.match_bits); - hdr->msg.put.ptl_index = le32_to_cpu(hdr->msg.put.ptl_index); - hdr->msg.put.offset = le32_to_cpu(hdr->msg.put.offset); -- - state_lock(nal, &flags); - LIB_LOCK(nal, flags); -- - me = lib_find_me(nal, hdr->msg.put.ptl_index, PTL_MD_OP_PUT, - hdr->src_nid, hdr->src_pid, - hdr->payload_length, hdr->msg.put.offset, - hdr->msg.put.match_bits, - &mlength, &offset, &unlink); - if (me == NULL) { - state_unlock(nal, &flags); - md = lib_match_md(nal, hdr->msg.put.ptl_index, PTL_MD_OP_PUT, - hdr->src_nid, hdr->src_pid, - hdr->payload_length, hdr->msg.put.offset, - hdr->msg.put.match_bits, msg, - &mlength, &offset); - if (md == NULL) { - LIB_UNLOCK(nal, flags); -- return (PTL_FAIL); -- } - - md = me->md; - CDEBUG(D_NET, "Incoming put index %x from "LPU64"/%u of length %d/%d " - "into md "LPX64" [%d] + %d\n", hdr->msg.put.ptl_index, - hdr->src_nid, hdr->src_pid, mlength, hdr->payload_length, - md->md_lh.lh_cookie, md->md_niov, offset); - - lib_commit_md(nal, md, msg); -- - msg->ev.type = PTL_EVENT_PUT; - msg->ev.initiator.nid = hdr->src_nid; - msg->ev.initiator.pid = hdr->src_pid; - msg->ev.portal = hdr->msg.put.ptl_index; - msg->ev.match_bits = hdr->msg.put.match_bits; - msg->ev.rlength = hdr->payload_length; - msg->ev.mlength = mlength; - msg->ev.offset = offset; - msg->ev.type = PTL_EVENT_PUT_END; -- msg->ev.hdr_data = hdr->msg.put.hdr_data; - - lib_md_deconstruct(nal, md, &msg->ev.mem_desc); -- -- if (!ptl_is_wire_handle_none(&hdr->msg.put.ack_wmd) && -- !(md->options & PTL_MD_ACK_DISABLE)) { -- msg->ack_wmd = hdr->msg.put.ack_wmd; -- } - - ni->counters.recv_count++; - ni->counters.recv_length += mlength; -- - /* only unlink after MD's pending count has been bumped in - * lib_commit_md() otherwise lib_me_unlink() will nuke it */ - if (unlink) - lib_me_unlink (nal, me); - ni->ni_counters.recv_count++; - ni->ni_counters.recv_length += mlength; -- - state_unlock(nal, &flags); - LIB_UNLOCK(nal, flags); -- -- rc = lib_recv(nal, private, msg, md, offset, mlength, -- hdr->payload_length); -- if (rc != PTL_OK) -- CERROR(LPU64": error on receiving PUT from "LPU64": %d\n", - ni->nid, hdr->src_nid, rc); - ni->ni_pid.nid, hdr->src_nid, rc); -- -- return (rc); --} -- --static ptl_err_t - parse_get(nal_cb_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg) -parse_get(lib_nal_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg) --{ - lib_ni_t *ni = &nal->ni; - lib_ni_t *ni = &nal->libnal_ni; -- ptl_size_t mlength = 0; -- ptl_size_t offset = 0; - int unlink = 0; - lib_me_t *me; -- lib_md_t *md; -- ptl_hdr_t reply; -- unsigned long flags; -- int rc; -- -- /* Convert get fields to host byte order */ - hdr->msg.get.match_bits = NTOH__u64 (hdr->msg.get.match_bits); - hdr->msg.get.ptl_index = NTOH__u32 (hdr->msg.get.ptl_index); - hdr->msg.get.sink_length = NTOH__u32 (hdr->msg.get.sink_length); - hdr->msg.get.src_offset = NTOH__u32 (hdr->msg.get.src_offset); - hdr->msg.get.match_bits = le64_to_cpu(hdr->msg.get.match_bits); - hdr->msg.get.ptl_index = le32_to_cpu(hdr->msg.get.ptl_index); - hdr->msg.get.sink_length = le32_to_cpu(hdr->msg.get.sink_length); - hdr->msg.get.src_offset = le32_to_cpu(hdr->msg.get.src_offset); -- - state_lock(nal, &flags); - LIB_LOCK(nal, flags); -- - me = lib_find_me(nal, hdr->msg.get.ptl_index, PTL_MD_OP_GET, - hdr->src_nid, hdr->src_pid, - hdr->msg.get.sink_length, hdr->msg.get.src_offset, - hdr->msg.get.match_bits, - &mlength, &offset, &unlink); - if (me == NULL) { - state_unlock(nal, &flags); - md = lib_match_md(nal, hdr->msg.get.ptl_index, PTL_MD_OP_GET, - hdr->src_nid, hdr->src_pid, - hdr->msg.get.sink_length, hdr->msg.get.src_offset, - hdr->msg.get.match_bits, msg, - &mlength, &offset); - if (md == NULL) { - LIB_UNLOCK(nal, flags); -- return (PTL_FAIL); -- } - - md = me->md; - CDEBUG(D_NET, "Incoming get index %d from "LPU64".%u of length %d/%d " - "from md "LPX64" [%d] + %d\n", hdr->msg.get.ptl_index, - hdr->src_nid, hdr->src_pid, mlength, hdr->payload_length, - md->md_lh.lh_cookie, md->md_niov, offset); - - lib_commit_md(nal, md, msg); -- - msg->ev.type = PTL_EVENT_GET; - msg->ev.initiator.nid = hdr->src_nid; - msg->ev.initiator.pid = hdr->src_pid; - msg->ev.portal = hdr->msg.get.ptl_index; - msg->ev.match_bits = hdr->msg.get.match_bits; - msg->ev.rlength = hdr->payload_length; - msg->ev.mlength = mlength; - msg->ev.offset = offset; - msg->ev.type = PTL_EVENT_GET_END; -- msg->ev.hdr_data = 0; - - lib_md_deconstruct(nal, md, &msg->ev.mem_desc); - - ni->counters.send_count++; - ni->counters.send_length += mlength; -- - /* only unlink after MD's refcount has been bumped in - * lib_commit_md() otherwise lib_me_unlink() will nuke it */ - if (unlink) - lib_me_unlink (nal, me); - ni->ni_counters.send_count++; - ni->ni_counters.send_length += mlength; -- - state_unlock(nal, &flags); - LIB_UNLOCK(nal, flags); -- -- memset (&reply, 0, sizeof (reply)); - reply.type = HTON__u32 (PTL_MSG_REPLY); - reply.dest_nid = HTON__u64 (hdr->src_nid); - reply.src_nid = HTON__u64 (ni->nid); - reply.dest_pid = HTON__u32 (hdr->src_pid); - reply.src_pid = HTON__u32 (ni->pid); - reply.payload_length = HTON__u32 (mlength); - reply.type = cpu_to_le32(PTL_MSG_REPLY); - reply.dest_nid = cpu_to_le64(hdr->src_nid); - reply.dest_pid = cpu_to_le32(hdr->src_pid); - reply.src_nid = cpu_to_le64(ni->ni_pid.nid); - reply.src_pid = cpu_to_le32(ni->ni_pid.pid); - reply.payload_length = cpu_to_le32(mlength); -- -- reply.msg.reply.dst_wmd = hdr->msg.get.return_wmd; -- -- /* NB call lib_send() _BEFORE_ lib_recv() completes the incoming -- * message. Some NALs _require_ this to implement optimized GET */ -- -- rc = lib_send (nal, private, msg, &reply, PTL_MSG_REPLY, -- hdr->src_nid, hdr->src_pid, md, offset, mlength); -- if (rc != PTL_OK) -- CERROR(LPU64": Unable to send REPLY for GET from "LPU64": %d\n", - ni->nid, hdr->src_nid, rc); - ni->ni_pid.nid, hdr->src_nid, rc); -- -- /* Discard any junk after the hdr */ -- (void) lib_recv(nal, private, NULL, NULL, 0, 0, hdr->payload_length); -- -- return (rc); --} -- --static ptl_err_t - parse_reply(nal_cb_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg) -parse_reply(lib_nal_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg) --{ - lib_ni_t *ni = &nal->ni; - lib_ni_t *ni = &nal->libnal_ni; -- lib_md_t *md; -- int rlength; -- int length; -- unsigned long flags; -- ptl_err_t rc; -- - state_lock(nal, &flags); - LIB_LOCK(nal, flags); -- -- /* NB handles only looked up by creator (no flips) */ -- md = ptl_wire_handle2md(&hdr->msg.reply.dst_wmd, nal); -- if (md == NULL || md->threshold == 0) { -- CERROR (LPU64": Dropping REPLY from "LPU64" for %s MD "LPX64"."LPX64"\n", - ni->nid, hdr->src_nid, - ni->ni_pid.nid, hdr->src_nid, -- md == NULL ? "invalid" : "inactive", -- hdr->msg.reply.dst_wmd.wh_interface_cookie, -- hdr->msg.reply.dst_wmd.wh_object_cookie); -- - state_unlock(nal, &flags); - LIB_UNLOCK(nal, flags); -- return (PTL_FAIL); -- } -- -- LASSERT (md->offset == 0); -- -- length = rlength = hdr->payload_length; -- -- if (length > md->length) { -- if ((md->options & PTL_MD_TRUNCATE) == 0) { -- CERROR (LPU64": Dropping REPLY from "LPU64 -- " length %d for MD "LPX64" would overflow (%d)\n", - ni->nid, hdr->src_nid, length, - ni->ni_pid.nid, hdr->src_nid, length, -- hdr->msg.reply.dst_wmd.wh_object_cookie, -- md->length); - state_unlock(nal, &flags); - LIB_UNLOCK(nal, flags); -- return (PTL_FAIL); -- } -- length = md->length; -- } -- -- CDEBUG(D_NET, "Reply from "LPU64" of length %d/%d into md "LPX64"\n", -- hdr->src_nid, length, rlength, -- hdr->msg.reply.dst_wmd.wh_object_cookie); -- -- lib_commit_md(nal, md, msg); -- - msg->ev.type = PTL_EVENT_REPLY; - msg->ev.type = PTL_EVENT_REPLY_END; -- msg->ev.initiator.nid = hdr->src_nid; -- msg->ev.initiator.pid = hdr->src_pid; -- msg->ev.rlength = rlength; -- msg->ev.mlength = length; -- msg->ev.offset = 0; -- - lib_md_deconstruct(nal, md, &msg->ev.mem_desc); - lib_md_deconstruct(nal, md, &msg->ev.md); - ptl_md2handle(&msg->ev.md_handle, nal, md); -- - ni->counters.recv_count++; - ni->counters.recv_length += length; - ni->ni_counters.recv_count++; - ni->ni_counters.recv_length += length; -- - state_unlock(nal, &flags); - LIB_UNLOCK(nal, flags); -- -- rc = lib_recv(nal, private, msg, md, 0, length, rlength); -- if (rc != PTL_OK) -- CERROR(LPU64": error on receiving REPLY from "LPU64": %d\n", - ni->nid, hdr->src_nid, rc); - ni->ni_pid.nid, hdr->src_nid, rc); -- -- return (rc); --} -- --static ptl_err_t - parse_ack(nal_cb_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg) -parse_ack(lib_nal_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg) --{ - lib_ni_t *ni = &nal->ni; - lib_ni_t *ni = &nal->libnal_ni; -- lib_md_t *md; -- unsigned long flags; -- -- /* Convert ack fields to host byte order */ - hdr->msg.ack.match_bits = NTOH__u64 (hdr->msg.ack.match_bits); - hdr->msg.ack.mlength = NTOH__u32 (hdr->msg.ack.mlength); - hdr->msg.ack.match_bits = le64_to_cpu(hdr->msg.ack.match_bits); - hdr->msg.ack.mlength = le32_to_cpu(hdr->msg.ack.mlength); -- - state_lock(nal, &flags); - LIB_LOCK(nal, flags); -- -- /* NB handles only looked up by creator (no flips) */ -- md = ptl_wire_handle2md(&hdr->msg.ack.dst_wmd, nal); -- if (md == NULL || md->threshold == 0) { -- CDEBUG(D_INFO, LPU64": Dropping ACK from "LPU64" to %s MD " - LPX64"."LPX64"\n", ni->nid, hdr->src_nid, - LPX64"."LPX64"\n", ni->ni_pid.nid, hdr->src_nid, -- (md == NULL) ? "invalid" : "inactive", -- hdr->msg.ack.dst_wmd.wh_interface_cookie, -- hdr->msg.ack.dst_wmd.wh_object_cookie); -- - state_unlock(nal, &flags); - LIB_UNLOCK(nal, flags); -- return (PTL_FAIL); -- } -- -- CDEBUG(D_NET, LPU64": ACK from "LPU64" into md "LPX64"\n", - ni->nid, hdr->src_nid, - ni->ni_pid.nid, hdr->src_nid, -- hdr->msg.ack.dst_wmd.wh_object_cookie); -- -- lib_commit_md(nal, md, msg); -- -- msg->ev.type = PTL_EVENT_ACK; -- msg->ev.initiator.nid = hdr->src_nid; -- msg->ev.initiator.pid = hdr->src_pid; -- msg->ev.mlength = hdr->msg.ack.mlength; -- msg->ev.match_bits = hdr->msg.ack.match_bits; -- - lib_md_deconstruct(nal, md, &msg->ev.mem_desc); - lib_md_deconstruct(nal, md, &msg->ev.md); - ptl_md2handle(&msg->ev.md_handle, nal, md); -- - ni->counters.recv_count++; - ni->ni_counters.recv_count++; -- - state_unlock(nal, &flags); - LIB_UNLOCK(nal, flags); -- -- /* We have received and matched up the ack OK, create the -- * completion event now... */ -- lib_finalize(nal, private, msg, PTL_OK); -- -- /* ...and now discard any junk after the hdr */ -- (void) lib_recv(nal, private, NULL, NULL, 0, 0, hdr->payload_length); -- -- return (PTL_OK); --} -- --static char * --hdr_type_string (ptl_hdr_t *hdr) --{ -- switch (hdr->type) { -- case PTL_MSG_ACK: -- return ("ACK"); -- case PTL_MSG_PUT: -- return ("PUT"); -- case PTL_MSG_GET: -- return ("GET"); -- case PTL_MSG_REPLY: -- return ("REPLY"); -- case PTL_MSG_HELLO: -- return ("HELLO"); -- default: -- return (""); -- } --} -- - void print_hdr(nal_cb_t * nal, ptl_hdr_t * hdr) -void print_hdr(lib_nal_t *nal, ptl_hdr_t * hdr) --{ -- char *type_str = hdr_type_string (hdr); -- - nal->cb_printf(nal, "P3 Header at %p of type %s\n", hdr, type_str); - nal->cb_printf(nal, " From nid/pid %Lu/%Lu", hdr->src_nid, - hdr->src_pid); - nal->cb_printf(nal, " To nid/pid %Lu/%Lu\n", hdr->dest_nid, - hdr->dest_pid); - CWARN("P3 Header at %p of type %s\n", hdr, type_str); - CWARN(" From nid/pid "LPX64"/%u", hdr->src_nid, hdr->src_pid); - CWARN(" To nid/pid "LPX64"/%u\n", hdr->dest_nid, hdr->dest_pid); -- -- switch (hdr->type) { -- default: -- break; -- -- case PTL_MSG_PUT: - nal->cb_printf(nal, - " Ptl index %d, ack md "LPX64"."LPX64", " - "match bits "LPX64"\n", - hdr->msg.put.ptl_index, - hdr->msg.put.ack_wmd.wh_interface_cookie, - hdr->msg.put.ack_wmd.wh_object_cookie, - hdr->msg.put.match_bits); - nal->cb_printf(nal, - " Length %d, offset %d, hdr data "LPX64"\n", - hdr->payload_length, hdr->msg.put.offset, - hdr->msg.put.hdr_data); - CWARN(" Ptl index %d, ack md "LPX64"."LPX64", " - "match bits "LPX64"\n", - hdr->msg.put.ptl_index, - hdr->msg.put.ack_wmd.wh_interface_cookie, - hdr->msg.put.ack_wmd.wh_object_cookie, - hdr->msg.put.match_bits); - CWARN(" Length %d, offset %d, hdr data "LPX64"\n", - hdr->payload_length, hdr->msg.put.offset, - hdr->msg.put.hdr_data); -- break; -- -- case PTL_MSG_GET: - nal->cb_printf(nal, - " Ptl index %d, return md "LPX64"."LPX64", " - "match bits "LPX64"\n", hdr->msg.get.ptl_index, - hdr->msg.get.return_wmd.wh_interface_cookie, - hdr->msg.get.return_wmd.wh_object_cookie, - hdr->msg.get.match_bits); - nal->cb_printf(nal, - " Length %d, src offset %d\n", - hdr->msg.get.sink_length, - hdr->msg.get.src_offset); - CWARN(" Ptl index %d, return md "LPX64"."LPX64", " - "match bits "LPX64"\n", hdr->msg.get.ptl_index, - hdr->msg.get.return_wmd.wh_interface_cookie, - hdr->msg.get.return_wmd.wh_object_cookie, - hdr->msg.get.match_bits); - CWARN(" Length %d, src offset %d\n", - hdr->msg.get.sink_length, - hdr->msg.get.src_offset); -- break; -- -- case PTL_MSG_ACK: - nal->cb_printf(nal, " dst md "LPX64"."LPX64", " - "manipulated length %d\n", - hdr->msg.ack.dst_wmd.wh_interface_cookie, - hdr->msg.ack.dst_wmd.wh_object_cookie, - hdr->msg.ack.mlength); - CWARN(" dst md "LPX64"."LPX64", " - "manipulated length %d\n", - hdr->msg.ack.dst_wmd.wh_interface_cookie, - hdr->msg.ack.dst_wmd.wh_object_cookie, - hdr->msg.ack.mlength); -- break; -- -- case PTL_MSG_REPLY: - nal->cb_printf(nal, " dst md "LPX64"."LPX64", " - "length %d\n", - hdr->msg.reply.dst_wmd.wh_interface_cookie, - hdr->msg.reply.dst_wmd.wh_object_cookie, - hdr->payload_length); - CWARN(" dst md "LPX64"."LPX64", " - "length %d\n", - hdr->msg.reply.dst_wmd.wh_interface_cookie, - hdr->msg.reply.dst_wmd.wh_object_cookie, - hdr->payload_length); -- } -- --} /* end of print_hdr() */ -- -- - void - lib_parse(nal_cb_t *nal, ptl_hdr_t *hdr, void *private) -ptl_err_t -lib_parse(lib_nal_t *nal, ptl_hdr_t *hdr, void *private) --{ -- unsigned long flags; -- ptl_err_t rc; -- lib_msg_t *msg; - - /* NB we return PTL_OK if we manage to parse the header and believe - * it looks OK. Anything that goes wrong with receiving the - * message after that point is the responsibility of the NAL */ -- -- /* convert common fields to host byte order */ - hdr->dest_nid = NTOH__u64 (hdr->dest_nid); - hdr->src_nid = NTOH__u64 (hdr->src_nid); - hdr->dest_pid = NTOH__u32 (hdr->dest_pid); - hdr->src_pid = NTOH__u32 (hdr->src_pid); - hdr->type = NTOH__u32 (hdr->type); - hdr->payload_length = NTOH__u32(hdr->payload_length); - #if 0 - nal->cb_printf(nal, "%d: lib_parse: nal=%p hdr=%p type=%d\n", - nal->ni.nid, nal, hdr, hdr->type); - print_hdr(nal, hdr); - #endif - if (hdr->type == PTL_MSG_HELLO) { - hdr->type = le32_to_cpu(hdr->type); - hdr->src_nid = le64_to_cpu(hdr->src_nid); - hdr->src_pid = le32_to_cpu(hdr->src_pid); - hdr->dest_pid = le32_to_cpu(hdr->dest_pid); - hdr->payload_length = le32_to_cpu(hdr->payload_length); - - switch (hdr->type) { - case PTL_MSG_HELLO: { -- /* dest_nid is really ptl_magicversion_t */ -- ptl_magicversion_t *mv = (ptl_magicversion_t *)&hdr->dest_nid; -- - CERROR (LPU64": Dropping unexpected HELLO message: " - mv->magic = le32_to_cpu(mv->magic); - mv->version_major = le16_to_cpu(mv->version_major); - mv->version_minor = le16_to_cpu(mv->version_minor); - - if (mv->magic == PORTALS_PROTO_MAGIC && - mv->version_major == PORTALS_PROTO_VERSION_MAJOR && - mv->version_minor == PORTALS_PROTO_VERSION_MINOR) { - CWARN (LPU64": Dropping unexpected HELLO message: " - "magic %d, version %d.%d from "LPD64"\n", - nal->libnal_ni.ni_pid.nid, mv->magic, - mv->version_major, mv->version_minor, - hdr->src_nid); - - /* it's good but we don't want it */ - lib_drop_message(nal, private, hdr); - return PTL_OK; - } - - /* we got garbage */ - CERROR (LPU64": Bad HELLO message: " -- "magic %d, version %d.%d from "LPD64"\n", - nal->ni.nid, mv->magic, - nal->libnal_ni.ni_pid.nid, mv->magic, -- mv->version_major, mv->version_minor, -- hdr->src_nid); - lib_drop_message(nal, private, hdr); - return; - return PTL_FAIL; -- } - - if (hdr->dest_nid != nal->ni.nid) { - CERROR(LPU64": Dropping %s message from "LPU64" to "LPU64 - " (not me)\n", nal->ni.nid, hdr_type_string (hdr), - hdr->src_nid, hdr->dest_nid); - lib_drop_message(nal, private, hdr); - return; - - case PTL_MSG_ACK: - case PTL_MSG_PUT: - case PTL_MSG_GET: - case PTL_MSG_REPLY: - hdr->dest_nid = le64_to_cpu(hdr->dest_nid); - if (hdr->dest_nid != nal->libnal_ni.ni_pid.nid) { - CERROR(LPU64": BAD dest NID in %s message from" - LPU64" to "LPU64" (not me)\n", - nal->libnal_ni.ni_pid.nid, hdr_type_string (hdr), - hdr->src_nid, hdr->dest_nid); - return PTL_FAIL; - } - break; - - default: - CERROR(LPU64": Bad message type 0x%x from "LPU64"\n", - nal->libnal_ni.ni_pid.nid, hdr->type, hdr->src_nid); - return PTL_FAIL; -- } -- - if (!list_empty (&nal->ni.ni_test_peers) && /* normally we don't */ - /* We've decided we're not receiving garbage since we can parse the - * header. We will return PTL_OK come what may... */ - - if (!list_empty (&nal->libnal_ni.ni_test_peers) && /* normally we don't */ -- fail_peer (nal, hdr->src_nid, 0)) /* shall we now? */ -- { -- CERROR(LPU64": Dropping incoming %s from "LPU64 -- ": simulated failure\n", - nal->ni.nid, hdr_type_string (hdr), - nal->libnal_ni.ni_pid.nid, hdr_type_string (hdr), -- hdr->src_nid); -- lib_drop_message(nal, private, hdr); - return; - return PTL_OK; -- } -- -- msg = lib_msg_alloc(nal); -- if (msg == NULL) { -- CERROR(LPU64": Dropping incoming %s from "LPU64 -- ": can't allocate a lib_msg_t\n", - nal->ni.nid, hdr_type_string (hdr), - nal->libnal_ni.ni_pid.nid, hdr_type_string (hdr), -- hdr->src_nid); -- lib_drop_message(nal, private, hdr); - return; - return PTL_OK; -- } - - do_gettimeofday(&msg->ev.arrival_time); -- -- switch (hdr->type) { -- case PTL_MSG_ACK: -- rc = parse_ack(nal, hdr, private, msg); -- break; -- case PTL_MSG_PUT: -- rc = parse_put(nal, hdr, private, msg); -- break; -- case PTL_MSG_GET: -- rc = parse_get(nal, hdr, private, msg); -- break; -- case PTL_MSG_REPLY: -- rc = parse_reply(nal, hdr, private, msg); -- break; -- default: - CERROR(LPU64": Dropping message from "LPU64 - ": Bad type=0x%x\n", nal->ni.nid, hdr->src_nid, - hdr->type); - rc = PTL_FAIL; - LASSERT(0); - rc = PTL_FAIL; /* no compiler warning please */ -- break; -- } -- -- if (rc != PTL_OK) { -- if (msg->md != NULL) { -- /* committed... */ -- lib_finalize(nal, private, msg, rc); -- } else { - state_lock(nal, &flags); - lib_msg_free(nal, msg); /* expects state_lock held */ - state_unlock(nal, &flags); - LIB_LOCK(nal, flags); - lib_msg_free(nal, msg); /* expects LIB_LOCK held */ - LIB_UNLOCK(nal, flags); -- -- lib_drop_message(nal, private, hdr); -- } -- } - - return PTL_OK; - /* That's "OK I can parse it", not "OK I like it" :) */ --} -- --int - do_PtlPut(nal_cb_t *nal, void *private, void *v_args, void *v_ret) -lib_api_put(nal_t *apinal, ptl_handle_md_t *mdh, - ptl_ack_req_t ack, ptl_process_id_t *id, - ptl_pt_index_t portal, ptl_ac_index_t ac, - ptl_match_bits_t match_bits, - ptl_size_t offset, ptl_hdr_data_t hdr_data) --{ - /* - * Incoming: - * ptl_handle_md_t md_in - * ptl_ack_req_t ack_req_in - * ptl_process_id_t target_in - * ptl_pt_index_t portal_in - * ptl_ac_index_t cookie_in - * ptl_match_bits_t match_bits_in - * ptl_size_t offset_in - * - * Outgoing: - */ - - PtlPut_in *args = v_args; - ptl_process_id_t *id = &args->target_in; - PtlPut_out *ret = v_ret; - lib_ni_t *ni = &nal->ni; - lib_nal_t *nal = apinal->nal_data; - lib_ni_t *ni = &nal->libnal_ni; -- lib_msg_t *msg; -- ptl_hdr_t hdr; -- lib_md_t *md; -- unsigned long flags; -- int rc; -- - if (!list_empty (&nal->ni.ni_test_peers) && /* normally we don't */ - if (!list_empty (&ni->ni_test_peers) && /* normally we don't */ -- fail_peer (nal, id->nid, 1)) /* shall we now? */ -- { - CERROR(LPU64": Dropping PUT to "LPU64": simulated failure\n", - nal->ni.nid, id->nid); - return (ret->rc = PTL_INV_PROC); - CERROR("Dropping PUT to "LPU64": simulated failure\n", - id->nid); - return PTL_PROCESS_INVALID; -- } -- -- msg = lib_msg_alloc(nal); -- if (msg == NULL) { -- CERROR(LPU64": Dropping PUT to "LPU64": ENOMEM on lib_msg_t\n", - ni->nid, id->nid); - return (ret->rc = PTL_NOSPACE); - ni->ni_pid.nid, id->nid); - return PTL_NO_SPACE; -- } -- - state_lock(nal, &flags); - LIB_LOCK(nal, flags); -- - md = ptl_handle2md(&args->md_in, nal); - md = ptl_handle2md(mdh, nal); -- if (md == NULL || md->threshold == 0) { -- lib_msg_free(nal, msg); - state_unlock(nal, &flags); - LIB_UNLOCK(nal, flags); -- - return (ret->rc = PTL_INV_MD); - return PTL_MD_INVALID; -- } -- - CDEBUG(D_NET, "PtlPut -> %Lu: %lu\n", (unsigned long long)id->nid, - (unsigned long)id->pid); - CDEBUG(D_NET, "PtlPut -> "LPX64"\n", id->nid); -- -- memset (&hdr, 0, sizeof (hdr)); - hdr.type = HTON__u32 (PTL_MSG_PUT); - hdr.dest_nid = HTON__u64 (id->nid); - hdr.src_nid = HTON__u64 (ni->nid); - hdr.dest_pid = HTON__u32 (id->pid); - hdr.src_pid = HTON__u32 (ni->pid); - hdr.payload_length = HTON__u32 (md->length); - hdr.type = cpu_to_le32(PTL_MSG_PUT); - hdr.dest_nid = cpu_to_le64(id->nid); - hdr.dest_pid = cpu_to_le32(id->pid); - hdr.src_nid = cpu_to_le64(ni->ni_pid.nid); - hdr.src_pid = cpu_to_le32(ni->ni_pid.pid); - hdr.payload_length = cpu_to_le32(md->length); -- -- /* NB handles only looked up by creator (no flips) */ - if (args->ack_req_in == PTL_ACK_REQ) { - if (ack == PTL_ACK_REQ) { -- hdr.msg.put.ack_wmd.wh_interface_cookie = ni->ni_interface_cookie; -- hdr.msg.put.ack_wmd.wh_object_cookie = md->md_lh.lh_cookie; -- } else { -- hdr.msg.put.ack_wmd = PTL_WIRE_HANDLE_NONE; -- } -- - hdr.msg.put.match_bits = HTON__u64 (args->match_bits_in); - hdr.msg.put.ptl_index = HTON__u32 (args->portal_in); - hdr.msg.put.offset = HTON__u32 (args->offset_in); - hdr.msg.put.hdr_data = args->hdr_data_in; - hdr.msg.put.match_bits = cpu_to_le64(match_bits); - hdr.msg.put.ptl_index = cpu_to_le32(portal); - hdr.msg.put.offset = cpu_to_le32(offset); - hdr.msg.put.hdr_data = hdr_data; -- -- lib_commit_md(nal, md, msg); -- - msg->ev.type = PTL_EVENT_SENT; - msg->ev.initiator.nid = ni->nid; - msg->ev.initiator.pid = ni->pid; - msg->ev.portal = args->portal_in; - msg->ev.match_bits = args->match_bits_in; - msg->ev.type = PTL_EVENT_SEND_END; - msg->ev.initiator.nid = ni->ni_pid.nid; - msg->ev.initiator.pid = ni->ni_pid.pid; - msg->ev.pt_index = portal; - msg->ev.match_bits = match_bits; -- msg->ev.rlength = md->length; -- msg->ev.mlength = md->length; - msg->ev.offset = args->offset_in; - msg->ev.hdr_data = args->hdr_data_in; - msg->ev.offset = offset; - msg->ev.hdr_data = hdr_data; -- - lib_md_deconstruct(nal, md, &msg->ev.mem_desc); - lib_md_deconstruct(nal, md, &msg->ev.md); - ptl_md2handle(&msg->ev.md_handle, nal, md); -- - ni->counters.send_count++; - ni->counters.send_length += md->length; - ni->ni_counters.send_count++; - ni->ni_counters.send_length += md->length; -- - state_unlock(nal, &flags); - LIB_UNLOCK(nal, flags); -- - rc = lib_send (nal, private, msg, &hdr, PTL_MSG_PUT, - rc = lib_send (nal, NULL, msg, &hdr, PTL_MSG_PUT, -- id->nid, id->pid, md, 0, md->length); -- if (rc != PTL_OK) { - CERROR(LPU64": error sending PUT to "LPU64": %d\n", - ni->nid, id->nid, rc); - lib_finalize (nal, private, msg, rc); - CERROR("Error sending PUT to "LPX64": %d\n", - id->nid, rc); - lib_finalize (nal, NULL, msg, rc); -- } -- -- /* completion will be signalled by an event */ - return ret->rc = PTL_OK; - return PTL_OK; --} -- --lib_msg_t * - lib_fake_reply_msg (nal_cb_t *nal, ptl_nid_t peer_nid, lib_md_t *getmd) -lib_create_reply_msg (lib_nal_t *nal, ptl_nid_t peer_nid, lib_msg_t *getmsg) --{ -- /* The NAL can DMA direct to the GET md (i.e. no REPLY msg). This - * returns a msg the NAL can pass to lib_finalize() so that a REPLY - * event still occurs. - * returns a msg for the NAL to pass to lib_finalize() when the sink - * data has been received. -- * - * CAVEAT EMPTOR: 'getmd' is passed by pointer so it MUST be valid. - * This can only be guaranteed while a lib_msg_t holds a reference - * on it (ie. pending > 0), so best call this before the - * lib_finalize() of the original GET. */ - * CAVEAT EMPTOR: 'getmsg' is the original GET, which is freed when - * lib_finalize() is called on it, so the NAL must call this first */ -- - lib_ni_t *ni = &nal->ni; - lib_ni_t *ni = &nal->libnal_ni; -- lib_msg_t *msg = lib_msg_alloc(nal); - lib_md_t *getmd = getmsg->md; -- unsigned long flags; -- - state_lock(nal, &flags); - LIB_LOCK(nal, flags); -- -- LASSERT (getmd->pending > 0); -- -- if (msg == NULL) { -- CERROR ("Dropping REPLY from "LPU64": can't allocate msg\n", -- peer_nid); -- goto drop; -- } -- -- if (getmd->threshold == 0) { -- CERROR ("Dropping REPLY from "LPU64" for inactive MD %p\n", -- peer_nid, getmd); -- goto drop_msg; -- } -- -- LASSERT (getmd->offset == 0); -- -- CDEBUG(D_NET, "Reply from "LPU64" md %p\n", peer_nid, getmd); -- -- lib_commit_md (nal, getmd, msg); -- - msg->ev.type = PTL_EVENT_REPLY; - msg->ev.type = PTL_EVENT_REPLY_END; -- msg->ev.initiator.nid = peer_nid; -- msg->ev.initiator.pid = 0; /* XXX FIXME!!! */ -- msg->ev.rlength = msg->ev.mlength = getmd->length; -- msg->ev.offset = 0; -- - lib_md_deconstruct(nal, getmd, &msg->ev.mem_desc); - lib_md_deconstruct(nal, getmd, &msg->ev.md); - ptl_md2handle(&msg->ev.md_handle, nal, getmd); -- - ni->counters.recv_count++; - ni->counters.recv_length += getmd->length; - ni->ni_counters.recv_count++; - ni->ni_counters.recv_length += getmd->length; -- - state_unlock(nal, &flags); - LIB_UNLOCK(nal, flags); -- -- return msg; -- -- drop_msg: -- lib_msg_free(nal, msg); -- drop: - nal->ni.counters.drop_count++; - nal->ni.counters.drop_length += getmd->length; - nal->libnal_ni.ni_counters.drop_count++; - nal->libnal_ni.ni_counters.drop_length += getmd->length; -- - state_unlock (nal, &flags); - LIB_UNLOCK (nal, flags); -- -- return NULL; --} -- --int - do_PtlGet(nal_cb_t *nal, void *private, void *v_args, void *v_ret) -lib_api_get(nal_t *apinal, ptl_handle_md_t *mdh, ptl_process_id_t *id, - ptl_pt_index_t portal, ptl_ac_index_t ac, - ptl_match_bits_t match_bits, ptl_size_t offset) --{ - /* - * Incoming: - * ptl_handle_md_t md_in - * ptl_process_id_t target_in - * ptl_pt_index_t portal_in - * ptl_ac_index_t cookie_in - * ptl_match_bits_t match_bits_in - * ptl_size_t offset_in - * - * Outgoing: - */ - - PtlGet_in *args = v_args; - ptl_process_id_t *id = &args->target_in; - PtlGet_out *ret = v_ret; - lib_ni_t *ni = &nal->ni; - lib_nal_t *nal = apinal->nal_data; - lib_ni_t *ni = &nal->libnal_ni; -- lib_msg_t *msg; -- ptl_hdr_t hdr; -- lib_md_t *md; -- unsigned long flags; -- int rc; -- - if (!list_empty (&nal->ni.ni_test_peers) && /* normally we don't */ - if (!list_empty (&ni->ni_test_peers) && /* normally we don't */ -- fail_peer (nal, id->nid, 1)) /* shall we now? */ -- { - CERROR(LPU64": Dropping PUT to "LPU64": simulated failure\n", - nal->ni.nid, id->nid); - return (ret->rc = PTL_INV_PROC); - CERROR("Dropping PUT to "LPX64": simulated failure\n", - id->nid); - return PTL_PROCESS_INVALID; -- } -- -- msg = lib_msg_alloc(nal); -- if (msg == NULL) { - CERROR(LPU64": Dropping GET to "LPU64": ENOMEM on lib_msg_t\n", - ni->nid, id->nid); - return (ret->rc = PTL_NOSPACE); - CERROR("Dropping GET to "LPU64": ENOMEM on lib_msg_t\n", - id->nid); - return PTL_NO_SPACE; -- } -- - state_lock(nal, &flags); - LIB_LOCK(nal, flags); -- - md = ptl_handle2md(&args->md_in, nal); - md = ptl_handle2md(mdh, nal); -- if (md == NULL || !md->threshold) { -- lib_msg_free(nal, msg); - state_unlock(nal, &flags); - LIB_UNLOCK(nal, flags); -- - return ret->rc = PTL_INV_MD; - return PTL_MD_INVALID; -- } -- -- CDEBUG(D_NET, "PtlGet -> %Lu: %lu\n", (unsigned long long)id->nid, -- (unsigned long)id->pid); -- -- memset (&hdr, 0, sizeof (hdr)); - hdr.type = HTON__u32 (PTL_MSG_GET); - hdr.dest_nid = HTON__u64 (id->nid); - hdr.src_nid = HTON__u64 (ni->nid); - hdr.dest_pid = HTON__u32 (id->pid); - hdr.src_pid = HTON__u32 (ni->pid); - hdr.type = cpu_to_le32(PTL_MSG_GET); - hdr.dest_nid = cpu_to_le64(id->nid); - hdr.dest_pid = cpu_to_le32(id->pid); - hdr.src_nid = cpu_to_le64(ni->ni_pid.nid); - hdr.src_pid = cpu_to_le32(ni->ni_pid.pid); -- hdr.payload_length = 0; -- -- /* NB handles only looked up by creator (no flips) */ -- hdr.msg.get.return_wmd.wh_interface_cookie = ni->ni_interface_cookie; -- hdr.msg.get.return_wmd.wh_object_cookie = md->md_lh.lh_cookie; -- - hdr.msg.get.match_bits = HTON__u64 (args->match_bits_in); - hdr.msg.get.ptl_index = HTON__u32 (args->portal_in); - hdr.msg.get.src_offset = HTON__u32 (args->offset_in); - hdr.msg.get.sink_length = HTON__u32 (md->length); - hdr.msg.get.match_bits = cpu_to_le64(match_bits); - hdr.msg.get.ptl_index = cpu_to_le32(portal); - hdr.msg.get.src_offset = cpu_to_le32(offset); - hdr.msg.get.sink_length = cpu_to_le32(md->length); -- -- lib_commit_md(nal, md, msg); -- - msg->ev.type = PTL_EVENT_SENT; - msg->ev.initiator.nid = ni->nid; - msg->ev.initiator.pid = ni->pid; - msg->ev.portal = args->portal_in; - msg->ev.match_bits = args->match_bits_in; - msg->ev.type = PTL_EVENT_SEND_END; - msg->ev.initiator = ni->ni_pid; - msg->ev.pt_index = portal; - msg->ev.match_bits = match_bits; -- msg->ev.rlength = md->length; -- msg->ev.mlength = md->length; - msg->ev.offset = args->offset_in; - msg->ev.offset = offset; -- msg->ev.hdr_data = 0; -- - lib_md_deconstruct(nal, md, &msg->ev.mem_desc); - lib_md_deconstruct(nal, md, &msg->ev.md); - ptl_md2handle(&msg->ev.md_handle, nal, md); -- - ni->counters.send_count++; - ni->ni_counters.send_count++; -- - state_unlock(nal, &flags); - LIB_UNLOCK(nal, flags); -- - rc = lib_send (nal, private, msg, &hdr, PTL_MSG_GET, - rc = lib_send (nal, NULL, msg, &hdr, PTL_MSG_GET, -- id->nid, id->pid, NULL, 0, 0); -- if (rc != PTL_OK) { -- CERROR(LPU64": error sending GET to "LPU64": %d\n", - ni->nid, id->nid, rc); - lib_finalize (nal, private, msg, rc); - ni->ni_pid.nid, id->nid, rc); - lib_finalize (nal, NULL, msg, rc); -- } -- -- /* completion will be signalled by an event */ - return ret->rc = PTL_OK; - return PTL_OK; --} -- --void lib_assert_wire_constants (void) --{ -- /* Wire protocol assertions generated by 'wirecheck' - * running on Linux robert.bartonsoftware.com 2.4.20-18.9 #1 Thu May 29 06:54:41 EDT 2003 i68 - * with gcc version 3.2.2 20030222 (Red Hat Linux 3.2.2-5) */ - * running on Linux mdevi 2.4.21-p4smp-55chaos #1 SMP Tue Jun 8 14:38:44 PDT 2004 i686 i686 i - * with gcc version 3.2.3 20030502 (Red Hat Linux 3.2.3-34) */ -- -- -- /* Constants... */ -- LASSERT (PORTALS_PROTO_MAGIC == 0xeebc0ded); - LASSERT (PORTALS_PROTO_VERSION_MAJOR == 0); - LASSERT (PORTALS_PROTO_VERSION_MINOR == 3); - LASSERT (PORTALS_PROTO_VERSION_MAJOR == 1); - LASSERT (PORTALS_PROTO_VERSION_MINOR == 0); -- LASSERT (PTL_MSG_ACK == 0); -- LASSERT (PTL_MSG_PUT == 1); -- LASSERT (PTL_MSG_GET == 2); -- LASSERT (PTL_MSG_REPLY == 3); -- LASSERT (PTL_MSG_HELLO == 4); -- -- /* Checks for struct ptl_handle_wire_t */ -- LASSERT ((int)sizeof(ptl_handle_wire_t) == 16); - LASSERT (offsetof(ptl_handle_wire_t, wh_interface_cookie) == 0); - LASSERT ((int)offsetof(ptl_handle_wire_t, wh_interface_cookie) == 0); -- LASSERT ((int)sizeof(((ptl_handle_wire_t *)0)->wh_interface_cookie) == 8); - LASSERT (offsetof(ptl_handle_wire_t, wh_object_cookie) == 8); - LASSERT ((int)offsetof(ptl_handle_wire_t, wh_object_cookie) == 8); -- LASSERT ((int)sizeof(((ptl_handle_wire_t *)0)->wh_object_cookie) == 8); -- -- /* Checks for struct ptl_magicversion_t */ -- LASSERT ((int)sizeof(ptl_magicversion_t) == 8); - LASSERT (offsetof(ptl_magicversion_t, magic) == 0); - LASSERT ((int)offsetof(ptl_magicversion_t, magic) == 0); -- LASSERT ((int)sizeof(((ptl_magicversion_t *)0)->magic) == 4); - LASSERT (offsetof(ptl_magicversion_t, version_major) == 4); - LASSERT ((int)offsetof(ptl_magicversion_t, version_major) == 4); -- LASSERT ((int)sizeof(((ptl_magicversion_t *)0)->version_major) == 2); - LASSERT (offsetof(ptl_magicversion_t, version_minor) == 6); - LASSERT ((int)offsetof(ptl_magicversion_t, version_minor) == 6); -- LASSERT ((int)sizeof(((ptl_magicversion_t *)0)->version_minor) == 2); -- -- /* Checks for struct ptl_hdr_t */ -- LASSERT ((int)sizeof(ptl_hdr_t) == 72); - LASSERT (offsetof(ptl_hdr_t, dest_nid) == 0); - LASSERT ((int)offsetof(ptl_hdr_t, dest_nid) == 0); -- LASSERT ((int)sizeof(((ptl_hdr_t *)0)->dest_nid) == 8); - LASSERT (offsetof(ptl_hdr_t, src_nid) == 8); - LASSERT ((int)offsetof(ptl_hdr_t, src_nid) == 8); -- LASSERT ((int)sizeof(((ptl_hdr_t *)0)->src_nid) == 8); - LASSERT (offsetof(ptl_hdr_t, dest_pid) == 16); - LASSERT ((int)offsetof(ptl_hdr_t, dest_pid) == 16); -- LASSERT ((int)sizeof(((ptl_hdr_t *)0)->dest_pid) == 4); - LASSERT (offsetof(ptl_hdr_t, src_pid) == 20); - LASSERT ((int)offsetof(ptl_hdr_t, src_pid) == 20); -- LASSERT ((int)sizeof(((ptl_hdr_t *)0)->src_pid) == 4); - LASSERT (offsetof(ptl_hdr_t, type) == 24); - LASSERT ((int)offsetof(ptl_hdr_t, type) == 24); -- LASSERT ((int)sizeof(((ptl_hdr_t *)0)->type) == 4); - LASSERT (offsetof(ptl_hdr_t, payload_length) == 28); - LASSERT ((int)offsetof(ptl_hdr_t, payload_length) == 28); -- LASSERT ((int)sizeof(((ptl_hdr_t *)0)->payload_length) == 4); - LASSERT (offsetof(ptl_hdr_t, msg) == 32); - LASSERT ((int)offsetof(ptl_hdr_t, msg) == 32); -- LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg) == 40); -- -- /* Ack */ - LASSERT (offsetof(ptl_hdr_t, msg.ack.dst_wmd) == 32); - LASSERT ((int)offsetof(ptl_hdr_t, msg.ack.dst_wmd) == 32); -- LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.ack.dst_wmd) == 16); - LASSERT (offsetof(ptl_hdr_t, msg.ack.match_bits) == 48); - LASSERT ((int)offsetof(ptl_hdr_t, msg.ack.match_bits) == 48); -- LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.ack.match_bits) == 8); - LASSERT (offsetof(ptl_hdr_t, msg.ack.mlength) == 56); - LASSERT ((int)offsetof(ptl_hdr_t, msg.ack.mlength) == 56); -- LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.ack.mlength) == 4); -- -- /* Put */ - LASSERT (offsetof(ptl_hdr_t, msg.put.ack_wmd) == 32); - LASSERT ((int)offsetof(ptl_hdr_t, msg.put.ack_wmd) == 32); -- LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.put.ack_wmd) == 16); - LASSERT (offsetof(ptl_hdr_t, msg.put.match_bits) == 48); - LASSERT ((int)offsetof(ptl_hdr_t, msg.put.match_bits) == 48); -- LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.put.match_bits) == 8); - LASSERT (offsetof(ptl_hdr_t, msg.put.hdr_data) == 56); - LASSERT ((int)offsetof(ptl_hdr_t, msg.put.hdr_data) == 56); -- LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.put.hdr_data) == 8); - LASSERT (offsetof(ptl_hdr_t, msg.put.ptl_index) == 64); - LASSERT ((int)offsetof(ptl_hdr_t, msg.put.ptl_index) == 64); -- LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.put.ptl_index) == 4); - LASSERT (offsetof(ptl_hdr_t, msg.put.offset) == 68); - LASSERT ((int)offsetof(ptl_hdr_t, msg.put.offset) == 68); -- LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.put.offset) == 4); -- -- /* Get */ - LASSERT (offsetof(ptl_hdr_t, msg.get.return_wmd) == 32); - LASSERT ((int)offsetof(ptl_hdr_t, msg.get.return_wmd) == 32); -- LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.get.return_wmd) == 16); - LASSERT (offsetof(ptl_hdr_t, msg.get.match_bits) == 48); - LASSERT ((int)offsetof(ptl_hdr_t, msg.get.match_bits) == 48); -- LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.get.match_bits) == 8); - LASSERT (offsetof(ptl_hdr_t, msg.get.ptl_index) == 56); - LASSERT ((int)offsetof(ptl_hdr_t, msg.get.ptl_index) == 56); -- LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.get.ptl_index) == 4); - LASSERT (offsetof(ptl_hdr_t, msg.get.src_offset) == 60); - LASSERT ((int)offsetof(ptl_hdr_t, msg.get.src_offset) == 60); -- LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.get.src_offset) == 4); - LASSERT (offsetof(ptl_hdr_t, msg.get.sink_length) == 64); - LASSERT ((int)offsetof(ptl_hdr_t, msg.get.sink_length) == 64); -- LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.get.sink_length) == 4); -- -- /* Reply */ - LASSERT (offsetof(ptl_hdr_t, msg.reply.dst_wmd) == 32); - LASSERT ((int)offsetof(ptl_hdr_t, msg.reply.dst_wmd) == 32); -- LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.reply.dst_wmd) == 16); -- -- /* Hello */ - LASSERT (offsetof(ptl_hdr_t, msg.hello.incarnation) == 32); - LASSERT ((int)offsetof(ptl_hdr_t, msg.hello.incarnation) == 32); -- LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.hello.incarnation) == 8); - LASSERT (offsetof(ptl_hdr_t, msg.hello.type) == 40); - LASSERT ((int)offsetof(ptl_hdr_t, msg.hello.type) == 40); -- LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.hello.type) == 4); --} diff --cc lnet/lnet/lib-msg.c index 04c69b1,54e89bc..0000000 deleted file mode 100644,100644 --- a/lnet/lnet/lib-msg.c +++ /dev/null @@@ -1,154 -1,147 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * lib/lib-msg.c -- * Message decoding, parsing and finalizing routines -- * -- * Copyright (c) 2001-2003 Cluster File Systems, Inc. -- * Copyright (c) 2001-2002 Sandia National Laboratories -- * -- * This file is part of Lustre, http://www.sf.net/projects/lustre/ -- * -- * Lustre is free software; you can redistribute it and/or -- * modify it under the terms of version 2 of the GNU General Public -- * License as published by the Free Software Foundation. -- * -- * Lustre is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- * GNU General Public License for more details. -- * -- * You should have received a copy of the GNU General Public License -- * along with Lustre; if not, write to the Free Software -- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -- */ -- --#ifndef __KERNEL__ --# include --#else --# define DEBUG_SUBSYSTEM S_PORTALS --# include --#endif -- --#include -- --void - lib_enq_event_locked (nal_cb_t *nal, void *private, -lib_enq_event_locked (lib_nal_t *nal, void *private, -- lib_eq_t *eq, ptl_event_t *ev) --{ -- ptl_event_t *eq_slot; - int rc; - - ev->sequence = eq->sequence++; /* Allocate the next queue slot */ - - /* size must be a power of 2 to handle a wrapped sequence # */ - LASSERT (eq->size != 0 && - eq->size == LOWEST_BIT_SET (eq->size)); - eq_slot = eq->base + (ev->sequence & (eq->size - 1)); -- - /* Copy the event into the allocated slot, ensuring all the rest of - * the event's contents have been copied _before_ the sequence - * number gets updated. A processes 'getting' an event waits on - * the next queue slot's sequence to be 'new'. When it is, _all_ - * other event fields had better be consistent. I assert - * 'sequence' is the last member, so I only need a 2 stage copy. */ - /* Allocate the next queue slot */ - ev->link = ev->sequence = eq->eq_enq_seq++; - /* NB we don't support START events yet and we don't create a separate - * UNLINK event unless an explicit unlink succeeds, so the link - * sequence is pretty useless */ -- - LASSERT(sizeof (ptl_event_t) == - offsetof(ptl_event_t, sequence) + sizeof(ev->sequence)); - /* We don't support different uid/jids yet */ - ev->uid = 0; - ev->jid = 0; - - /* size must be a power of 2 to handle sequence # overflow */ - LASSERT (eq->eq_size != 0 && - eq->eq_size == LOWEST_BIT_SET (eq->eq_size)); - eq_slot = eq->eq_events + (ev->sequence & (eq->eq_size - 1)); -- - rc = nal->cb_write (nal, private, (user_ptr)eq_slot, ev, - offsetof (ptl_event_t, sequence)); - LASSERT (rc == PTL_OK); - /* There is no race since both event consumers and event producers - * take the LIB_LOCK(), so we don't screw around with memory - * barriers, setting the sequence number last or wierd structure - * layout assertions. */ - *eq_slot = *ev; -- - #ifdef __KERNEL__ - barrier(); - #endif - /* Updating the sequence number is what makes the event 'new' NB if - * the cb_write below isn't atomic, this could cause a race with - * PtlEQGet */ - rc = nal->cb_write(nal, private, (user_ptr)&eq_slot->sequence, - (void *)&ev->sequence,sizeof (ev->sequence)); - LASSERT (rc == PTL_OK); - /* Call the callback handler (if any) */ - if (eq->eq_callback != NULL) - eq->eq_callback (eq_slot); -- - /* Wake anyone sleeping for an event (see lib-eq.c) */ --#ifdef __KERNEL__ - barrier(); - if (waitqueue_active(&nal->libnal_ni.ni_waitq)) - wake_up_all(&nal->libnal_ni.ni_waitq); -#else - pthread_cond_broadcast(&nal->libnal_ni.ni_cond); --#endif - - if (nal->cb_callback != NULL) - nal->cb_callback(nal, private, eq, ev); - else if (eq->event_callback != NULL) - eq->event_callback(ev); --} -- --void - lib_finalize(nal_cb_t *nal, void *private, lib_msg_t *msg, ptl_err_t status) -lib_finalize (lib_nal_t *nal, void *private, lib_msg_t *msg, ptl_err_t status) --{ -- lib_md_t *md; -- int unlink; -- unsigned long flags; -- int rc; -- ptl_hdr_t ack; - - /* ni went down while processing this message */ - if (nal->ni.up == 0) - return; -- -- if (msg == NULL) -- return; -- -- /* Only send an ACK if the PUT completed successfully */ -- if (status == PTL_OK && -- !ptl_is_wire_handle_none(&msg->ack_wmd)) { -- - LASSERT(msg->ev.type == PTL_EVENT_PUT); - LASSERT(msg->ev.type == PTL_EVENT_PUT_END); -- -- memset (&ack, 0, sizeof (ack)); - ack.type = HTON__u32 (PTL_MSG_ACK); - ack.dest_nid = HTON__u64 (msg->ev.initiator.nid); - ack.src_nid = HTON__u64 (nal->ni.nid); - ack.dest_pid = HTON__u32 (msg->ev.initiator.pid); - ack.src_pid = HTON__u32 (nal->ni.pid); - ack.type = cpu_to_le32(PTL_MSG_ACK); - ack.dest_nid = cpu_to_le64(msg->ev.initiator.nid); - ack.dest_pid = cpu_to_le32(msg->ev.initiator.pid); - ack.src_nid = cpu_to_le64(nal->libnal_ni.ni_pid.nid); - ack.src_pid = cpu_to_le32(nal->libnal_ni.ni_pid.pid); -- ack.payload_length = 0; -- -- ack.msg.ack.dst_wmd = msg->ack_wmd; -- ack.msg.ack.match_bits = msg->ev.match_bits; - ack.msg.ack.mlength = HTON__u32 (msg->ev.mlength); - ack.msg.ack.mlength = cpu_to_le32(msg->ev.mlength); -- -- rc = lib_send (nal, private, NULL, &ack, PTL_MSG_ACK, -- msg->ev.initiator.nid, msg->ev.initiator.pid, -- NULL, 0, 0); -- if (rc != PTL_OK) { -- /* send failed: there's nothing else to clean up. */ -- CERROR("Error %d sending ACK to "LPX64"\n", -- rc, msg->ev.initiator.nid); -- } -- } -- -- md = msg->md; -- - state_lock(nal, &flags); - LIB_LOCK(nal, flags); -- -- /* Now it's safe to drop my caller's ref */ -- md->pending--; -- LASSERT (md->pending >= 0); -- -- /* Should I unlink this MD? */ - unlink = (md->pending == 0 && /* No other refs */ - (md->threshold == 0 || /* All ops done */ - md->md_flags & PTL_MD_FLAG_UNLINK) != 0); /* black spot */ - if (md->pending != 0) /* other refs */ - unlink = 0; - else if ((md->md_flags & PTL_MD_FLAG_ZOMBIE) != 0) - unlink = 1; - else if ((md->md_flags & PTL_MD_FLAG_AUTO_UNLINK) == 0) - unlink = 0; - else - unlink = lib_md_exhausted(md); -- - msg->ev.status = status; - msg->ev.ni_fail_type = status; -- msg->ev.unlinked = unlink; -- -- if (md->eq != NULL) -- lib_enq_event_locked(nal, private, md->eq, &msg->ev); -- -- if (unlink) -- lib_md_unlink(nal, md); -- -- list_del (&msg->msg_list); - nal->ni.counters.msgs_alloc--; - nal->libnal_ni.ni_counters.msgs_alloc--; -- lib_msg_free(nal, msg); -- - state_unlock(nal, &flags); - LIB_UNLOCK(nal, flags); --} diff --cc lnet/lnet/lib-ni.c index 9e90576,0f298a0..0000000 deleted file mode 100644,100644 --- a/lnet/lnet/lib-ni.c +++ /dev/null @@@ -1,128 -1,72 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * lib/lib-ni.c -- * Network status registers and distance functions. -- * -- * Copyright (c) 2001-2003 Cluster File Systems, Inc. -- * Copyright (c) 2001-2002 Sandia National Laboratories -- * -- * This file is part of Lustre, http://www.sf.net/projects/lustre/ -- * -- * Lustre is free software; you can redistribute it and/or -- * modify it under the terms of version 2 of the GNU General Public -- * License as published by the Free Software Foundation. -- * -- * Lustre is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- * GNU General Public License for more details. -- * -- * You should have received a copy of the GNU General Public License -- * along with Lustre; if not, write to the Free Software -- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -- */ -- --#define DEBUG_SUBSYSTEM S_PORTALS --#include - #include -- --#define MAX_DIST 18446744073709551615ULL - - int do_PtlNIDebug(nal_cb_t * nal, void *private, void *v_args, void *v_ret) - { - PtlNIDebug_in *args = v_args; - PtlNIDebug_out *ret = v_ret; - lib_ni_t *ni = &nal->ni; - - ret->rc = ni->debug; - ni->debug = args->mask_in; - - return 0; - } -- - int do_PtlNIStatus(nal_cb_t * nal, void *private, void *v_args, void *v_ret) -int lib_api_ni_status (nal_t *apinal, ptl_sr_index_t sr_idx, - ptl_sr_value_t *status) --{ - /* - * Incoming: - * ptl_handle_ni_t interface_in - * ptl_sr_index_t register_in - * - * Outgoing: - * ptl_sr_value_t * status_out - */ - - PtlNIStatus_in *args = v_args; - PtlNIStatus_out *ret = v_ret; - lib_ni_t *ni = &nal->ni; - lib_counters_t *count = &ni->counters; - - if (!args) - return ret->rc = PTL_SEGV; - - ret->rc = PTL_OK; - ret->status_out = 0; - - /* - * I hate this sort of code.... Hash tables, offset lists? - * Treat the counters as an array of ints? - */ - if (args->register_in == PTL_SR_DROP_COUNT) - ret->status_out = count->drop_count; - - else if (args->register_in == PTL_SR_DROP_LENGTH) - ret->status_out = count->drop_length; - - else if (args->register_in == PTL_SR_RECV_COUNT) - ret->status_out = count->recv_count; - - else if (args->register_in == PTL_SR_RECV_LENGTH) - ret->status_out = count->recv_length; - - else if (args->register_in == PTL_SR_SEND_COUNT) - ret->status_out = count->send_count; - - else if (args->register_in == PTL_SR_SEND_LENGTH) - ret->status_out = count->send_length; - - else if (args->register_in == PTL_SR_MSGS_MAX) - ret->status_out = count->msgs_max; - else - ret->rc = PTL_INV_SR_INDX; - lib_nal_t *nal = apinal->nal_data; - lib_ni_t *ni = &nal->libnal_ni; - lib_counters_t *count = &ni->ni_counters; -- - return ret->rc; - switch (sr_idx) { - case PTL_SR_DROP_COUNT: - *status = count->drop_count; - return PTL_OK; - case PTL_SR_DROP_LENGTH: - *status = count->drop_length; - return PTL_OK; - case PTL_SR_RECV_COUNT: - *status = count->recv_count; - return PTL_OK; - case PTL_SR_RECV_LENGTH: - *status = count->recv_length; - return PTL_OK; - case PTL_SR_SEND_COUNT: - *status = count->send_count; - return PTL_OK; - case PTL_SR_SEND_LENGTH: - *status = count->send_length; - return PTL_OK; - case PTL_SR_MSGS_MAX: - *status = count->msgs_max; - return PTL_OK; - default: - *status = 0; - return PTL_SR_INDEX_INVALID; - } --} -- -- - int do_PtlNIDist(nal_cb_t * nal, void *private, void *v_args, void *v_ret) -int lib_api_ni_dist (nal_t *apinal, ptl_process_id_t *pid, unsigned long *dist) --{ - /* - * Incoming: - * ptl_handle_ni_t interface_in - * ptl_process_id_t process_in - - * - * Outgoing: - * unsigned long * distance_out - - */ - - PtlNIDist_in *args = v_args; - PtlNIDist_out *ret = v_ret; - - unsigned long dist; - ptl_process_id_t id_in = args->process_in; - ptl_nid_t nid; - int rc; - - nid = id_in.nid; - - if ((rc = nal->cb_dist(nal, nid, &dist)) != 0) { - ret->distance_out = (unsigned long) MAX_DIST; - return PTL_INV_PROC; - } - - ret->distance_out = dist; - lib_nal_t *nal = apinal->nal_data; -- - return ret->rc = PTL_OK; - return (nal->libnal_dist(nal, pid->nid, dist)); --} diff --cc lnet/packaging/.cvsignore index fd1d56a,fd1d56a..0000000 deleted file mode 100644,100644 --- a/lnet/packaging/.cvsignore +++ /dev/null @@@ -1,8 -1,8 +1,0 @@@ --Makefile --Makefile.in --aclocal.m4 --config.log --config.status --config.cache --configure --portals.spec diff --cc lnet/packaging/Makefile.am index 126bc69,126bc69..0000000 deleted file mode 100644,100644 --- a/lnet/packaging/Makefile.am +++ /dev/null @@@ -1,6 -1,6 +1,0 @@@ --# Copyright (C) 2002 Cluster File Systems, Inc. --# --# This code is issued under the GNU General Public License. --# See the file COPYING in this distribution -- --EXTRA_DIST = portals.spec diff --cc lnet/packaging/portals.spec.in index e196b3f,e196b3f..0000000 deleted file mode 100644,100644 --- a/lnet/packaging/portals.spec.in +++ /dev/null @@@ -1,116 -1,116 +1,0 @@@ --%define kversion @RELEASE@ --%define linuxdir @LINUX@ --%define version HEAD -- --Summary: Sandia Portals Message Passing - utilities --Name: portals --Version: %{version} --Release: 0210101748uml --Copyright: LGPL --Group: Utilities/System --BuildRoot: /var/tmp/portals-%{version}-root --Source: http://sandiaportals.org/portals-%{version}.tar.gz -- --%description --Sandia Portals message passing package. Contains kernel modules, libraries and utilities. -- --%package -n portals-modules --Summary: Kernel modules and NAL's for portals --Group: Development/Kernel -- --%description -n portals-modules --Object-Based Disk storage drivers for Linux %{kversion}. -- --%package -n portals-source --Summary: Portals kernel source for rebuilding with other kernels --Group: Development/Kernel -- --%description -n portals-source --Portals kernel source for rebuilding with other kernels -- --%prep --%setup -n portals-%{version} -- --%build --rm -rf $RPM_BUILD_ROOT -- --# Create the pristine source directory. --srcdir=$RPM_BUILD_ROOT/usr/src/portals-%{version} --mkdir -p $srcdir --find . -name CVS -prune -o -print | cpio -ap $srcdir -- --# Set an explicit path to our Linux tree, if we can. --conf_flag= --linuxdir=%{linuxdir} --test -d $linuxdir && conf_flag=--with-linux=$linuxdir --./configure $conf_flag --make -- --%install --make install prefix=$RPM_BUILD_ROOT -- --%ifarch alpha --# this hurts me -- conf_flag= -- linuxdir=%{linuxdir} -- test -d $linuxdir && conf_flag=--with-linux=$linuxdir -- make clean -- ./configure --enable-rtscts-myrinet $conf_flag -- make -- cp linux/rtscts/rtscts.o $RPM_BUILD_ROOT/lib/modules/%{kversion}/kernel/net/portals/rtscts_myrinet.o -- cp user/myrinet_utils/mcpload $RPM_BUILD_ROOT/usr/sbin/mcpload --%endif -- -- --%files --%attr(-, root, root) %doc COPYING --%attr(-, root, root) /usr/sbin/acceptor --%attr(-, root, root) /usr/sbin/ptlctl --%attr(-, root, root) /usr/sbin/debugctl --%ifarch alpha --%attr(-, root, root) /usr/sbin/mcpload --%endif --%attr(-, root, root) /lib/libmyrnal.a --%attr(-, root, root) /lib/libptlapi.a --%attr(-, root, root) /lib/libptlctl.a --%attr(-, root, root) /lib/libprocbridge.a --%attr(-, root, root) /lib/libptllib.a --%attr(-, root, root) /lib/libtcpnal.a --%attr(-, root, root) /lib/libtcpnalutil.a --%attr(-, root, root) /usr/include/portals/*.h --%attr(-, root, root) /usr/include/portals/base/*.h --%attr(-, root, root) /usr/include/linux/*.h -- --%files -n portals-modules --%attr(-, root, root) %doc COPYING --%attr(-, root, root) /lib/modules/%{kversion}/kernel/net/portals/portals.o --%attr(-, root, root) /lib/modules/%{kversion}/kernel/net/portals/kptlrouter.o --%attr(-, root, root) /lib/modules/%{kversion}/kernel/net/portals/kptrxtx.o --%ifarch alpha --%attr(-, root, root) /lib/modules/%{kversion}/kernel/net/portals/p3mod.o --%attr(-, root, root) /lib/modules/%{kversion}/kernel/net/portals/rtscts.o --%endif --%attr(-, root, root) /lib/modules/%{kversion}/kernel/net/portals/*nal.o -- --%files -n portals-source --%attr(-, root, root) /usr/src/portals-%{version} -- --%post --if [ ! -e /dev/portals ]; then -- mknod /dev/portals c 10 240 --fi --depmod -ae || exit 0 -- --grep -q portals /etc/modules.conf || \ -- echo 'alias char-major-10-240 portals' >> /etc/modules.conf -- --grep -q '/dev/portals' /etc/modules.conf || \ -- echo 'alias /dev/portals portals' >> /etc/modules.conf -- --%postun --depmod -ae || exit 0 -- --%clean --#rm -rf $RPM_BUILD_ROOT -- --# end of file diff --cc lnet/router/.cvsignore index 5ed596b,5ed596b..0000000 deleted file mode 100644,100644 --- a/lnet/router/.cvsignore +++ /dev/null @@@ -1,10 -1,10 +1,0 @@@ --.deps --Makefile --.*.cmd --autoMakefile.in --autoMakefile --*.ko --*.mod.c --.*.flags --.tmp_versions --.depend diff --cc lnet/router/Makefile.in index 3bb6cf7,3bb6cf7..0000000 deleted file mode 100644,100644 --- a/lnet/router/Makefile.in +++ /dev/null @@@ -1,4 -1,4 +1,0 @@@ --MODULES := kptlrouter --kptlrouter-objs := router.o proc.o -- --@INCLUDE_RULES@ diff --cc lnet/router/Makefile.mk index 9b02c03,9b02c03..0000000 deleted file mode 100644,100644 --- a/lnet/router/Makefile.mk +++ /dev/null @@@ -1,9 -1,9 +1,0 @@@ --# Copyright (C) 2001 Cluster File Systems, Inc. --# --# This code is issued under the GNU General Public License. --# See the file COPYING in this distribution -- --include $(src)/../Kernelenv -- --obj-y += kptlrouter.o --kptlrouter-objs := router.o proc.o diff --cc lnet/router/autoMakefile.am index fa11e8c,fa11e8c..0000000 deleted file mode 100644,100644 --- a/lnet/router/autoMakefile.am +++ /dev/null @@@ -1,13 -1,13 +1,0 @@@ --# Copyright (C) 2001 Cluster File Systems, Inc. --# --# This code is issued under the GNU General Public License. --# See the file COPYING in this distribution -- --if MODULES --if !CRAY_PORTALS --modulenet_DATA = kptlrouter$(KMODEXT) --endif --endif -- --MOSTLYCLEANFILES = *.o *.ko *.mod.c --DIST_SOURCES = $(kptlrouter-objs:%.o=%.c) router.h diff --cc lnet/router/proc.c index ad4dd87,0fe3b90..0000000 deleted file mode 100644,100644 --- a/lnet/router/proc.c +++ /dev/null @@@ -1,228 -1,219 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * Copyright (C) 2002 Cluster File Systems, Inc. -- * -- * This file is part of Portals -- * http://sourceforge.net/projects/sandiaportals/ -- * -- * Portals is free software; you can redistribute it and/or -- * modify it under the terms of version 2 of the GNU General Public -- * License as published by the Free Software Foundation. -- * -- * Portals is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- * GNU General Public License for more details. -- * -- * You should have received a copy of the GNU General Public License -- * along with Portals; if not, write to the Free Software -- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -- * -- */ -- --#include "router.h" -- --#define KPR_PROC_ROUTER "sys/portals/router" --#define KPR_PROC_ROUTES "sys/portals/routes" -- --/* Used for multi-page route list book keeping */ --struct proc_route_data { -- struct list_head *curr; -- unsigned int generation; -- off_t skip; --} kpr_read_routes_data; -- --/* nal2name support re-used from utils/portals.c */ --struct name2num { -- char *name; -- int num; --} nalnames[] = { -- { "any", 0}, -- { "elan", QSWNAL}, -- { "tcp", SOCKNAL}, -- { "gm", GMNAL}, - { "ib", IBNAL}, - { "ib", OPENIBNAL}, -- { NULL, -1} --}; -- --static struct name2num *name2num_lookup_num(struct name2num *table, int num) --{ -- while (table->name != NULL) -- if (num == table->num) -- return (table); -- else -- table++; -- return (NULL); --} -- --static char *nal2name(int nal) --{ -- struct name2num *e = name2num_lookup_num(nalnames, nal); -- return ((e == NULL) ? "???" : e->name); --} -- -- --static int kpr_proc_router_read(char *page, char **start, off_t off, -- int count, int *eof, void *data) --{ -- unsigned long long bytes = kpr_fwd_bytes; -- unsigned long packets = kpr_fwd_packets; -- unsigned long errors = kpr_fwd_errors; -- unsigned int qdepth = atomic_read (&kpr_queue_depth); -- int len; -- -- *eof = 1; -- if (off != 0) -- return (0); -- -- len = sprintf(page, "%Ld %ld %ld %d\n", bytes, packets, errors, qdepth); -- -- *start = page; -- return (len); --} -- --static int kpr_proc_router_write(struct file *file, const char *ubuffer, -- unsigned long count, void *data) --{ -- /* Ignore what we've been asked to write, and just zero the stats */ -- kpr_fwd_bytes = 0; -- kpr_fwd_packets = 0; -- kpr_fwd_errors = 0; -- -- return (count); --} -- --static int kpr_proc_routes_read(char *page, char **start, off_t off, -- int count, int *eof, void *data) --{ -- struct proc_route_data *prd = data; -- kpr_route_entry_t *re; -- kpr_gateway_entry_t *ge; -- int chunk_len = 0; -- int line_len = 0; -- int user_len = 0; -- -- *eof = 1; -- *start = page; -- -- if (prd->curr == NULL) { -- if (off != 0) -- return 0; -- -- /* First pass, initialize our private data */ -- prd->curr = kpr_routes.next; -- prd->generation = kpr_routes_generation; -- prd->skip = 0; -- } else { -- /* Abort route list generation change */ -- if (prd->generation != kpr_routes_generation) { -- prd->curr = NULL; -- return sprintf(page, "\nError: Routes Changed\n"); -- } -- -- /* All the routes have been walked */ -- if (prd->curr == &kpr_routes) { -- prd->curr = NULL; -- return 0; -- } -- } -- -- read_lock(&kpr_rwlock); -- *start = page + prd->skip; -- user_len = -prd->skip; -- - while ((prd->curr != NULL) && (prd->curr != &kpr_routes)) { - for (; prd->curr != &kpr_routes; prd->curr = prd->curr->next) { -- re = list_entry(prd->curr, kpr_route_entry_t, kpre_list); -- ge = re->kpre_gateway; -- -- line_len = sprintf(page + chunk_len, -- "%12s "LPX64" : "LPX64" - "LPX64", %s\n", -- nal2name(ge->kpge_nalid), ge->kpge_nid, -- re->kpre_lo_nid, re->kpre_hi_nid, -- ge->kpge_alive ? "up" : "down"); -- chunk_len += line_len; -- user_len += line_len; - - /* Abort the route list changed */ - if (prd->curr->next == NULL) { - prd->curr = NULL; - read_unlock(&kpr_rwlock); - return sprintf(page, "\nError: Routes Changed\n"); - } - - prd->curr = prd->curr->next; -- - /* The route table will exceed one page, break the while loop - * so the function can be re-called with a new page. - */ - if ((chunk_len > (PAGE_SIZE - 80)) || (user_len > count)) - /* The route table will exceed one page */ - if ((chunk_len > (PAGE_SIZE - 80)) || (user_len > count)) { - prd->curr = prd->curr->next; -- break; - } -- } -- -- *eof = 0; -- -- /* Caller received only a portion of the last entry, the -- * remaining will be delivered in the next page if asked for. -- */ -- if (user_len > count) { -- prd->curr = prd->curr->prev; -- prd->skip = line_len - (user_len - count); -- read_unlock(&kpr_rwlock); -- return count; -- } -- -- /* Not enough data to entirely satify callers request */ -- prd->skip = 0; -- read_unlock(&kpr_rwlock); -- return user_len; --} -- --static int kpr_proc_routes_write(struct file *file, const char *ubuffer, -- unsigned long count, void *data) --{ -- /* no-op; lctl should be used to adjust the routes */ -- return (count); --} -- --void kpr_proc_init(void) --{ -- struct proc_dir_entry *router_entry; -- struct proc_dir_entry *routes_entry; -- -- /* Initialize KPR_PROC_ROUTER */ -- router_entry = create_proc_entry (KPR_PROC_ROUTER, -- S_IFREG | S_IRUGO | S_IWUSR, NULL); -- -- if (router_entry == NULL) { -- CERROR("couldn't create proc entry %s\n", KPR_PROC_ROUTER); -- return; -- } -- -- router_entry->data = NULL; -- router_entry->read_proc = kpr_proc_router_read; -- router_entry->write_proc = kpr_proc_router_write; -- -- /* Initialize KPR_PROC_ROUTES */ -- routes_entry = create_proc_entry (KPR_PROC_ROUTES, -- S_IFREG | S_IRUGO | S_IWUSR, NULL); -- -- if (routes_entry == NULL) { -- CERROR("couldn't create proc entry %s\n", KPR_PROC_ROUTES); -- return; -- } -- -- kpr_read_routes_data.curr = NULL; -- kpr_read_routes_data.generation = 0; -- kpr_read_routes_data.skip = 0; -- -- routes_entry->data = &kpr_read_routes_data; -- routes_entry->read_proc = kpr_proc_routes_read; -- routes_entry->write_proc = kpr_proc_routes_write; --} -- --void kpr_proc_fini(void) --{ -- remove_proc_entry(KPR_PROC_ROUTER, 0); -- remove_proc_entry(KPR_PROC_ROUTES, 0); --} diff --cc lnet/router/router.c index 6fcd83a,448ab1f..0000000 deleted file mode 100644,100644 --- a/lnet/router/router.c +++ /dev/null @@@ -1,771 -1,824 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * Copyright (C) 2002 Cluster File Systems, Inc. -- * -- * This file is part of Portals -- * http://sourceforge.net/projects/sandiaportals/ -- * -- * Portals is free software; you can redistribute it and/or -- * modify it under the terms of version 2 of the GNU General Public -- * License as published by the Free Software Foundation. -- * -- * Portals is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- * GNU General Public License for more details. -- * -- * You should have received a copy of the GNU General Public License -- * along with Portals; if not, write to the Free Software -- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -- * -- */ -- --#include "router.h" -- --LIST_HEAD(kpr_routes); --LIST_HEAD(kpr_gateways); --LIST_HEAD(kpr_nals); -- --unsigned int kpr_routes_generation; --unsigned long long kpr_fwd_bytes; --unsigned long kpr_fwd_packets; --unsigned long kpr_fwd_errors; --atomic_t kpr_queue_depth; -- --/* Mostly the tables are read-only (thread and interrupt context) -- * -- * Once in a blue moon we register/deregister NALs and add/remove routing -- * entries (thread context only)... */ --rwlock_t kpr_rwlock = RW_LOCK_UNLOCKED; -- --kpr_router_interface_t kpr_router_interface = { -- kprri_register: kpr_register_nal, -- kprri_lookup: kpr_lookup_target, -- kprri_fwd_start: kpr_forward_packet, -- kprri_fwd_done: kpr_complete_packet, -- kprri_notify: kpr_nal_notify, -- kprri_shutdown: kpr_shutdown_nal, -- kprri_deregister: kpr_deregister_nal, - }; - - kpr_control_interface_t kpr_control_interface = { - kprci_add_route: kpr_add_route, - kprci_del_route: kpr_del_route, - kprci_get_route: kpr_get_route, - kprci_notify: kpr_sys_notify, --}; -- --int --kpr_register_nal (kpr_nal_interface_t *nalif, void **argp) --{ -- unsigned long flags; -- struct list_head *e; -- kpr_nal_entry_t *ne; -- -- CDEBUG (D_NET, "Registering NAL %d\n", nalif->kprni_nalid); -- -- PORTAL_ALLOC (ne, sizeof (*ne)); -- if (ne == NULL) -- return (-ENOMEM); -- -- memset (ne, 0, sizeof (*ne)); -- memcpy ((void *)&ne->kpne_interface, (void *)nalif, sizeof (*nalif)); -- -- LASSERT (!in_interrupt()); -- write_lock_irqsave (&kpr_rwlock, flags); -- -- for (e = kpr_nals.next; e != &kpr_nals; e = e->next) -- { -- kpr_nal_entry_t *ne2 = list_entry (e, kpr_nal_entry_t, kpne_list); -- -- if (ne2->kpne_interface.kprni_nalid == ne->kpne_interface.kprni_nalid) -- { -- write_unlock_irqrestore (&kpr_rwlock, flags); -- -- CERROR ("Attempt to register same NAL %d twice\n", ne->kpne_interface.kprni_nalid); -- -- PORTAL_FREE (ne, sizeof (*ne)); -- return (-EEXIST); -- } -- } -- -- list_add (&ne->kpne_list, &kpr_nals); -- -- write_unlock_irqrestore (&kpr_rwlock, flags); -- -- *argp = ne; -- PORTAL_MODULE_USE; -- return (0); --} -- --void --kpr_do_upcall (void *arg) --{ -- kpr_upcall_t *u = (kpr_upcall_t *)arg; -- char nalstr[10]; -- char nidstr[36]; -- char whenstr[36]; -- char *argv[] = { -- NULL, -- "ROUTER_NOTIFY", -- nalstr, -- nidstr, -- u->kpru_alive ? "up" : "down", -- whenstr, -- NULL}; -- -- snprintf (nalstr, sizeof(nalstr), "%d", u->kpru_nal_id); -- snprintf (nidstr, sizeof(nidstr), LPX64, u->kpru_nid); -- snprintf (whenstr, sizeof(whenstr), "%ld", u->kpru_when); -- -- portals_run_upcall (argv); -- -- kfree (u); --} -- --void --kpr_upcall (int gw_nalid, ptl_nid_t gw_nid, int alive, time_t when) --{ -- char str[PTL_NALFMT_SIZE]; -- -- /* May be in arbitrary context */ -- kpr_upcall_t *u = kmalloc (sizeof (kpr_upcall_t), GFP_ATOMIC); -- -- if (u == NULL) { -- CERROR ("Upcall out of memory: nal %d nid "LPX64" (%s) %s\n", -- gw_nalid, gw_nid, -- portals_nid2str(gw_nalid, gw_nid, str), -- alive ? "up" : "down"); -- return; -- } -- -- u->kpru_nal_id = gw_nalid; -- u->kpru_nid = gw_nid; -- u->kpru_alive = alive; -- u->kpru_when = when; -- -- prepare_work (&u->kpru_tq, kpr_do_upcall, u); -- schedule_work (&u->kpru_tq); --} -- --int --kpr_do_notify (int byNal, int gateway_nalid, ptl_nid_t gateway_nid, -- int alive, time_t when) --{ -- unsigned long flags; -- int found; -- kpr_nal_entry_t *ne = NULL; -- kpr_gateway_entry_t *ge = NULL; -- struct timeval now; -- struct list_head *e; -- struct list_head *n; -- char str[PTL_NALFMT_SIZE]; -- -- CDEBUG (D_NET, "%s notifying [%d] "LPX64": %s\n", -- byNal ? "NAL" : "userspace", -- gateway_nalid, gateway_nid, alive ? "up" : "down"); -- -- /* can't do predictions... */ -- do_gettimeofday (&now); -- if (when > now.tv_sec) { -- CWARN ("Ignoring prediction from %s of [%d] "LPX64" %s " -- "%ld seconds in the future\n", -- byNal ? "NAL" : "userspace", -- gateway_nalid, gateway_nid, -- alive ? "up" : "down", -- when - now.tv_sec); -- return (EINVAL); -- } -- -- LASSERT (when <= now.tv_sec); -- -- /* Serialise with lookups (i.e. write lock) */ -- write_lock_irqsave(&kpr_rwlock, flags); -- -- found = 0; -- list_for_each_safe (e, n, &kpr_gateways) { -- -- ge = list_entry(e, kpr_gateway_entry_t, kpge_list); -- if ((gateway_nalid != 0 && -- ge->kpge_nalid != gateway_nalid) || -- ge->kpge_nid != gateway_nid) -- continue; -- -- found = 1; -- break; -- } -- -- if (!found) { -- /* gateway not found */ -- write_unlock_irqrestore(&kpr_rwlock, flags); -- CDEBUG (D_NET, "Gateway not found\n"); -- return (0); -- } -- -- if (when < ge->kpge_timestamp) { -- /* out of date information */ -- write_unlock_irqrestore (&kpr_rwlock, flags); -- CDEBUG (D_NET, "Out of date\n"); -- return (0); -- } -- -- /* update timestamp */ -- ge->kpge_timestamp = when; -- -- if ((!ge->kpge_alive) == (!alive)) { -- /* new date for old news */ -- write_unlock_irqrestore (&kpr_rwlock, flags); -- CDEBUG (D_NET, "Old news\n"); -- return (0); -- } -- -- ge->kpge_alive = alive; -- CDEBUG(D_NET, "set "LPX64" [%p] %d\n", gateway_nid, ge, alive); -- -- if (alive) { -- /* Reset all gateway weights so the newly-enabled gateway -- * doesn't have to play catch-up */ -- list_for_each_safe (e, n, &kpr_gateways) { -- kpr_gateway_entry_t *ge = list_entry(e, kpr_gateway_entry_t, -- kpge_list); -- atomic_set (&ge->kpge_weight, 0); -- } -- } -- -- found = 0; -- if (!byNal) { -- /* userland notified me: notify NAL? */ -- ne = kpr_find_nal_entry_locked (ge->kpge_nalid); -- if (ne != NULL) { -- if (!ne->kpne_shutdown && -- ne->kpne_interface.kprni_notify != NULL) { -- /* take a ref on this NAL until notifying -- * it has completed... */ -- atomic_inc (&ne->kpne_refcount); -- found = 1; -- } -- } -- } -- -- write_unlock_irqrestore(&kpr_rwlock, flags); -- -- if (found) { -- ne->kpne_interface.kprni_notify (ne->kpne_interface.kprni_arg, -- gateway_nid, alive); -- /* 'ne' can disappear now... */ -- atomic_dec (&ne->kpne_refcount); -- } -- -- if (byNal) { -- /* It wasn't userland that notified me... */ -- CWARN ("Upcall: NAL %d NID "LPX64" (%s) is %s\n", -- gateway_nalid, gateway_nid, -- portals_nid2str(gateway_nalid, gateway_nid, str), -- alive ? "alive" : "dead"); -- kpr_upcall (gateway_nalid, gateway_nid, alive, when); -- } else { -- CDEBUG (D_NET, " NOT Doing upcall\n"); -- } -- -- return (0); --} -- --void --kpr_nal_notify (void *arg, ptl_nid_t peer, int alive, time_t when) --{ -- kpr_nal_entry_t *ne = (kpr_nal_entry_t *)arg; -- -- kpr_do_notify (1, ne->kpne_interface.kprni_nalid, peer, alive, when); --} -- --void --kpr_shutdown_nal (void *arg) --{ -- unsigned long flags; -- kpr_nal_entry_t *ne = (kpr_nal_entry_t *)arg; -- -- CDEBUG (D_NET, "Shutting down NAL %d\n", ne->kpne_interface.kprni_nalid); -- -- LASSERT (!ne->kpne_shutdown); -- LASSERT (!in_interrupt()); -- - write_lock_irqsave (&kpr_rwlock, flags); /* locking a bit spurious... */ - write_lock_irqsave (&kpr_rwlock, flags); -- ne->kpne_shutdown = 1; - write_unlock_irqrestore (&kpr_rwlock, flags); /* except it's a memory barrier */ - - while (atomic_read (&ne->kpne_refcount) != 0) - { - CDEBUG (D_NET, "Waiting for refcount on NAL %d to reach zero (%d)\n", - ne->kpne_interface.kprni_nalid, atomic_read (&ne->kpne_refcount)); - - set_current_state (TASK_UNINTERRUPTIBLE); - schedule_timeout (HZ); - } - write_unlock_irqrestore (&kpr_rwlock, flags); --} -- --void --kpr_deregister_nal (void *arg) --{ -- unsigned long flags; -- kpr_nal_entry_t *ne = (kpr_nal_entry_t *)arg; -- -- CDEBUG (D_NET, "Deregister NAL %d\n", ne->kpne_interface.kprni_nalid); -- -- LASSERT (ne->kpne_shutdown); /* caller must have issued shutdown already */ - LASSERT (atomic_read (&ne->kpne_refcount) == 0); /* can't be busy */ -- LASSERT (!in_interrupt()); -- -- write_lock_irqsave (&kpr_rwlock, flags); - -- list_del (&ne->kpne_list); - -- write_unlock_irqrestore (&kpr_rwlock, flags); - - /* Wait until all outstanding messages/notifications have completed */ - while (atomic_read (&ne->kpne_refcount) != 0) - { - CDEBUG (D_NET, "Waiting for refcount on NAL %d to reach zero (%d)\n", - ne->kpne_interface.kprni_nalid, atomic_read (&ne->kpne_refcount)); - - set_current_state (TASK_UNINTERRUPTIBLE); - schedule_timeout (HZ); - } -- -- PORTAL_FREE (ne, sizeof (*ne)); -- PORTAL_MODULE_UNUSE; --} -- --int --kpr_ge_isbetter (kpr_gateway_entry_t *ge1, kpr_gateway_entry_t *ge2) --{ -- const int significant_bits = 0x00ffffff; -- /* We use atomic_t to record/compare route weights for -- * load-balancing. Here we limit ourselves to only using -- * 'significant_bits' when we do an 'after' comparison */ -- -- int diff = (atomic_read (&ge1->kpge_weight) - -- atomic_read (&ge2->kpge_weight)) & significant_bits; -- int rc = (diff > (significant_bits >> 1)); -- -- CDEBUG(D_NET, "[%p]"LPX64"=%d %s [%p]"LPX64"=%d\n", -- ge1, ge1->kpge_nid, atomic_read (&ge1->kpge_weight), -- rc ? ">" : "<", -- ge2, ge2->kpge_nid, atomic_read (&ge2->kpge_weight)); -- -- return (rc); --} -- --void --kpr_update_weight (kpr_gateway_entry_t *ge, int nob) --{ -- int weight = 1 + (nob + sizeof (ptl_hdr_t)/2)/sizeof (ptl_hdr_t); -- -- /* We've chosen this route entry (i.e. gateway) to forward payload -- * of length 'nob'; update the route's weight to make it less -- * favoured. Note that the weight is 1 plus the payload size -- * rounded and scaled to the portals header size, so we get better -- * use of the significant bits in kpge_weight. */ -- -- CDEBUG(D_NET, "gateway [%p]"LPX64" += %d\n", ge, -- ge->kpge_nid, weight); -- -- atomic_add (weight, &ge->kpge_weight); --} -- --int --kpr_lookup_target (void *arg, ptl_nid_t target_nid, int nob, -- ptl_nid_t *gateway_nidp) --{ -- kpr_nal_entry_t *ne = (kpr_nal_entry_t *)arg; -- struct list_head *e; -- kpr_route_entry_t *re; -- kpr_gateway_entry_t *ge = NULL; -- int rc = -ENOENT; -- -- /* Caller wants to know if 'target_nid' can be reached via a gateway -- * ON HER OWN NETWORK */ -- -- CDEBUG (D_NET, "lookup "LPX64" from NAL %d\n", target_nid, -- ne->kpne_interface.kprni_nalid); - - if (ne->kpne_shutdown) /* caller is shutting down */ - return (-ENOENT); - LASSERT (!in_interrupt()); -- -- read_lock (&kpr_rwlock); - - if (ne->kpne_shutdown) { /* caller is shutting down */ - read_unlock (&kpr_rwlock); - return (-ENOENT); - } -- -- /* Search routes for one that has a gateway to target_nid on the callers network */ -- -- list_for_each (e, &kpr_routes) { -- re = list_entry (e, kpr_route_entry_t, kpre_list); -- -- if (re->kpre_lo_nid > target_nid || -- re->kpre_hi_nid < target_nid) -- continue; -- -- /* found table entry */ -- -- if (re->kpre_gateway->kpge_nalid != ne->kpne_interface.kprni_nalid || -- !re->kpre_gateway->kpge_alive) { -- /* different NAL or gateway down */ -- rc = -EHOSTUNREACH; -- continue; -- } -- -- if (ge == NULL || -- kpr_ge_isbetter (re->kpre_gateway, ge)) -- ge = re->kpre_gateway; -- } -- -- if (ge != NULL) { -- kpr_update_weight (ge, nob); -- *gateway_nidp = ge->kpge_nid; -- rc = 0; -- } -- -- read_unlock (&kpr_rwlock); -- -- /* NB can't deref 're' now; it might have been removed! */ -- -- CDEBUG (D_NET, "lookup "LPX64" from NAL %d: %d ("LPX64")\n", -- target_nid, ne->kpne_interface.kprni_nalid, rc, -- (rc == 0) ? *gateway_nidp : (ptl_nid_t)0); -- return (rc); --} -- --kpr_nal_entry_t * --kpr_find_nal_entry_locked (int nal_id) --{ -- struct list_head *e; -- -- /* Called with kpr_rwlock held */ -- -- list_for_each (e, &kpr_nals) { -- kpr_nal_entry_t *ne = list_entry (e, kpr_nal_entry_t, kpne_list); -- -- if (nal_id != ne->kpne_interface.kprni_nalid) /* no match */ -- continue; -- -- return (ne); -- } -- -- return (NULL); --} -- --void --kpr_forward_packet (void *arg, kpr_fwd_desc_t *fwd) --{ -- kpr_nal_entry_t *src_ne = (kpr_nal_entry_t *)arg; -- ptl_nid_t target_nid = fwd->kprfd_target_nid; -- int nob = fwd->kprfd_nob; -- kpr_gateway_entry_t *ge = NULL; -- kpr_nal_entry_t *dst_ne = NULL; -- struct list_head *e; -- kpr_route_entry_t *re; -- kpr_nal_entry_t *tmp_ne; - int rc; -- -- CDEBUG (D_NET, "forward [%p] "LPX64" from NAL %d\n", fwd, -- target_nid, src_ne->kpne_interface.kprni_nalid); -- -- LASSERT (nob == lib_kiov_nob (fwd->kprfd_niov, fwd->kprfd_kiov)); - - atomic_inc (&kpr_queue_depth); - atomic_inc (&src_ne->kpne_refcount); /* source nal is busy until fwd completes */ - LASSERT (!in_interrupt()); - - read_lock (&kpr_rwlock); -- -- kpr_fwd_packets++; /* (loose) stats accounting */ -- kpr_fwd_bytes += nob + sizeof(ptl_hdr_t); -- - if (src_ne->kpne_shutdown) /* caller is shutting down */ - if (src_ne->kpne_shutdown) { /* caller is shutting down */ - rc = -ESHUTDOWN; -- goto out; - } -- -- fwd->kprfd_router_arg = src_ne; /* stash caller's nal entry */ - - read_lock (&kpr_rwlock); -- -- /* Search routes for one that has a gateway to target_nid NOT on the caller's network */ -- -- list_for_each (e, &kpr_routes) { -- re = list_entry (e, kpr_route_entry_t, kpre_list); -- -- if (re->kpre_lo_nid > target_nid || /* no match */ -- re->kpre_hi_nid < target_nid) -- continue; -- -- if (re->kpre_gateway->kpge_nalid == src_ne->kpne_interface.kprni_nalid) -- continue; /* don't route to same NAL */ -- -- if (!re->kpre_gateway->kpge_alive) -- continue; /* gateway is dead */ -- -- tmp_ne = kpr_find_nal_entry_locked (re->kpre_gateway->kpge_nalid); -- -- if (tmp_ne == NULL || -- tmp_ne->kpne_shutdown) { -- /* NAL must be registered and not shutting down */ -- continue; -- } -- -- if (ge == NULL || -- kpr_ge_isbetter (re->kpre_gateway, ge)) { -- ge = re->kpre_gateway; -- dst_ne = tmp_ne; -- } -- } -- -- if (ge != NULL) { -- LASSERT (dst_ne != NULL); -- -- kpr_update_weight (ge, nob); -- -- fwd->kprfd_gateway_nid = ge->kpge_nid; - atomic_inc (&dst_ne->kpne_refcount); /* dest nal is busy until fwd completes */ - atomic_inc (&src_ne->kpne_refcount); /* source and dest nals are */ - atomic_inc (&dst_ne->kpne_refcount); /* busy until fwd completes */ - atomic_inc (&kpr_queue_depth); -- -- read_unlock (&kpr_rwlock); -- -- CDEBUG (D_NET, "forward [%p] "LPX64" from NAL %d: " -- "to "LPX64" on NAL %d\n", -- fwd, target_nid, src_ne->kpne_interface.kprni_nalid, -- fwd->kprfd_gateway_nid, dst_ne->kpne_interface.kprni_nalid); -- -- dst_ne->kpne_interface.kprni_fwd (dst_ne->kpne_interface.kprni_arg, fwd); -- return; -- } -- - read_unlock (&kpr_rwlock); - rc = -EHOSTUNREACH; -- out: -- kpr_fwd_errors++; -- - CDEBUG (D_NET, "Failed to forward [%p] "LPX64" from NAL %d\n", fwd, - target_nid, src_ne->kpne_interface.kprni_nalid); - CDEBUG (D_NET, "Failed to forward [%p] "LPX64" from NAL %d: %d\n", - fwd, target_nid, src_ne->kpne_interface.kprni_nalid, rc); -- - /* Can't find anywhere to forward to */ - (fwd->kprfd_callback)(fwd->kprfd_callback_arg, -EHOSTUNREACH); - (fwd->kprfd_callback)(fwd->kprfd_callback_arg, rc); -- - atomic_dec (&kpr_queue_depth); - atomic_dec (&src_ne->kpne_refcount); - read_unlock (&kpr_rwlock); --} -- --void --kpr_complete_packet (void *arg, kpr_fwd_desc_t *fwd, int error) --{ -- kpr_nal_entry_t *dst_ne = (kpr_nal_entry_t *)arg; -- kpr_nal_entry_t *src_ne = (kpr_nal_entry_t *)fwd->kprfd_router_arg; -- -- CDEBUG (D_NET, "complete(1) [%p] from NAL %d to NAL %d: %d\n", fwd, -- src_ne->kpne_interface.kprni_nalid, dst_ne->kpne_interface.kprni_nalid, error); -- -- atomic_dec (&dst_ne->kpne_refcount); /* CAVEAT EMPTOR dst_ne can disappear now!!! */ -- -- (fwd->kprfd_callback)(fwd->kprfd_callback_arg, error); -- -- CDEBUG (D_NET, "complete(2) [%p] from NAL %d: %d\n", fwd, -- src_ne->kpne_interface.kprni_nalid, error); -- -- atomic_dec (&kpr_queue_depth); -- atomic_dec (&src_ne->kpne_refcount); /* CAVEAT EMPTOR src_ne can disappear now!!! */ --} -- --int --kpr_add_route (int gateway_nalid, ptl_nid_t gateway_nid, -- ptl_nid_t lo_nid, ptl_nid_t hi_nid) --{ -- unsigned long flags; -- struct list_head *e; -- kpr_route_entry_t *re; -- kpr_gateway_entry_t *ge; -- int dup = 0; -- -- CDEBUG(D_NET, "Add route: %d "LPX64" : "LPX64" - "LPX64"\n", -- gateway_nalid, gateway_nid, lo_nid, hi_nid); -- -- if (gateway_nalid == PTL_NID_ANY || -- lo_nid == PTL_NID_ANY || -- hi_nid == PTL_NID_ANY || -- lo_nid > hi_nid) -- return (-EINVAL); -- -- PORTAL_ALLOC (ge, sizeof (*ge)); -- if (ge == NULL) -- return (-ENOMEM); -- -- ge->kpge_nalid = gateway_nalid; -- ge->kpge_nid = gateway_nid; -- ge->kpge_alive = 1; -- ge->kpge_timestamp = 0; -- ge->kpge_refcount = 0; -- atomic_set (&ge->kpge_weight, 0); -- -- PORTAL_ALLOC (re, sizeof (*re)); -- if (re == NULL) { -- PORTAL_FREE (ge, sizeof (*ge)); -- return (-ENOMEM); -- } -- -- re->kpre_lo_nid = lo_nid; -- re->kpre_hi_nid = hi_nid; -- -- LASSERT(!in_interrupt()); -- write_lock_irqsave (&kpr_rwlock, flags); -- -- list_for_each (e, &kpr_gateways) { -- kpr_gateway_entry_t *ge2 = list_entry(e, kpr_gateway_entry_t, -- kpge_list); -- -- if (ge2->kpge_nalid == gateway_nalid && -- ge2->kpge_nid == gateway_nid) { -- PORTAL_FREE (ge, sizeof (*ge)); -- ge = ge2; -- dup = 1; -- break; -- } -- } -- -- if (!dup) { -- /* Adding a new gateway... */ -- list_add (&ge->kpge_list, &kpr_gateways); -- -- /* ...zero all gateway weights so this one doesn't have to -- * play catch-up */ -- -- list_for_each (e, &kpr_gateways) { -- kpr_gateway_entry_t *ge2 = list_entry(e, kpr_gateway_entry_t, -- kpge_list); -- atomic_set (&ge2->kpge_weight, 0); -- } -- } -- -- re->kpre_gateway = ge; -- ge->kpge_refcount++; -- list_add (&re->kpre_list, &kpr_routes); -- kpr_routes_generation++; -- -- write_unlock_irqrestore (&kpr_rwlock, flags); -- return (0); --} -- --int --kpr_sys_notify (int gateway_nalid, ptl_nid_t gateway_nid, - int alive, time_t when) - int alive, time_t when) --{ -- return (kpr_do_notify (0, gateway_nalid, gateway_nid, alive, when)); --} -- --int --kpr_del_route (int gw_nalid, ptl_nid_t gw_nid, -- ptl_nid_t lo, ptl_nid_t hi) --{ -- int specific = (lo != PTL_NID_ANY); -- unsigned long flags; -- int rc = -ENOENT; -- struct list_head *e; -- struct list_head *n; -- -- CDEBUG(D_NET, "Del route [%d] "LPX64" : "LPX64" - "LPX64"\n", -- gw_nalid, gw_nid, lo, hi); -- -- LASSERT(!in_interrupt()); -- -- /* NB Caller may specify either all routes via the given gateway -- * (lo/hi == PTL_NID_ANY) or a specific route entry (lo/hi are -- * actual NIDs) */ -- if (specific ? (hi == PTL_NID_ANY || hi < lo) : (hi != PTL_NID_ANY)) -- return (-EINVAL); -- -- write_lock_irqsave(&kpr_rwlock, flags); -- -- list_for_each_safe (e, n, &kpr_routes) { -- kpr_route_entry_t *re = list_entry(e, kpr_route_entry_t, -- kpre_list); -- kpr_gateway_entry_t *ge = re->kpre_gateway; -- -- if (ge->kpge_nalid != gw_nalid || -- ge->kpge_nid != gw_nid || -- (specific && -- (lo != re->kpre_lo_nid || hi != re->kpre_hi_nid))) -- continue; -- -- rc = 0; -- -- if (--ge->kpge_refcount == 0) { -- list_del (&ge->kpge_list); -- PORTAL_FREE (ge, sizeof (*ge)); -- } -- -- list_del (&re->kpre_list); -- PORTAL_FREE(re, sizeof (*re)); -- -- if (specific) -- break; -- } -- -- kpr_routes_generation++; -- write_unlock_irqrestore(&kpr_rwlock, flags); -- -- return (rc); --} -- --int - kpr_get_route (int idx, int *gateway_nalid, ptl_nid_t *gateway_nid, - ptl_nid_t *lo_nid, ptl_nid_t *hi_nid, int *alive) -kpr_get_route (int idx, __u32 *gateway_nalid, ptl_nid_t *gateway_nid, - ptl_nid_t *lo_nid, ptl_nid_t *hi_nid, __u32 *alive) --{ -- struct list_head *e; -- - LASSERT (!in_interrupt()); -- read_lock(&kpr_rwlock); -- -- for (e = kpr_routes.next; e != &kpr_routes; e = e->next) { -- kpr_route_entry_t *re = list_entry(e, kpr_route_entry_t, -- kpre_list); -- kpr_gateway_entry_t *ge = re->kpre_gateway; -- -- if (idx-- == 0) { -- *gateway_nalid = ge->kpge_nalid; -- *gateway_nid = ge->kpge_nid; -- *alive = ge->kpge_alive; -- *lo_nid = re->kpre_lo_nid; -- *hi_nid = re->kpre_hi_nid; -- -- read_unlock(&kpr_rwlock); -- return (0); -- } -- } -- -- read_unlock (&kpr_rwlock); -- return (-ENOENT); -} - -static int -kpr_nal_cmd(struct portals_cfg *pcfg, void * private) -{ - int err = -EINVAL; - ENTRY; - - switch(pcfg->pcfg_command) { - default: - CDEBUG(D_IOCTL, "Inappropriate cmd: %d\n", pcfg->pcfg_command); - break; - - case NAL_CMD_ADD_ROUTE: - CDEBUG(D_IOCTL, "Adding route: [%d] "LPU64" : "LPU64" - "LPU64"\n", - pcfg->pcfg_nal, pcfg->pcfg_nid, - pcfg->pcfg_nid2, pcfg->pcfg_nid3); - err = kpr_add_route(pcfg->pcfg_gw_nal, pcfg->pcfg_nid, - pcfg->pcfg_nid2, pcfg->pcfg_nid3); - break; - - case NAL_CMD_DEL_ROUTE: - CDEBUG (D_IOCTL, "Removing routes via [%d] "LPU64" : "LPU64" - "LPU64"\n", - pcfg->pcfg_gw_nal, pcfg->pcfg_nid, - pcfg->pcfg_nid2, pcfg->pcfg_nid3); - err = kpr_del_route (pcfg->pcfg_gw_nal, pcfg->pcfg_nid, - pcfg->pcfg_nid2, pcfg->pcfg_nid3); - break; - - case NAL_CMD_NOTIFY_ROUTER: { - CDEBUG (D_IOCTL, "Notifying peer [%d] "LPU64" %s @ %ld\n", - pcfg->pcfg_gw_nal, pcfg->pcfg_nid, - pcfg->pcfg_flags ? "Enabling" : "Disabling", - (time_t)pcfg->pcfg_nid3); - - err = kpr_sys_notify (pcfg->pcfg_gw_nal, pcfg->pcfg_nid, - pcfg->pcfg_flags, (time_t)pcfg->pcfg_nid3); - break; - } - - case NAL_CMD_GET_ROUTE: - CDEBUG (D_IOCTL, "Getting route [%d]\n", pcfg->pcfg_count); - err = kpr_get_route(pcfg->pcfg_count, &pcfg->pcfg_gw_nal, - &pcfg->pcfg_nid, - &pcfg->pcfg_nid2, &pcfg->pcfg_nid3, - &pcfg->pcfg_flags); - break; - } - RETURN(err); --} - -- --static void /*__exit*/ --kpr_finalise (void) --{ -- LASSERT (list_empty (&kpr_nals)); - - libcfs_nal_cmd_unregister(ROUTER); - - PORTAL_SYMBOL_UNREGISTER(kpr_router_interface); - - kpr_proc_fini(); -- -- while (!list_empty (&kpr_routes)) { -- kpr_route_entry_t *re = list_entry(kpr_routes.next, -- kpr_route_entry_t, -- kpre_list); -- -- list_del(&re->kpre_list); -- PORTAL_FREE(re, sizeof (*re)); -- } - - kpr_proc_fini(); - - PORTAL_SYMBOL_UNREGISTER(kpr_router_interface); - PORTAL_SYMBOL_UNREGISTER(kpr_control_interface); -- -- CDEBUG(D_MALLOC, "kpr_finalise: kmem back to %d\n", -- atomic_read(&portal_kmemory)); --} -- --static int __init --kpr_initialise (void) --{ - int rc; - -- CDEBUG(D_MALLOC, "kpr_initialise: kmem %d\n", -- atomic_read(&portal_kmemory)); -- -- kpr_routes_generation = 0; -- kpr_proc_init(); -- - rc = libcfs_nal_cmd_register(ROUTER, kpr_nal_cmd, NULL); - if (rc != 0) { - CERROR("Can't register nal cmd handler\n"); - return (rc); - } - -- PORTAL_SYMBOL_REGISTER(kpr_router_interface); - PORTAL_SYMBOL_REGISTER(kpr_control_interface); -- return (0); --} -- --MODULE_AUTHOR("Eric Barton"); --MODULE_DESCRIPTION("Kernel Portals Router v0.01"); --MODULE_LICENSE("GPL"); -- --module_init (kpr_initialise); --module_exit (kpr_finalise); -- - EXPORT_SYMBOL (kpr_control_interface); --EXPORT_SYMBOL (kpr_router_interface); diff --cc lnet/router/router.h index 611d808,27e4983..0000000 deleted file mode 100644,100644 --- a/lnet/router/router.h +++ /dev/null @@@ -1,113 -1,105 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * Copyright (C) 2002 Cluster File Systems, Inc. -- * -- * This file is part of Portals -- * http://sourceforge.net/projects/sandiaportals/ -- * -- * Portals is free software; you can redistribute it and/or -- * modify it under the terms of version 2 of the GNU General Public -- * License as published by the Free Software Foundation. -- * -- * Portals is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- * GNU General Public License for more details. -- * -- * You should have received a copy of the GNU General Public License -- * along with Portals; if not, write to the Free Software -- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -- * -- */ -- --#ifndef _KPTLROUTER_H --#define _KPTLROUTER_H --#ifndef EXPORT_SYMTAB --# define EXPORT_SYMTAB --#endif -- --#include --#include --#include --#include --#include --#include --#include --#include -- --#define DEBUG_SUBSYSTEM S_PTLROUTER -- --#include --#include --#include --#include -- --typedef struct --{ -- struct list_head kpne_list; -- kpr_nal_interface_t kpne_interface; -- atomic_t kpne_refcount; -- int kpne_shutdown; --} kpr_nal_entry_t; -- --typedef struct --{ -- struct list_head kpge_list; -- atomic_t kpge_weight; -- time_t kpge_timestamp; -- int kpge_alive; -- int kpge_nalid; -- int kpge_refcount; -- ptl_nid_t kpge_nid; --} kpr_gateway_entry_t; -- --typedef struct --{ -- struct list_head kpre_list; -- kpr_gateway_entry_t *kpre_gateway; -- ptl_nid_t kpre_lo_nid; -- ptl_nid_t kpre_hi_nid; --} kpr_route_entry_t; -- --typedef struct --{ -- work_struct_t kpru_tq; -- int kpru_nal_id; -- ptl_nid_t kpru_nid; -- int kpru_alive; -- time_t kpru_when; --} kpr_upcall_t; -- --extern int kpr_register_nal (kpr_nal_interface_t *nalif, void **argp); --extern int kpr_lookup_target (void *arg, ptl_nid_t target_nid, int nob, -- ptl_nid_t *gateway_nidp); --extern kpr_nal_entry_t *kpr_find_nal_entry_locked (int nal_id); --extern void kpr_forward_packet (void *arg, kpr_fwd_desc_t *fwd); --extern void kpr_complete_packet (void *arg, kpr_fwd_desc_t *fwd, int error); --extern void kpr_nal_notify (void *arg, ptl_nid_t peer, -- int alive, time_t when); --extern void kpr_shutdown_nal (void *arg); --extern void kpr_deregister_nal (void *arg); -- --extern void kpr_proc_init (void); --extern void kpr_proc_fini (void); - - extern int kpr_add_route (int gateway_nal, ptl_nid_t gateway_nid, - ptl_nid_t lo_nid, ptl_nid_t hi_nid); - extern int kpr_del_route (int gw_nal, ptl_nid_t gw_nid, - ptl_nid_t lo, ptl_nid_t hi); - extern int kpr_get_route (int idx, int *gateway_nal, ptl_nid_t *gateway_nid, - ptl_nid_t *lo_nid, ptl_nid_t *hi_nid, int *alive); - extern int kpr_sys_notify (int gw_nalid, ptl_nid_t gw_nid, - int alive, time_t when); -- --extern unsigned int kpr_routes_generation; --extern unsigned long long kpr_fwd_bytes; --extern unsigned long kpr_fwd_packets; --extern unsigned long kpr_fwd_errors; --extern atomic_t kpr_queue_depth; - --extern struct list_head kpr_routes; --extern rwlock_t kpr_rwlock; -- --#endif /* _KPLROUTER_H */ diff --cc lnet/tests/.cvsignore index e034130,e034130..0000000 deleted file mode 100644,100644 --- a/lnet/tests/.cvsignore +++ /dev/null @@@ -1,10 -1,10 +1,0 @@@ --Makefile --.deps --.*.cmd --autoMakefile.in --autoMakefile --*.ko --*.mod.c --.*.flags --.tmp_versions --.depend diff --cc lnet/tests/Makefile.in index c309db0,c309db0..0000000 deleted file mode 100644,100644 --- a/lnet/tests/Makefile.in +++ /dev/null @@@ -1,16 -1,16 +1,0 @@@ --MODULES := pingsrv pingcli spingsrv spingcli --pingsrv-objs := ping_srv.o -- --ifeq ($(PATCHLEVEL),6) --pingcli-objs := ping_cli.o --spingsrv-objs := sping_srv.o --spingcli-objs := sping_cli.o --else --ping%.c: ping_%.c -- ln -sf $< $@ -- --sping%.c: sping_%.c -- ln -sf $< $@ --endif -- --@INCLUDE_RULES@ diff --cc lnet/tests/Makefile.mk index 751c0a0,751c0a0..0000000 deleted file mode 100644,100644 --- a/lnet/tests/Makefile.mk +++ /dev/null @@@ -1,9 -1,9 +1,0 @@@ --# Copyright (C) 2001 Cluster File Systems, Inc. --# --# This code is issued under the GNU General Public License. --# See the file COPYING in this distribution -- --include $(src)/../Kernelenv -- --obj-y += ping_cli.o --obj-y += ping_srv.o diff --cc lnet/tests/autoMakefile.am index 5f81b93,5f81b93..0000000 deleted file mode 100644,100644 --- a/lnet/tests/autoMakefile.am +++ /dev/null @@@ -1,16 -1,16 +1,0 @@@ --# Copyright (C) 2001 Cluster File Systems, Inc. --# --# This code is issued under the GNU General Public License. --# See the file COPYING in this distribution -- --if MODULES --if !CRAY_PORTALS --if TESTS --noinst_DATA := pingsrv$(KMODEXT) pingcli$(KMODEXT) --noinst_DATA += spingsrv$(KMODEXT) spingcli$(KMODEXT) --endif --endif --endif -- --MOSTLYCLEANFILES = *.o *.ko *.mod.c pingsrv.c pingcli.c spingsrv.c spingcli.c --DIST_SOURCES = ping_srv.c ping_cli.c sping_srv.c sping_cli.c ping.h diff --cc lnet/tests/ping.h index f07444b,f07444b..0000000 deleted file mode 100644,100644 --- a/lnet/tests/ping.h +++ /dev/null @@@ -1,80 -1,80 +1,0 @@@ --#ifndef _KPING_INCLUDED --#define _KPING_INCLUDED -- --#include -- -- --#define PTL_PING_IN_SIZE 256 // n packets per buffer --#define PTL_PING_IN_BUFFERS 2 // n fallback buffers -- --#define PTL_PING_CLIENT 4 --#define PTL_PING_SERVER 5 -- --#define PING_HEADER_MAGIC 0xDEADBEEF --#define PING_BULK_MAGIC 0xCAFEBABE -- --#define PING_HEAD_BITS 0x00000001 --#define PING_BULK_BITS 0x00000002 --#define PING_IGNORE_BITS 0xFFFFFFFC -- --#define PTL_PING_ACK 0x01 --#define PTL_PING_VERBOSE 0x02 --#define PTL_PING_VERIFY 0x04 --#define PTL_PING_PREALLOC 0x08 -- -- --#define NEXT_PRIMARY_BUFFER(index) \ -- (((index + 1) >= PTL_PING_IN_BUFFERS) ? 0 : (index + 1)) -- --#define PDEBUG(str, err) \ -- CERROR ("%s: error=%s (%d)\n", str, ptl_err_str[err], err) -- -- --/* Ping data to be passed via the ioctl to kernel space */ -- --#if __KERNEL__ -- -- --#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) --#include --#else --#include --#endif --struct pingsrv_data { -- -- ptl_handle_ni_t ni; -- ptl_handle_me_t me; -- ptl_handle_eq_t eq; -- void *in_buf; -- ptl_process_id_t my_id; -- ptl_process_id_t id_local; -- ptl_md_t mdin; -- ptl_md_t mdout; -- ptl_handle_md_t mdin_h; -- ptl_handle_md_t mdout_h; -- ptl_event_t evnt; -- struct task_struct *tsk; --}; /* struct pingsrv_data */ -- --struct pingcli_data { -- -- struct portal_ioctl_data *args; -- ptl_handle_me_t me; -- ptl_handle_eq_t eq; -- char *inbuf; -- char *outbuf; -- ptl_process_id_t myid; -- ptl_process_id_t id_local; -- ptl_process_id_t id_remote; -- ptl_md_t md_in_head; -- ptl_md_t md_out_head; -- ptl_handle_md_t md_in_head_h; -- ptl_handle_md_t md_out_head_h; -- ptl_event_t ev; -- struct task_struct *tsk; --}; /* struct pingcli_data */ -- -- --#endif /* __KERNEL__ */ -- --#endif /* _KPING_INCLUDED */ diff --cc lnet/tests/ping_cli.c index 85c0d71,7a3f8a0..0000000 deleted file mode 100644,100644 --- a/lnet/tests/ping_cli.c +++ /dev/null @@@ -1,304 -1,303 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * Copyright (C) 2002, Lawrence Livermore National Labs (LLNL) -- * Author: Brian Behlendorf -- * Kedar Sovani (kedar@calsoftinc.com) -- * Amey Inamdar (amey@calsoftinc.com) -- * -- * This file is part of Portals, http://www.sf.net/projects/lustre/ -- * -- * Portals is free software; you can redistribute it and/or -- * modify it under the terms of version 2 of the GNU General Public -- * License as published by the Free Software Foundation. -- * -- * Portals is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- * GNU General Public License for more details. -- * -- * You should have received a copy of the GNU General Public License -- * along with Portals; if not, write to the Free Software -- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -- * -- */ -- --#define DEBUG_SUBSYSTEM S_PINGER -- --#include --#include --#include --#include --#include --#include --#include "ping.h" --/* int portal_debug = D_PING_CLI; */ -- -- --#define STDSIZE (sizeof(int) + sizeof(int) + sizeof(struct timeval)) -- --#define MAX_TIME 100000 -- --/* This should be enclosed in a structure */ -- --static struct pingcli_data *client = NULL; -- --static int count = 0; -- --static void - pingcli_shutdown(int err) -pingcli_shutdown(ptl_handle_ni_t nih, int err) --{ -- int rc; -- -- /* Yes, we are intentionally allowing us to fall through each -- * case in to the next. This allows us to pass an error -- * code to just clean up the right stuff. -- */ -- switch (err) { -- case 1: -- /* Unlink any memory descriptors we may have used */ -- if ((rc = PtlMDUnlink (client->md_out_head_h))) -- PDEBUG ("PtlMDUnlink", rc); -- case 2: -- if ((rc = PtlMDUnlink (client->md_in_head_h))) -- PDEBUG ("PtlMDUnlink", rc); -- -- /* Free the event queue */ -- if ((rc = PtlEQFree (client->eq))) -- PDEBUG ("PtlEQFree", rc); -- -- if ((rc = PtlMEUnlink (client->me))) -- PDEBUG ("PtlMEUnlink", rc); -- case 3: - kportal_put_ni (client->args->ioc_nal); - PtlNIFini(nih); -- -- case 4: -- /* Free our buffers */ -- -- if (client != NULL) -- PORTAL_FREE (client, -- sizeof(struct pingcli_data)); -- } -- -- -- CDEBUG (D_OTHER, "ping client released resources\n"); --} /* pingcli_shutdown() */ -- - static int pingcli_callback(ptl_event_t *ev) -static void pingcli_callback(ptl_event_t *ev) --{ -- int i, magic; - i = *(int *)(ev->mem_desc.start + ev->offset + sizeof(unsigned)); - magic = *(int *)(ev->mem_desc.start + ev->offset); - i = *(int *)(ev->md.start + ev->offset + sizeof(unsigned)); - magic = *(int *)(ev->md.start + ev->offset); -- -- if(magic != 0xcafebabe) { - printk ("LustreError: Unexpected response \n"); - return 1; - CERROR("Unexpected response %x\n", magic); -- } -- -- if((i == count) || !count) -- wake_up_process (client->tsk); -- else - printk ("LustreError: Received response after timeout for %d\n",i); - return 1; - CERROR("Received response after timeout for %d\n",i); --} -- -- --static struct pingcli_data * --pingcli_start(struct portal_ioctl_data *args) --{ - ptl_handle_ni_t *nip; - ptl_handle_ni_t nih = PTL_INVALID_HANDLE; -- unsigned ping_head_magic = PING_HEADER_MAGIC; -- unsigned ping_bulk_magic = PING_BULK_MAGIC; -- int rc; -- struct timeval tv1, tv2; -- char str[PTL_NALFMT_SIZE]; -- -- client->tsk = current; -- client->args = args; -- CDEBUG (D_OTHER, "pingcli_setup args: nid "LPX64" (%s), \ -- nal %d, size %u, count: %u, timeout: %u\n", -- args->ioc_nid, -- portals_nid2str(args->ioc_nal, args->ioc_nid, str), -- args->ioc_nal, args->ioc_size, -- args->ioc_count, args->ioc_timeout); -- -- -- PORTAL_ALLOC (client->outbuf, STDSIZE + args->ioc_size) ; -- if (client->outbuf == NULL) -- { -- CERROR ("Unable to allocate out_buf ("LPSZ" bytes)\n", STDSIZE); - pingcli_shutdown (4); - pingcli_shutdown (nih, 4); -- return (NULL); -- } -- -- PORTAL_ALLOC (client->inbuf, -- (args->ioc_size + STDSIZE) * args->ioc_count); -- if (client->inbuf == NULL) -- { -- CERROR ("Unable to allocate out_buf ("LPSZ" bytes)\n", STDSIZE); - pingcli_shutdown (4); - pingcli_shutdown (nih, 4); -- return (NULL); -- } -- -- /* Aquire and initialize the proper nal for portals. */ - if ((nip = kportal_get_ni (args->ioc_nal)) == NULL) - rc = PtlNIInit(args->ioc_nal, 0, NULL, NULL, &nih); - if (rc != PTL_OK || rc != PTL_IFACE_DUP) -- { -- CERROR ("NAL %d not loaded\n", args->ioc_nal); - pingcli_shutdown (4); - pingcli_shutdown (nih, 4); -- return (NULL); -- } -- -- /* Based on the initialization aquire our unique portal ID. */ - if ((rc = PtlGetId (*nip, &client->myid))) - if ((rc = PtlGetId (nih, &client->myid))) -- { -- CERROR ("PtlGetId error %d\n", rc); - pingcli_shutdown (2); - pingcli_shutdown (nih, 2); -- return (NULL); -- } -- -- /* Setup the local match entries */ -- client->id_local.nid = PTL_NID_ANY; -- client->id_local.pid = PTL_PID_ANY; -- -- /* Setup the remote match entries */ -- client->id_remote.nid = args->ioc_nid; -- client->id_remote.pid = 0; -- - if ((rc = PtlMEAttach (*nip, PTL_PING_CLIENT, - if ((rc = PtlMEAttach (nih, PTL_PING_CLIENT, -- client->id_local, 0, ~0, PTL_RETAIN, -- PTL_INS_AFTER, &client->me))) -- { -- CERROR ("PtlMEAttach error %d\n", rc); - pingcli_shutdown (2); - pingcli_shutdown (nih, 2); -- return (NULL); -- } -- -- /* Allocate the event queue for this network interface */ - if ((rc = PtlEQAlloc (*nip, 64, pingcli_callback, &client->eq))) - if ((rc = PtlEQAlloc (nih, 64, pingcli_callback, &client->eq))) -- { -- CERROR ("PtlEQAlloc error %d\n", rc); - pingcli_shutdown (2); - pingcli_shutdown (nih, 2); -- return (NULL); -- } -- -- count = args->ioc_count; -- -- client->md_in_head.start = client->inbuf; -- client->md_in_head.length = (args->ioc_size + STDSIZE) -- * count; -- client->md_in_head.threshold = PTL_MD_THRESH_INF; - client->md_in_head.options = PTL_MD_OP_PUT; - client->md_in_head.options = PTL_MD_EVENT_START_DISABLE | PTL_MD_OP_PUT; -- client->md_in_head.user_ptr = NULL; - client->md_in_head.eventq = client->eq; - client->md_in_head.eq_handle = client->eq; -- memset (client->inbuf, 0, (args->ioc_size + STDSIZE) * count); -- -- /* Attach the incoming buffer */ -- if ((rc = PtlMDAttach (client->me, client->md_in_head, -- PTL_UNLINK, &client->md_in_head_h))) { -- CERROR ("PtlMDAttach error %d\n", rc); - pingcli_shutdown (1); - pingcli_shutdown (nih, 1); -- return (NULL); -- } -- /* Setup the outgoing ping header */ -- client->md_out_head.start = client->outbuf; -- client->md_out_head.length = STDSIZE + args->ioc_size; -- client->md_out_head.threshold = args->ioc_count; - client->md_out_head.options = PTL_MD_OP_PUT; - client->md_out_head.options = PTL_MD_EVENT_START_DISABLE | PTL_MD_OP_PUT; -- client->md_out_head.user_ptr = NULL; - client->md_out_head.eventq = PTL_EQ_NONE; - client->md_out_head.eq_handle = PTL_EQ_NONE; -- -- memcpy (client->outbuf, &ping_head_magic, sizeof(ping_bulk_magic)); -- -- count = 0; -- -- /* Bind the outgoing ping header */ - if ((rc=PtlMDBind (*nip, client->md_out_head, - &client->md_out_head_h))) { - if ((rc=PtlMDBind (nih, client->md_out_head, - PTL_UNLINK, &client->md_out_head_h))) { -- CERROR ("PtlMDBind error %d\n", rc); - pingcli_shutdown (1); - pingcli_shutdown (nih, 1); -- return NULL; -- } -- while ((args->ioc_count - count)) { -- memcpy (client->outbuf + sizeof(unsigned), -- &(count), sizeof(unsigned)); -- /* Put the ping packet */ -- do_gettimeofday (&tv1); -- -- memcpy(client->outbuf+sizeof(unsigned)+sizeof(unsigned),&tv1, -- sizeof(struct timeval)); -- -- if((rc = PtlPut (client->md_out_head_h, PTL_NOACK_REQ, -- client->id_remote, PTL_PING_SERVER, 0, 0, 0, 0))) { -- PDEBUG ("PtlPut (header)", rc); - pingcli_shutdown (1); - pingcli_shutdown (nih, 1); -- return NULL; -- } - printk ("Lustre: sent msg no %d", count); - CWARN ("Lustre: sent msg no %d", count); -- -- set_current_state (TASK_INTERRUPTIBLE); -- rc = schedule_timeout (20 * args->ioc_timeout); -- if (rc == 0) { - printk ("LustreError: :: timeout .....\n"); - CERROR ("timeout .....\n"); -- } else { -- do_gettimeofday (&tv2); - printk("Lustre: :: Reply in %u usec\n", - (unsigned)((tv2.tv_sec - tv1.tv_sec) - * 1000000 + (tv2.tv_usec - tv1.tv_usec))); - CWARN("Reply in %u usec\n", - (unsigned)((tv2.tv_sec - tv1.tv_sec) - * 1000000 + (tv2.tv_usec - tv1.tv_usec))); -- } -- count++; -- } -- -- if (client->outbuf != NULL) -- PORTAL_FREE (client->outbuf, STDSIZE + args->ioc_size); -- -- if (client->inbuf != NULL) -- PORTAL_FREE (client->inbuf, -- (args->ioc_size + STDSIZE) * args->ioc_count); -- - pingcli_shutdown (2); - pingcli_shutdown (nih, 2); -- -- /* Success! */ -- return NULL; --} /* pingcli_setup() */ -- -- -- --/* called by the portals_ioctl for ping requests */ --int kping_client(struct portal_ioctl_data *args) --{ -- PORTAL_ALLOC (client, sizeof(struct pingcli_data)); -- if (client == NULL) -- { -- CERROR ("Unable to allocate client structure\n"); -- return (0); -- } -- memset (client, 0, sizeof(struct pingcli_data)); -- pingcli_start (args); -- -- return 0; --} /* kping_client() */ -- -- --static int __init pingcli_init(void) --{ -- PORTAL_SYMBOL_REGISTER(kping_client); -- return 0; --} /* pingcli_init() */ -- -- --static void /*__exit*/ pingcli_cleanup(void) --{ -- PORTAL_SYMBOL_UNREGISTER (kping_client); --} /* pingcli_cleanup() */ -- -- --MODULE_AUTHOR("Brian Behlendorf (LLNL)"); --MODULE_DESCRIPTION("A simple kernel space ping client for portals testing"); --MODULE_LICENSE("GPL"); -- --module_init(pingcli_init); --module_exit(pingcli_cleanup); -- --#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) --EXPORT_SYMBOL (kping_client); --#endif diff --cc lnet/tests/ping_srv.c index 1e40ed8,dec806a..0000000 deleted file mode 100644,100644 --- a/lnet/tests/ping_srv.c +++ /dev/null @@@ -1,308 -1,308 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * Copyright (C) 2002, Lawrence Livermore National Labs (LLNL) -- * Author: Brian Behlendorf -- * Amey Inamdar -- * Kedar Sovani -- * -- * -- * This file is part of Portals, http://www.sf.net/projects/lustre/ -- * -- * Portals is free software; you can redistribute it and/or -- * modify it under the terms of version 2 of the GNU General Public -- * License as published by the Free Software Foundation. -- * -- * Portals is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- * GNU General Public License for more details. -- * -- * You should have received a copy of the GNU General Public License -- * along with Portals; if not, write to the Free Software -- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -- */ -- --#define DEBUG_SUBSYSTEM S_PINGER -- --#include --#include --#include "ping.h" -- --#include --#include --#include --#include --#include --#include --#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) --#include --#else --#include --#endif --#include --#include -- --#include --#include -- --#define STDSIZE (sizeof(int) + sizeof(int) + sizeof(struct timeval)) --#define MAXSIZE (16*1024) -- --static unsigned ping_head_magic; --static unsigned ping_bulk_magic; --static int nal = SOCKNAL; // Your NAL, --static unsigned long packets_valid = 0; // Valid packets --static int running = 1; --atomic_t pkt; -- --static struct pingsrv_data *server=NULL; // Our ping server -- --static void *pingsrv_shutdown(int err) --{ -- int rc; -- -- /* Yes, we are intentionally allowing us to fall through each -- * case in to the next. This allows us to pass an error -- * code to just clean up the right stuff. -- */ -- switch (err) { -- case 1: -- /* Unlink any memory descriptors we may have used */ -- if ((rc = PtlMDUnlink (server->mdin_h))) -- PDEBUG ("PtlMDUnlink (out head buffer)", rc); -- case 2: -- /* Free the event queue */ -- if ((rc = PtlEQFree (server->eq))) -- PDEBUG ("PtlEQFree", rc); -- -- /* Unlink the client portal from the ME list */ -- if ((rc = PtlMEUnlink (server->me))) -- PDEBUG ("PtlMEUnlink", rc); -- -- case 3: - kportal_put_ni (nal); - PtlNIFini (server->ni); -- -- case 4: -- -- case 5: -- if (server->in_buf != NULL) -- PORTAL_FREE (server->in_buf, MAXSIZE); -- -- if (server != NULL) -- PORTAL_FREE (server, -- sizeof (struct pingsrv_data)); -- -- } -- -- CDEBUG (D_OTHER, "ping sever resources released\n"); -- return NULL; --} /* pingsrv_shutdown() */ -- -- --int pingsrv_thread(void *arg) --{ -- int rc; -- unsigned long magic; -- unsigned long ping_bulk_magic = 0xcafebabe; -- -- kportal_daemonize ("pingsrv"); -- server->tsk = current; -- -- while (running) { -- set_current_state (TASK_INTERRUPTIBLE); -- if (atomic_read (&pkt) == 0) { -- schedule_timeout (MAX_SCHEDULE_TIMEOUT); -- continue; -- } -- - magic = *((int *)(server->evnt.mem_desc.start - magic = *((int *)(server->evnt.md.start -- + server->evnt.offset)); -- -- -- if(magic != 0xdeadbeef) { - printk("LustreError: Unexpected Packet to the server\n"); - CERROR("Unexpected Packet to the server\n"); -- -- } -- memcpy (server->in_buf, &ping_bulk_magic, sizeof(ping_bulk_magic)); -- -- server->mdout.length = server->evnt.rlength; -- server->mdout.start = server->in_buf; -- server->mdout.threshold = 1; - server->mdout.options = PTL_MD_OP_PUT; - server->mdout.options = PTL_MD_EVENT_START_DISABLE | PTL_MD_OP_PUT; -- server->mdout.user_ptr = NULL; - server->mdout.eventq = PTL_EQ_NONE; - server->mdout.eq_handle = PTL_EQ_NONE; -- -- /* Bind the outgoing buffer */ -- if ((rc = PtlMDBind (server->ni, server->mdout, - &server->mdout_h))) { - PTL_UNLINK, &server->mdout_h))) { -- PDEBUG ("PtlMDBind", rc); -- pingsrv_shutdown (1); -- return 1; -- } -- -- -- server->mdin.start = server->in_buf; -- server->mdin.length = MAXSIZE; -- server->mdin.threshold = 1; - server->mdin.options = PTL_MD_OP_PUT; - server->mdin.options = PTL_MD_EVENT_START_DISABLE | PTL_MD_OP_PUT; -- server->mdin.user_ptr = NULL; - server->mdin.eventq = server->eq; - server->mdin.eq_handle = server->eq; -- -- if ((rc = PtlMDAttach (server->me, server->mdin, -- PTL_UNLINK, &server->mdin_h))) { -- PDEBUG ("PtlMDAttach (bulk)", rc); -- CDEBUG (D_OTHER, "ping server resources allocated\n"); -- } -- -- if ((rc = PtlPut (server->mdout_h, PTL_NOACK_REQ, -- server->evnt.initiator, PTL_PING_CLIENT, 0, 0, 0, 0))) -- PDEBUG ("PtlPut", rc); -- -- atomic_dec (&pkt); -- -- } -- pingsrv_shutdown (1); -- running = 1; -- return 0; --} -- - static int pingsrv_packet(ptl_event_t *ev) -static void pingsrv_packet(ptl_event_t *ev) --{ -- atomic_inc (&pkt); -- wake_up_process (server->tsk); - return 1; --} /* pingsrv_head() */ -- - static int pingsrv_callback(ptl_event_t *ev) -static void pingsrv_callback(ptl_event_t *ev) --{ -- -- if (ev == NULL) { -- CERROR ("null in callback, ev=%p\n", ev); - return 0; - return; -- } -- server->evnt = *ev; -- - printk ("Lustre: received ping from nid "LPX64" " - CWARN ("received ping from nid "LPX64" " -- "(off=%u rlen=%u mlen=%u head=%x seq=%d size=%d)\n", -- ev->initiator.nid, ev->offset, ev->rlength, ev->mlength, - *((int *)(ev->mem_desc.start + ev->offset)), - *((int *)(ev->mem_desc.start + ev->offset + sizeof(unsigned))), - *((int *)(ev->mem_desc.start + ev->offset + 2 * - *((int *)(ev->md.start + ev->offset)), - *((int *)(ev->md.start + ev->offset + sizeof(unsigned))), - *((int *)(ev->md.start + ev->offset + 2 * -- sizeof(unsigned)))); -- -- packets_valid++; -- - return pingsrv_packet(ev); - pingsrv_packet(ev); -- --} /* pingsrv_callback() */ -- -- --static struct pingsrv_data *pingsrv_setup(void) --{ - ptl_handle_ni_t *nip; -- int rc; - - server->ni = PTL_INVALID_HANDLE; -- -- /* Aquire and initialize the proper nal for portals. */ - if ((nip = kportal_get_ni (nal)) == NULL) { - rc = PtlNIInit(nal, 0, NULL, NULL, &server->ni); - if (!(rc == PTL_OK || rc == PTL_IFACE_DUP)) { -- CDEBUG (D_OTHER, "NAL %d not loaded\n", nal); -- return pingsrv_shutdown (4); -- } -- - server->ni= *nip; -- -- /* Based on the initialization aquire our unique portal ID. */ -- if ((rc = PtlGetId (server->ni, &server->my_id))) { -- PDEBUG ("PtlGetId", rc); -- return pingsrv_shutdown (2); -- } -- -- server->id_local.nid = PTL_NID_ANY; -- server->id_local.pid = PTL_PID_ANY; -- -- /* Attach a match entries for header packets */ -- if ((rc = PtlMEAttach (server->ni, PTL_PING_SERVER, -- server->id_local,0, ~0, -- PTL_RETAIN, PTL_INS_AFTER, &server->me))) { -- PDEBUG ("PtlMEAttach", rc); -- return pingsrv_shutdown (2); -- } -- -- - if ((rc = PtlEQAlloc (server->ni, 1024, pingsrv_callback, - if ((rc = PtlEQAlloc (server->ni, 1024, &pingsrv_callback, -- &server->eq))) { -- PDEBUG ("PtlEQAlloc (callback)", rc); -- return pingsrv_shutdown (2); -- } -- -- PORTAL_ALLOC (server->in_buf, MAXSIZE); -- if(!server->in_buf){ -- CDEBUG (D_OTHER,"Allocation error\n"); -- return pingsrv_shutdown(2); -- } -- -- /* Setup the incoming buffer */ -- server->mdin.start = server->in_buf; -- server->mdin.length = MAXSIZE; -- server->mdin.threshold = 1; - server->mdin.options = PTL_MD_OP_PUT; - server->mdin.options = PTL_MD_EVENT_START_DISABLE | PTL_MD_OP_PUT; -- server->mdin.user_ptr = NULL; - server->mdin.eventq = server->eq; - server->mdin.eq_handle = server->eq; -- memset (server->in_buf, 0, STDSIZE); -- -- if ((rc = PtlMDAttach (server->me, server->mdin, -- PTL_UNLINK, &server->mdin_h))) { -- PDEBUG ("PtlMDAttach (bulk)", rc); -- CDEBUG (D_OTHER, "ping server resources allocated\n"); -- } -- -- /* Success! */ -- return server; --} /* pingsrv_setup() */ -- --static int pingsrv_start(void) --{ -- /* Setup our server */ -- if (!pingsrv_setup()) { -- CDEBUG (D_OTHER, "pingsrv_setup() failed, server stopped\n"); -- return -ENOMEM; -- } -- kernel_thread (pingsrv_thread,NULL,0); -- return 0; --} /* pingsrv_start() */ -- -- -- --static int __init pingsrv_init(void) --{ -- ping_head_magic = PING_HEADER_MAGIC; -- ping_bulk_magic = PING_BULK_MAGIC; -- PORTAL_ALLOC (server, sizeof(struct pingsrv_data)); -- return pingsrv_start (); --} /* pingsrv_init() */ -- -- --static void /*__exit*/ pingsrv_cleanup(void) --{ -- remove_proc_entry ("net/pingsrv", NULL); -- -- running = 0; -- wake_up_process (server->tsk); -- while (running != 1) { -- set_current_state (TASK_UNINTERRUPTIBLE); -- schedule_timeout (HZ); -- } -- --} /* pingsrv_cleanup() */ -- -- --MODULE_PARM(nal, "i"); --MODULE_PARM_DESC(nal, "Use the specified NAL " - "(6-kscimacnal, 2-ksocknal, 1-kqswnal)"); - "(2-ksocknal, 1-kqswnal)"); -- --MODULE_AUTHOR("Brian Behlendorf (LLNL)"); --MODULE_DESCRIPTION("A kernel space ping server for portals testing"); --MODULE_LICENSE("GPL"); -- --module_init(pingsrv_init); --module_exit(pingsrv_cleanup); diff --cc lnet/tests/sping_cli.c index 64a1dd2,730ba00..0000000 deleted file mode 100644,100644 --- a/lnet/tests/sping_cli.c +++ /dev/null @@@ -1,279 -1,279 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * Copyright (C) 2002, Lawrence Livermore National Labs (LLNL) -- * Author: Brian Behlendorf -- * Kedar Sovani (kedar@calsoftinc.com) -- * Amey Inamdar (amey@calsoftinc.com) -- * -- * This file is part of Portals, http://www.sf.net/projects/lustre/ -- * -- * Portals is free software; you can redistribute it and/or -- * modify it under the terms of version 2 of the GNU General Public -- * License as published by the Free Software Foundation. -- * -- * Portals is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- * GNU General Public License for more details. -- * -- * You should have received a copy of the GNU General Public License -- * along with Portals; if not, write to the Free Software -- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -- * -- */ -- --/* This is a striped down version of pinger. It follows a single -- * request-response protocol. Doesn't do Bulk data pinging. Also doesn't -- * send multiple packets in a single ioctl. -- */ -- -- --#define DEBUG_SUBSYSTEM S_PINGER -- --#include --#include --#include --#include --#include --#include --#include "ping.h" --/* int portal_debug = D_PING_CLI; */ -- -- --#define STDSIZE (sizeof(int) + sizeof(int) + 4) /* The data is 4 bytes -- assumed */ -- --/* This should be enclosed in a structure */ -- --static struct pingcli_data *client = NULL; -- --static int count = 0; -- --static void - pingcli_shutdown(int err) -pingcli_shutdown(ptl_handle_ni_t nih, int err) --{ -- int rc; -- -- /* Yes, we are intentionally allowing us to fall through each -- * case in to the next. This allows us to pass an error -- * code to just clean up the right stuff. -- */ -- switch (err) { -- case 1: -- /* Unlink any memory descriptors we may have used */ -- if ((rc = PtlMDUnlink (client->md_out_head_h))) -- PDEBUG ("PtlMDUnlink", rc); -- case 2: -- /* Free the event queue */ -- if ((rc = PtlEQFree (client->eq))) -- PDEBUG ("PtlEQFree", rc); -- -- if ((rc = PtlMEUnlink (client->me))) -- PDEBUG ("PtlMEUnlink", rc); -- case 3: - kportal_put_ni (client->args->ioc_nal); - PtlNIFini (nih); -- -- case 4: -- /* Free our buffers */ -- if (client->outbuf != NULL) -- PORTAL_FREE (client->outbuf, STDSIZE); -- -- if (client->inbuf != NULL) -- PORTAL_FREE (client->inbuf, STDSIZE); -- -- -- if (client != NULL) -- PORTAL_FREE (client, -- sizeof(struct pingcli_data)); -- } -- -- -- CDEBUG (D_OTHER, "ping client released resources\n"); --} /* pingcli_shutdown() */ -- - static int pingcli_callback(ptl_event_t *ev) -static void pingcli_callback(ptl_event_t *ev) --{ - wake_up_process (client->tsk); - return 1; - wake_up_process (client->tsk); --} -- -- --static struct pingcli_data * --pingcli_start(struct portal_ioctl_data *args) --{ - const ptl_handle_ni_t *nip; - ptl_handle_ni_t nih = PTL_INVALID_HANDLE; -- unsigned ping_head_magic = PING_HEADER_MAGIC; -- char str[PTL_NALFMT_SIZE]; -- int rc; -- -- client->tsk = current; -- client->args = args; -- -- CDEBUG (D_OTHER, "pingcli_setup args: nid "LPX64" (%s), \ -- nal %d, size %u, count: %u, timeout: %u\n", -- args->ioc_nid, -- portals_nid2str(args->ioc_nid, args->ioc_nal, str), -- args->ioc_nal, args->ioc_size, -- args->ioc_count, args->ioc_timeout); -- -- -- PORTAL_ALLOC (client->outbuf, STDSIZE) ; -- if (client->outbuf == NULL) -- { -- CERROR ("Unable to allocate out_buf ("LPSZ" bytes)\n", STDSIZE); - pingcli_shutdown (4); - pingcli_shutdown (nih, 4); -- return (NULL); -- } -- -- PORTAL_ALLOC (client->inbuf, STDSIZE); -- -- if (client->inbuf == NULL) -- { -- CERROR ("Unable to allocate out_buf ("LPSZ" bytes)\n", STDSIZE); - pingcli_shutdown (4); - pingcli_shutdown (nih, 4); -- return (NULL); -- } -- -- /* Aquire and initialize the proper nal for portals. */ - if ((nip = kportal_get_ni (args->ioc_nal)) == NULL) - rc = PtlNIInit(args->ioc_nal, 0, NULL, NULL, &nih); - if (rc != PTL_OK && rc != PTL_IFACE_DUP) -- { -- CERROR ("NAL %d not loaded.\n", args->ioc_nal); - pingcli_shutdown (4); - pingcli_shutdown (nih, 4); -- return (NULL); -- } -- -- /* Based on the initialization aquire our unique portal ID. */ - if ((rc = PtlGetId (*nip, &client->myid))) - if ((rc = PtlGetId (nih, &client->myid))) -- { -- CERROR ("PtlGetId error %d\n", rc); - pingcli_shutdown (2); - pingcli_shutdown (nih, 2); -- return (NULL); -- } -- -- /* Setup the local match entries */ -- client->id_local.nid = PTL_NID_ANY; -- client->id_local.pid = PTL_PID_ANY; -- -- /* Setup the remote match entries */ -- client->id_remote.nid = args->ioc_nid; -- client->id_remote.pid = 0; -- - if ((rc = PtlMEAttach (*nip, PTL_PING_CLIENT, - if ((rc = PtlMEAttach (nih, PTL_PING_CLIENT, -- client->id_local, 0, ~0, PTL_RETAIN, -- PTL_INS_AFTER, &client->me))) -- { -- CERROR ("PtlMEAttach error %d\n", rc); - pingcli_shutdown (2); - pingcli_shutdown (nih, 2); -- return (NULL); -- } -- -- /* Allocate the event queue for this network interface */ - if ((rc = PtlEQAlloc (*nip, 64, pingcli_callback, &client->eq))) - if ((rc = PtlEQAlloc (nih, 64, pingcli_callback, &client->eq))) -- { -- CERROR ("PtlEQAlloc error %d\n", rc); - pingcli_shutdown (2); - pingcli_shutdown (nih, 2); -- return (NULL); -- } -- -- -- client->md_in_head.start = client->inbuf; -- client->md_in_head.length = STDSIZE; -- client->md_in_head.threshold = 1; - client->md_in_head.options = PTL_MD_OP_PUT; - client->md_in_head.options = PTL_MD_EVENT_START_DISABLE | PTL_MD_OP_PUT; -- client->md_in_head.user_ptr = NULL; - client->md_in_head.eventq = client->eq; - client->md_in_head.eq_handle = client->eq; -- memset (client->inbuf, 0, STDSIZE); -- -- /* Attach the incoming buffer */ -- if ((rc = PtlMDAttach (client->me, client->md_in_head, -- PTL_UNLINK, &client->md_in_head_h))) { -- CERROR ("PtlMDAttach error %d\n", rc); - pingcli_shutdown (1); - pingcli_shutdown (nih, 1); -- return (NULL); -- } -- -- /* Setup the outgoing ping header */ -- client->md_out_head.start = client->outbuf; -- client->md_out_head.length = STDSIZE; -- client->md_out_head.threshold = 1; - client->md_out_head.options = PTL_MD_OP_PUT; - client->md_out_head.options = PTL_MD_EVENT_START_DISABLE | PTL_MD_OP_PUT; -- client->md_out_head.user_ptr = NULL; - client->md_out_head.eventq = PTL_EQ_NONE; - client->md_out_head.eq_handle = PTL_EQ_NONE; -- -- memcpy (client->outbuf, &ping_head_magic, sizeof(ping_head_magic)); -- -- /* Bind the outgoing ping header */ - if ((rc=PtlMDBind (*nip, client->md_out_head, - &client->md_out_head_h))) { - if ((rc=PtlMDBind (nih, client->md_out_head, - PTL_UNLINK, &client->md_out_head_h))) { -- CERROR ("PtlMDBind error %d\n", rc); - pingcli_shutdown (1); - pingcli_shutdown (nih, 1); -- return (NULL); -- } -- /* Put the ping packet */ -- if((rc = PtlPut (client->md_out_head_h, PTL_NOACK_REQ, -- client->id_remote, PTL_PING_SERVER, 0, 0, 0, 0))) { -- PDEBUG ("PtlPut (header)", rc); - pingcli_shutdown (1); - pingcli_shutdown (nih, 1); -- return NULL; -- } -- -- count = 0; -- set_current_state (TASK_INTERRUPTIBLE); -- rc = schedule_timeout (20 * args->ioc_timeout); -- if (rc == 0) { - printk ("LustreError: Time out on the server\n"); - pingcli_shutdown (2); - CERROR ("Time out on the server\n"); - pingcli_shutdown (nih, 2); -- return NULL; - } else - printk("Lustre: Received respose from the server \n"); - - } else { - CWARN("Received respose from the server \n"); - } -- - pingcli_shutdown (2); - pingcli_shutdown (nih, 2); -- -- /* Success! */ -- return NULL; --} /* pingcli_setup() */ -- -- -- --/* called by the portals_ioctl for ping requests */ --int kping_client(struct portal_ioctl_data *args) --{ -- -- PORTAL_ALLOC (client, sizeof(struct pingcli_data)); -- memset (client, 0, sizeof(struct pingcli_data)); -- if (client == NULL) -- { -- CERROR ("Unable to allocate client structure\n"); -- return (0); -- } -- pingcli_start (args); -- -- return 0; --} /* kping_client() */ -- -- --static int __init pingcli_init(void) --{ -- PORTAL_SYMBOL_REGISTER(kping_client); -- return 0; --} /* pingcli_init() */ -- -- --static void /*__exit*/ pingcli_cleanup(void) --{ -- PORTAL_SYMBOL_UNREGISTER (kping_client); --} /* pingcli_cleanup() */ -- -- --MODULE_AUTHOR("Brian Behlendorf (LLNL)"); --MODULE_DESCRIPTION("A simple kernel space ping client for portals testing"); --MODULE_LICENSE("GPL"); -- --module_init(pingcli_init); --module_exit(pingcli_cleanup); -- --#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) --EXPORT_SYMBOL (kping_client); --#endif diff --cc lnet/tests/sping_srv.c index b8bda29,f2382d1..0000000 deleted file mode 100644,100644 --- a/lnet/tests/sping_srv.c +++ /dev/null @@@ -1,295 -1,294 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * Copyright (C) 2002, Lawrence Livermore National Labs (LLNL) -- * Author: Brian Behlendorf -- * Amey Inamdar -- * Kedar Sovani -- * -- * -- * This file is part of Portals, http://www.sf.net/projects/lustre/ -- * -- * Portals is free software; you can redistribute it and/or -- * modify it under the terms of version 2 of the GNU General Public -- * License as published by the Free Software Foundation. -- * -- * Portals is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- * GNU General Public License for more details. -- * -- * You should have received a copy of the GNU General Public License -- * along with Portals; if not, write to the Free Software -- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -- */ -- --/* This is a striped down version of pinger. It follows a single -- * request-response protocol. Doesn't do Bulk data pinging. Also doesn't -- * send multiple packets in a single ioctl. -- */ -- --#define DEBUG_SUBSYSTEM S_PINGER -- --#include --#include --#include "ping.h" -- --#include --#include --#include --#include --#include --#include --#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) --#include --#else --#include --#endif --#include --#include -- --#include --#include -- --#define STDSIZE (sizeof(int) + sizeof(int) + 4) -- - static int nal = 0; // Your NAL, -static int nal = PTL_IFACE_DEFAULT; // Your NAL, --static unsigned long packets_valid = 0; // Valid packets --static int running = 1; --atomic_t pkt; -- --static struct pingsrv_data *server=NULL; // Our ping server -- --#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) --#endif -- --static void *pingsrv_shutdown(int err) --{ -- int rc; -- -- /* Yes, we are intentionally allowing us to fall through each -- * case in to the next. This allows us to pass an error -- * code to just clean up the right stuff. -- */ -- switch (err) { -- case 1: -- /* Unlink any memory descriptors we may have used */ -- if ((rc = PtlMDUnlink (server->mdin_h))) -- PDEBUG ("PtlMDUnlink (out head buffer)", rc); -- case 2: -- /* Free the event queue */ -- if ((rc = PtlEQFree (server->eq))) -- PDEBUG ("PtlEQFree", rc); -- -- /* Unlink the client portal from the ME list */ -- if ((rc = PtlMEUnlink (server->me))) -- PDEBUG ("PtlMEUnlink", rc); -- -- case 3: - kportal_put_ni (nal); - PtlNIFini(server->ni); -- -- case 4: -- -- if (server->in_buf != NULL) -- PORTAL_FREE (server->in_buf, STDSIZE); -- -- if (server != NULL) -- PORTAL_FREE (server, -- sizeof (struct pingsrv_data)); -- -- } -- -- CDEBUG (D_OTHER, "ping sever resources released\n"); -- return NULL; --} /* pingsrv_shutdown() */ -- -- --int pingsrv_thread(void *arg) --{ -- int rc; -- -- kportal_daemonize ("pingsrv"); -- server->tsk = current; -- -- while (running) { -- set_current_state (TASK_INTERRUPTIBLE); -- if (atomic_read (&pkt) == 0) { -- schedule_timeout (MAX_SCHEDULE_TIMEOUT); -- continue; -- } -- -- server->mdout.start = server->in_buf; -- server->mdout.length = STDSIZE; -- server->mdout.threshold = 1; - server->mdout.options = PTL_MD_OP_PUT; - server->mdout.options = PTL_MD_EVENT_START_DISABLE | PTL_MD_OP_PUT; -- server->mdout.user_ptr = NULL; - server->mdout.eventq = PTL_EQ_NONE; - server->mdout.eq_handle = PTL_EQ_NONE; -- -- /* Bind the outgoing buffer */ -- if ((rc = PtlMDBind (server->ni, server->mdout, - &server->mdout_h))) { - PTL_UNLINK, &server->mdout_h))) { -- PDEBUG ("PtlMDBind", rc); -- pingsrv_shutdown (1); -- return 1; -- } -- -- -- server->mdin.start = server->in_buf; -- server->mdin.length = STDSIZE; -- server->mdin.threshold = 1; - server->mdin.options = PTL_MD_OP_PUT; - server->mdin.options = PTL_MD_EVENT_START_DISABLE | PTL_MD_OP_PUT; -- server->mdin.user_ptr = NULL; - server->mdin.eventq = server->eq; - server->mdin.eq_handle = server->eq; -- -- if ((rc = PtlMDAttach (server->me, server->mdin, -- PTL_UNLINK, &server->mdin_h))) { -- PDEBUG ("PtlMDAttach (bulk)", rc); -- CDEBUG (D_OTHER, "ping server resources allocated\n"); -- } -- -- if ((rc = PtlPut (server->mdout_h, PTL_NOACK_REQ, -- server->evnt.initiator, PTL_PING_CLIENT, 0, 0, 0, 0))) -- PDEBUG ("PtlPut", rc); -- -- atomic_dec (&pkt); -- -- } -- pingsrv_shutdown (1); -- running = 1; -- return 0; --} -- - static int pingsrv_packet(ptl_event_t *ev) -static void pingsrv_packet(ptl_event_t *ev) --{ -- atomic_inc (&pkt); -- wake_up_process (server->tsk); - return 1; --} /* pingsrv_head() */ -- - static int pingsrv_callback(ptl_event_t *ev) -static void pingsrv_callback(ptl_event_t *ev) --{ -- -- if (ev == NULL) { -- CERROR ("null in callback, ev=%p\n", ev); - return 0; - return; -- } -- server->evnt = *ev; -- - printk ("Lustre: received ping from nid "LPX64" " - "(off=%u rlen=%u mlen=%u head=%x)\n", - ev->initiator.nid, ev->offset, ev->rlength, ev->mlength, - *((int *)(ev->mem_desc.start + ev->offset))); - CWARN("Lustre: received ping from nid "LPX64" " - "(off=%u rlen=%u mlen=%u head=%x)\n", - ev->initiator.nid, ev->offset, ev->rlength, ev->mlength, - *((int *)(ev->md.start + ev->offset))); -- -- packets_valid++; -- - return pingsrv_packet(ev); - pingsrv_packet(ev); -- --} /* pingsrv_callback() */ -- -- --static struct pingsrv_data *pingsrv_setup(void) --{ - ptl_handle_ni_t *nip; -- int rc; -- -- /* Aquire and initialize the proper nal for portals. */ - if ((nip = kportal_get_ni (nal)) == NULL) { - server->ni = PTL_INVALID_HANDLE; - - rc = PtlNIInit(nal, 0, NULL, NULL, &server->ni); - if (rc != PTL_OK && rc != PTL_IFACE_DUP) { -- CDEBUG (D_OTHER, "Nal %d not loaded.\n", nal); -- return pingsrv_shutdown (4); -- } - - server->ni= *nip; -- -- /* Based on the initialization aquire our unique portal ID. */ -- if ((rc = PtlGetId (server->ni, &server->my_id))) { -- PDEBUG ("PtlGetId", rc); -- return pingsrv_shutdown (2); -- } -- -- server->id_local.nid = PTL_NID_ANY; -- server->id_local.pid = PTL_PID_ANY; -- -- /* Attach a match entries for header packets */ -- if ((rc = PtlMEAttach (server->ni, PTL_PING_SERVER, -- server->id_local,0, ~0, -- PTL_RETAIN, PTL_INS_AFTER, &server->me))) { -- PDEBUG ("PtlMEAttach", rc); -- return pingsrv_shutdown (2); -- } -- -- -- if ((rc = PtlEQAlloc (server->ni, 64, pingsrv_callback, -- &server->eq))) { -- PDEBUG ("PtlEQAlloc (callback)", rc); -- return pingsrv_shutdown (2); -- } -- -- PORTAL_ALLOC (server->in_buf, STDSIZE); -- if(!server->in_buf){ -- CDEBUG (D_OTHER,"Allocation error\n"); -- return pingsrv_shutdown(2); -- } -- -- /* Setup the incoming buffer */ -- server->mdin.start = server->in_buf; -- server->mdin.length = STDSIZE; -- server->mdin.threshold = 1; - server->mdin.options = PTL_MD_OP_PUT; - server->mdin.options = PTL_MD_EVENT_START_DISABLE | PTL_MD_OP_PUT; -- server->mdin.user_ptr = NULL; - server->mdin.eventq = server->eq; - server->mdin.eq_handle = server->eq; -- memset (server->in_buf, 0, STDSIZE); -- -- if ((rc = PtlMDAttach (server->me, server->mdin, -- PTL_UNLINK, &server->mdin_h))) { -- PDEBUG ("PtlMDAttach (bulk)", rc); -- CDEBUG (D_OTHER, "ping server resources allocated\n"); -- } -- -- /* Success! */ -- return server; --} /* pingsrv_setup() */ -- --static int pingsrv_start(void) --{ -- /* Setup our server */ -- if (!pingsrv_setup()) { -- CDEBUG (D_OTHER, "pingsrv_setup() failed, server stopped\n"); -- return -ENOMEM; -- } -- kernel_thread (pingsrv_thread,NULL,0); -- return 0; --} /* pingsrv_start() */ -- -- -- --static int __init pingsrv_init(void) --{ -- PORTAL_ALLOC (server, sizeof(struct pingsrv_data)); -- return pingsrv_start (); --} /* pingsrv_init() */ -- -- --static void /*__exit*/ pingsrv_cleanup(void) --{ -- remove_proc_entry ("net/pingsrv", NULL); -- -- running = 0; -- wake_up_process (server->tsk); -- while (running != 1) { -- set_current_state (TASK_UNINTERRUPTIBLE); -- schedule_timeout (HZ); -- } -- --} /* pingsrv_cleanup() */ -- -- --MODULE_PARM(nal, "i"); --MODULE_PARM_DESC(nal, "Use the specified NAL " - "(6-kscimacnal, 2-ksocknal, 1-kqswnal)"); - "(2-ksocknal, 1-kqswnal)"); -- --MODULE_AUTHOR("Brian Behlendorf (LLNL)"); --MODULE_DESCRIPTION("A kernel space ping server for portals testing"); --MODULE_LICENSE("GPL"); -- --module_init(pingsrv_init); --module_exit(pingsrv_cleanup); diff --cc lnet/tests/startclient.sh index be60509,be60509..0000000 deleted file mode 100644,100644 --- a/lnet/tests/startclient.sh +++ /dev/null @@@ -1,37 -1,37 +1,0 @@@ --#!/bin/sh -- --SIMPLE=${SIMPLE:-0} -- --if [ $SIMPLE -eq 0 ]; then -- PING=pingcli.o --else -- PING=spingcli.o --fi -- --case "$1" in -- tcp) -- /sbin/insmod ../oslib/portals.o -- /sbin/insmod ../socknal/ksocknal.o -- /sbin/insmod ./$PING -- echo ksocknal > /tmp/nal -- ;; -- -- elan) -- /sbin/insmod ../oslib/portals.o -- /sbin/insmod ../qswnal/kqswnal.o -- /sbin/insmod ./$PING -- echo kqswnal > /tmp/nal -- ;; -- -- gm) -- /sbin/insmod portals -- /sbin/insmod kgmnal -- /sbin/insmod ./$PING -- echo kgmnal > /tmp/nal -- ;; -- -- *) -- echo "Usage : ${0} < tcp | elan | gm>" -- exit 1; --esac --exit 0; diff --cc lnet/tests/startserver.sh index 9b5ccf6,9b5ccf6..0000000 deleted file mode 100644,100644 --- a/lnet/tests/startserver.sh +++ /dev/null @@@ -1,38 -1,38 +1,0 @@@ --#!/bin/sh -- --SIMPLE=${SIMPLE:-0} -- --if [ $SIMPLE -eq 0 ]; then -- PING=pingsrv.o --else -- PING=spingsrv.o --fi -- --case "$1" in -- tcp) -- /sbin/insmod ../oslib/portals.o -- /sbin/insmod ../socknal/ksocknal.o -- /sbin/insmod ./$PING nal=2 -- echo ksocknal > /tmp/nal -- ;; -- -- elan) -- /sbin/insmod ../oslib/portals.o -- /sbin/insmod ../qswnal/kqswnal.o -- /sbin/insmod ./$PING nal=4 -- echo kqswnal > /tmp/nal -- ;; -- -- gm) -- /sbin/insmod portals -- /sbin/insmod kgmnal -- /sbin/insmod ./$PING nal=3 -- echo kgmnal > /tmp/nal -- ;; -- -- *) -- echo "Usage : ${0} < tcp | elan | gm>" -- exit 1; --esac --../utils/acceptor 9999& --exit 0; diff --cc lnet/tests/stopclient.sh index f7e3aa1,f7e3aa1..0000000 deleted file mode 100644,100644 --- a/lnet/tests/stopclient.sh +++ /dev/null @@@ -1,14 -1,14 +1,0 @@@ --#!/bin/sh -- --SIMPLE=${SIMPLE:-1} -- --if [ $SIMPLE -eq 0 ]; then -- PING=spingcli --else -- PING=pingcli --fi -- --rmmod $PING --NAL=`cat /tmp/nal`; --rmmod $NAL --rmmod portals diff --cc lnet/tests/stopserver.sh index 3e81831,3e81831..0000000 deleted file mode 100644,100644 --- a/lnet/tests/stopserver.sh +++ /dev/null @@@ -1,16 -1,16 +1,0 @@@ --#!/bin/sh -- --SIMPLE=${SIMPLE:-1} -- --if [ $SIMPLE -eq 0 ]; then -- PING=spingsrv --else -- PING=pingsrv --fi -- --rmmod $PING --NAL=`cat /tmp/nal`; --rmmod $NAL --killall -9 acceptor --rm -f /var/run/acceptor-9999.pid --rmmod portals diff --cc lnet/ulnds/Makefile.am index 15080b0,3437d39..0000000 deleted file mode 100644,100644 --- a/lnet/ulnds/Makefile.am +++ /dev/null @@@ -1,13 -1,10 +1,0 @@@ --if LIBLUSTRE -if !CRAY_PORTALS --noinst_LIBRARIES = libtcpnal.a --endif - - noinst_HEADERS = pqtimer.h dispatch.h table.h timer.h connection.h \ - ipmap.h bridge.h procbridge.h - - libtcpnal_a_SOURCES = debug.c pqtimer.c select.c table.c pqtimer.h \ - dispatch.h table.h timer.h address.c procapi.c proclib.c \ - connection.c tcpnal.c connection.h -endif -- -noinst_HEADERS = pqtimer.h dispatch.h table.h timer.h connection.h ipmap.h bridge.h procbridge.h -libtcpnal_a_SOURCES = debug.c pqtimer.c select.c table.c pqtimer.h dispatch.h table.h timer.h address.c procapi.c proclib.c connection.c tcpnal.c connection.h --libtcpnal_a_CPPFLAGS = $(LLCPPFLAGS) --libtcpnal_a_CFLAGS = $(LLCFLAGS) diff --cc lnet/ulnds/README index 6cb93d9,6cb93d9..0000000 deleted file mode 100644,100644 --- a/lnet/ulnds/README +++ /dev/null @@@ -1,53 -1,53 +1,0 @@@ --This library implements two NAL interfaces, both running over IP. --The first, tcpnal, creates TCP connections between participating --processes in order to transport the portals requests. The second, --ernal, provides a simple transport protocol which runs over --UDP datagrams. -- --The interface functions return both of these values in host order for --convenience and readability. However this means that addresses --exchanged in messages between hosts of different orderings will not --function properly. -- --Both NALs use the same support functions in order to schedule events --and communicate with the generic portals implementation. -- -- ------------------------- -- | api | -- |_______________________| -- | lib | -- |_______________________| -- | ernal | |tcpnal | -- |--------| |----------| -- | udpsock| |connection| -- |-----------------------| -- | timer/select | -- ------------------------- -- -- -- These NALs uses the framework from fdnal of a pipe between the api --and library sides. This is wrapped up in the select on the library --side, and blocks on the api side. Performance could be severely --enhanced by collapsing this aritificial barrier, by using shared --memory queues, or by wiring the api layer directly to the library. -- -- --nid is defined as the low order 24-bits of the IP address of the --physical node left shifted by 8 plus a virtual node number of 0 --through 255 (really only 239). The virtual node number of a tcpnal --application should be specified using the environment variable --PTL_VIRTNODE. pid is now a completely arbitrary number in the --range of 0 to 255. The IP interface used can be overridden by --specifying the appropriate hostid by setting the PTL_HOSTID --environment variable. The value can be either dotted decimal --(n.n.n.n) or hex starting with "0x". --TCPNAL: -- As the NAL needs to try to send to a particular nid/pid pair, it -- will open up connections on demand. Because the port associated with -- the connecting socket is different from the bound port, two -- connections will normally be established between a pair of peers, with -- data flowing from the anonymous connect (active) port to the advertised -- or well-known bound (passive) port of each peer. -- -- Should the connection fail to open, an error is reported to the -- library component, which causes the api request to fail. diff --cc lnet/ulnds/address.c index 6507924,f329e2a..0000000 deleted file mode 100644,100644 --- a/lnet/ulnds/address.c +++ /dev/null @@@ -1,146 -1,145 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * Copyright (c) 2002 Cray Inc. -- * -- * This file is part of Lustre, http://www.lustre.org. -- * -- * Lustre is free software; you can redistribute it and/or -- * modify it under the terms of version 2 of the GNU General Public -- * License as published by the Free Software Foundation. -- * -- * Lustre is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- * GNU General Public License for more details. -- * -- * You should have received a copy of the GNU General Public License -- * along with Lustre; if not, write to the Free Software -- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -- */ -- --/* address.c: -- * this file provides functions to aquire the IP address of the node -- * and translate them into a NID/PID pair which supports a static -- * mapping of virtual nodes into the port range of an IP socket. --*/ -- --#include --#include --#include --#include --#include --#include --#include -- -- --/* Function: get_node_id -- * Returns: a 32 bit id for this node, actually a big-endian IP address -- * -- * get_node_id() determines the host name and uses the resolver to -- * find out its ip address. This is fairly fragile and inflexible, but -- * explicitly asking about interfaces and their addresses is very -- * complicated and nonportable. -- */ --static unsigned int get_node_id(void) --{ -- char buffer[255]; -- unsigned int x; -- struct hostent *he; -- char * host_envp; -- -- if (!(host_envp = getenv("PTL_HOSTID"))) -- { -- gethostname(buffer,sizeof(buffer)); -- he=gethostbyname(buffer); -- if (he) -- x=*(unsigned int *)he->h_addr_list[0]; -- else -- x = 0; -- return(ntohl(x)); -- } -- else -- { -- if (host_envp[1] != 'x') -- { -- int a, b, c, d; -- sscanf(host_envp, "%d.%d.%d.%d", &a, &b, &c, &d); -- return ((a<<24) | (b<<16) | (c<<8) | d); -- } -- else -- { -- long long hostid = strtoll(host_envp, 0, 0); -- return((unsigned int) hostid); -- } -- } --} -- -- --/* Function: set_address -- * Arugments: t: a procnal structure to populate with the request -- * -- * set_address performs the bit manipulations to set the nid, pid, and -- * iptop8 fields of the procnal structures. -- * -- * TODO: fix pidrequest to try to do dynamic binding if PTL_ID_ANY -- */ -- --#ifdef DIRECT_IP_MODE --void set_address(bridge t,ptl_pid_t pidrequest) --{ -- int port; -- if (pidrequest==(unsigned short)PTL_PID_ANY) port = 0; -- else port=pidrequest; - t->nal_cb->ni.nid=get_node_id(); - t->nal_cb->ni.pid=port; - t->lib_nal->libnal_ni.ni_pid.nid=get_node_id(); - t->lib_nal->libnal_ni.ni_pid.pid=port; --} --#else -- --void set_address(bridge t,ptl_pid_t pidrequest) --{ -- int virtnode, in_addr, port; -- ptl_pid_t pid; -- -- /* get and remember my node id*/ -- if (!getenv("PTL_VIRTNODE")) -- virtnode = 0; -- else -- { -- int maxvnode = PNAL_VNODE_MASK - (PNAL_BASE_PORT -- >> PNAL_VNODE_SHIFT); -- virtnode = atoi(getenv("PTL_VIRTNODE")); -- if (virtnode > maxvnode) -- { -- fprintf(stderr, "PTL_VIRTNODE of %d is too large - max %d\n", -- virtnode, maxvnode); -- return; -- } -- } -- -- in_addr = get_node_id(); -- -- t->iptop8 = in_addr >> PNAL_HOSTID_SHIFT;/* for making new connections */ - t->nal_cb->ni.nid = ((in_addr & PNAL_HOSTID_MASK) - << PNAL_VNODE_SHIFT) - + virtnode; - - t->lib_nal->libnal_ni.ni_pid.nid = ((in_addr & PNAL_HOSTID_MASK) - << PNAL_VNODE_SHIFT) - + virtnode; -- pid=pidrequest; -- /* TODO: Support of pid PTL_ID_ANY with virtual nodes needs more work. */ --#ifdef notyet -- if (pid==(unsigned short)PTL_PID_ANY) port = 0; --#endif -- if (pid==(unsigned short)PTL_PID_ANY) -- { -- fprintf(stderr, "portal pid PTL_ID_ANY is not currently supported\n"); -- return; -- } -- else if (pid > PNAL_PID_MASK) -- { -- fprintf(stderr, "portal pid of %d is too large - max %d\n", -- pid, PNAL_PID_MASK); -- return; -- } -- else port = ((virtnode << PNAL_VNODE_SHIFT) + pid) + PNAL_BASE_PORT; - t->nal_cb->ni.pid=pid; - t->lib_nal->libnal_ni.ni_pid.pid=pid; --} --#endif diff --cc lnet/ulnds/bridge.h index 9a90ab8,d2f0f2c..0000000 deleted file mode 100644,100644 --- a/lnet/ulnds/bridge.h +++ /dev/null @@@ -1,34 -1,34 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * Copyright (c) 2002 Cray Inc. -- * -- * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ -- */ -- --#ifndef TCPNAL_PROCBRIDGE_H --#define TCPNAL_PROCBRIDGE_H -- --#include -#include - -#define PTL_IFACE_TCP 1 -#define PTL_IFACE_ER 2 -#define PTL_IFACE_SS 3 -#define PTL_IFACE_MAX 4 -- --typedef struct bridge { -- int alive; - nal_cb_t *nal_cb; - lib_nal_t *lib_nal; -- void *lower; -- void *local; -- void (*shutdown)(struct bridge *); -- /* this doesn't really belong here */ -- unsigned char iptop8; --} *bridge; - -- - nal_t *bridge_init(ptl_interface_t nal, - ptl_pid_t pid_request, - ptl_ni_limits_t *desired, - ptl_ni_limits_t *actual, - int *rc); -- --typedef int (*nal_initialize)(bridge); --extern nal_initialize nal_table[PTL_IFACE_MAX]; -- --#endif diff --cc lnet/ulnds/connection.c index 7b4cecd,ed8dc08..0000000 deleted file mode 100644,100644 --- a/lnet/ulnds/connection.c +++ /dev/null @@@ -1,488 -1,468 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * Copyright (c) 2002 Cray Inc. -- * -- * This file is part of Lustre, http://www.lustre.org. -- * -- * Lustre is free software; you can redistribute it and/or -- * modify it under the terms of version 2 of the GNU General Public -- * License as published by the Free Software Foundation. -- * -- * Lustre is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- * GNU General Public License for more details. -- * -- * You should have received a copy of the GNU General Public License -- * along with Lustre; if not, write to the Free Software -- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -- */ -- --/* connection.c: -- This file provides a simple stateful connection manager which -- builds tcp connections on demand and leaves them open for -- future use. It also provides the machinery to allow peers -- to connect to it --*/ -- --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#ifndef __CYGWIN__ --#include --#endif -- --/* global variable: acceptor port */ --unsigned short tcpnal_acceptor_port = 988; -- -- --/* Function: compare_connection -- * Arguments: connection c: a connection in the hash table -- * ptl_process_id_t: an id to verify agains -- * Returns: 1 if the connection is the one requested, 0 otherwise -- * -- * compare_connection() tests for collisions in the hash table -- */ --static int compare_connection(void *arg1, void *arg2) --{ -- connection c = arg1; -- unsigned int * id = arg2; --#if 0 -- return((c->ip==id[0]) && (c->port==id[1])); --#else -- /* CFS specific hacking */ -- return (c->ip == id[0]); --#endif --} -- -- --/* Function: connection_key -- * Arguments: ptl_process_id_t id: an id to hash -- * Returns: a not-particularily-well-distributed hash -- * of the id -- */ --static unsigned int connection_key(unsigned int *id) --{ --#if 0 -- return(id[0]^id[1]); --#else -- /* CFS specific hacking */ -- return (unsigned int) id[0]; --#endif --} -- -- --/* Function: remove_connection -- * Arguments: c: the connection to remove -- */ --void remove_connection(void *arg) --{ -- connection c = arg; -- unsigned int id[2]; -- -- id[0]=c->ip; -- id[1]=c->port; -- hash_table_remove(c->m->connections,id); -- close(c->fd); -- free(c); --} -- -- --/* Function: read_connection: -- * Arguments: c: the connection to read from -- * dest: the buffer to read into -- * len: the number of bytes to read -- * Returns: success as 1, or failure as 0 -- * -- * read_connection() reads data from the connection, continuing -- * to read partial results until the request is satisfied or -- * it errors. TODO: this read should be covered by signal protection. -- */ --int read_connection(connection c, -- unsigned char *dest, -- int len) --{ -- int offset = 0,rc; -- -- if (len) { -- do { --#ifndef __CYGWIN__ -- rc = syscall(SYS_read, c->fd, dest+offset, len-offset); --#else -- rc = recv(c->fd, dest+offset, len-offset, 0); --#endif -- if (rc <= 0) { -- if (errno == EINTR) { -- rc = 0; -- } else { -- remove_connection(c); -- return (0); -- } -- } -- offset += rc; -- } while (offset < len); -- } -- return (1); --} -- --static int connection_input(void *d) --{ -- connection c = d; -- return((*c->m->handler)(c->m->handler_arg,c)); --} -- -- --/* Function: allocate_connection -- * Arguments: t: tcpnal the allocation is occuring in the context of -- * dest: portal endpoint address for this connection -- * fd: open file descriptor for the socket -- * Returns: an allocated connection structure -- * -- * just encompasses the action common to active and passive -- * connections of allocation and placement in the global table -- */ --static connection allocate_connection(manager m, -- unsigned int ip, -- unsigned short port, -- int fd) --{ -- connection c=malloc(sizeof(struct connection)); -- unsigned int id[2]; -- c->m=m; -- c->fd=fd; -- c->ip=ip; -- c->port=port; -- id[0]=ip; -- id[1]=port; -- register_io_handler(fd,READ_HANDLER,connection_input,c); -- hash_table_insert(m->connections,c,id); -- return(c); --} -- -- --/* Function: new_connection -- * Arguments: t: opaque argument holding the tcpname -- * Returns: 1 in order to reregister for new connection requests -- * -- * called when the bound service socket recieves -- * a new connection request, it always accepts and -- * installs a new connection -- */ --static int new_connection(void *z) --{ -- manager m=z; -- struct sockaddr_in s; -- int len=sizeof(struct sockaddr_in); -- int fd=accept(m->bound,(struct sockaddr *)&s,&len); -- unsigned int nid=*((unsigned int *)&s.sin_addr); -- /* cfs specific hack */ -- //unsigned short pid=s.sin_port; -- pthread_mutex_lock(&m->conn_lock); -- allocate_connection(m,htonl(nid),0/*pid*/,fd); -- pthread_mutex_unlock(&m->conn_lock); -- return(1); --} - - /* FIXME assuming little endian, cleanup!! */ - #define __cpu_to_le64(x) ((__u64)(x)) - #define __le64_to_cpu(x) ((__u64)(x)) - #define __cpu_to_le32(x) ((__u32)(x)) - #define __le32_to_cpu(x) ((__u32)(x)) - #define __cpu_to_le16(x) ((__u16)(x)) - #define __le16_to_cpu(x) ((__u16)(x)) -- --extern ptl_nid_t tcpnal_mynid; -- --int --tcpnal_hello (int sockfd, ptl_nid_t *nid, int type, __u64 incarnation) --{ -- int rc; - int nob; -- ptl_hdr_t hdr; -- ptl_magicversion_t *hmv = (ptl_magicversion_t *)&hdr.dest_nid; -- -- LASSERT (sizeof (*hmv) == sizeof (hdr.dest_nid)); -- -- memset (&hdr, 0, sizeof (hdr)); - hmv->magic = __cpu_to_le32 (PORTALS_PROTO_MAGIC); - hmv->version_major = __cpu_to_le32 (PORTALS_PROTO_VERSION_MAJOR); - hmv->version_minor = __cpu_to_le32 (PORTALS_PROTO_VERSION_MINOR); - hmv->magic = cpu_to_le32(PORTALS_PROTO_MAGIC); - hmv->version_major = cpu_to_le32(PORTALS_PROTO_VERSION_MAJOR); - hmv->version_minor = cpu_to_le32(PORTALS_PROTO_VERSION_MINOR); -- - hdr.src_nid = __cpu_to_le64 (tcpnal_mynid); - hdr.type = __cpu_to_le32 (PTL_MSG_HELLO); - hdr.src_nid = cpu_to_le64(tcpnal_mynid); - hdr.type = cpu_to_le32(PTL_MSG_HELLO); -- - hdr.msg.hello.type = __cpu_to_le32 (type); - hdr.msg.hello.incarnation = 0; - hdr.msg.hello.type = cpu_to_le32(type); - hdr.msg.hello.incarnation = cpu_to_le64(incarnation); - - /* I don't send any interface info */ -- -- /* Assume sufficient socket buffering for this message */ -- rc = syscall(SYS_write, sockfd, &hdr, sizeof(hdr)); -- if (rc <= 0) { -- CERROR ("Error %d sending HELLO to "LPX64"\n", rc, *nid); -- return (rc); -- } -- -- rc = syscall(SYS_read, sockfd, hmv, sizeof(*hmv)); -- if (rc <= 0) { -- CERROR ("Error %d reading HELLO from "LPX64"\n", rc, *nid); -- return (rc); -- } -- - if (hmv->magic != __le32_to_cpu (PORTALS_PROTO_MAGIC)) { - if (hmv->magic != le32_to_cpu(PORTALS_PROTO_MAGIC)) { -- CERROR ("Bad magic %#08x (%#08x expected) from "LPX64"\n", - __cpu_to_le32 (hmv->magic), PORTALS_PROTO_MAGIC, *nid); - cpu_to_le32(hmv->magic), PORTALS_PROTO_MAGIC, *nid); -- return (-EPROTO); -- } -- - if (hmv->version_major != __cpu_to_le16 (PORTALS_PROTO_VERSION_MAJOR) || - hmv->version_minor != __cpu_to_le16 (PORTALS_PROTO_VERSION_MINOR)) { - if (hmv->version_major != cpu_to_le16 (PORTALS_PROTO_VERSION_MAJOR) || - hmv->version_minor != cpu_to_le16 (PORTALS_PROTO_VERSION_MINOR)) { -- CERROR ("Incompatible protocol version %d.%d (%d.%d expected)" -- " from "LPX64"\n", - __le16_to_cpu (hmv->version_major), - __le16_to_cpu (hmv->version_minor), - le16_to_cpu (hmv->version_major), - le16_to_cpu (hmv->version_minor), -- PORTALS_PROTO_VERSION_MAJOR, -- PORTALS_PROTO_VERSION_MINOR, -- *nid); -- return (-EPROTO); -- } -- - #if (PORTALS_PROTO_VERSION_MAJOR != 0) - # error "This code only understands protocol version 0.x" -#if (PORTALS_PROTO_VERSION_MAJOR != 1) -# error "This code only understands protocol version 1.x" --#endif - /* version 0 sends magic/version as the dest_nid of a 'hello' header, - /* version 1 sends magic/version as the dest_nid of a 'hello' header, -- * so read the rest of it in now... */ -- -- rc = syscall(SYS_read, sockfd, hmv + 1, sizeof(hdr) - sizeof(*hmv)); -- if (rc <= 0) { -- CERROR ("Error %d reading rest of HELLO hdr from "LPX64"\n", -- rc, *nid); -- return (rc); -- } -- -- /* ...and check we got what we expected */ - if (hdr.type != __cpu_to_le32 (PTL_MSG_HELLO) || - hdr.payload_length != __cpu_to_le32 (0)) { - CERROR ("Expecting a HELLO hdr with 0 payload," - if (hdr.type != cpu_to_le32 (PTL_MSG_HELLO)) { - CERROR ("Expecting a HELLO hdr " -- " but got type %d with %d payload from "LPX64"\n", - __le32_to_cpu (hdr.type), - __le32_to_cpu (hdr.payload_length), *nid); - le32_to_cpu (hdr.type), - le32_to_cpu (hdr.payload_length), *nid); -- return (-EPROTO); -- } -- - if (__le64_to_cpu(hdr.src_nid) == PTL_NID_ANY) { - if (le64_to_cpu(hdr.src_nid) == PTL_NID_ANY) { -- CERROR("Expecting a HELLO hdr with a NID, but got PTL_NID_ANY\n"); -- return (-EPROTO); -- } -- -- if (*nid == PTL_NID_ANY) { /* don't know peer's nid yet */ - *nid = __le64_to_cpu(hdr.src_nid); - } else if (*nid != __le64_to_cpu (hdr.src_nid)) { - *nid = le64_to_cpu(hdr.src_nid); - } else if (*nid != le64_to_cpu (hdr.src_nid)) { -- CERROR ("Connected to nid "LPX64", but expecting "LPX64"\n", - __le64_to_cpu (hdr.src_nid), *nid); - le64_to_cpu (hdr.src_nid), *nid); - return (-EPROTO); - } - - /* Ignore any interface info in the payload */ - nob = le32_to_cpu(hdr.payload_length); - if (nob > getpagesize()) { - CERROR("Unexpected HELLO payload %d from "LPX64"\n", - nob, *nid); -- return (-EPROTO); - } - if (nob > 0) { - char *space = (char *)malloc(nob); - - if (space == NULL) { - CERROR("Can't allocate scratch buffer %d\n", nob); - return (-ENOMEM); - } - - rc = syscall(SYS_read, sockfd, space, nob); - if (rc <= 0) { - CERROR("Error %d skipping HELLO payload from " - LPX64"\n", rc, *nid); - return (rc); - } -- } -- -- return (0); --} -- --/* Function: force_tcp_connection -- * Arguments: t: tcpnal -- * dest: portals endpoint for the connection -- * Returns: an allocated connection structure, either -- * a pre-existing one, or a new connection -- */ --connection force_tcp_connection(manager m, -- unsigned int ip, -- unsigned short port, -- procbridge pb) --{ -- connection conn; -- struct sockaddr_in addr; - struct sockaddr_in locaddr; -- unsigned int id[2]; -- struct timeval tv; -- __u64 incarnation; -- - int fd; - int option; - int rc; - int rport; - ptl_nid_t peernid = PTL_NID_ANY; - port = tcpnal_acceptor_port; -- -- id[0] = ip; -- id[1] = port; -- -- pthread_mutex_lock(&m->conn_lock); -- -- conn = hash_table_find(m->connections, id); - if (conn) - goto out; - - memset(&addr, 0, sizeof(addr)); - addr.sin_family = AF_INET; - addr.sin_addr.s_addr = htonl(ip); - addr.sin_port = htons(port); - if (!conn) { - int fd; - int option; - ptl_nid_t peernid = PTL_NID_ANY; -- - memset(&locaddr, 0, sizeof(locaddr)); - locaddr.sin_family = AF_INET; - locaddr.sin_addr.s_addr = INADDR_ANY; - bzero((char *) &addr, sizeof(addr)); - addr.sin_family = AF_INET; - addr.sin_addr.s_addr = htonl(ip); - addr.sin_port = htons(port); -- - for (rport = IPPORT_RESERVED - 1; rport > IPPORT_RESERVED / 2; --rport) { - fd = socket(AF_INET, SOCK_STREAM, 0); - if (fd < 0) { - perror("tcpnal socket failed"); - goto out; - } - - option = 1; - rc = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, - &option, sizeof(option)); - if (rc != 0) { - perror ("Can't set SO_REUSEADDR for socket"); - close(fd); - goto out; - } - if ((fd = socket(AF_INET, SOCK_STREAM, 0)) < 0) { - perror("tcpnal socket failed"); - exit(-1); - } - if (connect(fd, (struct sockaddr *)&addr, - sizeof(struct sockaddr_in))) { - perror("tcpnal connect"); - return(0); - } -- - locaddr.sin_port = htons(rport); - rc = bind(fd, (struct sockaddr *)&locaddr, sizeof(locaddr)); - if (rc == 0 || errno == EACCES) { - rc = connect(fd, (struct sockaddr *)&addr, - sizeof(struct sockaddr_in)); - if (rc == 0) { - break; - } else if (errno != EADDRINUSE) { - perror("Error connecting to remote host"); - close(fd); - goto out; - } - } else if (errno != EADDRINUSE) { - perror("Error binding to privileged port"); - close(fd); - goto out; - } - close(fd); - } - - if (rport == IPPORT_RESERVED / 2) { - fprintf(stderr, "Out of ports trying to bind to a reserved port\n"); - goto out; - } - --#if 1 - option = 1; - setsockopt(fd, SOL_TCP, TCP_NODELAY, &option, sizeof(option)); - option = 1<<20; - setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &option, sizeof(option)); - option = 1<<20; - setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &option, sizeof(option)); - option = 1; - setsockopt(fd, SOL_TCP, TCP_NODELAY, &option, sizeof(option)); - option = 1<<20; - setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &option, sizeof(option)); - option = 1<<20; - setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &option, sizeof(option)); --#endif -- - gettimeofday(&tv, NULL); - incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec; - gettimeofday(&tv, NULL); - incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec; -- - /* say hello */ - if (tcpnal_hello(fd, &peernid, SOCKNAL_CONN_ANY, incarnation)) - /* say hello */ - if (tcpnal_hello(fd, &peernid, SOCKNAL_CONN_ANY, incarnation)) -- exit(-1); - - conn = allocate_connection(m, ip, port, fd); - - /* let nal thread know this event right away */ - if (conn) - procbridge_wakeup_nal(pb); -- - out: - conn = allocate_connection(m, ip, port, fd); - - /* let nal thread know this event right away */ - if (conn) - procbridge_wakeup_nal(pb); - } - -- pthread_mutex_unlock(&m->conn_lock); -- return (conn); --} - -- --/* Function: bind_socket -- * Arguments: t: the nal state for this interface -- * port: the port to attempt to bind to -- * Returns: 1 on success, or 0 on error -- * -- * bind_socket() attempts to allocate and bind a socket to the requested -- * port, or dynamically assign one from the kernel should the port be -- * zero. Sets the bound and bound_handler elements of m. -- * -- * TODO: The port should be an explicitly sized type. -- */ --static int bind_socket(manager m,unsigned short port) --{ -- struct sockaddr_in addr; -- int alen=sizeof(struct sockaddr_in); -- -- if ((m->bound = socket(AF_INET, SOCK_STREAM, 0)) < 0) -- return(0); -- -- bzero((char *) &addr, sizeof(addr)); -- addr.sin_family = AF_INET; -- addr.sin_addr.s_addr = 0; -- addr.sin_port = htons(port); -- -- if (bind(m->bound,(struct sockaddr *)&addr,alen)<0){ -- perror ("tcpnal bind"); -- return(0); -- } -- -- getsockname(m->bound,(struct sockaddr *)&addr, &alen); -- -- m->bound_handler=register_io_handler(m->bound,READ_HANDLER, -- new_connection,m); -- listen(m->bound,5); -- m->port=addr.sin_port; -- return(1); --} -- -- --/* Function: shutdown_connections -- * Arguments: m: the manager structure -- * -- * close all connections and reclaim resources -- */ --void shutdown_connections(manager m) --{ -- close(m->bound); -- remove_io_handler(m->bound_handler); -- hash_destroy_table(m->connections,remove_connection); -- free(m); --} -- -- --/* Function: init_connections -- * Arguments: t: the nal state for this interface -- * port: the port to attempt to bind to -- * Returns: a newly allocated manager structure, or -- * zero if the fixed port could not be bound -- */ --manager init_connections(unsigned short pid, -- int (*input)(void *, void *), -- void *a) --{ -- manager m = (manager)malloc(sizeof(struct manager)); -- m->connections = hash_create_table(compare_connection,connection_key); -- m->handler = input; -- m->handler_arg = a; -- pthread_mutex_init(&m->conn_lock, 0); -- -- if (bind_socket(m,pid)) -- return(m); -- -- free(m); -- return(0); --} diff --cc lnet/ulnds/connection.h index 343ffa6,343ffa6..0000000 deleted file mode 100644,100644 --- a/lnet/ulnds/connection.h +++ /dev/null @@@ -1,35 -1,35 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * Copyright (c) 2002 Cray Inc. -- * -- * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ -- */ -- --#include --#include -- --typedef struct manager { -- table connections; -- pthread_mutex_t conn_lock; /* protect connections table */ -- int bound; -- io_handler bound_handler; -- int (*handler)(void *, void *); -- void *handler_arg; -- unsigned short port; --} *manager; -- -- --typedef struct connection { -- unsigned int ip; -- unsigned short port; -- int fd; -- manager m; --} *connection; -- --connection force_tcp_connection(manager m, unsigned int ip, unsigned int short, -- procbridge pb); --manager init_connections(unsigned short, int (*f)(void *, void *), void *); --void remove_connection(void *arg); --void shutdown_connections(manager m); --int read_connection(connection c, unsigned char *dest, int len); diff --cc lnet/ulnds/debug.c index b82bb2f,b82bb2f..0000000 deleted file mode 100644,100644 --- a/lnet/ulnds/debug.c +++ /dev/null @@@ -1,119 -1,119 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * Copyright (C) 2002 Cluster File Systems, Inc. -- * Author: Phil Schwan -- * -- * This file is part of Lustre, http://www.lustre.org. -- * -- * Lustre is free software; you can redistribute it and/or -- * modify it under the terms of version 2 of the GNU General Public -- * License as published by the Free Software Foundation. -- * -- * Lustre is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- * GNU General Public License for more details. -- * -- * You should have received a copy of the GNU General Public License -- * along with Lustre; if not, write to the Free Software -- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -- */ -- --#include --#include --#include --#include --#include -- --int smp_processor_id = 1; --char debug_file_path[1024] = "/tmp/lustre-log"; --char debug_file_name[1024]; --FILE *debug_file_fd; -- --int portals_do_debug_dumplog(void *arg) --{ -- printf("Look in %s\n", debug_file_name); -- return 0; --} -- -- --void portals_debug_print(void) --{ -- return; --} -- -- --void portals_debug_dumplog(void) --{ -- printf("Look in %s\n", debug_file_name); -- return; --} -- -- --int portals_debug_init(unsigned long bufsize) --{ -- debug_file_fd = stdout; -- return 0; --} -- --int portals_debug_cleanup(void) --{ -- return 0; //close(portals_debug_fd); --} -- --int portals_debug_clear_buffer(void) --{ -- return 0; --} -- --int portals_debug_mark_buffer(char *text) --{ -- -- fprintf(debug_file_fd, "*******************************************************************************\n"); -- fprintf(debug_file_fd, "DEBUG MARKER: %s\n", text); -- fprintf(debug_file_fd, "*******************************************************************************\n"); -- -- return 0; --} -- --int portals_debug_copy_to_user(char *buf, unsigned long len) --{ -- return 0; --} -- --/* FIXME: I'm not very smart; someone smarter should make this better. */ --void --portals_debug_msg (int subsys, int mask, char *file, const char *fn, -- const int line, const char *format, ...) --{ -- va_list ap; -- unsigned long flags; -- struct timeval tv; -- int nob; -- -- -- /* NB since we pass a non-zero sized buffer (at least) on the first -- * print, we can be assured that by the end of all the snprinting, -- * we _do_ have a terminated buffer, even if our message got truncated. -- */ -- -- gettimeofday(&tv, NULL); -- -- nob += fprintf(debug_file_fd, -- "%02x:%06x:%d:%lu.%06lu ", -- subsys >> 24, mask, smp_processor_id, -- tv.tv_sec, tv.tv_usec); -- -- nob += fprintf(debug_file_fd, -- "(%s:%d:%s() %d+%ld): ", -- file, line, fn, 0, -- 8192 - ((unsigned long)&flags & 8191UL)); -- -- va_start (ap, format); -- nob += fprintf(debug_file_fd, format, ap); -- va_end (ap); -- -- --} -- diff --cc lnet/ulnds/dispatch.h index 34dd070,34dd070..0000000 deleted file mode 100644,100644 --- a/lnet/ulnds/dispatch.h +++ /dev/null @@@ -1,39 -1,39 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * Copyright (c) 2002 Cray Inc. -- * Copyright (c) 2002 Eric Hoffman -- * -- * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ -- */ -- --/* this file is only called dispatch.h to prevent it -- from colliding with /usr/include/sys/select.h */ -- --typedef struct io_handler *io_handler; -- --struct io_handler{ -- io_handler *last; -- io_handler next; -- int fd; -- int type; -- int (*function)(void *); -- void *argument; -- int disabled; --}; -- -- --#define READ_HANDLER 1 --#define WRITE_HANDLER 2 --#define EXCEPTION_HANDLER 4 --#define ALL_HANDLER (READ_HANDLER | WRITE_HANDLER | EXCEPTION_HANDLER) -- --io_handler register_io_handler(int fd, -- int type, -- int (*function)(void *), -- void *arg); -- --void remove_io_handler (io_handler i); --void init_unix_timer(void); --void select_timer_block(when until); --when now(void); diff --cc lnet/ulnds/ipmap.h index 85b1e18,85b1e18..0000000 deleted file mode 100644,100644 --- a/lnet/ulnds/ipmap.h +++ /dev/null @@@ -1,38 -1,38 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * Copyright (c) 2002 Cray Inc. -- * -- * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ -- */ -- --#define DIRECT_IP_MODE --#ifdef DIRECT_IP_MODE --#define PNAL_NID(in_addr, port) (in_addr) --#define PNAL_PID(pid) (pid) --#define PNAL_IP(in_addr, port) (in_addr) --#define PNAL_PORT(nid, pid) (pid) --#else -- --#define PNAL_BASE_PORT 4096 --#define PNAL_HOSTID_SHIFT 24 --#define PNAL_HOSTID_MASK ((1 << PNAL_HOSTID_SHIFT) - 1) --#define PNAL_VNODE_SHIFT 8 --#define PNAL_VNODE_MASK ((1 << PNAL_VNODE_SHIFT) - 1) --#define PNAL_PID_SHIFT 8 --#define PNAL_PID_MASK ((1 << PNAL_PID_SHIFT) - 1) -- --#define PNAL_NID(in_addr, port) (((ntohl(in_addr) & PNAL_HOSTID_MASK) \ -- << PNAL_VNODE_SHIFT) \ -- | (((ntohs(port)-PNAL_BASE_PORT) >>\ -- PNAL_PID_SHIFT))) --#define PNAL_PID(port) ((ntohs(port) - PNAL_BASE_PORT) & PNAL_PID_MASK) -- --#define PNAL_IP(nid,t) (htonl((((unsigned)(nid))\ -- >> PNAL_VNODE_SHIFT)\ -- | (t->iptop8 << PNAL_HOSTID_SHIFT))) --#define PNAL_PORT(nid, pid) (htons(((((nid) & PNAL_VNODE_MASK) \ -- << PNAL_VNODE_SHIFT) \ -- | ((pid) & PNAL_PID_MASK)) \ -- + PNAL_BASE_PORT)) --#endif diff --cc lnet/ulnds/pqtimer.c index 98c48eb,98c48eb..0000000 deleted file mode 100644,100644 --- a/lnet/ulnds/pqtimer.c +++ /dev/null @@@ -1,226 -1,226 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * Copyright (c) 2002 Cray Inc. -- * Copyright (c) 2002 Eric Hoffman -- * -- * This file is part of Lustre, http://www.lustre.org. -- * -- * Lustre is free software; you can redistribute it and/or -- * modify it under the terms of version 2 of the GNU General Public -- * License as published by the Free Software Foundation. -- * -- * Lustre is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- * GNU General Public License for more details. -- * -- * You should have received a copy of the GNU General Public License -- * along with Lustre; if not, write to the Free Software -- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -- */ -- --/* timer.c: -- * this file implements a simple priority-queue based timer system. when -- * combined with a file which implements now() and block(), it can -- * be used to provide course-grained time-based callbacks. -- */ -- --#include --#include --#include -- --struct timer { -- void (*function)(void *); -- void *arg; -- when w; -- int interval; -- int disable; --}; -- --typedef struct thunk *thunk; --struct thunk { -- void (*f)(void *); -- void *a; -- thunk next; --}; -- --extern when now(void); -- --static thunk thunks; --static int internal; --static void (*block_function)(when); --static int number_of_timers; --static int size_of_pqueue; --static timer *timers; -- -- --static void heal(int where) --{ -- int left=(where<<1); -- int right=(where<<1)+1; -- int min=where; -- timer temp; -- -- if (left <= number_of_timers) -- if (timers[left]->w < timers[min]->w) min=left; -- if (right <= number_of_timers) -- if (timers[right]->w < timers[min]->w) min=right; -- if (min != where){ -- temp=timers[where]; -- timers[where]=timers[min]; -- timers[min]=temp; -- heal(min); -- } --} -- --static void add_pqueue(int i) --{ -- timer temp; -- int parent=(i>>1); -- if ((i>1) && (timers[i]->w< timers[parent]->w)){ -- temp=timers[i]; -- timers[i]=timers[parent]; -- timers[parent]=temp; -- add_pqueue(parent); -- } --} -- --static void add_timer(timer t) --{ -- if (size_of_pqueue<(number_of_timers+2)){ -- int oldsize=size_of_pqueue; -- timer *new=(void *)malloc(sizeof(struct timer)*(size_of_pqueue+=10)); -- memcpy(new,timers,sizeof(timer)*oldsize); -- timers=new; -- } -- timers[++number_of_timers]=t; -- add_pqueue(number_of_timers); --} -- --/* Function: register_timer -- * Arguments: interval: the time interval from the current time when -- * the timer function should be called -- * function: the function to call when the time has expired -- * argument: the argument to call it with. -- * Returns: a pointer to a timer structure -- */ --timer register_timer(when interval, -- void (*function)(void *), -- void *argument) --{ -- timer t=(timer)malloc(sizeof(struct timer)); -- -- t->arg=argument; -- t->function=function; -- t->interval=interval; -- t->disable=0; -- t->w=now()+interval; -- add_timer(t); -- if (!internal && (number_of_timers==1)) -- block_function(t->w); -- return(t); --} -- --/* Function: remove_timer -- * Arguments: t: -- * Returns: nothing -- * -- * remove_timer removes a timer from the system, insuring -- * that it will never be called. It does not actually -- * free the timer due to reentrancy issues. -- */ -- --void remove_timer(timer t) --{ -- t->disable=1; --} -- -- -- --void timer_fire() --{ -- timer current; -- -- current=timers[1]; -- timers[1]=timers[number_of_timers--]; -- heal(1); -- if (!current->disable) { -- (*current->function)(current->arg); -- } -- free(current); --} -- --when next_timer(void) --{ -- when here=now(); -- -- while (number_of_timers && (timers[1]->w <= here)) timer_fire(); -- if (number_of_timers) return(timers[1]->w); -- return(0); --} -- --/* Function: timer_loop -- * Arguments: none -- * Returns: never -- * -- * timer_loop() is the blocking dispatch function for the timer. -- * Is calls the block() function registered with init_timer, -- * and handles associated with timers that have been registered. -- */ --void timer_loop() --{ -- when here; -- -- while (1){ -- thunk z; -- here=now(); -- -- for (z=thunks;z;z=z->next) (*z->f)(z->a); -- -- if (number_of_timers){ -- if (timers[1]->w > here){ -- (*block_function)(timers[1]->w); -- } else { -- timer_fire(); -- } -- } else { -- thunk z; -- for (z=thunks;z;z=z->next) (*z->f)(z->a); -- (*block_function)(0); -- } -- } --} -- -- --/* Function: register_thunk -- * Arguments: f: the function to call -- * a: the single argument to call it with -- * -- * Thunk functions get called at irregular intervals, they -- * should not assume when, or take a particularily long -- * amount of time. Thunks are for background cleanup tasks. -- */ --void register_thunk(void (*f)(void *),void *a) --{ -- thunk t=(void *)malloc(sizeof(struct thunk)); -- t->f=f; -- t->a=a; -- t->next=thunks; -- thunks=t; --} -- --/* Function: initialize_timer -- * Arguments: block: the function to call to block for the specified interval -- * -- * initialize_timer() must be called before any other timer function, -- * including timer_loop. -- */ --void initialize_timer(void (*block)(when)) --{ -- block_function=block; -- number_of_timers=0; -- size_of_pqueue=10; -- timers=(timer *)malloc(sizeof(timer)*size_of_pqueue); -- thunks=0; --} diff --cc lnet/ulnds/pqtimer.h index 11efb0e,11efb0e..0000000 deleted file mode 100644,100644 --- a/lnet/ulnds/pqtimer.h +++ /dev/null @@@ -1,25 -1,25 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * Copyright (c) 2002 Cray Inc. -- * Copyright (c) 2002 Eric Hoffman -- * -- * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ -- */ -- --typedef unsigned long long when; --when now(void); --typedef struct timer *timer; --timer register_timer(when interval, -- void (*function)(void *), -- void *argument); --timer register_timer_wait(void); --void remove_timer(timer); --void timer_loop(void); --void initialize_timer(void (*block)(when)); --void timer_fire(void); -- -- --#define HZ 0x100000000ull -- -- diff --cc lnet/ulnds/procapi.c index c27f555,f3843d7..0000000 deleted file mode 100644,100644 --- a/lnet/ulnds/procapi.c +++ /dev/null @@@ -1,273 -1,188 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * Copyright (c) 2002 Cray Inc. -- * Copyright (c) 2003 Cluster File Systems, Inc. -- * -- * This file is part of Lustre, http://www.lustre.org. -- * -- * Lustre is free software; you can redistribute it and/or -- * modify it under the terms of version 2 of the GNU General Public -- * License as published by the Free Software Foundation. -- * -- * Lustre is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- * GNU General Public License for more details. -- * -- * You should have received a copy of the GNU General Public License -- * along with Lustre; if not, write to the Free Software -- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -- */ -- --/* api.c: -- * This file provides the 'api' side for the process-based nals. -- * it is responsible for creating the 'library' side thread, -- * and passing wrapped portals transactions to it. -- * -- * Along with initialization, shutdown, and transport to the library -- * side, this file contains some stubs to satisfy the nal definition. -- */ --#include --#include --#include --#include --#ifndef __CYGWIN__ --#include --#endif --#include --#include --#include --#include --#include -- -- --/* XXX CFS workaround, to give a chance to let nal thread wake up -- * from waiting in select -- */ --static int procbridge_notifier_handler(void *arg) --{ -- static char buf[8]; -- procbridge p = (procbridge) arg; -- -- syscall(SYS_read, p->notifier[1], buf, sizeof(buf)); -- return 1; --} -- --void procbridge_wakeup_nal(procbridge p) --{ -- static char buf[8]; -- syscall(SYS_write, p->notifier[0], buf, sizeof(buf)); - } - - /* Function: forward - * Arguments: nal_t *nal: pointer to my top-side nal structure - * id: the command to pass to the lower layer - * args, args_len:pointer to and length of the request - * ret, ret_len: pointer to and size of the result - * Returns: a portals status code - * - * forwards a packaged api call from the 'api' side to the 'library' - * side, and collects the result - */ - static int procbridge_forward(nal_t *n, int id, void *args, size_t args_len, - void *ret, size_t ret_len) - { - bridge b = (bridge) n->nal_data; - - if (id == PTL_FINI) { - lib_fini(b->nal_cb); - - if (b->shutdown) - (*b->shutdown)(b); - } - - lib_dispatch(b->nal_cb, NULL, id, args, ret); - - return (PTL_OK); --} - -- --/* Function: shutdown -- * Arguments: nal: a pointer to my top side nal structure -- * ni: my network interface index -- * -- * cleanup nal state, reclaim the lower side thread and -- * its state using PTL_FINI codepoint -- */ - static int procbridge_shutdown(nal_t *n, int ni) -static void procbridge_shutdown(nal_t *n) --{ - bridge b=(bridge)n->nal_data; - lib_nal_t *nal = n->nal_data; - bridge b=(bridge)nal->libnal_data; -- procbridge p=(procbridge)b->local; -- -- p->nal_flags |= NAL_FLAG_STOPPING; -- procbridge_wakeup_nal(p); -- -- do { -- pthread_mutex_lock(&p->mutex); -- if (p->nal_flags & NAL_FLAG_STOPPED) { -- pthread_mutex_unlock(&p->mutex); -- break; -- } -- pthread_cond_wait(&p->cond, &p->mutex); -- pthread_mutex_unlock(&p->mutex); -- } while (1); -- -- free(p); - return(0); - } - - - /* Function: validate - * useless stub - */ - static int procbridge_validate(nal_t *nal, void *base, size_t extent) - { - return(0); --} - - - /* FIXME cfs temporary workaround! FIXME - * global time out value - */ - int __tcpnal_eqwait_timeout_value = 0; - int __tcpnal_eqwait_timedout = 0; - - /* Function: yield - * Arguments: pid: - * - * this function was originally intended to allow the - * lower half thread to be scheduled to allow progress. we - * overload it to explicitly block until signalled by the - * lower half. - */ - static void procbridge_yield(nal_t *n) - { - bridge b=(bridge)n->nal_data; - procbridge p=(procbridge)b->local; - - pthread_mutex_lock(&p->mutex); - if (!__tcpnal_eqwait_timeout_value) { - pthread_cond_wait(&p->cond,&p->mutex); - } else { - struct timeval now; - struct timespec timeout; - - gettimeofday(&now, NULL); - timeout.tv_sec = now.tv_sec + __tcpnal_eqwait_timeout_value; - timeout.tv_nsec = now.tv_usec * 1000; -- - __tcpnal_eqwait_timedout = - pthread_cond_timedwait(&p->cond, &p->mutex, &timeout); - } - pthread_mutex_unlock(&p->mutex); - } -- -/* forward decl */ -extern int procbridge_startup (nal_t *, ptl_pid_t, - ptl_ni_limits_t *, ptl_ni_limits_t *); -- - static void procbridge_lock(nal_t * nal, unsigned long *flags){} - static void procbridge_unlock(nal_t * nal, unsigned long *flags){} --/* api_nal -- * the interface vector to allow the generic code to access - * this nal. this is seperate from the library side nal_cb. - * this nal. this is seperate from the library side lib_nal. -- * TODO: should be dyanmically allocated -- */ - static nal_t api_nal = { - ni: {0}, -nal_t procapi_nal = { -- nal_data: NULL, - forward: procbridge_forward, - shutdown: procbridge_shutdown, - validate: procbridge_validate, - yield: procbridge_yield, - lock: procbridge_lock, - unlock: procbridge_unlock - nal_ni_init: procbridge_startup, - nal_ni_fini: procbridge_shutdown, --}; -- --ptl_nid_t tcpnal_mynid; -- - /* Function: procbridge_interface -/* Function: procbridge_startup -- * -- * Arguments: pid: requested process id (port offset) -- * PTL_ID_ANY not supported. -- * desired: limits passed from the application -- * and effectively ignored -- * actual: limits actually allocated and returned -- * - * Returns: a pointer to my statically allocated top side NAL - * structure - * Returns: portals rc -- * -- * initializes the tcp nal. we define unix_failure as an -- * error wrapper to cut down clutter. -- */ - nal_t *procbridge_interface(int num_interface, - ptl_pt_index_t ptl_size, - ptl_ac_index_t acl_size, - ptl_pid_t requested_pid) -int procbridge_startup (nal_t *nal, ptl_pid_t requested_pid, - ptl_ni_limits_t *requested_limits, - ptl_ni_limits_t *actual_limits) --{ -- nal_init_args_t args; - -- procbridge p; -- bridge b; - static int initialized=0; - ptl_ni_limits_t limits = {-1,-1,-1,-1,-1}; - /* XXX nal_type is purely private to tcpnal here */ -- int nal_type = PTL_IFACE_TCP;/* PTL_IFACE_DEFAULT FIXME hack */ -- - if(initialized) return (&api_nal); - LASSERT(nal == &procapi_nal); -- -- init_unix_timer(); -- -- b=(bridge)malloc(sizeof(struct bridge)); -- p=(procbridge)malloc(sizeof(struct procbridge)); - api_nal.nal_data=b; -- b->local=p; - - if (ptl_size) - limits.max_ptable_index = ptl_size; - if (acl_size) - limits.max_atable_index = acl_size; -- -- args.nia_requested_pid = requested_pid; - args.nia_limits = &limits; - args.nia_requested_limits = requested_limits; - args.nia_actual_limits = actual_limits; -- args.nia_nal_type = nal_type; -- args.nia_bridge = b; - args.nia_apinal = nal; -- -- /* init procbridge */ -- pthread_mutex_init(&p->mutex,0); -- pthread_cond_init(&p->cond, 0); -- p->nal_flags = 0; - pthread_mutex_init(&p->nal_cb_lock, 0); -- -- /* initialize notifier */ -- if (socketpair(AF_UNIX, SOCK_STREAM, 0, p->notifier)) { -- perror("socketpair failed"); - return NULL; - return PTL_FAIL; -- } -- -- if (!register_io_handler(p->notifier[1], READ_HANDLER, -- procbridge_notifier_handler, p)) { -- perror("fail to register notifier handler"); - return NULL; - return PTL_FAIL; -- } -- -- /* create nal thread */ -- if (pthread_create(&p->t, NULL, nal_thread, &args)) { -- perror("nal_init: pthread_create"); - return(NULL); - return PTL_FAIL; -- } -- -- do { -- pthread_mutex_lock(&p->mutex); -- if (p->nal_flags & (NAL_FLAG_RUNNING | NAL_FLAG_STOPPED)) { -- pthread_mutex_unlock(&p->mutex); -- break; -- } -- pthread_cond_wait(&p->cond, &p->mutex); -- pthread_mutex_unlock(&p->mutex); -- } while (1); -- -- if (p->nal_flags & NAL_FLAG_STOPPED) - return (NULL); - return PTL_FAIL; -- - b->nal_cb->ni.nid = tcpnal_mynid; - initialized = 1; - b->lib_nal->libnal_ni.ni_pid.nid = tcpnal_mynid; -- - return (&api_nal); - return PTL_OK; --} diff --cc lnet/ulnds/procbridge.h index 965f83d,1f91ced..0000000 deleted file mode 100644,100644 --- a/lnet/ulnds/procbridge.h +++ /dev/null @@@ -1,59 -1,56 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * Copyright (c) 2002 Cray Inc. -- * Copyright (c) 2003 Cluster File Systems, Inc. -- * -- * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ -- */ -- --#ifndef _PROCBRIDGE_H_ --#define _PROCBRIDGE_H_ -- --#include --#include --#include -- -- --#define NAL_FLAG_RUNNING 1 --#define NAL_FLAG_STOPPING 2 --#define NAL_FLAG_STOPPED 4 -- --typedef struct procbridge { -- /* sync between user threads and nal thread */ -- pthread_t t; -- pthread_cond_t cond; -- pthread_mutex_t mutex; -- -- /* socket pair used to notify nal thread */ -- int notifier[2]; -- -- int nal_flags; -- - pthread_mutex_t nal_cb_lock; --} *procbridge; -- --typedef struct nal_init_args { -- ptl_pid_t nia_requested_pid; - ptl_ni_limits_t *nia_limits; - ptl_ni_limits_t *nia_requested_limits; - ptl_ni_limits_t *nia_actual_limits; -- int nia_nal_type; -- bridge nia_bridge; - nal_t *nia_apinal; --} nal_init_args_t; -- --extern void *nal_thread(void *); -- -- --#define PTL_INIT (LIB_MAX_DISPATCH+1) --#define PTL_FINI (LIB_MAX_DISPATCH+2) -- --#define MAX_ACLS 1 --#define MAX_PTLS 128 -- --extern void set_address(bridge t,ptl_pid_t pidrequest); - extern nal_t *procbridge_interface(int num_interface, - ptl_pt_index_t ptl_size, - ptl_ac_index_t acl_size, - ptl_pid_t requested_pid); --extern void procbridge_wakeup_nal(procbridge p); -- --#endif diff --cc lnet/ulnds/proclib.c index 2a5ba0d,7ee7c71..0000000 deleted file mode 100644,100644 --- a/lnet/ulnds/proclib.c +++ /dev/null @@@ -1,224 -1,137 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * Copyright (c) 2002 Cray Inc. -- * Copyright (c) 2003 Cluster File Systems, Inc. -- * -- * This file is part of Lustre, http://www.lustre.org. -- * -- * Lustre is free software; you can redistribute it and/or -- * modify it under the terms of version 2 of the GNU General Public -- * License as published by the Free Software Foundation. -- * -- * Lustre is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- * GNU General Public License for more details. -- * -- * You should have received a copy of the GNU General Public License -- * along with Lustre; if not, write to the Free Software -- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -- */ -- --/* lib.c: -- * This file provides the 'library' side for the process-based nals. -- * it is responsible for communication with the 'api' side and -- * providing service to the generic portals 'library' -- * implementation. 'library' might be better termed 'communication' -- * or 'kernel'. -- */ -- --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include -- --/* the following functions are stubs to satisfy the nal definition -- without doing anything particularily useful*/ - - static ptl_err_t nal_write(nal_cb_t *nal, - void *private, - user_ptr dst_addr, - void *src_addr, - size_t len) - { - memcpy(dst_addr, src_addr, len); - return PTL_OK; - } - - static ptl_err_t nal_read(nal_cb_t * nal, - void *private, - void *dst_addr, - user_ptr src_addr, - size_t len) - { - memcpy(dst_addr, src_addr, len); - return PTL_OK; - } - - static void *nal_malloc(nal_cb_t *nal, - size_t len) - { - void *buf = malloc(len); - return buf; - } - - static void nal_free(nal_cb_t *nal, - void *buf, - size_t len) - { - free(buf); - } - - static void nal_printf(nal_cb_t *nal, - const char *fmt, - ...) - { - va_list ap; - - va_start(ap, fmt); - vprintf(fmt, ap); - va_end(ap); - } - - - static void nal_cli(nal_cb_t *nal, - unsigned long *flags) - { - bridge b = (bridge) nal->nal_data; - procbridge p = (procbridge) b->local; - - pthread_mutex_lock(&p->nal_cb_lock); - } - - - static void nal_sti(nal_cb_t *nal, - unsigned long *flags) - { - bridge b = (bridge)nal->nal_data; - procbridge p = (procbridge) b->local; - - pthread_mutex_unlock(&p->nal_cb_lock); - } - -- - static int nal_dist(nal_cb_t *nal, -static int nal_dist(lib_nal_t *nal, -- ptl_nid_t nid, -- unsigned long *dist) --{ -- return 0; --} -- - static void wakeup_topside(void *z) -static void check_stopping(void *z) --{ -- bridge b = z; -- procbridge p = b->local; - int stop; -- - if ((p->nal_flags & NAL_FLAG_STOPPING) == 0) - return; - -- pthread_mutex_lock(&p->mutex); - stop = p->nal_flags & NAL_FLAG_STOPPING; - if (stop) - p->nal_flags |= NAL_FLAG_STOPPED; - p->nal_flags |= NAL_FLAG_STOPPED; -- pthread_cond_broadcast(&p->cond); -- pthread_mutex_unlock(&p->mutex); -- - if (stop) - pthread_exit(0); - pthread_exit(0); --} -- -- --/* Function: nal_thread -- * Arguments: z: an opaque reference to a nal control structure -- * allocated and partially populated by the api level code -- * Returns: nothing, and only on error or explicit shutdown -- * -- * This function is the entry point of the pthread initiated on -- * the api side of the interface. This thread is used to handle -- * asynchronous delivery to the application. -- * -- * We define a limit macro to place a ceiling on limits -- * for syntactic convenience -- */ - #define LIMIT(x,y,max)\ - if ((unsigned int)x > max) y = max; - --extern int tcpnal_init(bridge); -- --nal_initialize nal_table[PTL_IFACE_MAX]={0,tcpnal_init,0}; -- --void *nal_thread(void *z) --{ -- nal_init_args_t *args = (nal_init_args_t *) z; -- bridge b = args->nia_bridge; -- procbridge p=b->local; -- int rc; - ptl_pid_t pid_request; - ptl_process_id_t process_id; -- int nal_type; - ptl_ni_limits_t desired; - ptl_ni_limits_t actual; -- - b->nal_cb=(nal_cb_t *)malloc(sizeof(nal_cb_t)); - b->nal_cb->nal_data=b; - b->nal_cb->cb_read=nal_read; - b->nal_cb->cb_write=nal_write; - b->nal_cb->cb_malloc=nal_malloc; - b->nal_cb->cb_free=nal_free; - b->nal_cb->cb_map=NULL; - b->nal_cb->cb_unmap=NULL; - b->nal_cb->cb_printf=nal_printf; - b->nal_cb->cb_cli=nal_cli; - b->nal_cb->cb_sti=nal_sti; - b->nal_cb->cb_dist=nal_dist; - b->lib_nal=(lib_nal_t *)malloc(sizeof(lib_nal_t)); - b->lib_nal->libnal_data=b; - b->lib_nal->libnal_map=NULL; - b->lib_nal->libnal_unmap=NULL; - b->lib_nal->libnal_dist=nal_dist; -- - pid_request = args->nia_requested_pid; - desired = *args->nia_limits; -- nal_type = args->nia_nal_type; - - actual = desired; - LIMIT(desired.max_match_entries,actual.max_match_entries,MAX_MES); - LIMIT(desired.max_mem_descriptors,actual.max_mem_descriptors,MAX_MDS); - LIMIT(desired.max_event_queues,actual.max_event_queues,MAX_EQS); - LIMIT(desired.max_atable_index,actual.max_atable_index,MAX_ACLS); - LIMIT(desired.max_ptable_index,actual.max_ptable_index,MAX_PTLS); -- - set_address(b,pid_request); - /* Wierd, but this sets b->lib_nal->libnal_ni.ni_pid.{nid,pid}, which - * lib_init() is about to do from the process_id passed to it...*/ - set_address(b,args->nia_requested_pid); -- - process_id = b->lib_nal->libnal_ni.ni_pid; - -- if (nal_table[nal_type]) rc=(*nal_table[nal_type])(b); -- /* initialize the generic 'library' level code */ -- - rc = lib_init(b->nal_cb, - b->nal_cb->ni.nid, - b->nal_cb->ni.pid, - 10, - actual.max_ptable_index, - actual.max_atable_index); - rc = lib_init(b->lib_nal, args->nia_apinal, - process_id, - args->nia_requested_limits, - args->nia_actual_limits); -- -- /* -- * Whatever the initialization returned is passed back to the -- * user level code for further interpretation. We just exit if -- * it is non-zero since something went wrong. -- */ -- /* this should perform error checking */ -- pthread_mutex_lock(&p->mutex); - p->nal_flags |= rc ? NAL_FLAG_STOPPED : NAL_FLAG_RUNNING; - p->nal_flags |= (rc != PTL_OK) ? NAL_FLAG_STOPPED : NAL_FLAG_RUNNING; -- pthread_cond_broadcast(&p->cond); -- pthread_mutex_unlock(&p->mutex); -- - if (!rc) { - if (rc == PTL_OK) { -- /* the thunk function is called each time the timer loop -- performs an operation and returns to blocking mode. we -- overload this function to inform the api side that -- it may be interested in looking at the event queue */ - register_thunk(wakeup_topside,b); - register_thunk(check_stopping,b); -- timer_loop(); -- } -- return(0); --} - #undef LIMIT diff --cc lnet/ulnds/select.c index c4ccae1,c4ccae1..0000000 deleted file mode 100644,100644 --- a/lnet/ulnds/select.c +++ /dev/null @@@ -1,166 -1,166 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * Copyright (c) 2002 Cray Inc. -- * Copyright (c) 2002 Eric Hoffman -- * -- * This file is part of Lustre, http://www.lustre.org. -- * -- * Lustre is free software; you can redistribute it and/or -- * modify it under the terms of version 2 of the GNU General Public -- * License as published by the Free Software Foundation. -- * -- * Lustre is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- * GNU General Public License for more details. -- * -- * You should have received a copy of the GNU General Public License -- * along with Lustre; if not, write to the Free Software -- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -- */ -- --/* select.c: -- * Provides a general mechanism for registering and dispatching -- * io events through the select system call. -- */ -- --#ifdef sun --#include --#else --#include --#endif -- --#include --#include --#include --#include --#include -- -- --static struct timeval beginning_of_epoch; --static io_handler io_handlers; -- --/* Function: now -- * -- * Return: the current time in canonical units: a 64 bit number -- * where the most significant 32 bits contains the number -- * of seconds, and the least signficant a count of (1/(2^32))ths -- * of a second. -- */ --when now() --{ -- struct timeval result; -- -- gettimeofday(&result,0); -- return((((unsigned long long)result.tv_sec)<<32)| -- (((unsigned long long)result.tv_usec)<<32)/1000000); --} -- -- --/* Function: register_io_handler -- * Arguments: fd: the file descriptor of interest -- * type: a mask of READ_HANDLER, WRITE_HANDLER, EXCEPTION_HANDLER -- * function: a function to call when io is available on fd -- * arg: an opaque correlator to return to the handler -- * Returns: a pointer to the io_handler structure -- */ --io_handler register_io_handler(int fd, -- int type, -- int (*function)(void *), -- void *arg) --{ -- io_handler i=(io_handler)malloc(sizeof(struct io_handler)); -- if ((i->fd=fd)>=0){ -- i->type=type; -- i->function=function; -- i->argument=arg; -- i->disabled=0; -- i->last=&io_handlers; -- if ((i->next=io_handlers)) i->next->last=&i->next; -- io_handlers=i; -- } -- return(i); --} -- --/* Function: remove_io_handler -- * Arguments: i: a pointer to the handler to stop servicing -- * -- * remove_io_handler() doesn't actually free the handler, due -- * to reentrancy problems. it just marks the handler for -- * later cleanup by the blocking function. -- */ --void remove_io_handler (io_handler i) --{ -- i->disabled=1; --} -- --static void set_flag(io_handler n,fd_set *fds) --{ -- if (n->type & READ_HANDLER) FD_SET(n->fd, &fds[0]); -- if (n->type & WRITE_HANDLER) FD_SET(n->fd,&fds[1]); -- if (n->type & EXCEPTION_HANDLER) FD_SET(n->fd, &fds[2]); --} -- -- --/* Function: select_timer_block -- * Arguments: until: an absolute time when the select should return -- * -- * This function dispatches the various file descriptors' handler -- * functions, if the kernel indicates there is io available. -- */ --void select_timer_block(when until) --{ -- fd_set fds[3]; -- struct timeval timeout; -- struct timeval *timeout_pointer; -- int result; -- io_handler j; -- io_handler *k; -- -- /* TODO: loop until the entire interval is expired*/ -- if (until){ -- when interval=until-now(); -- timeout.tv_sec=(interval>>32); -- timeout.tv_usec=((interval<<32)/1000000)>>32; -- timeout_pointer=&timeout; -- } else timeout_pointer=0; -- -- FD_ZERO(&fds[0]); -- FD_ZERO(&fds[1]); -- FD_ZERO(&fds[2]); -- for (k=&io_handlers;*k;){ -- if ((*k)->disabled){ -- j=*k; -- *k=(*k)->next; -- free(j); -- } -- if (*k) { -- set_flag(*k,fds); -- k=&(*k)->next; -- } -- } -- -- result=select(FD_SETSIZE, &fds[0], &fds[1], &fds[2], timeout_pointer); -- -- if (result > 0) -- for (j=io_handlers;j;j=j->next){ -- if (!(j->disabled) && -- ((FD_ISSET(j->fd, &fds[0]) && (j->type & READ_HANDLER)) || -- (FD_ISSET(j->fd, &fds[1]) && (j->type & WRITE_HANDLER)) || -- (FD_ISSET(j->fd, &fds[2]) && (j->type & EXCEPTION_HANDLER)))){ -- if (!(*j->function)(j->argument)) -- j->disabled=1; -- } -- } --} -- --/* Function: init_unix_timer() -- * is called to initialize the library -- */ --void init_unix_timer() --{ -- io_handlers=0; -- gettimeofday(&beginning_of_epoch, 0); -- initialize_timer(select_timer_block); --} diff --cc lnet/ulnds/socklnd/Makefile.am index 15080b0,3437d39..0000000 deleted file mode 100644,100644 --- a/lnet/ulnds/socklnd/Makefile.am +++ /dev/null @@@ -1,13 -1,10 +1,0 @@@ --if LIBLUSTRE -if !CRAY_PORTALS --noinst_LIBRARIES = libtcpnal.a --endif - - noinst_HEADERS = pqtimer.h dispatch.h table.h timer.h connection.h \ - ipmap.h bridge.h procbridge.h - - libtcpnal_a_SOURCES = debug.c pqtimer.c select.c table.c pqtimer.h \ - dispatch.h table.h timer.h address.c procapi.c proclib.c \ - connection.c tcpnal.c connection.h -endif -- -noinst_HEADERS = pqtimer.h dispatch.h table.h timer.h connection.h ipmap.h bridge.h procbridge.h -libtcpnal_a_SOURCES = debug.c pqtimer.c select.c table.c pqtimer.h dispatch.h table.h timer.h address.c procapi.c proclib.c connection.c tcpnal.c connection.h --libtcpnal_a_CPPFLAGS = $(LLCPPFLAGS) --libtcpnal_a_CFLAGS = $(LLCFLAGS) diff --cc lnet/ulnds/socklnd/README index 6cb93d9,6cb93d9..0000000 deleted file mode 100644,100644 --- a/lnet/ulnds/socklnd/README +++ /dev/null @@@ -1,53 -1,53 +1,0 @@@ --This library implements two NAL interfaces, both running over IP. --The first, tcpnal, creates TCP connections between participating --processes in order to transport the portals requests. The second, --ernal, provides a simple transport protocol which runs over --UDP datagrams. -- --The interface functions return both of these values in host order for --convenience and readability. However this means that addresses --exchanged in messages between hosts of different orderings will not --function properly. -- --Both NALs use the same support functions in order to schedule events --and communicate with the generic portals implementation. -- -- ------------------------- -- | api | -- |_______________________| -- | lib | -- |_______________________| -- | ernal | |tcpnal | -- |--------| |----------| -- | udpsock| |connection| -- |-----------------------| -- | timer/select | -- ------------------------- -- -- -- These NALs uses the framework from fdnal of a pipe between the api --and library sides. This is wrapped up in the select on the library --side, and blocks on the api side. Performance could be severely --enhanced by collapsing this aritificial barrier, by using shared --memory queues, or by wiring the api layer directly to the library. -- -- --nid is defined as the low order 24-bits of the IP address of the --physical node left shifted by 8 plus a virtual node number of 0 --through 255 (really only 239). The virtual node number of a tcpnal --application should be specified using the environment variable --PTL_VIRTNODE. pid is now a completely arbitrary number in the --range of 0 to 255. The IP interface used can be overridden by --specifying the appropriate hostid by setting the PTL_HOSTID --environment variable. The value can be either dotted decimal --(n.n.n.n) or hex starting with "0x". --TCPNAL: -- As the NAL needs to try to send to a particular nid/pid pair, it -- will open up connections on demand. Because the port associated with -- the connecting socket is different from the bound port, two -- connections will normally be established between a pair of peers, with -- data flowing from the anonymous connect (active) port to the advertised -- or well-known bound (passive) port of each peer. -- -- Should the connection fail to open, an error is reported to the -- library component, which causes the api request to fail. diff --cc lnet/ulnds/socklnd/address.c index 6507924,f329e2a..0000000 deleted file mode 100644,100644 --- a/lnet/ulnds/socklnd/address.c +++ /dev/null @@@ -1,146 -1,145 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * Copyright (c) 2002 Cray Inc. -- * -- * This file is part of Lustre, http://www.lustre.org. -- * -- * Lustre is free software; you can redistribute it and/or -- * modify it under the terms of version 2 of the GNU General Public -- * License as published by the Free Software Foundation. -- * -- * Lustre is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- * GNU General Public License for more details. -- * -- * You should have received a copy of the GNU General Public License -- * along with Lustre; if not, write to the Free Software -- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -- */ -- --/* address.c: -- * this file provides functions to aquire the IP address of the node -- * and translate them into a NID/PID pair which supports a static -- * mapping of virtual nodes into the port range of an IP socket. --*/ -- --#include --#include --#include --#include --#include --#include --#include -- -- --/* Function: get_node_id -- * Returns: a 32 bit id for this node, actually a big-endian IP address -- * -- * get_node_id() determines the host name and uses the resolver to -- * find out its ip address. This is fairly fragile and inflexible, but -- * explicitly asking about interfaces and their addresses is very -- * complicated and nonportable. -- */ --static unsigned int get_node_id(void) --{ -- char buffer[255]; -- unsigned int x; -- struct hostent *he; -- char * host_envp; -- -- if (!(host_envp = getenv("PTL_HOSTID"))) -- { -- gethostname(buffer,sizeof(buffer)); -- he=gethostbyname(buffer); -- if (he) -- x=*(unsigned int *)he->h_addr_list[0]; -- else -- x = 0; -- return(ntohl(x)); -- } -- else -- { -- if (host_envp[1] != 'x') -- { -- int a, b, c, d; -- sscanf(host_envp, "%d.%d.%d.%d", &a, &b, &c, &d); -- return ((a<<24) | (b<<16) | (c<<8) | d); -- } -- else -- { -- long long hostid = strtoll(host_envp, 0, 0); -- return((unsigned int) hostid); -- } -- } --} -- -- --/* Function: set_address -- * Arugments: t: a procnal structure to populate with the request -- * -- * set_address performs the bit manipulations to set the nid, pid, and -- * iptop8 fields of the procnal structures. -- * -- * TODO: fix pidrequest to try to do dynamic binding if PTL_ID_ANY -- */ -- --#ifdef DIRECT_IP_MODE --void set_address(bridge t,ptl_pid_t pidrequest) --{ -- int port; -- if (pidrequest==(unsigned short)PTL_PID_ANY) port = 0; -- else port=pidrequest; - t->nal_cb->ni.nid=get_node_id(); - t->nal_cb->ni.pid=port; - t->lib_nal->libnal_ni.ni_pid.nid=get_node_id(); - t->lib_nal->libnal_ni.ni_pid.pid=port; --} --#else -- --void set_address(bridge t,ptl_pid_t pidrequest) --{ -- int virtnode, in_addr, port; -- ptl_pid_t pid; -- -- /* get and remember my node id*/ -- if (!getenv("PTL_VIRTNODE")) -- virtnode = 0; -- else -- { -- int maxvnode = PNAL_VNODE_MASK - (PNAL_BASE_PORT -- >> PNAL_VNODE_SHIFT); -- virtnode = atoi(getenv("PTL_VIRTNODE")); -- if (virtnode > maxvnode) -- { -- fprintf(stderr, "PTL_VIRTNODE of %d is too large - max %d\n", -- virtnode, maxvnode); -- return; -- } -- } -- -- in_addr = get_node_id(); -- -- t->iptop8 = in_addr >> PNAL_HOSTID_SHIFT;/* for making new connections */ - t->nal_cb->ni.nid = ((in_addr & PNAL_HOSTID_MASK) - << PNAL_VNODE_SHIFT) - + virtnode; - - t->lib_nal->libnal_ni.ni_pid.nid = ((in_addr & PNAL_HOSTID_MASK) - << PNAL_VNODE_SHIFT) - + virtnode; -- pid=pidrequest; -- /* TODO: Support of pid PTL_ID_ANY with virtual nodes needs more work. */ --#ifdef notyet -- if (pid==(unsigned short)PTL_PID_ANY) port = 0; --#endif -- if (pid==(unsigned short)PTL_PID_ANY) -- { -- fprintf(stderr, "portal pid PTL_ID_ANY is not currently supported\n"); -- return; -- } -- else if (pid > PNAL_PID_MASK) -- { -- fprintf(stderr, "portal pid of %d is too large - max %d\n", -- pid, PNAL_PID_MASK); -- return; -- } -- else port = ((virtnode << PNAL_VNODE_SHIFT) + pid) + PNAL_BASE_PORT; - t->nal_cb->ni.pid=pid; - t->lib_nal->libnal_ni.ni_pid.pid=pid; --} --#endif diff --cc lnet/ulnds/socklnd/bridge.h index 9a90ab8,d2f0f2c..0000000 deleted file mode 100644,100644 --- a/lnet/ulnds/socklnd/bridge.h +++ /dev/null @@@ -1,34 -1,34 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * Copyright (c) 2002 Cray Inc. -- * -- * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ -- */ -- --#ifndef TCPNAL_PROCBRIDGE_H --#define TCPNAL_PROCBRIDGE_H -- --#include -#include - -#define PTL_IFACE_TCP 1 -#define PTL_IFACE_ER 2 -#define PTL_IFACE_SS 3 -#define PTL_IFACE_MAX 4 -- --typedef struct bridge { -- int alive; - nal_cb_t *nal_cb; - lib_nal_t *lib_nal; -- void *lower; -- void *local; -- void (*shutdown)(struct bridge *); -- /* this doesn't really belong here */ -- unsigned char iptop8; --} *bridge; - -- - nal_t *bridge_init(ptl_interface_t nal, - ptl_pid_t pid_request, - ptl_ni_limits_t *desired, - ptl_ni_limits_t *actual, - int *rc); -- --typedef int (*nal_initialize)(bridge); --extern nal_initialize nal_table[PTL_IFACE_MAX]; -- --#endif diff --cc lnet/ulnds/socklnd/connection.c index 7b4cecd,ed8dc08..0000000 deleted file mode 100644,100644 --- a/lnet/ulnds/socklnd/connection.c +++ /dev/null @@@ -1,488 -1,468 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * Copyright (c) 2002 Cray Inc. -- * -- * This file is part of Lustre, http://www.lustre.org. -- * -- * Lustre is free software; you can redistribute it and/or -- * modify it under the terms of version 2 of the GNU General Public -- * License as published by the Free Software Foundation. -- * -- * Lustre is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- * GNU General Public License for more details. -- * -- * You should have received a copy of the GNU General Public License -- * along with Lustre; if not, write to the Free Software -- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -- */ -- --/* connection.c: -- This file provides a simple stateful connection manager which -- builds tcp connections on demand and leaves them open for -- future use. It also provides the machinery to allow peers -- to connect to it --*/ -- --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#ifndef __CYGWIN__ --#include --#endif -- --/* global variable: acceptor port */ --unsigned short tcpnal_acceptor_port = 988; -- -- --/* Function: compare_connection -- * Arguments: connection c: a connection in the hash table -- * ptl_process_id_t: an id to verify agains -- * Returns: 1 if the connection is the one requested, 0 otherwise -- * -- * compare_connection() tests for collisions in the hash table -- */ --static int compare_connection(void *arg1, void *arg2) --{ -- connection c = arg1; -- unsigned int * id = arg2; --#if 0 -- return((c->ip==id[0]) && (c->port==id[1])); --#else -- /* CFS specific hacking */ -- return (c->ip == id[0]); --#endif --} -- -- --/* Function: connection_key -- * Arguments: ptl_process_id_t id: an id to hash -- * Returns: a not-particularily-well-distributed hash -- * of the id -- */ --static unsigned int connection_key(unsigned int *id) --{ --#if 0 -- return(id[0]^id[1]); --#else -- /* CFS specific hacking */ -- return (unsigned int) id[0]; --#endif --} -- -- --/* Function: remove_connection -- * Arguments: c: the connection to remove -- */ --void remove_connection(void *arg) --{ -- connection c = arg; -- unsigned int id[2]; -- -- id[0]=c->ip; -- id[1]=c->port; -- hash_table_remove(c->m->connections,id); -- close(c->fd); -- free(c); --} -- -- --/* Function: read_connection: -- * Arguments: c: the connection to read from -- * dest: the buffer to read into -- * len: the number of bytes to read -- * Returns: success as 1, or failure as 0 -- * -- * read_connection() reads data from the connection, continuing -- * to read partial results until the request is satisfied or -- * it errors. TODO: this read should be covered by signal protection. -- */ --int read_connection(connection c, -- unsigned char *dest, -- int len) --{ -- int offset = 0,rc; -- -- if (len) { -- do { --#ifndef __CYGWIN__ -- rc = syscall(SYS_read, c->fd, dest+offset, len-offset); --#else -- rc = recv(c->fd, dest+offset, len-offset, 0); --#endif -- if (rc <= 0) { -- if (errno == EINTR) { -- rc = 0; -- } else { -- remove_connection(c); -- return (0); -- } -- } -- offset += rc; -- } while (offset < len); -- } -- return (1); --} -- --static int connection_input(void *d) --{ -- connection c = d; -- return((*c->m->handler)(c->m->handler_arg,c)); --} -- -- --/* Function: allocate_connection -- * Arguments: t: tcpnal the allocation is occuring in the context of -- * dest: portal endpoint address for this connection -- * fd: open file descriptor for the socket -- * Returns: an allocated connection structure -- * -- * just encompasses the action common to active and passive -- * connections of allocation and placement in the global table -- */ --static connection allocate_connection(manager m, -- unsigned int ip, -- unsigned short port, -- int fd) --{ -- connection c=malloc(sizeof(struct connection)); -- unsigned int id[2]; -- c->m=m; -- c->fd=fd; -- c->ip=ip; -- c->port=port; -- id[0]=ip; -- id[1]=port; -- register_io_handler(fd,READ_HANDLER,connection_input,c); -- hash_table_insert(m->connections,c,id); -- return(c); --} -- -- --/* Function: new_connection -- * Arguments: t: opaque argument holding the tcpname -- * Returns: 1 in order to reregister for new connection requests -- * -- * called when the bound service socket recieves -- * a new connection request, it always accepts and -- * installs a new connection -- */ --static int new_connection(void *z) --{ -- manager m=z; -- struct sockaddr_in s; -- int len=sizeof(struct sockaddr_in); -- int fd=accept(m->bound,(struct sockaddr *)&s,&len); -- unsigned int nid=*((unsigned int *)&s.sin_addr); -- /* cfs specific hack */ -- //unsigned short pid=s.sin_port; -- pthread_mutex_lock(&m->conn_lock); -- allocate_connection(m,htonl(nid),0/*pid*/,fd); -- pthread_mutex_unlock(&m->conn_lock); -- return(1); --} - - /* FIXME assuming little endian, cleanup!! */ - #define __cpu_to_le64(x) ((__u64)(x)) - #define __le64_to_cpu(x) ((__u64)(x)) - #define __cpu_to_le32(x) ((__u32)(x)) - #define __le32_to_cpu(x) ((__u32)(x)) - #define __cpu_to_le16(x) ((__u16)(x)) - #define __le16_to_cpu(x) ((__u16)(x)) -- --extern ptl_nid_t tcpnal_mynid; -- --int --tcpnal_hello (int sockfd, ptl_nid_t *nid, int type, __u64 incarnation) --{ -- int rc; - int nob; -- ptl_hdr_t hdr; -- ptl_magicversion_t *hmv = (ptl_magicversion_t *)&hdr.dest_nid; -- -- LASSERT (sizeof (*hmv) == sizeof (hdr.dest_nid)); -- -- memset (&hdr, 0, sizeof (hdr)); - hmv->magic = __cpu_to_le32 (PORTALS_PROTO_MAGIC); - hmv->version_major = __cpu_to_le32 (PORTALS_PROTO_VERSION_MAJOR); - hmv->version_minor = __cpu_to_le32 (PORTALS_PROTO_VERSION_MINOR); - hmv->magic = cpu_to_le32(PORTALS_PROTO_MAGIC); - hmv->version_major = cpu_to_le32(PORTALS_PROTO_VERSION_MAJOR); - hmv->version_minor = cpu_to_le32(PORTALS_PROTO_VERSION_MINOR); -- - hdr.src_nid = __cpu_to_le64 (tcpnal_mynid); - hdr.type = __cpu_to_le32 (PTL_MSG_HELLO); - hdr.src_nid = cpu_to_le64(tcpnal_mynid); - hdr.type = cpu_to_le32(PTL_MSG_HELLO); -- - hdr.msg.hello.type = __cpu_to_le32 (type); - hdr.msg.hello.incarnation = 0; - hdr.msg.hello.type = cpu_to_le32(type); - hdr.msg.hello.incarnation = cpu_to_le64(incarnation); - - /* I don't send any interface info */ -- -- /* Assume sufficient socket buffering for this message */ -- rc = syscall(SYS_write, sockfd, &hdr, sizeof(hdr)); -- if (rc <= 0) { -- CERROR ("Error %d sending HELLO to "LPX64"\n", rc, *nid); -- return (rc); -- } -- -- rc = syscall(SYS_read, sockfd, hmv, sizeof(*hmv)); -- if (rc <= 0) { -- CERROR ("Error %d reading HELLO from "LPX64"\n", rc, *nid); -- return (rc); -- } -- - if (hmv->magic != __le32_to_cpu (PORTALS_PROTO_MAGIC)) { - if (hmv->magic != le32_to_cpu(PORTALS_PROTO_MAGIC)) { -- CERROR ("Bad magic %#08x (%#08x expected) from "LPX64"\n", - __cpu_to_le32 (hmv->magic), PORTALS_PROTO_MAGIC, *nid); - cpu_to_le32(hmv->magic), PORTALS_PROTO_MAGIC, *nid); -- return (-EPROTO); -- } -- - if (hmv->version_major != __cpu_to_le16 (PORTALS_PROTO_VERSION_MAJOR) || - hmv->version_minor != __cpu_to_le16 (PORTALS_PROTO_VERSION_MINOR)) { - if (hmv->version_major != cpu_to_le16 (PORTALS_PROTO_VERSION_MAJOR) || - hmv->version_minor != cpu_to_le16 (PORTALS_PROTO_VERSION_MINOR)) { -- CERROR ("Incompatible protocol version %d.%d (%d.%d expected)" -- " from "LPX64"\n", - __le16_to_cpu (hmv->version_major), - __le16_to_cpu (hmv->version_minor), - le16_to_cpu (hmv->version_major), - le16_to_cpu (hmv->version_minor), -- PORTALS_PROTO_VERSION_MAJOR, -- PORTALS_PROTO_VERSION_MINOR, -- *nid); -- return (-EPROTO); -- } -- - #if (PORTALS_PROTO_VERSION_MAJOR != 0) - # error "This code only understands protocol version 0.x" -#if (PORTALS_PROTO_VERSION_MAJOR != 1) -# error "This code only understands protocol version 1.x" --#endif - /* version 0 sends magic/version as the dest_nid of a 'hello' header, - /* version 1 sends magic/version as the dest_nid of a 'hello' header, -- * so read the rest of it in now... */ -- -- rc = syscall(SYS_read, sockfd, hmv + 1, sizeof(hdr) - sizeof(*hmv)); -- if (rc <= 0) { -- CERROR ("Error %d reading rest of HELLO hdr from "LPX64"\n", -- rc, *nid); -- return (rc); -- } -- -- /* ...and check we got what we expected */ - if (hdr.type != __cpu_to_le32 (PTL_MSG_HELLO) || - hdr.payload_length != __cpu_to_le32 (0)) { - CERROR ("Expecting a HELLO hdr with 0 payload," - if (hdr.type != cpu_to_le32 (PTL_MSG_HELLO)) { - CERROR ("Expecting a HELLO hdr " -- " but got type %d with %d payload from "LPX64"\n", - __le32_to_cpu (hdr.type), - __le32_to_cpu (hdr.payload_length), *nid); - le32_to_cpu (hdr.type), - le32_to_cpu (hdr.payload_length), *nid); -- return (-EPROTO); -- } -- - if (__le64_to_cpu(hdr.src_nid) == PTL_NID_ANY) { - if (le64_to_cpu(hdr.src_nid) == PTL_NID_ANY) { -- CERROR("Expecting a HELLO hdr with a NID, but got PTL_NID_ANY\n"); -- return (-EPROTO); -- } -- -- if (*nid == PTL_NID_ANY) { /* don't know peer's nid yet */ - *nid = __le64_to_cpu(hdr.src_nid); - } else if (*nid != __le64_to_cpu (hdr.src_nid)) { - *nid = le64_to_cpu(hdr.src_nid); - } else if (*nid != le64_to_cpu (hdr.src_nid)) { -- CERROR ("Connected to nid "LPX64", but expecting "LPX64"\n", - __le64_to_cpu (hdr.src_nid), *nid); - le64_to_cpu (hdr.src_nid), *nid); - return (-EPROTO); - } - - /* Ignore any interface info in the payload */ - nob = le32_to_cpu(hdr.payload_length); - if (nob > getpagesize()) { - CERROR("Unexpected HELLO payload %d from "LPX64"\n", - nob, *nid); -- return (-EPROTO); - } - if (nob > 0) { - char *space = (char *)malloc(nob); - - if (space == NULL) { - CERROR("Can't allocate scratch buffer %d\n", nob); - return (-ENOMEM); - } - - rc = syscall(SYS_read, sockfd, space, nob); - if (rc <= 0) { - CERROR("Error %d skipping HELLO payload from " - LPX64"\n", rc, *nid); - return (rc); - } -- } -- -- return (0); --} -- --/* Function: force_tcp_connection -- * Arguments: t: tcpnal -- * dest: portals endpoint for the connection -- * Returns: an allocated connection structure, either -- * a pre-existing one, or a new connection -- */ --connection force_tcp_connection(manager m, -- unsigned int ip, -- unsigned short port, -- procbridge pb) --{ -- connection conn; -- struct sockaddr_in addr; - struct sockaddr_in locaddr; -- unsigned int id[2]; -- struct timeval tv; -- __u64 incarnation; -- - int fd; - int option; - int rc; - int rport; - ptl_nid_t peernid = PTL_NID_ANY; - port = tcpnal_acceptor_port; -- -- id[0] = ip; -- id[1] = port; -- -- pthread_mutex_lock(&m->conn_lock); -- -- conn = hash_table_find(m->connections, id); - if (conn) - goto out; - - memset(&addr, 0, sizeof(addr)); - addr.sin_family = AF_INET; - addr.sin_addr.s_addr = htonl(ip); - addr.sin_port = htons(port); - if (!conn) { - int fd; - int option; - ptl_nid_t peernid = PTL_NID_ANY; -- - memset(&locaddr, 0, sizeof(locaddr)); - locaddr.sin_family = AF_INET; - locaddr.sin_addr.s_addr = INADDR_ANY; - bzero((char *) &addr, sizeof(addr)); - addr.sin_family = AF_INET; - addr.sin_addr.s_addr = htonl(ip); - addr.sin_port = htons(port); -- - for (rport = IPPORT_RESERVED - 1; rport > IPPORT_RESERVED / 2; --rport) { - fd = socket(AF_INET, SOCK_STREAM, 0); - if (fd < 0) { - perror("tcpnal socket failed"); - goto out; - } - - option = 1; - rc = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, - &option, sizeof(option)); - if (rc != 0) { - perror ("Can't set SO_REUSEADDR for socket"); - close(fd); - goto out; - } - if ((fd = socket(AF_INET, SOCK_STREAM, 0)) < 0) { - perror("tcpnal socket failed"); - exit(-1); - } - if (connect(fd, (struct sockaddr *)&addr, - sizeof(struct sockaddr_in))) { - perror("tcpnal connect"); - return(0); - } -- - locaddr.sin_port = htons(rport); - rc = bind(fd, (struct sockaddr *)&locaddr, sizeof(locaddr)); - if (rc == 0 || errno == EACCES) { - rc = connect(fd, (struct sockaddr *)&addr, - sizeof(struct sockaddr_in)); - if (rc == 0) { - break; - } else if (errno != EADDRINUSE) { - perror("Error connecting to remote host"); - close(fd); - goto out; - } - } else if (errno != EADDRINUSE) { - perror("Error binding to privileged port"); - close(fd); - goto out; - } - close(fd); - } - - if (rport == IPPORT_RESERVED / 2) { - fprintf(stderr, "Out of ports trying to bind to a reserved port\n"); - goto out; - } - --#if 1 - option = 1; - setsockopt(fd, SOL_TCP, TCP_NODELAY, &option, sizeof(option)); - option = 1<<20; - setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &option, sizeof(option)); - option = 1<<20; - setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &option, sizeof(option)); - option = 1; - setsockopt(fd, SOL_TCP, TCP_NODELAY, &option, sizeof(option)); - option = 1<<20; - setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &option, sizeof(option)); - option = 1<<20; - setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &option, sizeof(option)); --#endif -- - gettimeofday(&tv, NULL); - incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec; - gettimeofday(&tv, NULL); - incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec; -- - /* say hello */ - if (tcpnal_hello(fd, &peernid, SOCKNAL_CONN_ANY, incarnation)) - /* say hello */ - if (tcpnal_hello(fd, &peernid, SOCKNAL_CONN_ANY, incarnation)) -- exit(-1); - - conn = allocate_connection(m, ip, port, fd); - - /* let nal thread know this event right away */ - if (conn) - procbridge_wakeup_nal(pb); -- - out: - conn = allocate_connection(m, ip, port, fd); - - /* let nal thread know this event right away */ - if (conn) - procbridge_wakeup_nal(pb); - } - -- pthread_mutex_unlock(&m->conn_lock); -- return (conn); --} - -- --/* Function: bind_socket -- * Arguments: t: the nal state for this interface -- * port: the port to attempt to bind to -- * Returns: 1 on success, or 0 on error -- * -- * bind_socket() attempts to allocate and bind a socket to the requested -- * port, or dynamically assign one from the kernel should the port be -- * zero. Sets the bound and bound_handler elements of m. -- * -- * TODO: The port should be an explicitly sized type. -- */ --static int bind_socket(manager m,unsigned short port) --{ -- struct sockaddr_in addr; -- int alen=sizeof(struct sockaddr_in); -- -- if ((m->bound = socket(AF_INET, SOCK_STREAM, 0)) < 0) -- return(0); -- -- bzero((char *) &addr, sizeof(addr)); -- addr.sin_family = AF_INET; -- addr.sin_addr.s_addr = 0; -- addr.sin_port = htons(port); -- -- if (bind(m->bound,(struct sockaddr *)&addr,alen)<0){ -- perror ("tcpnal bind"); -- return(0); -- } -- -- getsockname(m->bound,(struct sockaddr *)&addr, &alen); -- -- m->bound_handler=register_io_handler(m->bound,READ_HANDLER, -- new_connection,m); -- listen(m->bound,5); -- m->port=addr.sin_port; -- return(1); --} -- -- --/* Function: shutdown_connections -- * Arguments: m: the manager structure -- * -- * close all connections and reclaim resources -- */ --void shutdown_connections(manager m) --{ -- close(m->bound); -- remove_io_handler(m->bound_handler); -- hash_destroy_table(m->connections,remove_connection); -- free(m); --} -- -- --/* Function: init_connections -- * Arguments: t: the nal state for this interface -- * port: the port to attempt to bind to -- * Returns: a newly allocated manager structure, or -- * zero if the fixed port could not be bound -- */ --manager init_connections(unsigned short pid, -- int (*input)(void *, void *), -- void *a) --{ -- manager m = (manager)malloc(sizeof(struct manager)); -- m->connections = hash_create_table(compare_connection,connection_key); -- m->handler = input; -- m->handler_arg = a; -- pthread_mutex_init(&m->conn_lock, 0); -- -- if (bind_socket(m,pid)) -- return(m); -- -- free(m); -- return(0); --} diff --cc lnet/ulnds/socklnd/connection.h index 343ffa6,343ffa6..0000000 deleted file mode 100644,100644 --- a/lnet/ulnds/socklnd/connection.h +++ /dev/null @@@ -1,35 -1,35 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * Copyright (c) 2002 Cray Inc. -- * -- * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ -- */ -- --#include --#include -- --typedef struct manager { -- table connections; -- pthread_mutex_t conn_lock; /* protect connections table */ -- int bound; -- io_handler bound_handler; -- int (*handler)(void *, void *); -- void *handler_arg; -- unsigned short port; --} *manager; -- -- --typedef struct connection { -- unsigned int ip; -- unsigned short port; -- int fd; -- manager m; --} *connection; -- --connection force_tcp_connection(manager m, unsigned int ip, unsigned int short, -- procbridge pb); --manager init_connections(unsigned short, int (*f)(void *, void *), void *); --void remove_connection(void *arg); --void shutdown_connections(manager m); --int read_connection(connection c, unsigned char *dest, int len); diff --cc lnet/ulnds/socklnd/debug.c index b82bb2f,b82bb2f..0000000 deleted file mode 100644,100644 --- a/lnet/ulnds/socklnd/debug.c +++ /dev/null @@@ -1,119 -1,119 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * Copyright (C) 2002 Cluster File Systems, Inc. -- * Author: Phil Schwan -- * -- * This file is part of Lustre, http://www.lustre.org. -- * -- * Lustre is free software; you can redistribute it and/or -- * modify it under the terms of version 2 of the GNU General Public -- * License as published by the Free Software Foundation. -- * -- * Lustre is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- * GNU General Public License for more details. -- * -- * You should have received a copy of the GNU General Public License -- * along with Lustre; if not, write to the Free Software -- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -- */ -- --#include --#include --#include --#include --#include -- --int smp_processor_id = 1; --char debug_file_path[1024] = "/tmp/lustre-log"; --char debug_file_name[1024]; --FILE *debug_file_fd; -- --int portals_do_debug_dumplog(void *arg) --{ -- printf("Look in %s\n", debug_file_name); -- return 0; --} -- -- --void portals_debug_print(void) --{ -- return; --} -- -- --void portals_debug_dumplog(void) --{ -- printf("Look in %s\n", debug_file_name); -- return; --} -- -- --int portals_debug_init(unsigned long bufsize) --{ -- debug_file_fd = stdout; -- return 0; --} -- --int portals_debug_cleanup(void) --{ -- return 0; //close(portals_debug_fd); --} -- --int portals_debug_clear_buffer(void) --{ -- return 0; --} -- --int portals_debug_mark_buffer(char *text) --{ -- -- fprintf(debug_file_fd, "*******************************************************************************\n"); -- fprintf(debug_file_fd, "DEBUG MARKER: %s\n", text); -- fprintf(debug_file_fd, "*******************************************************************************\n"); -- -- return 0; --} -- --int portals_debug_copy_to_user(char *buf, unsigned long len) --{ -- return 0; --} -- --/* FIXME: I'm not very smart; someone smarter should make this better. */ --void --portals_debug_msg (int subsys, int mask, char *file, const char *fn, -- const int line, const char *format, ...) --{ -- va_list ap; -- unsigned long flags; -- struct timeval tv; -- int nob; -- -- -- /* NB since we pass a non-zero sized buffer (at least) on the first -- * print, we can be assured that by the end of all the snprinting, -- * we _do_ have a terminated buffer, even if our message got truncated. -- */ -- -- gettimeofday(&tv, NULL); -- -- nob += fprintf(debug_file_fd, -- "%02x:%06x:%d:%lu.%06lu ", -- subsys >> 24, mask, smp_processor_id, -- tv.tv_sec, tv.tv_usec); -- -- nob += fprintf(debug_file_fd, -- "(%s:%d:%s() %d+%ld): ", -- file, line, fn, 0, -- 8192 - ((unsigned long)&flags & 8191UL)); -- -- va_start (ap, format); -- nob += fprintf(debug_file_fd, format, ap); -- va_end (ap); -- -- --} -- diff --cc lnet/ulnds/socklnd/dispatch.h index 34dd070,34dd070..0000000 deleted file mode 100644,100644 --- a/lnet/ulnds/socklnd/dispatch.h +++ /dev/null @@@ -1,39 -1,39 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * Copyright (c) 2002 Cray Inc. -- * Copyright (c) 2002 Eric Hoffman -- * -- * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ -- */ -- --/* this file is only called dispatch.h to prevent it -- from colliding with /usr/include/sys/select.h */ -- --typedef struct io_handler *io_handler; -- --struct io_handler{ -- io_handler *last; -- io_handler next; -- int fd; -- int type; -- int (*function)(void *); -- void *argument; -- int disabled; --}; -- -- --#define READ_HANDLER 1 --#define WRITE_HANDLER 2 --#define EXCEPTION_HANDLER 4 --#define ALL_HANDLER (READ_HANDLER | WRITE_HANDLER | EXCEPTION_HANDLER) -- --io_handler register_io_handler(int fd, -- int type, -- int (*function)(void *), -- void *arg); -- --void remove_io_handler (io_handler i); --void init_unix_timer(void); --void select_timer_block(when until); --when now(void); diff --cc lnet/ulnds/socklnd/ipmap.h index 85b1e18,85b1e18..0000000 deleted file mode 100644,100644 --- a/lnet/ulnds/socklnd/ipmap.h +++ /dev/null @@@ -1,38 -1,38 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * Copyright (c) 2002 Cray Inc. -- * -- * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ -- */ -- --#define DIRECT_IP_MODE --#ifdef DIRECT_IP_MODE --#define PNAL_NID(in_addr, port) (in_addr) --#define PNAL_PID(pid) (pid) --#define PNAL_IP(in_addr, port) (in_addr) --#define PNAL_PORT(nid, pid) (pid) --#else -- --#define PNAL_BASE_PORT 4096 --#define PNAL_HOSTID_SHIFT 24 --#define PNAL_HOSTID_MASK ((1 << PNAL_HOSTID_SHIFT) - 1) --#define PNAL_VNODE_SHIFT 8 --#define PNAL_VNODE_MASK ((1 << PNAL_VNODE_SHIFT) - 1) --#define PNAL_PID_SHIFT 8 --#define PNAL_PID_MASK ((1 << PNAL_PID_SHIFT) - 1) -- --#define PNAL_NID(in_addr, port) (((ntohl(in_addr) & PNAL_HOSTID_MASK) \ -- << PNAL_VNODE_SHIFT) \ -- | (((ntohs(port)-PNAL_BASE_PORT) >>\ -- PNAL_PID_SHIFT))) --#define PNAL_PID(port) ((ntohs(port) - PNAL_BASE_PORT) & PNAL_PID_MASK) -- --#define PNAL_IP(nid,t) (htonl((((unsigned)(nid))\ -- >> PNAL_VNODE_SHIFT)\ -- | (t->iptop8 << PNAL_HOSTID_SHIFT))) --#define PNAL_PORT(nid, pid) (htons(((((nid) & PNAL_VNODE_MASK) \ -- << PNAL_VNODE_SHIFT) \ -- | ((pid) & PNAL_PID_MASK)) \ -- + PNAL_BASE_PORT)) --#endif diff --cc lnet/ulnds/socklnd/pqtimer.c index 98c48eb,98c48eb..0000000 deleted file mode 100644,100644 --- a/lnet/ulnds/socklnd/pqtimer.c +++ /dev/null @@@ -1,226 -1,226 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * Copyright (c) 2002 Cray Inc. -- * Copyright (c) 2002 Eric Hoffman -- * -- * This file is part of Lustre, http://www.lustre.org. -- * -- * Lustre is free software; you can redistribute it and/or -- * modify it under the terms of version 2 of the GNU General Public -- * License as published by the Free Software Foundation. -- * -- * Lustre is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- * GNU General Public License for more details. -- * -- * You should have received a copy of the GNU General Public License -- * along with Lustre; if not, write to the Free Software -- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -- */ -- --/* timer.c: -- * this file implements a simple priority-queue based timer system. when -- * combined with a file which implements now() and block(), it can -- * be used to provide course-grained time-based callbacks. -- */ -- --#include --#include --#include -- --struct timer { -- void (*function)(void *); -- void *arg; -- when w; -- int interval; -- int disable; --}; -- --typedef struct thunk *thunk; --struct thunk { -- void (*f)(void *); -- void *a; -- thunk next; --}; -- --extern when now(void); -- --static thunk thunks; --static int internal; --static void (*block_function)(when); --static int number_of_timers; --static int size_of_pqueue; --static timer *timers; -- -- --static void heal(int where) --{ -- int left=(where<<1); -- int right=(where<<1)+1; -- int min=where; -- timer temp; -- -- if (left <= number_of_timers) -- if (timers[left]->w < timers[min]->w) min=left; -- if (right <= number_of_timers) -- if (timers[right]->w < timers[min]->w) min=right; -- if (min != where){ -- temp=timers[where]; -- timers[where]=timers[min]; -- timers[min]=temp; -- heal(min); -- } --} -- --static void add_pqueue(int i) --{ -- timer temp; -- int parent=(i>>1); -- if ((i>1) && (timers[i]->w< timers[parent]->w)){ -- temp=timers[i]; -- timers[i]=timers[parent]; -- timers[parent]=temp; -- add_pqueue(parent); -- } --} -- --static void add_timer(timer t) --{ -- if (size_of_pqueue<(number_of_timers+2)){ -- int oldsize=size_of_pqueue; -- timer *new=(void *)malloc(sizeof(struct timer)*(size_of_pqueue+=10)); -- memcpy(new,timers,sizeof(timer)*oldsize); -- timers=new; -- } -- timers[++number_of_timers]=t; -- add_pqueue(number_of_timers); --} -- --/* Function: register_timer -- * Arguments: interval: the time interval from the current time when -- * the timer function should be called -- * function: the function to call when the time has expired -- * argument: the argument to call it with. -- * Returns: a pointer to a timer structure -- */ --timer register_timer(when interval, -- void (*function)(void *), -- void *argument) --{ -- timer t=(timer)malloc(sizeof(struct timer)); -- -- t->arg=argument; -- t->function=function; -- t->interval=interval; -- t->disable=0; -- t->w=now()+interval; -- add_timer(t); -- if (!internal && (number_of_timers==1)) -- block_function(t->w); -- return(t); --} -- --/* Function: remove_timer -- * Arguments: t: -- * Returns: nothing -- * -- * remove_timer removes a timer from the system, insuring -- * that it will never be called. It does not actually -- * free the timer due to reentrancy issues. -- */ -- --void remove_timer(timer t) --{ -- t->disable=1; --} -- -- -- --void timer_fire() --{ -- timer current; -- -- current=timers[1]; -- timers[1]=timers[number_of_timers--]; -- heal(1); -- if (!current->disable) { -- (*current->function)(current->arg); -- } -- free(current); --} -- --when next_timer(void) --{ -- when here=now(); -- -- while (number_of_timers && (timers[1]->w <= here)) timer_fire(); -- if (number_of_timers) return(timers[1]->w); -- return(0); --} -- --/* Function: timer_loop -- * Arguments: none -- * Returns: never -- * -- * timer_loop() is the blocking dispatch function for the timer. -- * Is calls the block() function registered with init_timer, -- * and handles associated with timers that have been registered. -- */ --void timer_loop() --{ -- when here; -- -- while (1){ -- thunk z; -- here=now(); -- -- for (z=thunks;z;z=z->next) (*z->f)(z->a); -- -- if (number_of_timers){ -- if (timers[1]->w > here){ -- (*block_function)(timers[1]->w); -- } else { -- timer_fire(); -- } -- } else { -- thunk z; -- for (z=thunks;z;z=z->next) (*z->f)(z->a); -- (*block_function)(0); -- } -- } --} -- -- --/* Function: register_thunk -- * Arguments: f: the function to call -- * a: the single argument to call it with -- * -- * Thunk functions get called at irregular intervals, they -- * should not assume when, or take a particularily long -- * amount of time. Thunks are for background cleanup tasks. -- */ --void register_thunk(void (*f)(void *),void *a) --{ -- thunk t=(void *)malloc(sizeof(struct thunk)); -- t->f=f; -- t->a=a; -- t->next=thunks; -- thunks=t; --} -- --/* Function: initialize_timer -- * Arguments: block: the function to call to block for the specified interval -- * -- * initialize_timer() must be called before any other timer function, -- * including timer_loop. -- */ --void initialize_timer(void (*block)(when)) --{ -- block_function=block; -- number_of_timers=0; -- size_of_pqueue=10; -- timers=(timer *)malloc(sizeof(timer)*size_of_pqueue); -- thunks=0; --} diff --cc lnet/ulnds/socklnd/pqtimer.h index 11efb0e,11efb0e..0000000 deleted file mode 100644,100644 --- a/lnet/ulnds/socklnd/pqtimer.h +++ /dev/null @@@ -1,25 -1,25 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * Copyright (c) 2002 Cray Inc. -- * Copyright (c) 2002 Eric Hoffman -- * -- * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ -- */ -- --typedef unsigned long long when; --when now(void); --typedef struct timer *timer; --timer register_timer(when interval, -- void (*function)(void *), -- void *argument); --timer register_timer_wait(void); --void remove_timer(timer); --void timer_loop(void); --void initialize_timer(void (*block)(when)); --void timer_fire(void); -- -- --#define HZ 0x100000000ull -- -- diff --cc lnet/ulnds/socklnd/procapi.c index c27f555,f3843d7..0000000 deleted file mode 100644,100644 --- a/lnet/ulnds/socklnd/procapi.c +++ /dev/null @@@ -1,273 -1,188 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * Copyright (c) 2002 Cray Inc. -- * Copyright (c) 2003 Cluster File Systems, Inc. -- * -- * This file is part of Lustre, http://www.lustre.org. -- * -- * Lustre is free software; you can redistribute it and/or -- * modify it under the terms of version 2 of the GNU General Public -- * License as published by the Free Software Foundation. -- * -- * Lustre is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- * GNU General Public License for more details. -- * -- * You should have received a copy of the GNU General Public License -- * along with Lustre; if not, write to the Free Software -- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -- */ -- --/* api.c: -- * This file provides the 'api' side for the process-based nals. -- * it is responsible for creating the 'library' side thread, -- * and passing wrapped portals transactions to it. -- * -- * Along with initialization, shutdown, and transport to the library -- * side, this file contains some stubs to satisfy the nal definition. -- */ --#include --#include --#include --#include --#ifndef __CYGWIN__ --#include --#endif --#include --#include --#include --#include --#include -- -- --/* XXX CFS workaround, to give a chance to let nal thread wake up -- * from waiting in select -- */ --static int procbridge_notifier_handler(void *arg) --{ -- static char buf[8]; -- procbridge p = (procbridge) arg; -- -- syscall(SYS_read, p->notifier[1], buf, sizeof(buf)); -- return 1; --} -- --void procbridge_wakeup_nal(procbridge p) --{ -- static char buf[8]; -- syscall(SYS_write, p->notifier[0], buf, sizeof(buf)); - } - - /* Function: forward - * Arguments: nal_t *nal: pointer to my top-side nal structure - * id: the command to pass to the lower layer - * args, args_len:pointer to and length of the request - * ret, ret_len: pointer to and size of the result - * Returns: a portals status code - * - * forwards a packaged api call from the 'api' side to the 'library' - * side, and collects the result - */ - static int procbridge_forward(nal_t *n, int id, void *args, size_t args_len, - void *ret, size_t ret_len) - { - bridge b = (bridge) n->nal_data; - - if (id == PTL_FINI) { - lib_fini(b->nal_cb); - - if (b->shutdown) - (*b->shutdown)(b); - } - - lib_dispatch(b->nal_cb, NULL, id, args, ret); - - return (PTL_OK); --} - -- --/* Function: shutdown -- * Arguments: nal: a pointer to my top side nal structure -- * ni: my network interface index -- * -- * cleanup nal state, reclaim the lower side thread and -- * its state using PTL_FINI codepoint -- */ - static int procbridge_shutdown(nal_t *n, int ni) -static void procbridge_shutdown(nal_t *n) --{ - bridge b=(bridge)n->nal_data; - lib_nal_t *nal = n->nal_data; - bridge b=(bridge)nal->libnal_data; -- procbridge p=(procbridge)b->local; -- -- p->nal_flags |= NAL_FLAG_STOPPING; -- procbridge_wakeup_nal(p); -- -- do { -- pthread_mutex_lock(&p->mutex); -- if (p->nal_flags & NAL_FLAG_STOPPED) { -- pthread_mutex_unlock(&p->mutex); -- break; -- } -- pthread_cond_wait(&p->cond, &p->mutex); -- pthread_mutex_unlock(&p->mutex); -- } while (1); -- -- free(p); - return(0); - } - - - /* Function: validate - * useless stub - */ - static int procbridge_validate(nal_t *nal, void *base, size_t extent) - { - return(0); --} - - - /* FIXME cfs temporary workaround! FIXME - * global time out value - */ - int __tcpnal_eqwait_timeout_value = 0; - int __tcpnal_eqwait_timedout = 0; - - /* Function: yield - * Arguments: pid: - * - * this function was originally intended to allow the - * lower half thread to be scheduled to allow progress. we - * overload it to explicitly block until signalled by the - * lower half. - */ - static void procbridge_yield(nal_t *n) - { - bridge b=(bridge)n->nal_data; - procbridge p=(procbridge)b->local; - - pthread_mutex_lock(&p->mutex); - if (!__tcpnal_eqwait_timeout_value) { - pthread_cond_wait(&p->cond,&p->mutex); - } else { - struct timeval now; - struct timespec timeout; - - gettimeofday(&now, NULL); - timeout.tv_sec = now.tv_sec + __tcpnal_eqwait_timeout_value; - timeout.tv_nsec = now.tv_usec * 1000; -- - __tcpnal_eqwait_timedout = - pthread_cond_timedwait(&p->cond, &p->mutex, &timeout); - } - pthread_mutex_unlock(&p->mutex); - } -- -/* forward decl */ -extern int procbridge_startup (nal_t *, ptl_pid_t, - ptl_ni_limits_t *, ptl_ni_limits_t *); -- - static void procbridge_lock(nal_t * nal, unsigned long *flags){} - static void procbridge_unlock(nal_t * nal, unsigned long *flags){} --/* api_nal -- * the interface vector to allow the generic code to access - * this nal. this is seperate from the library side nal_cb. - * this nal. this is seperate from the library side lib_nal. -- * TODO: should be dyanmically allocated -- */ - static nal_t api_nal = { - ni: {0}, -nal_t procapi_nal = { -- nal_data: NULL, - forward: procbridge_forward, - shutdown: procbridge_shutdown, - validate: procbridge_validate, - yield: procbridge_yield, - lock: procbridge_lock, - unlock: procbridge_unlock - nal_ni_init: procbridge_startup, - nal_ni_fini: procbridge_shutdown, --}; -- --ptl_nid_t tcpnal_mynid; -- - /* Function: procbridge_interface -/* Function: procbridge_startup -- * -- * Arguments: pid: requested process id (port offset) -- * PTL_ID_ANY not supported. -- * desired: limits passed from the application -- * and effectively ignored -- * actual: limits actually allocated and returned -- * - * Returns: a pointer to my statically allocated top side NAL - * structure - * Returns: portals rc -- * -- * initializes the tcp nal. we define unix_failure as an -- * error wrapper to cut down clutter. -- */ - nal_t *procbridge_interface(int num_interface, - ptl_pt_index_t ptl_size, - ptl_ac_index_t acl_size, - ptl_pid_t requested_pid) -int procbridge_startup (nal_t *nal, ptl_pid_t requested_pid, - ptl_ni_limits_t *requested_limits, - ptl_ni_limits_t *actual_limits) --{ -- nal_init_args_t args; - -- procbridge p; -- bridge b; - static int initialized=0; - ptl_ni_limits_t limits = {-1,-1,-1,-1,-1}; - /* XXX nal_type is purely private to tcpnal here */ -- int nal_type = PTL_IFACE_TCP;/* PTL_IFACE_DEFAULT FIXME hack */ -- - if(initialized) return (&api_nal); - LASSERT(nal == &procapi_nal); -- -- init_unix_timer(); -- -- b=(bridge)malloc(sizeof(struct bridge)); -- p=(procbridge)malloc(sizeof(struct procbridge)); - api_nal.nal_data=b; -- b->local=p; - - if (ptl_size) - limits.max_ptable_index = ptl_size; - if (acl_size) - limits.max_atable_index = acl_size; -- -- args.nia_requested_pid = requested_pid; - args.nia_limits = &limits; - args.nia_requested_limits = requested_limits; - args.nia_actual_limits = actual_limits; -- args.nia_nal_type = nal_type; -- args.nia_bridge = b; - args.nia_apinal = nal; -- -- /* init procbridge */ -- pthread_mutex_init(&p->mutex,0); -- pthread_cond_init(&p->cond, 0); -- p->nal_flags = 0; - pthread_mutex_init(&p->nal_cb_lock, 0); -- -- /* initialize notifier */ -- if (socketpair(AF_UNIX, SOCK_STREAM, 0, p->notifier)) { -- perror("socketpair failed"); - return NULL; - return PTL_FAIL; -- } -- -- if (!register_io_handler(p->notifier[1], READ_HANDLER, -- procbridge_notifier_handler, p)) { -- perror("fail to register notifier handler"); - return NULL; - return PTL_FAIL; -- } -- -- /* create nal thread */ -- if (pthread_create(&p->t, NULL, nal_thread, &args)) { -- perror("nal_init: pthread_create"); - return(NULL); - return PTL_FAIL; -- } -- -- do { -- pthread_mutex_lock(&p->mutex); -- if (p->nal_flags & (NAL_FLAG_RUNNING | NAL_FLAG_STOPPED)) { -- pthread_mutex_unlock(&p->mutex); -- break; -- } -- pthread_cond_wait(&p->cond, &p->mutex); -- pthread_mutex_unlock(&p->mutex); -- } while (1); -- -- if (p->nal_flags & NAL_FLAG_STOPPED) - return (NULL); - return PTL_FAIL; -- - b->nal_cb->ni.nid = tcpnal_mynid; - initialized = 1; - b->lib_nal->libnal_ni.ni_pid.nid = tcpnal_mynid; -- - return (&api_nal); - return PTL_OK; --} diff --cc lnet/ulnds/socklnd/procbridge.h index 965f83d,1f91ced..0000000 deleted file mode 100644,100644 --- a/lnet/ulnds/socklnd/procbridge.h +++ /dev/null @@@ -1,59 -1,56 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * Copyright (c) 2002 Cray Inc. -- * Copyright (c) 2003 Cluster File Systems, Inc. -- * -- * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ -- */ -- --#ifndef _PROCBRIDGE_H_ --#define _PROCBRIDGE_H_ -- --#include --#include --#include -- -- --#define NAL_FLAG_RUNNING 1 --#define NAL_FLAG_STOPPING 2 --#define NAL_FLAG_STOPPED 4 -- --typedef struct procbridge { -- /* sync between user threads and nal thread */ -- pthread_t t; -- pthread_cond_t cond; -- pthread_mutex_t mutex; -- -- /* socket pair used to notify nal thread */ -- int notifier[2]; -- -- int nal_flags; -- - pthread_mutex_t nal_cb_lock; --} *procbridge; -- --typedef struct nal_init_args { -- ptl_pid_t nia_requested_pid; - ptl_ni_limits_t *nia_limits; - ptl_ni_limits_t *nia_requested_limits; - ptl_ni_limits_t *nia_actual_limits; -- int nia_nal_type; -- bridge nia_bridge; - nal_t *nia_apinal; --} nal_init_args_t; -- --extern void *nal_thread(void *); -- -- --#define PTL_INIT (LIB_MAX_DISPATCH+1) --#define PTL_FINI (LIB_MAX_DISPATCH+2) -- --#define MAX_ACLS 1 --#define MAX_PTLS 128 -- --extern void set_address(bridge t,ptl_pid_t pidrequest); - extern nal_t *procbridge_interface(int num_interface, - ptl_pt_index_t ptl_size, - ptl_ac_index_t acl_size, - ptl_pid_t requested_pid); --extern void procbridge_wakeup_nal(procbridge p); -- --#endif diff --cc lnet/ulnds/socklnd/proclib.c index 2a5ba0d,7ee7c71..0000000 deleted file mode 100644,100644 --- a/lnet/ulnds/socklnd/proclib.c +++ /dev/null @@@ -1,224 -1,137 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * Copyright (c) 2002 Cray Inc. -- * Copyright (c) 2003 Cluster File Systems, Inc. -- * -- * This file is part of Lustre, http://www.lustre.org. -- * -- * Lustre is free software; you can redistribute it and/or -- * modify it under the terms of version 2 of the GNU General Public -- * License as published by the Free Software Foundation. -- * -- * Lustre is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- * GNU General Public License for more details. -- * -- * You should have received a copy of the GNU General Public License -- * along with Lustre; if not, write to the Free Software -- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -- */ -- --/* lib.c: -- * This file provides the 'library' side for the process-based nals. -- * it is responsible for communication with the 'api' side and -- * providing service to the generic portals 'library' -- * implementation. 'library' might be better termed 'communication' -- * or 'kernel'. -- */ -- --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include -- --/* the following functions are stubs to satisfy the nal definition -- without doing anything particularily useful*/ - - static ptl_err_t nal_write(nal_cb_t *nal, - void *private, - user_ptr dst_addr, - void *src_addr, - size_t len) - { - memcpy(dst_addr, src_addr, len); - return PTL_OK; - } - - static ptl_err_t nal_read(nal_cb_t * nal, - void *private, - void *dst_addr, - user_ptr src_addr, - size_t len) - { - memcpy(dst_addr, src_addr, len); - return PTL_OK; - } - - static void *nal_malloc(nal_cb_t *nal, - size_t len) - { - void *buf = malloc(len); - return buf; - } - - static void nal_free(nal_cb_t *nal, - void *buf, - size_t len) - { - free(buf); - } - - static void nal_printf(nal_cb_t *nal, - const char *fmt, - ...) - { - va_list ap; - - va_start(ap, fmt); - vprintf(fmt, ap); - va_end(ap); - } - - - static void nal_cli(nal_cb_t *nal, - unsigned long *flags) - { - bridge b = (bridge) nal->nal_data; - procbridge p = (procbridge) b->local; - - pthread_mutex_lock(&p->nal_cb_lock); - } - - - static void nal_sti(nal_cb_t *nal, - unsigned long *flags) - { - bridge b = (bridge)nal->nal_data; - procbridge p = (procbridge) b->local; - - pthread_mutex_unlock(&p->nal_cb_lock); - } - -- - static int nal_dist(nal_cb_t *nal, -static int nal_dist(lib_nal_t *nal, -- ptl_nid_t nid, -- unsigned long *dist) --{ -- return 0; --} -- - static void wakeup_topside(void *z) -static void check_stopping(void *z) --{ -- bridge b = z; -- procbridge p = b->local; - int stop; -- - if ((p->nal_flags & NAL_FLAG_STOPPING) == 0) - return; - -- pthread_mutex_lock(&p->mutex); - stop = p->nal_flags & NAL_FLAG_STOPPING; - if (stop) - p->nal_flags |= NAL_FLAG_STOPPED; - p->nal_flags |= NAL_FLAG_STOPPED; -- pthread_cond_broadcast(&p->cond); -- pthread_mutex_unlock(&p->mutex); -- - if (stop) - pthread_exit(0); - pthread_exit(0); --} -- -- --/* Function: nal_thread -- * Arguments: z: an opaque reference to a nal control structure -- * allocated and partially populated by the api level code -- * Returns: nothing, and only on error or explicit shutdown -- * -- * This function is the entry point of the pthread initiated on -- * the api side of the interface. This thread is used to handle -- * asynchronous delivery to the application. -- * -- * We define a limit macro to place a ceiling on limits -- * for syntactic convenience -- */ - #define LIMIT(x,y,max)\ - if ((unsigned int)x > max) y = max; - --extern int tcpnal_init(bridge); -- --nal_initialize nal_table[PTL_IFACE_MAX]={0,tcpnal_init,0}; -- --void *nal_thread(void *z) --{ -- nal_init_args_t *args = (nal_init_args_t *) z; -- bridge b = args->nia_bridge; -- procbridge p=b->local; -- int rc; - ptl_pid_t pid_request; - ptl_process_id_t process_id; -- int nal_type; - ptl_ni_limits_t desired; - ptl_ni_limits_t actual; -- - b->nal_cb=(nal_cb_t *)malloc(sizeof(nal_cb_t)); - b->nal_cb->nal_data=b; - b->nal_cb->cb_read=nal_read; - b->nal_cb->cb_write=nal_write; - b->nal_cb->cb_malloc=nal_malloc; - b->nal_cb->cb_free=nal_free; - b->nal_cb->cb_map=NULL; - b->nal_cb->cb_unmap=NULL; - b->nal_cb->cb_printf=nal_printf; - b->nal_cb->cb_cli=nal_cli; - b->nal_cb->cb_sti=nal_sti; - b->nal_cb->cb_dist=nal_dist; - b->lib_nal=(lib_nal_t *)malloc(sizeof(lib_nal_t)); - b->lib_nal->libnal_data=b; - b->lib_nal->libnal_map=NULL; - b->lib_nal->libnal_unmap=NULL; - b->lib_nal->libnal_dist=nal_dist; -- - pid_request = args->nia_requested_pid; - desired = *args->nia_limits; -- nal_type = args->nia_nal_type; - - actual = desired; - LIMIT(desired.max_match_entries,actual.max_match_entries,MAX_MES); - LIMIT(desired.max_mem_descriptors,actual.max_mem_descriptors,MAX_MDS); - LIMIT(desired.max_event_queues,actual.max_event_queues,MAX_EQS); - LIMIT(desired.max_atable_index,actual.max_atable_index,MAX_ACLS); - LIMIT(desired.max_ptable_index,actual.max_ptable_index,MAX_PTLS); -- - set_address(b,pid_request); - /* Wierd, but this sets b->lib_nal->libnal_ni.ni_pid.{nid,pid}, which - * lib_init() is about to do from the process_id passed to it...*/ - set_address(b,args->nia_requested_pid); -- - process_id = b->lib_nal->libnal_ni.ni_pid; - -- if (nal_table[nal_type]) rc=(*nal_table[nal_type])(b); -- /* initialize the generic 'library' level code */ -- - rc = lib_init(b->nal_cb, - b->nal_cb->ni.nid, - b->nal_cb->ni.pid, - 10, - actual.max_ptable_index, - actual.max_atable_index); - rc = lib_init(b->lib_nal, args->nia_apinal, - process_id, - args->nia_requested_limits, - args->nia_actual_limits); -- -- /* -- * Whatever the initialization returned is passed back to the -- * user level code for further interpretation. We just exit if -- * it is non-zero since something went wrong. -- */ -- /* this should perform error checking */ -- pthread_mutex_lock(&p->mutex); - p->nal_flags |= rc ? NAL_FLAG_STOPPED : NAL_FLAG_RUNNING; - p->nal_flags |= (rc != PTL_OK) ? NAL_FLAG_STOPPED : NAL_FLAG_RUNNING; -- pthread_cond_broadcast(&p->cond); -- pthread_mutex_unlock(&p->mutex); -- - if (!rc) { - if (rc == PTL_OK) { -- /* the thunk function is called each time the timer loop -- performs an operation and returns to blocking mode. we -- overload this function to inform the api side that -- it may be interested in looking at the event queue */ - register_thunk(wakeup_topside,b); - register_thunk(check_stopping,b); -- timer_loop(); -- } -- return(0); --} - #undef LIMIT diff --cc lnet/ulnds/socklnd/select.c index c4ccae1,c4ccae1..0000000 deleted file mode 100644,100644 --- a/lnet/ulnds/socklnd/select.c +++ /dev/null @@@ -1,166 -1,166 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * Copyright (c) 2002 Cray Inc. -- * Copyright (c) 2002 Eric Hoffman -- * -- * This file is part of Lustre, http://www.lustre.org. -- * -- * Lustre is free software; you can redistribute it and/or -- * modify it under the terms of version 2 of the GNU General Public -- * License as published by the Free Software Foundation. -- * -- * Lustre is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- * GNU General Public License for more details. -- * -- * You should have received a copy of the GNU General Public License -- * along with Lustre; if not, write to the Free Software -- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -- */ -- --/* select.c: -- * Provides a general mechanism for registering and dispatching -- * io events through the select system call. -- */ -- --#ifdef sun --#include --#else --#include --#endif -- --#include --#include --#include --#include --#include -- -- --static struct timeval beginning_of_epoch; --static io_handler io_handlers; -- --/* Function: now -- * -- * Return: the current time in canonical units: a 64 bit number -- * where the most significant 32 bits contains the number -- * of seconds, and the least signficant a count of (1/(2^32))ths -- * of a second. -- */ --when now() --{ -- struct timeval result; -- -- gettimeofday(&result,0); -- return((((unsigned long long)result.tv_sec)<<32)| -- (((unsigned long long)result.tv_usec)<<32)/1000000); --} -- -- --/* Function: register_io_handler -- * Arguments: fd: the file descriptor of interest -- * type: a mask of READ_HANDLER, WRITE_HANDLER, EXCEPTION_HANDLER -- * function: a function to call when io is available on fd -- * arg: an opaque correlator to return to the handler -- * Returns: a pointer to the io_handler structure -- */ --io_handler register_io_handler(int fd, -- int type, -- int (*function)(void *), -- void *arg) --{ -- io_handler i=(io_handler)malloc(sizeof(struct io_handler)); -- if ((i->fd=fd)>=0){ -- i->type=type; -- i->function=function; -- i->argument=arg; -- i->disabled=0; -- i->last=&io_handlers; -- if ((i->next=io_handlers)) i->next->last=&i->next; -- io_handlers=i; -- } -- return(i); --} -- --/* Function: remove_io_handler -- * Arguments: i: a pointer to the handler to stop servicing -- * -- * remove_io_handler() doesn't actually free the handler, due -- * to reentrancy problems. it just marks the handler for -- * later cleanup by the blocking function. -- */ --void remove_io_handler (io_handler i) --{ -- i->disabled=1; --} -- --static void set_flag(io_handler n,fd_set *fds) --{ -- if (n->type & READ_HANDLER) FD_SET(n->fd, &fds[0]); -- if (n->type & WRITE_HANDLER) FD_SET(n->fd,&fds[1]); -- if (n->type & EXCEPTION_HANDLER) FD_SET(n->fd, &fds[2]); --} -- -- --/* Function: select_timer_block -- * Arguments: until: an absolute time when the select should return -- * -- * This function dispatches the various file descriptors' handler -- * functions, if the kernel indicates there is io available. -- */ --void select_timer_block(when until) --{ -- fd_set fds[3]; -- struct timeval timeout; -- struct timeval *timeout_pointer; -- int result; -- io_handler j; -- io_handler *k; -- -- /* TODO: loop until the entire interval is expired*/ -- if (until){ -- when interval=until-now(); -- timeout.tv_sec=(interval>>32); -- timeout.tv_usec=((interval<<32)/1000000)>>32; -- timeout_pointer=&timeout; -- } else timeout_pointer=0; -- -- FD_ZERO(&fds[0]); -- FD_ZERO(&fds[1]); -- FD_ZERO(&fds[2]); -- for (k=&io_handlers;*k;){ -- if ((*k)->disabled){ -- j=*k; -- *k=(*k)->next; -- free(j); -- } -- if (*k) { -- set_flag(*k,fds); -- k=&(*k)->next; -- } -- } -- -- result=select(FD_SETSIZE, &fds[0], &fds[1], &fds[2], timeout_pointer); -- -- if (result > 0) -- for (j=io_handlers;j;j=j->next){ -- if (!(j->disabled) && -- ((FD_ISSET(j->fd, &fds[0]) && (j->type & READ_HANDLER)) || -- (FD_ISSET(j->fd, &fds[1]) && (j->type & WRITE_HANDLER)) || -- (FD_ISSET(j->fd, &fds[2]) && (j->type & EXCEPTION_HANDLER)))){ -- if (!(*j->function)(j->argument)) -- j->disabled=1; -- } -- } --} -- --/* Function: init_unix_timer() -- * is called to initialize the library -- */ --void init_unix_timer() --{ -- io_handlers=0; -- gettimeofday(&beginning_of_epoch, 0); -- initialize_timer(select_timer_block); --} diff --cc lnet/ulnds/socklnd/table.c index 662775a,662775a..0000000 deleted file mode 100644,100644 --- a/lnet/ulnds/socklnd/table.c +++ /dev/null @@@ -1,264 -1,264 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * Copyright (c) 2002 Cray Inc. -- * Copyright (c) 2002 Eric Hoffman -- * -- * This file is part of Lustre, http://www.lustre.org. -- * -- * Lustre is free software; you can redistribute it and/or -- * modify it under the terms of version 2 of the GNU General Public -- * License as published by the Free Software Foundation. -- * -- * Lustre is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- * GNU General Public License for more details. -- * -- * You should have received a copy of the GNU General Public License -- * along with Lustre; if not, write to the Free Software -- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -- */ -- --#include --#include --#include -- -- --/* table.c: -- * a very simple hash table implementation with paramerterizable -- * comparison and key generation functions. it does resize -- * in order to accomidate more entries, but never collapses -- * the table -- */ -- --static table_entry *table_lookup (table t,void *comparator, -- unsigned int k, -- int (*compare_function)(void *, void *), -- int *success) --{ -- unsigned int key=k%t->size; -- table_entry *i; -- -- for (i=&(t->entries[key]);*i;i=&((*i)->next)){ -- if (compare_function && ((*i)->key==k)) -- if ((*t->compare_function)((*i)->value,comparator)){ -- *success=1; -- return(i); -- } -- } -- *success=0; -- return(&(t->entries[key])); --} -- -- --static void resize_table(table t, int size) --{ -- int old_size=t->size; -- table_entry *old_entries=t->entries; -- int i; -- table_entry j,n; -- table_entry *position; -- int success; -- -- t->size=size; -- t->entries=(table_entry *)malloc(sizeof(table_entry)*t->size); -- memset(t->entries,0,sizeof(table_entry)*t->size); -- -- for (i=0;inext; -- position=table_lookup(t,0,j->key,0,&success); -- j->next= *position; -- *position=j; -- } -- free(old_entries); --} -- -- --/* Function: key_from_int -- * Arguments: int i: value to compute the key of -- * Returns: the key -- */ --unsigned int key_from_int(int i) --{ -- return(i); --} -- -- --/* Function: key_from_string -- * Arguments: char *s: the null terminated string -- * to compute the key of -- * Returns: the key -- */ --unsigned int key_from_string(char *s) --{ -- unsigned int result=0; -- unsigned char *n; -- int i; -- if (!s) return(1); -- for (n=s,i=0;*n;n++,i++) result^=(*n*57)^*n*i; -- return(result); --} -- -- --/* Function: hash_create_table -- * Arguments: compare_function: a function to compare -- * a table instance with a correlator -- * key_function: a function to generate a 32 bit -- * hash key from a correlator -- * Returns: a pointer to the new table -- */ --table hash_create_table (int (*compare_function)(void *, void *), -- unsigned int (*key_function)(unsigned int *)) --{ -- table new=(table)malloc(sizeof(struct table)); -- memset(new, 0, sizeof(struct table)); -- -- new->compare_function=compare_function; -- new->key_function=key_function; -- new->number_of_entries=0; -- new->size=4; -- new->entries=(table_entry *)malloc(sizeof(table_entry)*new->size); -- memset(new->entries,0,sizeof(table_entry)*new->size); -- return(new); --} -- -- --/* Function: hash_table_find -- * Arguments: t: a table to look in -- * comparator: a value to access the table entry -- * Returns: the element references to by comparator, or null -- */ --void *hash_table_find (table t, void *comparator) --{ -- int success; -- table_entry* entry=table_lookup(t,comparator, -- (*t->key_function)(comparator), -- t->compare_function, -- &success); -- if (success) return((*entry)->value); -- return(0); --} -- -- --/* Function: hash_table_insert -- * Arguments: t: a table to insert the object -- * value: the object to put in the table -- * comparator: the value by which the object -- * will be addressed -- * Returns: nothing -- */ --void hash_table_insert (table t, void *value, void *comparator) --{ -- int success; -- unsigned int k=(*t->key_function)(comparator); -- table_entry *position=table_lookup(t,comparator,k, -- t->compare_function,&success); -- table_entry entry; -- -- if (success) { -- entry = *position; -- } else { -- entry = (table_entry)malloc(sizeof(struct table_entry)); -- memset(entry, 0, sizeof(struct table_entry)); -- entry->next= *position; -- *position=entry; -- t->number_of_entries++; -- } -- entry->value=value; -- entry->key=k; -- if (t->number_of_entries > t->size) resize_table(t,t->size*2); --} -- --/* Function: hash_table_remove -- * Arguments: t: the table to remove the object from -- * comparator: the index value of the object to remove -- * Returns: -- */ --void hash_table_remove (table t, void *comparator) --{ -- int success; -- table_entry temp; -- table_entry *position=table_lookup(t,comparator, -- (*t->key_function)(comparator), -- t->compare_function,&success); -- if(success) { -- temp=*position; -- *position=(*position)->next; -- free(temp); /* the value? */ -- t->number_of_entries--; -- } --} -- --/* Function: hash_iterate_table_entries -- * Arguments: t: the table to iterate over -- * handler: a function to call with each element -- * of the table, along with arg -- * arg: the opaque object to pass to handler -- * Returns: nothing -- */ --void hash_iterate_table_entries(table t, -- void (*handler)(void *,void *), -- void *arg) --{ -- int i; -- table_entry *j,*next; -- -- for (i=0;isize;i++) -- for (j=t->entries+i;*j;j=next){ -- next=&((*j)->next); -- (*handler)(arg,(*j)->value); -- } --} -- --/* Function: hash_filter_table_entries -- * Arguments: t: the table to iterate over -- * handler: a function to call with each element -- * of the table, along with arg -- * arg: the opaque object to pass to handler -- * Returns: nothing -- * Notes: operations on the table inside handler are not safe -- * -- * filter_table_entires() calls the handler function for each -- * item in the table, passing it and arg. The handler function -- * returns 1 if it is to be retained in the table, and 0 -- * if it is to be removed. -- */ --void hash_filter_table_entries(table t, int (*handler)(void *, void *), void *arg) --{ -- int i; -- table_entry *j,*next,v; -- -- for (i=0;isize;i++) -- for (j=t->entries+i;*j;j=next){ -- next=&((*j)->next); -- if (!(*handler)(arg,(*j)->value)){ -- next=j; -- v=*j; -- *j=(*j)->next; -- free(v); -- t->number_of_entries--; -- } -- } --} -- --/* Function: destroy_table -- * Arguments: t: the table to free -- * thunk: a function to call with each element, -- * most likely free() -- * Returns: nothing -- */ --void hash_destroy_table(table t,void (*thunk)(void *)) --{ -- table_entry j,next; -- int i; -- for (i=0;isize;i++) -- for (j=t->entries[i];j;j=next){ -- next=j->next; -- if (thunk) (*thunk)(j->value); -- free(j); -- } -- free(t->entries); -- free(t); --} diff --cc lnet/ulnds/socklnd/table.h index 7fab586,7fab586..0000000 deleted file mode 100644,100644 --- a/lnet/ulnds/socklnd/table.h +++ /dev/null @@@ -1,39 -1,39 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * Copyright (c) 2002 Cray Inc. -- * Copyright (c) 2002 Eric Hoffman -- * -- * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ -- */ -- --#ifndef E_TABLE --#define E_TABLE -- --typedef struct table_entry { -- unsigned int key; -- void *value; -- struct table_entry *next; --} *table_entry; -- -- --typedef struct table { -- unsigned int size; -- int number_of_entries; -- table_entry *entries; -- int (*compare_function)(void *, void *); -- unsigned int (*key_function)(unsigned int *); --} *table; -- --/* table.c */ --unsigned int key_from_int(int i); --unsigned int key_from_string(char *s); --table hash_create_table(int (*compare_function)(void *, void *), unsigned int (*key_function)(unsigned int *)); --void *hash_table_find(table t, void *comparator); --void hash_table_insert(table t, void *value, void *comparator); --void hash_table_remove(table t, void *comparator); --void hash_iterate_table_entries(table t, void (*handler)(void *, void *), void *arg); --void hash_filter_table_entries(table t, int (*handler)(void *, void *), void *arg); --void hash_destroy_table(table t, void (*thunk)(void *)); -- --#endif diff --cc lnet/ulnds/socklnd/tcplnd.c index 0c47f42,6e9cca9..0000000 deleted file mode 100644,100644 --- a/lnet/ulnds/socklnd/tcplnd.c +++ /dev/null @@@ -1,257 -1,258 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * Copyright (c) 2002 Cray Inc. -- * Copyright (c) 2003 Cluster File Systems, Inc. -- * -- * This file is part of Lustre, http://www.lustre.org. -- * -- * Lustre is free software; you can redistribute it and/or -- * modify it under the terms of version 2 of the GNU General Public -- * License as published by the Free Software Foundation. -- * -- * Lustre is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- * GNU General Public License for more details. -- * -- * You should have received a copy of the GNU General Public License -- * along with Lustre; if not, write to the Free Software -- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -- */ -- --/* tcpnal.c: -- This file implements the TCP-based nal by providing glue -- between the connection service and the generic NAL implementation */ -- --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#ifndef __CYGWIN__ --#include --#endif -- --/* Function: tcpnal_send -- * Arguments: nal: pointer to my nal control block -- * private: unused -- * cookie: passed back to the portals library -- * hdr: pointer to the portals header -- * nid: destination node -- * pid: destination process -- * data: body of the message -- * len: length of the body -- * Returns: zero on success -- * -- * sends a packet to the peer, after insuring that a connection exists -- */ - ptl_err_t tcpnal_send(nal_cb_t *n, -ptl_err_t tcpnal_send(lib_nal_t *n, -- void *private, -- lib_msg_t *cookie, -- ptl_hdr_t *hdr, -- int type, -- ptl_nid_t nid, -- ptl_pid_t pid, -- unsigned int niov, -- struct iovec *iov, -- size_t offset, -- size_t len) --{ -- connection c; - bridge b=(bridge)n->nal_data; - bridge b=(bridge)n->libnal_data; -- struct iovec tiov[257]; -- static pthread_mutex_t send_lock = PTHREAD_MUTEX_INITIALIZER; -- ptl_err_t rc = PTL_OK; -- int sysrc; -- int total; -- int ntiov; -- int i; -- -- if (!(c=force_tcp_connection((manager)b->lower, -- PNAL_IP(nid,b), -- PNAL_PORT(nid,pid), -- b->local))) -- return(PTL_FAIL); -- -- /* TODO: these results should be checked. furthermore, provision -- must be made for the SIGPIPE which is delivered when -- writing on a tcp socket which has closed underneath -- the application. there is a linux flag in the sendmsg -- call which turns off the signally behaviour, but its -- nonstandard */ -- -- LASSERT (niov <= 256); -- -- tiov[0].iov_base = hdr; -- tiov[0].iov_len = sizeof(ptl_hdr_t); -- ntiov = 1 + lib_extract_iov(256, &tiov[1], niov, iov, offset, len); -- -- pthread_mutex_lock(&send_lock); --#if 1 -- for (i = total = 0; i < ntiov; i++) -- total += tiov[i].iov_len; -- -- sysrc = syscall(SYS_writev, c->fd, tiov, ntiov); -- if (sysrc != total) { -- fprintf (stderr, "BAD SEND rc %d != %d, errno %d\n", -- rc, total, errno); -- rc = PTL_FAIL; -- } --#else -- for (i = total = 0; i <= ntiov; i++) { -- rc = send(c->fd, tiov[i].iov_base, tiov[i].iov_len, 0); -- -- if (rc != tiov[i].iov_len) { -- fprintf (stderr, "BAD SEND rc %d != %d, errno %d\n", -- rc, tiov[i].iov_len, errno); -- rc = PTL_FAIL; -- break; -- } -- total += rc; -- } --#endif --#if 0 -- fprintf (stderr, "sent %s total %d in %d frags\n", -- hdr->type == PTL_MSG_ACK ? "ACK" : -- hdr->type == PTL_MSG_PUT ? "PUT" : -- hdr->type == PTL_MSG_GET ? "GET" : -- hdr->type == PTL_MSG_REPLY ? "REPLY" : -- hdr->type == PTL_MSG_HELLO ? "HELLO" : "UNKNOWN", -- total, niov + 1); --#endif -- pthread_mutex_unlock(&send_lock); -- -- if (rc == PTL_OK) { -- /* NB the NAL only calls lib_finalize() if it returns PTL_OK -- * from cb_send() */ -- lib_finalize(n, private, cookie, PTL_OK); -- } -- -- return(rc); --} -- -- --/* Function: tcpnal_recv - * Arguments: nal_cb_t *nal: pointer to my nal control block - * Arguments: lib_nal_t *nal: pointer to my nal control block -- * void *private: connection pointer passed through -- * lib_parse() -- * lib_msg_t *cookie: passed back to portals library -- * user_ptr data: pointer to the destination buffer -- * size_t mlen: length of the body -- * size_t rlen: length of data in the network -- * Returns: zero on success -- * -- * blocking read of the requested data. must drain out the -- * difference of mainpulated and requested lengths from the network -- */ - ptl_err_t tcpnal_recv(nal_cb_t *n, -ptl_err_t tcpnal_recv(lib_nal_t *n, -- void *private, -- lib_msg_t *cookie, -- unsigned int niov, -- struct iovec *iov, -- size_t offset, -- size_t mlen, -- size_t rlen) -- --{ -- struct iovec tiov[256]; -- int ntiov; -- int i; -- -- if (!niov) -- goto finalize; -- -- LASSERT(mlen); -- LASSERT(rlen); -- LASSERT(rlen >= mlen); -- -- ntiov = lib_extract_iov(256, tiov, niov, iov, offset, mlen); -- -- /* FIXME -- * 1. Is this effecient enough? change to use readv() directly? -- * 2. need check return from read_connection() -- * - MeiJia -- */ -- for (i = 0; i < ntiov; i++) -- read_connection(private, tiov[i].iov_base, tiov[i].iov_len); -- --finalize: -- /* FIXME; we always assume success here... */ -- lib_finalize(n, private, cookie, PTL_OK); -- -- if (mlen!=rlen){ -- char *trash=malloc(rlen-mlen); -- -- /*TODO: check error status*/ -- read_connection(private,trash,rlen-mlen); -- free(trash); -- } -- -- return(PTL_OK); --} -- -- --/* Function: from_connection: -- * Arguments: c: the connection to read from -- * Returns: whether or not to continue reading from this connection, -- * expressed as a 1 to continue, and a 0 to not -- * -- * from_connection() is called from the select loop when i/o is -- * available. It attempts to read the portals header and -- * pass it to the generic library for processing. -- */ --static int from_connection(void *a, void *d) --{ -- connection c = d; -- bridge b = a; -- ptl_hdr_t hdr; -- -- if (read_connection(c, (unsigned char *)&hdr, sizeof(hdr))){ - lib_parse(b->nal_cb, &hdr, c); - lib_parse(b->lib_nal, &hdr, c); - /*TODO: check error status*/ -- return(1); -- } -- return(0); --} -- -- --static void tcpnal_shutdown(bridge b) --{ -- shutdown_connections(b->lower); --} -- --/* Function: PTL_IFACE_TCP -- * Arguments: pid_request: desired port number to bind to -- * desired: passed NAL limits structure -- * actual: returned NAL limits structure -- * Returns: a nal structure on success, or null on failure -- */ --int tcpnal_init(bridge b) --{ -- manager m; -- - b->nal_cb->cb_send=tcpnal_send; - b->nal_cb->cb_recv=tcpnal_recv; - b->lib_nal->libnal_send=tcpnal_send; - b->lib_nal->libnal_recv=tcpnal_recv; -- b->shutdown=tcpnal_shutdown; -- - if (!(m=init_connections(PNAL_PORT(b->nal_cb->ni.nid, - b->nal_cb->ni.pid), - if (!(m=init_connections(PNAL_PORT(b->lib_nal->libnal_ni.ni_pid.nid, - b->lib_nal->libnal_ni.ni_pid.pid), -- from_connection,b))){ -- /* TODO: this needs to shut down the -- newly created junk */ -- return(PTL_NAL_FAILED); -- } -- /* XXX cfs hack */ - b->nal_cb->ni.pid=0; -// b->lib_nal->libnal_ni.ni_pid.pid=0; -- b->lower=m; -- return(PTL_OK); --} diff --cc lnet/ulnds/socklnd/timer.h index aaf39d2,aaf39d2..0000000 deleted file mode 100644,100644 --- a/lnet/ulnds/socklnd/timer.h +++ /dev/null @@@ -1,30 -1,30 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * Copyright (c) 2002 Cray Inc. -- * Copyright (c) 2002 Eric Hoffman -- * -- * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ -- */ -- --/* TODO: make this an explicit type when they become available */ --typedef unsigned long long when; -- --typedef struct timer { -- void (*function)(void *); -- void *arg; -- when w; -- int interval; -- int disable; --} *timer; -- --timer register_timer(when, void (*f)(void *), void *a); --void remove_timer(timer t); --void timer_loop(void); --void initialize_timer(void); --void register_thunk(void (*f)(void *),void *a); -- -- --#define HZ 0x100000000ull -- -- diff --cc lnet/ulnds/socklnd/utypes.h index 7eca959,7eca959..0000000 deleted file mode 100644,100644 --- a/lnet/ulnds/socklnd/utypes.h +++ /dev/null @@@ -1,12 -1,12 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * Copyright (c) 2002 Cray Inc. -- * -- * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ -- */ -- --typedef unsigned short uint16; --typedef unsigned long uint32; --typedef unsigned long long uint64; --typedef unsigned char uint8; diff --cc lnet/ulnds/table.c index 662775a,662775a..0000000 deleted file mode 100644,100644 --- a/lnet/ulnds/table.c +++ /dev/null @@@ -1,264 -1,264 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * Copyright (c) 2002 Cray Inc. -- * Copyright (c) 2002 Eric Hoffman -- * -- * This file is part of Lustre, http://www.lustre.org. -- * -- * Lustre is free software; you can redistribute it and/or -- * modify it under the terms of version 2 of the GNU General Public -- * License as published by the Free Software Foundation. -- * -- * Lustre is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- * GNU General Public License for more details. -- * -- * You should have received a copy of the GNU General Public License -- * along with Lustre; if not, write to the Free Software -- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -- */ -- --#include --#include --#include -- -- --/* table.c: -- * a very simple hash table implementation with paramerterizable -- * comparison and key generation functions. it does resize -- * in order to accomidate more entries, but never collapses -- * the table -- */ -- --static table_entry *table_lookup (table t,void *comparator, -- unsigned int k, -- int (*compare_function)(void *, void *), -- int *success) --{ -- unsigned int key=k%t->size; -- table_entry *i; -- -- for (i=&(t->entries[key]);*i;i=&((*i)->next)){ -- if (compare_function && ((*i)->key==k)) -- if ((*t->compare_function)((*i)->value,comparator)){ -- *success=1; -- return(i); -- } -- } -- *success=0; -- return(&(t->entries[key])); --} -- -- --static void resize_table(table t, int size) --{ -- int old_size=t->size; -- table_entry *old_entries=t->entries; -- int i; -- table_entry j,n; -- table_entry *position; -- int success; -- -- t->size=size; -- t->entries=(table_entry *)malloc(sizeof(table_entry)*t->size); -- memset(t->entries,0,sizeof(table_entry)*t->size); -- -- for (i=0;inext; -- position=table_lookup(t,0,j->key,0,&success); -- j->next= *position; -- *position=j; -- } -- free(old_entries); --} -- -- --/* Function: key_from_int -- * Arguments: int i: value to compute the key of -- * Returns: the key -- */ --unsigned int key_from_int(int i) --{ -- return(i); --} -- -- --/* Function: key_from_string -- * Arguments: char *s: the null terminated string -- * to compute the key of -- * Returns: the key -- */ --unsigned int key_from_string(char *s) --{ -- unsigned int result=0; -- unsigned char *n; -- int i; -- if (!s) return(1); -- for (n=s,i=0;*n;n++,i++) result^=(*n*57)^*n*i; -- return(result); --} -- -- --/* Function: hash_create_table -- * Arguments: compare_function: a function to compare -- * a table instance with a correlator -- * key_function: a function to generate a 32 bit -- * hash key from a correlator -- * Returns: a pointer to the new table -- */ --table hash_create_table (int (*compare_function)(void *, void *), -- unsigned int (*key_function)(unsigned int *)) --{ -- table new=(table)malloc(sizeof(struct table)); -- memset(new, 0, sizeof(struct table)); -- -- new->compare_function=compare_function; -- new->key_function=key_function; -- new->number_of_entries=0; -- new->size=4; -- new->entries=(table_entry *)malloc(sizeof(table_entry)*new->size); -- memset(new->entries,0,sizeof(table_entry)*new->size); -- return(new); --} -- -- --/* Function: hash_table_find -- * Arguments: t: a table to look in -- * comparator: a value to access the table entry -- * Returns: the element references to by comparator, or null -- */ --void *hash_table_find (table t, void *comparator) --{ -- int success; -- table_entry* entry=table_lookup(t,comparator, -- (*t->key_function)(comparator), -- t->compare_function, -- &success); -- if (success) return((*entry)->value); -- return(0); --} -- -- --/* Function: hash_table_insert -- * Arguments: t: a table to insert the object -- * value: the object to put in the table -- * comparator: the value by which the object -- * will be addressed -- * Returns: nothing -- */ --void hash_table_insert (table t, void *value, void *comparator) --{ -- int success; -- unsigned int k=(*t->key_function)(comparator); -- table_entry *position=table_lookup(t,comparator,k, -- t->compare_function,&success); -- table_entry entry; -- -- if (success) { -- entry = *position; -- } else { -- entry = (table_entry)malloc(sizeof(struct table_entry)); -- memset(entry, 0, sizeof(struct table_entry)); -- entry->next= *position; -- *position=entry; -- t->number_of_entries++; -- } -- entry->value=value; -- entry->key=k; -- if (t->number_of_entries > t->size) resize_table(t,t->size*2); --} -- --/* Function: hash_table_remove -- * Arguments: t: the table to remove the object from -- * comparator: the index value of the object to remove -- * Returns: -- */ --void hash_table_remove (table t, void *comparator) --{ -- int success; -- table_entry temp; -- table_entry *position=table_lookup(t,comparator, -- (*t->key_function)(comparator), -- t->compare_function,&success); -- if(success) { -- temp=*position; -- *position=(*position)->next; -- free(temp); /* the value? */ -- t->number_of_entries--; -- } --} -- --/* Function: hash_iterate_table_entries -- * Arguments: t: the table to iterate over -- * handler: a function to call with each element -- * of the table, along with arg -- * arg: the opaque object to pass to handler -- * Returns: nothing -- */ --void hash_iterate_table_entries(table t, -- void (*handler)(void *,void *), -- void *arg) --{ -- int i; -- table_entry *j,*next; -- -- for (i=0;isize;i++) -- for (j=t->entries+i;*j;j=next){ -- next=&((*j)->next); -- (*handler)(arg,(*j)->value); -- } --} -- --/* Function: hash_filter_table_entries -- * Arguments: t: the table to iterate over -- * handler: a function to call with each element -- * of the table, along with arg -- * arg: the opaque object to pass to handler -- * Returns: nothing -- * Notes: operations on the table inside handler are not safe -- * -- * filter_table_entires() calls the handler function for each -- * item in the table, passing it and arg. The handler function -- * returns 1 if it is to be retained in the table, and 0 -- * if it is to be removed. -- */ --void hash_filter_table_entries(table t, int (*handler)(void *, void *), void *arg) --{ -- int i; -- table_entry *j,*next,v; -- -- for (i=0;isize;i++) -- for (j=t->entries+i;*j;j=next){ -- next=&((*j)->next); -- if (!(*handler)(arg,(*j)->value)){ -- next=j; -- v=*j; -- *j=(*j)->next; -- free(v); -- t->number_of_entries--; -- } -- } --} -- --/* Function: destroy_table -- * Arguments: t: the table to free -- * thunk: a function to call with each element, -- * most likely free() -- * Returns: nothing -- */ --void hash_destroy_table(table t,void (*thunk)(void *)) --{ -- table_entry j,next; -- int i; -- for (i=0;isize;i++) -- for (j=t->entries[i];j;j=next){ -- next=j->next; -- if (thunk) (*thunk)(j->value); -- free(j); -- } -- free(t->entries); -- free(t); --} diff --cc lnet/ulnds/table.h index 7fab586,7fab586..0000000 deleted file mode 100644,100644 --- a/lnet/ulnds/table.h +++ /dev/null @@@ -1,39 -1,39 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * Copyright (c) 2002 Cray Inc. -- * Copyright (c) 2002 Eric Hoffman -- * -- * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ -- */ -- --#ifndef E_TABLE --#define E_TABLE -- --typedef struct table_entry { -- unsigned int key; -- void *value; -- struct table_entry *next; --} *table_entry; -- -- --typedef struct table { -- unsigned int size; -- int number_of_entries; -- table_entry *entries; -- int (*compare_function)(void *, void *); -- unsigned int (*key_function)(unsigned int *); --} *table; -- --/* table.c */ --unsigned int key_from_int(int i); --unsigned int key_from_string(char *s); --table hash_create_table(int (*compare_function)(void *, void *), unsigned int (*key_function)(unsigned int *)); --void *hash_table_find(table t, void *comparator); --void hash_table_insert(table t, void *value, void *comparator); --void hash_table_remove(table t, void *comparator); --void hash_iterate_table_entries(table t, void (*handler)(void *, void *), void *arg); --void hash_filter_table_entries(table t, int (*handler)(void *, void *), void *arg); --void hash_destroy_table(table t, void (*thunk)(void *)); -- --#endif diff --cc lnet/ulnds/tcplnd.c index 0c47f42,6e9cca9..0000000 deleted file mode 100644,100644 --- a/lnet/ulnds/tcplnd.c +++ /dev/null @@@ -1,257 -1,258 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * Copyright (c) 2002 Cray Inc. -- * Copyright (c) 2003 Cluster File Systems, Inc. -- * -- * This file is part of Lustre, http://www.lustre.org. -- * -- * Lustre is free software; you can redistribute it and/or -- * modify it under the terms of version 2 of the GNU General Public -- * License as published by the Free Software Foundation. -- * -- * Lustre is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- * GNU General Public License for more details. -- * -- * You should have received a copy of the GNU General Public License -- * along with Lustre; if not, write to the Free Software -- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -- */ -- --/* tcpnal.c: -- This file implements the TCP-based nal by providing glue -- between the connection service and the generic NAL implementation */ -- --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#ifndef __CYGWIN__ --#include --#endif -- --/* Function: tcpnal_send -- * Arguments: nal: pointer to my nal control block -- * private: unused -- * cookie: passed back to the portals library -- * hdr: pointer to the portals header -- * nid: destination node -- * pid: destination process -- * data: body of the message -- * len: length of the body -- * Returns: zero on success -- * -- * sends a packet to the peer, after insuring that a connection exists -- */ - ptl_err_t tcpnal_send(nal_cb_t *n, -ptl_err_t tcpnal_send(lib_nal_t *n, -- void *private, -- lib_msg_t *cookie, -- ptl_hdr_t *hdr, -- int type, -- ptl_nid_t nid, -- ptl_pid_t pid, -- unsigned int niov, -- struct iovec *iov, -- size_t offset, -- size_t len) --{ -- connection c; - bridge b=(bridge)n->nal_data; - bridge b=(bridge)n->libnal_data; -- struct iovec tiov[257]; -- static pthread_mutex_t send_lock = PTHREAD_MUTEX_INITIALIZER; -- ptl_err_t rc = PTL_OK; -- int sysrc; -- int total; -- int ntiov; -- int i; -- -- if (!(c=force_tcp_connection((manager)b->lower, -- PNAL_IP(nid,b), -- PNAL_PORT(nid,pid), -- b->local))) -- return(PTL_FAIL); -- -- /* TODO: these results should be checked. furthermore, provision -- must be made for the SIGPIPE which is delivered when -- writing on a tcp socket which has closed underneath -- the application. there is a linux flag in the sendmsg -- call which turns off the signally behaviour, but its -- nonstandard */ -- -- LASSERT (niov <= 256); -- -- tiov[0].iov_base = hdr; -- tiov[0].iov_len = sizeof(ptl_hdr_t); -- ntiov = 1 + lib_extract_iov(256, &tiov[1], niov, iov, offset, len); -- -- pthread_mutex_lock(&send_lock); --#if 1 -- for (i = total = 0; i < ntiov; i++) -- total += tiov[i].iov_len; -- -- sysrc = syscall(SYS_writev, c->fd, tiov, ntiov); -- if (sysrc != total) { -- fprintf (stderr, "BAD SEND rc %d != %d, errno %d\n", -- rc, total, errno); -- rc = PTL_FAIL; -- } --#else -- for (i = total = 0; i <= ntiov; i++) { -- rc = send(c->fd, tiov[i].iov_base, tiov[i].iov_len, 0); -- -- if (rc != tiov[i].iov_len) { -- fprintf (stderr, "BAD SEND rc %d != %d, errno %d\n", -- rc, tiov[i].iov_len, errno); -- rc = PTL_FAIL; -- break; -- } -- total += rc; -- } --#endif --#if 0 -- fprintf (stderr, "sent %s total %d in %d frags\n", -- hdr->type == PTL_MSG_ACK ? "ACK" : -- hdr->type == PTL_MSG_PUT ? "PUT" : -- hdr->type == PTL_MSG_GET ? "GET" : -- hdr->type == PTL_MSG_REPLY ? "REPLY" : -- hdr->type == PTL_MSG_HELLO ? "HELLO" : "UNKNOWN", -- total, niov + 1); --#endif -- pthread_mutex_unlock(&send_lock); -- -- if (rc == PTL_OK) { -- /* NB the NAL only calls lib_finalize() if it returns PTL_OK -- * from cb_send() */ -- lib_finalize(n, private, cookie, PTL_OK); -- } -- -- return(rc); --} -- -- --/* Function: tcpnal_recv - * Arguments: nal_cb_t *nal: pointer to my nal control block - * Arguments: lib_nal_t *nal: pointer to my nal control block -- * void *private: connection pointer passed through -- * lib_parse() -- * lib_msg_t *cookie: passed back to portals library -- * user_ptr data: pointer to the destination buffer -- * size_t mlen: length of the body -- * size_t rlen: length of data in the network -- * Returns: zero on success -- * -- * blocking read of the requested data. must drain out the -- * difference of mainpulated and requested lengths from the network -- */ - ptl_err_t tcpnal_recv(nal_cb_t *n, -ptl_err_t tcpnal_recv(lib_nal_t *n, -- void *private, -- lib_msg_t *cookie, -- unsigned int niov, -- struct iovec *iov, -- size_t offset, -- size_t mlen, -- size_t rlen) -- --{ -- struct iovec tiov[256]; -- int ntiov; -- int i; -- -- if (!niov) -- goto finalize; -- -- LASSERT(mlen); -- LASSERT(rlen); -- LASSERT(rlen >= mlen); -- -- ntiov = lib_extract_iov(256, tiov, niov, iov, offset, mlen); -- -- /* FIXME -- * 1. Is this effecient enough? change to use readv() directly? -- * 2. need check return from read_connection() -- * - MeiJia -- */ -- for (i = 0; i < ntiov; i++) -- read_connection(private, tiov[i].iov_base, tiov[i].iov_len); -- --finalize: -- /* FIXME; we always assume success here... */ -- lib_finalize(n, private, cookie, PTL_OK); -- -- if (mlen!=rlen){ -- char *trash=malloc(rlen-mlen); -- -- /*TODO: check error status*/ -- read_connection(private,trash,rlen-mlen); -- free(trash); -- } -- -- return(PTL_OK); --} -- -- --/* Function: from_connection: -- * Arguments: c: the connection to read from -- * Returns: whether or not to continue reading from this connection, -- * expressed as a 1 to continue, and a 0 to not -- * -- * from_connection() is called from the select loop when i/o is -- * available. It attempts to read the portals header and -- * pass it to the generic library for processing. -- */ --static int from_connection(void *a, void *d) --{ -- connection c = d; -- bridge b = a; -- ptl_hdr_t hdr; -- -- if (read_connection(c, (unsigned char *)&hdr, sizeof(hdr))){ - lib_parse(b->nal_cb, &hdr, c); - lib_parse(b->lib_nal, &hdr, c); - /*TODO: check error status*/ -- return(1); -- } -- return(0); --} -- -- --static void tcpnal_shutdown(bridge b) --{ -- shutdown_connections(b->lower); --} -- --/* Function: PTL_IFACE_TCP -- * Arguments: pid_request: desired port number to bind to -- * desired: passed NAL limits structure -- * actual: returned NAL limits structure -- * Returns: a nal structure on success, or null on failure -- */ --int tcpnal_init(bridge b) --{ -- manager m; -- - b->nal_cb->cb_send=tcpnal_send; - b->nal_cb->cb_recv=tcpnal_recv; - b->lib_nal->libnal_send=tcpnal_send; - b->lib_nal->libnal_recv=tcpnal_recv; -- b->shutdown=tcpnal_shutdown; -- - if (!(m=init_connections(PNAL_PORT(b->nal_cb->ni.nid, - b->nal_cb->ni.pid), - if (!(m=init_connections(PNAL_PORT(b->lib_nal->libnal_ni.ni_pid.nid, - b->lib_nal->libnal_ni.ni_pid.pid), -- from_connection,b))){ -- /* TODO: this needs to shut down the -- newly created junk */ -- return(PTL_NAL_FAILED); -- } -- /* XXX cfs hack */ - b->nal_cb->ni.pid=0; -// b->lib_nal->libnal_ni.ni_pid.pid=0; -- b->lower=m; -- return(PTL_OK); --} diff --cc lnet/ulnds/timer.h index aaf39d2,aaf39d2..0000000 deleted file mode 100644,100644 --- a/lnet/ulnds/timer.h +++ /dev/null @@@ -1,30 -1,30 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * Copyright (c) 2002 Cray Inc. -- * Copyright (c) 2002 Eric Hoffman -- * -- * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ -- */ -- --/* TODO: make this an explicit type when they become available */ --typedef unsigned long long when; -- --typedef struct timer { -- void (*function)(void *); -- void *arg; -- when w; -- int interval; -- int disable; --} *timer; -- --timer register_timer(when, void (*f)(void *), void *a); --void remove_timer(timer t); --void timer_loop(void); --void initialize_timer(void); --void register_thunk(void (*f)(void *),void *a); -- -- --#define HZ 0x100000000ull -- -- diff --cc lnet/ulnds/utypes.h index 7eca959,7eca959..0000000 deleted file mode 100644,100644 --- a/lnet/ulnds/utypes.h +++ /dev/null @@@ -1,12 -1,12 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * Copyright (c) 2002 Cray Inc. -- * -- * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ -- */ -- --typedef unsigned short uint16; --typedef unsigned long uint32; --typedef unsigned long long uint64; --typedef unsigned char uint8; diff --cc lnet/utils/.cvsignore index e2a0d44,e2a0d44..0000000 deleted file mode 100644,100644 --- a/lnet/utils/.cvsignore +++ /dev/null @@@ -1,10 -1,10 +1,0 @@@ --Makefile --Makefile.in --acceptor --debugctl --ptlctl --.deps --routerstat --wirecheck --gmnalnid --.*.cmd diff --cc lnet/utils/Makefile.am index 051bcd9,1d9f905..0000000 deleted file mode 100644,100644 --- a/lnet/utils/Makefile.am +++ /dev/null @@@ -1,39 -1,46 +1,0 @@@ --# Copyright (C) 2001 Cluster File Systems, Inc. --# --# This code is issued under the GNU General Public License. --# See the file COPYING in this distribution -- --## $(srcdir)/../ for , ../../ for generated --#COMPILE = $(CC) -Wall -g -I$(srcdir)/../include -I../../include --#LINK = $(CC) -o $@ -- --if LIBLUSTRE --noinst_LIBRARIES = libuptlctl.a - libuptlctl_a_SOURCES = portals.c debug.c l_ioctl.c parser.c parser.h -endif - -libuptlctl_a_SOURCES = portals.c debug.c l_ioctl.c --libuptlctl_a_CPPFLAGS = $(LLCPPFLAGS) --libuptlctl_a_CFLAGS = $(LLCFLAGS) - endif -- - if UTILS - sbin_PROGRAMS = acceptor ptlctl debugctl routerstat wirecheck gmnalnid -sbin_PROGRAMS = debugctl - --lib_LIBRARIES = libptlctl.a - -libptlctl_a_SOURCES = portals.c debug.c l_ioctl.c parser.c parser.h - -if UTILS -if !CRAY_PORTALS -sbin_PROGRAMS += acceptor ptlctl routerstat wirecheck gmnalnid -endif --endif -- --acceptor_SOURCES = acceptor.c --acceptor_LDADD = $(LIBWRAP) -- --wirecheck_SOURCES = wirecheck.c - - libptlctl_a_SOURCES = portals.c debug.c l_ioctl.c parser.c parser.h -- --gmnalnid_SOURCES = gmnalnid.c -- --ptlctl_SOURCES = ptlctl.c --ptlctl_LDADD = -L. -lptlctl $(LIBREADLINE) $(LIBEFENCE) --ptlctl_DEPENDENCIES = libptlctl.a - -routerstat_SOURCES = routerstat.c -- --debugctl_SOURCES = debugctl.c --debugctl_LDADD = -L. -lptlctl $(LIBREADLINE) $(LIBEFENCE) --debugctl_DEPENDENCIES = libptlctl.a -- - routerstat_SOURCES = routerstat.c diff --cc lnet/utils/Makefile.mk index cbbe6d5,cbbe6d5..0000000 deleted file mode 100644,100644 --- a/lnet/utils/Makefile.mk +++ /dev/null @@@ -1,6 -1,6 +1,0 @@@ --include $(src)/../Kernelenv -- --host-progs := acceptor ptlctl --always := $(host-progs) -- --ptlctl-objs := ptlctl.o $(PTLCTLOBJS) diff --cc lnet/utils/acceptor.c index cff2235,8aea457..0000000 deleted file mode 100644,100644 --- a/lnet/utils/acceptor.c +++ /dev/null @@@ -1,356 -1,230 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- */ --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#ifdef HAVE_LIBWRAP --#include --#include --#include --#endif -- --#include --#include --#include --#include -- --/* should get this from autoconf somehow */ --#ifndef PIDFILE_DIR --#define PIDFILE_DIR "/var/run" --#endif -- --#define PROGNAME "acceptor" -- --#ifdef HAVE_LIBWRAP --/* needed because libwrap declares these as externs */ --int allow_severity = LOG_INFO; --int deny_severity = LOG_WARNING; --#endif -- --void create_pidfile(char *name, int port) --{ -- char pidfile[1024]; -- FILE *fp; -- -- snprintf(pidfile, sizeof(pidfile), "%s/%s-%d.pid", -- PIDFILE_DIR, name, port); -- -- if ((fp = fopen(pidfile, "w"))) { -- fprintf(fp, "%d\n", getpid()); -- fclose(fp); -- } else { -- syslog(LOG_ERR, "%s: %s\n", pidfile, -- strerror(errno)); -- } --} -- --int pidfile_exists(char *name, int port) --{ -- char pidfile[1024]; -- -- snprintf(pidfile, sizeof(pidfile), "%s/%s-%d.pid", -- PIDFILE_DIR, name, port); -- -- if (!access(pidfile, F_OK)) { -- fprintf(stderr, "%s: exists, acceptor already running.\n", -- pidfile); -- return (1); -- } -- return (0); - } - - int - parse_size (int *sizep, char *str) - { - int size; - char mod[32]; - - switch (sscanf (str, "%d%1[gGmMkK]", &size, mod)) - { - default: - return (-1); - - case 1: - *sizep = size; - return (0); - - case 2: - switch (*mod) - { - case 'g': - case 'G': - *sizep = size << 30; - return (0); - - case 'm': - case 'M': - *sizep = size << 20; - return (0); - - case 'k': - case 'K': - *sizep = size << 10; - return (0); - - default: - *sizep = size; - return (0); - } - } --} -- --void --show_connection (int fd, __u32 net_ip) --{ -- struct hostent *h = gethostbyaddr ((char *)&net_ip, sizeof net_ip, AF_INET); -- __u32 host_ip = ntohl (net_ip); - int rxmem = 0; - int txmem = 0; - int nonagle = 0; -- int len; -- char host[1024]; - - len = sizeof (txmem); - if (getsockopt (fd, SOL_SOCKET, SO_SNDBUF, &txmem, &len) != 0) - perror ("Cannot get write buffer size"); - - len = sizeof (rxmem); - if (getsockopt (fd, SOL_SOCKET, SO_RCVBUF, &rxmem, &len) != 0) - perror ("Cannot get read buffer size"); -- - len = sizeof (nonagle); - if (getsockopt (fd, IPPROTO_TCP, TCP_NODELAY, &nonagle, &len) != 0) - perror ("Cannot get nagle"); - -- if (h == NULL) -- snprintf (host, sizeof(host), "%d.%d.%d.%d", (host_ip >> 24) & 0xff, -- (host_ip >> 16) & 0xff, (host_ip >> 8) & 0xff, host_ip & 0xff); -- else -- snprintf (host, sizeof(host), "%s", h->h_name); -- - syslog (LOG_INFO, "Accepted host: %s snd: %d rcv %d nagle: %s\n", - host, txmem, rxmem, nonagle ? "disabled" : "enabled"); - syslog (LOG_INFO, "Accepted host: %s\n", host); --} -- --void --usage (char *myname) --{ - fprintf (stderr, "Usage: %s [-r recv_mem] [-s send_mem] [-n] [-p] [-N nal_id] port\n", myname); - fprintf (stderr, "Usage: %s [-N nal_id] port\n", myname); -- exit (1); --} -- --int main(int argc, char **argv) --{ -- int o, fd, rc, port, pfd; -- struct sockaddr_in srvaddr; -- int c; - int rxmem = 0; - int txmem = 0; -- int noclose = 0; - int nonagle = 1; -- int nal = SOCKNAL; - int bind_irq = 0; - int rport; - int require_privports = 1; -- - while ((c = getopt (argc, argv, "N:pr:s:nli")) != -1) - while ((c = getopt (argc, argv, "N:l")) != -1) -- switch (c) -- { - case 'r': - if (parse_size (&rxmem, optarg) != 0 || rxmem < 0) - usage (argv[0]); - break; - - case 's': - if (parse_size (&txmem, optarg) != 0 || txmem < 0) - usage (argv[0]); - break; - - case 'n': - nonagle = 0; - break; - -- case 'l': -- noclose = 1; -- break; -- - case 'i': - bind_irq = 1; - break; - case 'p': - require_privports = 0; - break; -- case 'N': - if (parse_size(&nal, optarg) != 0 || - if (sscanf(optarg, "%d", &nal) != 1 || -- nal < 0 || nal > NAL_MAX_NR) -- usage(argv[0]); -- break; -- -- default: -- usage (argv[0]); -- break; -- } -- -- if (optind >= argc) -- usage (argv[0]); -- -- port = atol(argv[optind++]); -- -- if (pidfile_exists(PROGNAME, port)) -- exit(1); -- -- memset(&srvaddr, 0, sizeof(srvaddr)); -- srvaddr.sin_family = AF_INET; -- srvaddr.sin_port = htons(port); -- srvaddr.sin_addr.s_addr = INADDR_ANY; -- -- fd = socket(PF_INET, SOCK_STREAM, 0); -- if (fd < 0) { -- perror("opening socket"); -- exit(1); -- } -- -- o = 1; -- if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &o, sizeof(o))) { -- perror("Cannot set REUSEADDR socket opt"); -- exit(1); - } - - if (nonagle) - { - o = 1; - rc = setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &o, sizeof (o)); - if (rc != 0) - { - perror ("Cannot disable nagle"); - exit (1); - } -- } -- - if (txmem != 0) - { - rc = setsockopt (fd, SOL_SOCKET, SO_SNDBUF, &txmem, sizeof (txmem)); - if (rc != 0) - { - perror ("Cannot set write buffer size"); - exit (1); - } - } - - if (rxmem != 0) - { - rc = setsockopt (fd, SOL_SOCKET, SO_RCVBUF, &rxmem, sizeof (rxmem)); - if (rc != 0) - { - perror ("Cannot set read buffer size"); - exit (1); - } - } - -- rc = bind(fd, (struct sockaddr *)&srvaddr, sizeof(srvaddr)); -- if ( rc == -1 ) { -- perror("bind: "); -- exit(1); -- } -- -- if (listen(fd, 127)) { -- perror("listen: "); -- exit(1); -- } -- fprintf(stderr, "listening on port %d\n", port); -- -- pfd = open("/dev/portals", O_RDWR); -- if ( pfd < 0 ) { -- perror("opening portals device"); -- exit(1); -- } -- - rc = daemon(0, noclose); - rc = daemon(1, noclose); -- if (rc < 0) { -- perror("daemon(): "); -- exit(1); -- } -- -- openlog(PROGNAME, LOG_PID, LOG_DAEMON); -- syslog(LOG_INFO, "started, listening on port %d\n", port); -- create_pidfile(PROGNAME, port); -- -- while (1) { -- struct sockaddr_in clntaddr; -- int len = sizeof(clntaddr); -- int cfd; -- struct portal_ioctl_data data; -- struct portals_cfg pcfg; - int privileged = 0; - char addrstr[INET_ADDRSTRLEN]; --#ifdef HAVE_LIBWRAP -- struct request_info request; - char addrstr[INET_ADDRSTRLEN]; --#endif - - -- cfd = accept(fd, (struct sockaddr *)&clntaddr, &len); -- if ( cfd < 0 ) { -- perror("accept"); -- exit(0); -- continue; -- } -- - rport = ntohs(clntaddr.sin_port); --#ifdef HAVE_LIBWRAP -- /* libwrap access control */ -- request_init(&request, RQ_DAEMON, "lustre", RQ_FILE, cfd, 0); -- sock_host(&request); -- if (!hosts_access(&request)) { -- inet_ntop(AF_INET, &clntaddr.sin_addr, -- addrstr, INET_ADDRSTRLEN); -- syslog(LOG_WARNING, "Unauthorized access from %s:%hd\n", - addrstr, rport); - addrstr, ntohs(clntaddr.sin_port)); -- close (cfd); -- continue; -- } --#endif - - if (require_privports && rport >= IPPORT_RESERVED) { - inet_ntop(AF_INET, &clntaddr.sin_addr, - addrstr, INET_ADDRSTRLEN); - syslog(LOG_ERR, "Closing non-privileged connection from %s:%d\n", - addrstr, rport); - close(cfd); - continue; - } - -- show_connection (cfd, clntaddr.sin_addr.s_addr); -- -- PCFG_INIT(pcfg, NAL_CMD_REGISTER_PEER_FD); -- pcfg.pcfg_nal = nal; -- pcfg.pcfg_fd = cfd; - pcfg.pcfg_flags = bind_irq; -- pcfg.pcfg_misc = SOCKNAL_CONN_NONE; /* == incoming connection */ - - -- PORTAL_IOC_INIT(data); -- data.ioc_pbuf1 = (char*)&pcfg; -- data.ioc_plen1 = sizeof(pcfg); - - -- if (ioctl(pfd, IOC_PORTAL_NAL_CMD, &data) < 0) { -- perror("ioctl failed"); -- } else { -- printf("client registered\n"); -- } -- rc = close(cfd); -- if (rc) -- perror ("close failed"); -- } -- -- closelog(); -- exit(0); -- --} diff --cc lnet/utils/debug.c index 538af44,36d8a04..0000000 deleted file mode 100644,100644 --- a/lnet/utils/debug.c +++ /dev/null @@@ -1,727 -1,757 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * Copyright (C) 2001, 2002 Cluster File Systems, Inc. -- * -- * This file is part of Portals, http://www.sf.net/projects/lustre/ -- * -- * Portals is free software; you can redistribute it and/or -- * modify it under the terms of version 2 of the GNU General Public -- * License as published by the Free Software Foundation. -- * -- * Portals is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- * GNU General Public License for more details. -- * -- * You should have received a copy of the GNU General Public License -- * along with Portals; if not, write to the Free Software -- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -- * -- * Some day I'll split all of this functionality into a cfs_debug module -- * of its own. That day is not today. -- * -- */ -- --#define __USE_FILE_OFFSET64 -#define _GNU_SOURCE -- --#include -- --#include --#include --#include --#include --#include --#include --#include - #include --#ifndef __CYGWIN__ --# include --#endif -- --#include --#include --#include --#include --#include -- --#include -- --#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) --#define BUG() /* workaround for module.h includes */ --#include --#endif -#include -- --#include --#include --#include "parser.h" - -#include -- --static char rawbuf[8192]; --static char *buf = rawbuf; --static int max = 8192; --//static int g_pfd = -1; --static int subsystem_mask = ~0; --static int debug_mask = ~0; -- --#define MAX_MARK_SIZE 100 -- --static const char *portal_debug_subsystems[] = -- {"undefined", "mdc", "mds", "osc", "ost", "class", "log", "llite", - "rpc", "mgmt", "portals", "socknal", "qswnal", "pinger", "filter", - "ptlbd", "echo", "ldlm", "lov", "gmnal", "router", "cobd", "ibnal", - NULL}; - "rpc", "mgmt", "portals", "libcfs", "socknal", "qswnal", "pinger", - "filter", "ptlbd", "echo", "ldlm", "lov", "gmnal", "router", "cobd", - "openibnal", "lmv", "smfs", "cmobd", NULL}; --static const char *portal_debug_masks[] = -- {"trace", "inode", "super", "ext2", "malloc", "cache", "info", "ioctl", -- "blocks", "net", "warning", "buffs", "other", "dentry", "portals", -- "page", "dlmtrace", "error", "emerg", "ha", "rpctrace", "vfstrace", - "reada", NULL}; - "reada", "mmap", NULL}; -- --struct debug_daemon_cmd { -- char *cmd; -- unsigned int cmdv; --}; -- --static const struct debug_daemon_cmd portal_debug_daemon_cmd[] = { -- {"start", DEBUG_DAEMON_START}, -- {"stop", DEBUG_DAEMON_STOP}, -- {0, 0} --}; -- --static int do_debug_mask(char *name, int enable) --{ -- int found = 0, i; -- -- for (i = 0; portal_debug_subsystems[i] != NULL; i++) { -- if (strcasecmp(name, portal_debug_subsystems[i]) == 0 || -- strcasecmp(name, "all_subs") == 0) { -- printf("%s output from subsystem \"%s\"\n", -- enable ? "Enabling" : "Disabling", -- portal_debug_subsystems[i]); -- if (enable) -- subsystem_mask |= (1 << i); -- else -- subsystem_mask &= ~(1 << i); -- found = 1; -- } -- } -- for (i = 0; portal_debug_masks[i] != NULL; i++) { -- if (strcasecmp(name, portal_debug_masks[i]) == 0 || -- strcasecmp(name, "all_types") == 0) { -- printf("%s output of type \"%s\"\n", -- enable ? "Enabling" : "Disabling", -- portal_debug_masks[i]); -- if (enable) -- debug_mask |= (1 << i); -- else -- debug_mask &= ~(1 << i); -- found = 1; -- } -- } -- -- return found; --} -- --int dbg_initialize(int argc, char **argv) --{ -- return 0; --} -- --int jt_dbg_filter(int argc, char **argv) --{ -- int i; -- -- if (argc < 2) { -- fprintf(stderr, "usage: %s \n", -- argv[0]); -- return 0; -- } -- -- for (i = 1; i < argc; i++) -- if (!do_debug_mask(argv[i], 0)) -- fprintf(stderr, "Unknown subsystem or debug type: %s\n", -- argv[i]); -- return 0; --} -- --int jt_dbg_show(int argc, char **argv) --{ -- int i; -- -- if (argc < 2) { -- fprintf(stderr, "usage: %s \n", -- argv[0]); -- return 0; -- } -- -- for (i = 1; i < argc; i++) -- if (!do_debug_mask(argv[i], 1)) -- fprintf(stderr, "Unknown subsystem or debug type: %s\n", -- argv[i]); -- -- return 0; --} -- --static int applymask(char* procpath, int value) --{ -- int rc; -- char buf[64]; -- int len = snprintf(buf, 64, "%d", value); -- -- int fd = open(procpath, O_WRONLY); -- if (fd == -1) { -- fprintf(stderr, "Unable to open %s: %s\n", -- procpath, strerror(errno)); -- return fd; -- } -- rc = write(fd, buf, len+1); -- if (rc<0) { -- fprintf(stderr, "Write to %s failed: %s\n", -- procpath, strerror(errno)); -- return rc; -- } -- close(fd); -- return 0; --} - - extern char *dump_filename; - extern int dump(int dev_id, int opc, void *buf); -- --static void applymask_all(unsigned int subs_mask, unsigned int debug_mask) --{ -- if (!dump_filename) { -- applymask("/proc/sys/portals/subsystem_debug", subs_mask); -- applymask("/proc/sys/portals/debug", debug_mask); -- } else { -- struct portals_debug_ioctl_data data; -- -- data.hdr.ioc_len = sizeof(data); -- data.hdr.ioc_version = 0; -- data.subs = subs_mask; -- data.debug = debug_mask; -- -- dump(OBD_DEV_ID, PTL_IOC_DEBUG_MASK, &data); -- } -- printf("Applied subsystem_debug=%d, debug=%d to /proc/sys/portals\n", -- subs_mask, debug_mask); --} -- --int jt_dbg_list(int argc, char **argv) --{ -- int i; -- -- if (argc != 2) { -- fprintf(stderr, "usage: %s \n", argv[0]); -- return 0; -- } -- -- if (strcasecmp(argv[1], "subs") == 0) { -- printf("Subsystems: all_subs"); -- for (i = 0; portal_debug_subsystems[i] != NULL; i++) -- printf(", %s", portal_debug_subsystems[i]); -- printf("\n"); -- } else if (strcasecmp(argv[1], "types") == 0) { -- printf("Types: all_types"); -- for (i = 0; portal_debug_masks[i] != NULL; i++) -- printf(", %s", portal_debug_masks[i]); -- printf("\n"); -- } else if (strcasecmp(argv[1], "applymasks") == 0) { -- applymask_all(subsystem_mask, debug_mask); -- } -- return 0; --} -- --/* all strings nul-terminated; only the struct and hdr need to be freed */ --struct dbg_line { -- struct ptldebug_header *hdr; -- char *file; -- char *fn; -- char *text; -- struct list_head chain; --}; -- --/* nurr. */ --static void list_add_ordered(struct dbg_line *new, struct list_head *head) --{ -- struct list_head *pos; - struct dbg_line *curr, *next; - struct dbg_line *curr; -- -- list_for_each(pos, head) { -- curr = list_entry(pos, struct dbg_line, chain); -- -- if (curr->hdr->ph_sec < new->hdr->ph_sec) -- continue; -- if (curr->hdr->ph_sec == new->hdr->ph_sec && -- curr->hdr->ph_usec < new->hdr->ph_usec) -- continue; -- -- list_add(&new->chain, pos->prev); -- return; -- } -- list_add_tail(&new->chain, head); --} -- --static void print_saved_records(struct list_head *list, FILE *out) --{ -- struct list_head *pos, *tmp; -- -- list_for_each_safe(pos, tmp, list) { -- struct dbg_line *line; -- struct ptldebug_header *hdr; -- -- line = list_entry(pos, struct dbg_line, chain); -- list_del(&line->chain); -- -- hdr = line->hdr; -- fprintf(out, "%06x:%06x:%u:%u.%06Lu:%u:%u:%u:(%s:%u:%s()) %s", -- hdr->ph_subsys, hdr->ph_mask, hdr->ph_cpu_id, -- hdr->ph_sec, (unsigned long long)hdr->ph_usec, -- hdr->ph_stack, hdr->ph_pid, hdr->ph_extern_pid, -- line->file, hdr->ph_line_num, line->fn, line->text); -- free(line->hdr); -- free(line); -- } --} -- --static int parse_buffer(FILE *in, FILE *out) --{ -- struct dbg_line *line; -- struct ptldebug_header *hdr; -- char buf[4097], *p; -- int rc; -- unsigned long dropped = 0, kept = 0; - struct list_head chunk_list, *pos; - struct list_head chunk_list; -- -- INIT_LIST_HEAD(&chunk_list); -- -- while (1) { -- rc = fread(buf, sizeof(hdr->ph_len), 1, in); -- if (rc <= 0) -- break; -- -- hdr = (void *)buf; -- if (hdr->ph_len == 0) -- break; -- if (hdr->ph_len > 4094) { -- fprintf(stderr, "unexpected large record: %d bytes. " -- "aborting.\n", -- hdr->ph_len); -- break; -- } -- -- if (hdr->ph_flags & PH_FLAG_FIRST_RECORD) { -- print_saved_records(&chunk_list, out); -- assert(list_empty(&chunk_list)); -- } -- -- rc = fread(buf + sizeof(hdr->ph_len), 1, -- hdr->ph_len - sizeof(hdr->ph_len), in); -- if (rc <= 0) -- break; -- -- if (hdr->ph_mask && -- (!(subsystem_mask & hdr->ph_subsys) || -- (!(debug_mask & hdr->ph_mask)))) { -- dropped++; -- continue; -- } -- -- line = malloc(sizeof(*line)); -- if (line == NULL) { -- fprintf(stderr, "malloc failed; printing accumulated " -- "records and exiting.\n"); -- break; -- } -- -- line->hdr = malloc(hdr->ph_len + 1); -- if (line->hdr == NULL) { -- fprintf(stderr, "malloc failed; printing accumulated " -- "records and exiting.\n"); -- break; -- } -- -- p = (void *)line->hdr; -- memcpy(line->hdr, buf, hdr->ph_len); -- p[hdr->ph_len] = '\0'; -- -- p += sizeof(*hdr); -- line->file = p; -- p += strlen(line->file) + 1; -- line->fn = p; -- p += strlen(line->fn) + 1; -- line->text = p; -- -- list_add_ordered(line, &chunk_list); -- kept++; -- } -- -- print_saved_records(&chunk_list, out); -- -- printf("Debug log: %lu lines, %lu kept, %lu dropped.\n", -- dropped + kept, kept, dropped); -- return 0; --} -- --int jt_dbg_debug_kernel(int argc, char **argv) --{ -- char filename[4096]; -- int rc, raw = 0, fd; -- FILE *in, *out = stdout; -- -- if (argc > 3) { -- fprintf(stderr, "usage: %s [file] [raw]\n", argv[0]); -- return 0; -- } -- sprintf(filename, "%s.%lu.%u", argc > 1 ? argv[1] : "/tmp/lustre-log", -- time(NULL), getpid()); -- -- if (argc > 2) -- raw = atoi(argv[2]); -- unlink(filename); -- -- fd = open("/proc/sys/portals/dump_kernel", O_WRONLY); -- if (fd < 0) { -- fprintf(stderr, "open(dump_kernel) failed: %s\n", -- strerror(errno)); -- return 1; -- } -- -- rc = write(fd, filename, strlen(filename)); -- if (rc != strlen(filename)) { -- fprintf(stderr, "write(%s) failed: %s\n", filename, -- strerror(errno)); -- close(fd); -- return 1; -- } -- close(fd); -- -- if (raw) -- return 0; -- -- in = fopen(filename, "r"); -- if (in == NULL) { - if (errno == ENOENT) /* no dump file created */ - return 0; - -- fprintf(stderr, "fopen(%s) failed: %s\n", filename, -- strerror(errno)); -- return 1; -- } -- if (argc > 1) { -- out = fopen(argv[1], "w"); -- if (out == NULL) { -- fprintf(stderr, "fopen(%s) failed: %s\n", argv[1], -- strerror(errno)); - fclose(in); -- return 1; -- } -- } -- -- rc = parse_buffer(in, out); - fclose(in); - if (argc > 1) - fclose(out); -- if (rc) { -- fprintf(stderr, "parse_buffer failed; leaving tmp file %s " -- "behind.\n", filename); -- } else { -- rc = unlink(filename); -- if (rc) -- fprintf(stderr, "dumped successfully, but couldn't " -- "unlink tmp file %s: %s\n", filename, -- strerror(errno)); -- } -- return rc; --} -- --int jt_dbg_debug_file(int argc, char **argv) --{ - int fdin,fdout; -- FILE *in, *out = stdout; -- if (argc > 3 || argc < 2) { -- fprintf(stderr, "usage: %s [output]\n", argv[0]); -- return 0; -- } -- - in = fopen(argv[1], "r"); - fdin = open(argv[1], O_RDONLY | O_LARGEFILE); - if (fdin == -1) { - fprintf(stderr, "open(%s) failed: %s\n", argv[1], - strerror(errno)); - return 1; - } - in = fdopen(fdin, "r"); -- if (in == NULL) { -- fprintf(stderr, "fopen(%s) failed: %s\n", argv[1], -- strerror(errno)); - close(fdin); -- return 1; -- } -- if (argc > 2) { - out = fopen(argv[2], "w"); - fdout = open(argv[2], O_CREAT | O_WRONLY | O_LARGEFILE); - if (fdout == -1) { - fprintf(stderr, "open(%s) failed: %s\n", argv[2], - strerror(errno)); - fclose(in); - return 1; - } - out = fdopen(fdout, "w"); -- if (out == NULL) { -- fprintf(stderr, "fopen(%s) failed: %s\n", argv[2], -- strerror(errno)); - fclose(in); - close(fdout); -- return 1; -- } -- } -- -- return parse_buffer(in, out); --} -- - const char debug_daemon_usage[] = "usage: %s {start file [MB]|stop}\n"; - #define DAEMON_FILE "/proc/sys/portals/daemon_file" -const char debug_daemon_usage[]="usage: debug_daemon {start file [MB]|stop}\n"; --int jt_dbg_debug_daemon(int argc, char **argv) --{ - int rc = 1, fd; - - int rc, fd; - -- if (argc <= 1) { - fprintf(stderr, debug_daemon_usage, argv[0]); - return 1; - fprintf(stderr, debug_daemon_usage); - return 0; -- } - - fd = open(DAEMON_FILE, O_WRONLY); - - fd = open("/proc/sys/portals/daemon_file", O_WRONLY); -- if (fd < 0) { - fprintf(stderr, "open %s failed: %s\n", DAEMON_FILE, - fprintf(stderr, "open(daemon_file) failed: %s\n", -- strerror(errno)); - } else if (strcasecmp(argv[1], "start") == 0) { - if (argc < 3 || argc > 4 || - (argc == 4 && strlen(argv[3]) > 5)) { - fprintf(stderr, debug_daemon_usage, argv[0]); - goto out; - } - - if (argc == 4) { - char size[12] = "size="; - long sizecheck; - - sizecheck = strtoul(argv[3], NULL, 0); - if (sizecheck < 10 || sizecheck > 20480) { - fprintf(stderr, "size %s invalid, must be in " - "the range 20-20480 MB\n", argv[3]); - } else { - strncat(size, argv[3], sizeof(size) - 6); - rc = write(fd, size, strlen(size)); - if (rc != strlen(size)) { - fprintf(stderr, "set %s failed: %s\n", size, strerror(errno)); - } - } - return 1; - } - - if (strcasecmp(argv[1], "start") == 0) { - if (argc != 3) { - fprintf(stderr, debug_daemon_usage); - return 1; -- } - -- rc = write(fd, argv[2], strlen(argv[2])); -- if (rc != strlen(argv[2])) { - fprintf(stderr, "start debug_daemon on %s failed: %s\n", - argv[2], strerror(errno)); - goto out; - fprintf(stderr, "write(%s) failed: %s\n", argv[2], - strerror(errno)); - close(fd); - return 1; -- } - - rc = 0; -- } else if (strcasecmp(argv[1], "stop") == 0) { -- rc = write(fd, "stop", 4); -- if (rc != 4) { - fprintf(stderr, "stopping debug_daemon failed: %s\n", - fprintf(stderr, "write(stop) failed: %s\n", -- strerror(errno)); - goto out; - close(fd); - return 1; -- } - rc = 0; -- } else { - fprintf(stderr, debug_daemon_usage, argv[0]); - rc = 1; - fprintf(stderr, debug_daemon_usage); - return 1; -- } - - out: - -- close(fd); -- return 0; --} -- --int jt_dbg_clear_debug_buf(int argc, char **argv) --{ -- int rc; -- struct portal_ioctl_data data; -- -- if (argc != 1) { -- fprintf(stderr, "usage: %s\n", argv[0]); -- return 0; -- } -- -- memset(&data, 0, sizeof(data)); -- if (portal_ioctl_pack(&data, &buf, max) != 0) { -- fprintf(stderr, "portal_ioctl_pack failed.\n"); -- return -1; -- } -- -- rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_CLEAR_DEBUG, buf); -- if (rc) { -- fprintf(stderr, "IOC_PORTAL_CLEAR_DEBUG failed: %s\n", -- strerror(errno)); -- return -1; -- } -- return 0; --} -- --int jt_dbg_mark_debug_buf(int argc, char **argv) --{ -- int rc, max_size = MAX_MARK_SIZE-1; -- struct portal_ioctl_data data; -- char *text; -- time_t now = time(NULL); -- -- if (argc > 1) { -- int counter; -- text = malloc(MAX_MARK_SIZE); -- strncpy(text, argv[1], max_size); -- max_size-=strlen(argv[1]); -- for(counter = 2; (counter < argc) && (max_size > 0) ; counter++){ -- strncat(text, " ", 1); -- max_size-=1; -- strncat(text, argv[counter], max_size); -- max_size-=strlen(argv[counter]); -- } -- } else { -- text = ctime(&now); -- text[strlen(text) - 1] = '\0'; /* stupid \n */ -- } -- if (!max_size) { -- text[MAX_MARK_SIZE - 1] = '\0'; -- } -- -- memset(&data, 0, sizeof(data)); -- data.ioc_inllen1 = strlen(text) + 1; -- data.ioc_inlbuf1 = text; -- if (portal_ioctl_pack(&data, &buf, max) != 0) { -- fprintf(stderr, "portal_ioctl_pack failed.\n"); -- return -1; -- } -- -- rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_MARK_DEBUG, buf); -- if (rc) { -- fprintf(stderr, "IOC_PORTAL_MARK_DEBUG failed: %s\n", -- strerror(errno)); -- return -1; -- } -- return 0; --} -- --static struct mod_paths { -- char *name, *path; --} mod_paths[] = { - {"portals", "lustre/portals/libcfs"}, - {"libcfs", "lustre/portals/libcfs"}, - {"portals", "lustre/portals/portals"}, -- {"ksocknal", "lustre/portals/knals/socknal"}, -- {"kptlrouter", "lustre/portals/router"}, -- {"lvfs", "lustre/lvfs"}, -- {"obdclass", "lustre/obdclass"}, -- {"llog_test", "lustre/obdclass"}, -- {"ptlrpc", "lustre/ptlrpc"}, -- {"obdext2", "lustre/obdext2"}, -- {"ost", "lustre/ost"}, -- {"osc", "lustre/osc"}, -- {"mds", "lustre/mds"}, -- {"mdc", "lustre/mdc"}, -- {"llite", "lustre/llite"}, - {"smfs", "lustre/smfs"}, -- {"obdecho", "lustre/obdecho"}, -- {"ldlm", "lustre/ldlm"}, -- {"obdfilter", "lustre/obdfilter"}, -- {"extN", "lustre/extN"}, -- {"lov", "lustre/lov"}, - {"lmv", "lustre/lmv"}, -- {"fsfilt_ext3", "lustre/lvfs"}, -- {"fsfilt_extN", "lustre/lvfs"}, -- {"fsfilt_reiserfs", "lustre/lvfs"}, - {"fsfilt_smfs", "lustre/lvfs"}, - {"fsfilt_ldiskfs", "lustre/lvfs"}, -- {"mds_ext2", "lustre/mds"}, -- {"mds_ext3", "lustre/mds"}, -- {"mds_extN", "lustre/mds"}, -- {"ptlbd", "lustre/ptlbd"}, -- {"mgmt_svc", "lustre/mgmt"}, -- {"mgmt_cli", "lustre/mgmt"}, - {"cobd", "lustre/cobd"}, - {"cmobd", "lustre/cmobd"}, -- {NULL, NULL} --}; -- - #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) - int jt_dbg_modules(int argc, char **argv) -static int jt_dbg_modules_2_4(int argc, char **argv) --{ -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -- struct mod_paths *mp; -- char *path = ".."; -- char *kernel = "linux"; -- -- if (argc >= 2) -- path = argv[1]; -- if (argc == 3) -- kernel = argv[2]; -- if (argc > 3) { -- printf("%s [path] [kernel]\n", argv[0]); -- return 0; -- } -- -- for (mp = mod_paths; mp->name != NULL; mp++) { -- struct module_info info; -- int rc; -- size_t crap; -- int query_module(const char *name, int which, void *buf, -- size_t bufsize, size_t *ret); -- -- rc = query_module(mp->name, QM_INFO, &info, sizeof(info), -- &crap); -- if (rc < 0) { -- if (errno != ENOENT) -- printf("query_module(%s) failed: %s\n", -- mp->name, strerror(errno)); -- } else { -- printf("add-symbol-file %s/%s/%s.o 0x%0lx\n", path, -- mp->path, mp->name, -- info.addr + sizeof(struct module)); -- } -- } -- -- return 0; -#else /* Headers are 2.6-only */ - return -EINVAL; -#endif --} - #else - int jt_dbg_modules(int argc, char **argv) - -static int jt_dbg_modules_2_5(int argc, char **argv) --{ -- struct mod_paths *mp; -- char *path = ".."; -- char *kernel = "linux"; -- const char *proc = "/proc/modules"; -- char modname[128], others[128]; -- long modaddr; -- int rc; -- FILE *file; -- -- if (argc >= 2) -- path = argv[1]; -- if (argc == 3) -- kernel = argv[2]; -- if (argc > 3) { -- printf("%s [path] [kernel]\n", argv[0]); -- return 0; -- } -- -- file = fopen(proc, "r"); -- if (!file) { -- printf("failed open %s: %s\n", proc, strerror(errno)); -- return 0; -- } -- -- while ((rc = fscanf(file, "%s %s %s %s %s %lx\n", -- modname, others, others, others, others, &modaddr)) == 6) { -- for (mp = mod_paths; mp->name != NULL; mp++) { -- if (!strcmp(mp->name, modname)) -- break; -- } -- if (mp->name) { -- printf("add-symbol-file %s/%s/%s.o 0x%0lx\n", path, -- mp->path, mp->name, modaddr); -- } -- } -- -- return 0; --} - #endif /* linux 2.5 */ - -int jt_dbg_modules(int argc, char **argv) -{ - int rc = 0; - struct utsname sysinfo; - - rc = uname(&sysinfo); - if (rc) { - printf("uname() failed: %s\n", strerror(errno)); - return 0; - } - - if (sysinfo.release[2] > '4') { - return jt_dbg_modules_2_5(argc, argv); - } else { - return jt_dbg_modules_2_4(argc, argv); - } - - return 0; -} -- --int jt_dbg_panic(int argc, char **argv) --{ -- int rc; -- struct portal_ioctl_data data; -- -- if (argc != 1) { -- fprintf(stderr, "usage: %s\n", argv[0]); -- return 0; -- } -- -- memset(&data, 0, sizeof(data)); -- if (portal_ioctl_pack(&data, &buf, max) != 0) { -- fprintf(stderr, "portal_ioctl_pack failed.\n"); -- return -1; -- } -- -- rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_PANIC, buf); -- if (rc) { -- fprintf(stderr, "IOC_PORTAL_PANIC failed: %s\n", -- strerror(errno)); -- return -1; -- } -- return 0; --} diff --cc lnet/utils/debugctl.c index 1b6cd96,1b6cd96..0000000 deleted file mode 100644,100644 --- a/lnet/utils/debugctl.c +++ /dev/null @@@ -1,66 -1,66 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * Copyright (C) 2001, 2002 Cluster File Systems, Inc. -- * -- * This file is part of Portals, http://www.sf.net/projects/lustre/ -- * -- * Portals is free software; you can redistribute it and/or -- * modify it under the terms of version 2 of the GNU General Public -- * License as published by the Free Software Foundation. -- * -- * Portals is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- * GNU General Public License for more details. -- * -- * You should have received a copy of the GNU General Public License -- * along with Portals; if not, write to the Free Software -- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -- * -- * Some day I'll split all of this functionality into a cfs_debug module -- * of its own. That day is not today. -- * -- */ -- --#include --#include --#include --#include --#include "parser.h" -- -- --command_t list[] = { -- {"debug_kernel", jt_dbg_debug_kernel, 0, "usage: debug_kernel [file] [raw], get debug buffer and print it [to a file]"}, -- {"debug_daemon", jt_dbg_debug_daemon, 0, "usage: debug_daemon [start file|stop], control debug daemon to dump debug buffer to a file"}, -- {"debug_file", jt_dbg_debug_file, 0, "usage: debug_file [output] [raw], read debug buffer from input and print it [to output]"}, -- {"clear", jt_dbg_clear_debug_buf, 0, "clear kernel debug buffer"}, -- {"mark", jt_dbg_mark_debug_buf, 0, "insert a marker into the kernel debug buffer (args: [marker text])"}, -- {"filter", jt_dbg_filter, 0, "filter certain messages (args: subsystem/debug ID)\n"}, -- {"show", jt_dbg_show, 0, "enable certain messages (args: subsystem/debug ID)\n"}, -- {"list", jt_dbg_list, 0, "list subsystem and debug types (args: subs or types)\n"}, -- {"modules", jt_dbg_modules, 0, "provide gdb-friendly module info (arg: )"}, -- {"panic", jt_dbg_panic, 0, "cause the kernel to panic"}, -- {"dump", jt_ioc_dump, 0, "usage: dump file, save ioctl buffer to file"}, -- {"help", Parser_help, 0, "help"}, -- {"exit", Parser_quit, 0, "quit"}, -- {"quit", Parser_quit, 0, "quit"}, -- { 0, 0, 0, NULL } --}; -- --int main(int argc, char **argv) --{ -- if (dbg_initialize(argc, argv) < 0) -- exit(2); -- -- register_ioc_dev(PORTALS_DEV_ID, PORTALS_DEV_PATH); -- -- Parser_init("debugctl > ", list); -- if (argc > 1) -- return Parser_execarg(argc - 1, &argv[1], list); -- -- Parser_commands(); -- -- unregister_ioc_dev(PORTALS_DEV_ID); -- return 0; --} diff --cc lnet/utils/gmlndnid.c index e45fae4,e45fae4..0000000 deleted file mode 100644,100644 --- a/lnet/utils/gmlndnid.c +++ /dev/null @@@ -1,117 -1,117 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * Copyright (c) 2003 Los Alamos National Laboratory (LANL) -- * -- * This file is part of Lustre, http://www.lustre.org/ -- * -- * This file is free software; you can redistribute it and/or -- * modify it under the terms of version 2.1 of the GNU Lesser General -- * Public License as published by the Free Software Foundation. -- * -- * Lustre is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- * GNU Lesser General Public License for more details. -- * -- * You should have received a copy of the GNU Lesser General Public -- * License along with Portals; if not, write to the Free Software -- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -- */ -- --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include -- --#include --#include --#include -- --#define GMNAL_IOC_GET_GNID 1 -- --int --roundup(int len) --{ -- return((len+7) & (~0x7)); --} -- --int main(int argc, char **argv) --{ -- int rc, pfd; -- struct portal_ioctl_data data; -- struct portals_cfg pcfg; -- unsigned int nid = 0, len; -- char *name = NULL; -- int c; -- -- -- -- while ((c = getopt(argc, argv, "n:l")) != -1) { -- switch(c) { -- case('n'): -- name = optarg; -- break; -- case('l'): -- printf("Get local id not implemented yet!\n"); -- exit(-1); -- default: -- printf("usage %s -n nodename [-p]\n", argv[0]); -- } -- } -- -- if (!name) { -- printf("usage %s -n nodename [-p]\n", argv[0]); -- exit(-1); -- } -- -- -- -- -- PCFG_INIT(pcfg, GMNAL_IOC_GET_GNID); -- pcfg.pcfg_nal = GMNAL; -- -- /* -- * set up the inputs -- */ -- len = strlen(name) + 1; -- pcfg.pcfg_pbuf1 = malloc(len); -- strcpy(pcfg.pcfg_pbuf1, name); -- pcfg.pcfg_plen1 = len; -- -- /* -- * set up the outputs -- */ -- pcfg.pcfg_pbuf2 = (void*)&nid; -- pcfg.pcfg_plen2 = sizeof(unsigned int*); -- -- pfd = open("/dev/portals", O_RDWR); -- if ( pfd < 0 ) { -- perror("opening portals device"); -- free(pcfg.pcfg_pbuf1); -- exit(-1); -- } -- -- PORTAL_IOC_INIT(data); -- data.ioc_pbuf1 = (char*)&pcfg; -- data.ioc_plen1 = sizeof(pcfg); -- -- rc = ioctl (pfd, IOC_PORTAL_NAL_CMD, &data); -- if (rc < 0) -- { -- perror ("Can't get my NID"); -- } -- -- free(pcfg.pcfg_pbuf1); -- close(pfd); -- printf("%u\n", nid); -- exit(0); --} diff --cc lnet/utils/l_ioctl.c index 1adcc8e,0671c24..0000000 deleted file mode 100644,100644 --- a/lnet/utils/l_ioctl.c +++ /dev/null @@@ -1,339 -1,339 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * Copyright (C) 2001, 2002 Cluster File Systems, Inc. -- * -- * This file is part of Portals, http://www.sf.net/projects/lustre/ -- * -- * Portals is free software; you can redistribute it and/or -- * modify it under the terms of version 2 of the GNU General Public -- * License as published by the Free Software Foundation. -- * -- * Portals is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- * GNU General Public License for more details. -- * -- * You should have received a copy of the GNU General Public License -- * along with Portals; if not, write to the Free Software -- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -- * -- */ -- --#define __USE_FILE_OFFSET64 -- --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include -- --#include --#include -- --#ifndef __CYGWIN__ -- #include --#else -- #include -- #include --#endif -- --static ioc_handler_t do_ioctl; /* forward ref */ --static ioc_handler_t *current_ioc_handler = &do_ioctl; -- --struct ioc_dev { -- const char * dev_name; -- int dev_fd; --}; -- --static struct ioc_dev ioc_dev_list[10]; -- --struct dump_hdr { -- int magic; -- int dev_id; - int opc; - unsigned int opc; --}; -- --char *dump_filename; -- --void --set_ioc_handler (ioc_handler_t *handler) --{ -- if (handler == NULL) -- current_ioc_handler = do_ioctl; -- else -- current_ioc_handler = handler; --} -- --static int --open_ioc_dev(int dev_id) --{ -- const char * dev_name; -- -- if (dev_id < 0 || dev_id >= sizeof(ioc_dev_list)) -- return -EINVAL; -- -- dev_name = ioc_dev_list[dev_id].dev_name; -- if (dev_name == NULL) { -- fprintf(stderr, "unknown device id: %d\n", dev_id); -- return -EINVAL; -- } -- -- if (ioc_dev_list[dev_id].dev_fd < 0) { -- int fd = open(dev_name, O_RDWR); -- -- if (fd < 0) { -- fprintf(stderr, "opening %s failed: %s\n" -- "hint: the kernel modules may not be loaded\n", -- dev_name, strerror(errno)); -- return fd; -- } -- ioc_dev_list[dev_id].dev_fd = fd; -- } -- -- return ioc_dev_list[dev_id].dev_fd; --} -- -- --static int - do_ioctl(int dev_id, int opc, void *buf) -do_ioctl(int dev_id, unsigned int opc, void *buf) --{ -- int fd, rc; -- -- fd = open_ioc_dev(dev_id); -- if (fd < 0) -- return fd; -- -- rc = ioctl(fd, opc, buf); -- return rc; -- --} -- --static FILE * --get_dump_file() --{ -- FILE *fp = NULL; -- -- if (!dump_filename) { -- fprintf(stderr, "no dump filename\n"); -- } else -- fp = fopen(dump_filename, "a"); -- return fp; --} -- --/* -- * The dump file should start with a description of which devices are -- * used, but for now it will assumed whatever app reads the file will -- * know what to do. */ --int - dump(int dev_id, int opc, void *buf) -dump(int dev_id, unsigned int opc, void *buf) --{ -- FILE *fp; -- struct dump_hdr dump_hdr; -- struct portal_ioctl_hdr * ioc_hdr = (struct portal_ioctl_hdr *) buf; -- int rc; -- -- printf("dumping opc %x to %s\n", opc, dump_filename); -- -- -- dump_hdr.magic = 0xdeadbeef; -- dump_hdr.dev_id = dev_id; -- dump_hdr.opc = opc; -- -- fp = get_dump_file(); -- if (fp == NULL) { -- fprintf(stderr, "%s: %s\n", dump_filename, -- strerror(errno)); -- return -EINVAL; -- } -- -- rc = fwrite(&dump_hdr, sizeof(dump_hdr), 1, fp); -- if (rc == 1) -- rc = fwrite(buf, ioc_hdr->ioc_len, 1, fp); -- fclose(fp); -- if (rc != 1) { -- fprintf(stderr, "%s: %s\n", dump_filename, -- strerror(errno)); -- return -EINVAL; -- } -- -- return 0; --} -- --/* register a device to send ioctls to. */ --int --register_ioc_dev(int dev_id, const char * dev_name) --{ -- -- if (dev_id < 0 || dev_id >= sizeof(ioc_dev_list)) -- return -EINVAL; -- -- unregister_ioc_dev(dev_id); -- -- ioc_dev_list[dev_id].dev_name = dev_name; -- ioc_dev_list[dev_id].dev_fd = -1; -- -- return dev_id; --} -- --void --unregister_ioc_dev(int dev_id) --{ -- -- if (dev_id < 0 || dev_id >= sizeof(ioc_dev_list)) -- return; -- if (ioc_dev_list[dev_id].dev_name != NULL && -- ioc_dev_list[dev_id].dev_fd >= 0) -- close(ioc_dev_list[dev_id].dev_fd); -- -- ioc_dev_list[dev_id].dev_name = NULL; -- ioc_dev_list[dev_id].dev_fd = -1; --} -- --/* If this file is set, then all ioctl buffers will be -- appended to the file. */ --int --set_ioctl_dump(char * file) --{ -- if (dump_filename) -- free(dump_filename); -- -- dump_filename = strdup(file); -- if (dump_filename == NULL) -- abort(); -- -- set_ioc_handler(&dump); -- return 0; --} -- --int - l_ioctl(int dev_id, int opc, void *buf) -l_ioctl(int dev_id, unsigned int opc, void *buf) --{ -- return current_ioc_handler(dev_id, opc, buf); --} -- --/* Read an ioctl dump file, and call the ioc_func for each ioctl buffer -- * in the file. For example: -- * -- * parse_dump("lctl.dump", l_ioctl); -- * -- * Note: if using l_ioctl, then you also need to register_ioc_dev() for -- * each device used in the dump. -- */ --int - parse_dump(char * dump_file, int (*ioc_func)(int dev_id, int opc, void *)) -parse_dump(char * dump_file, ioc_handler_t ioc_func) --{ -- int line =0; -- struct stat st; -- char *start, *buf, *end; --#ifndef __CYGWIN__ -- int fd; --#else -- HANDLE fd, hmap; -- DWORD size; --#endif -- --#ifndef __CYGWIN__ -- fd = syscall(SYS_open, dump_file, O_RDONLY); -- if (fd < 0) { -- fprintf(stderr, "couldn't open %s: %s\n", dump_file, -- strerror(errno)); -- exit(1); -- } -- -- if (fstat(fd, &st)) { -- perror("stat fails"); -- exit(1); -- } -- -- if (st.st_size < 1) { -- fprintf(stderr, "KML is empty\n"); -- exit(1); -- } -- -- start = buf = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE , fd, 0); -- end = start + st.st_size; -- close(fd); -- if (start == MAP_FAILED) { -- fprintf(stderr, "can't create file mapping\n"); -- exit(1); -- } --#else -- fd = CreateFile(dump_file, GENERIC_READ, FILE_SHARE_READ, NULL, -- OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); -- size = GetFileSize(fd, NULL); -- if (size < 1) { -- fprintf(stderr, "KML is empty\n"); -- exit(1); -- } -- -- hmap = CreateFileMapping(fd, NULL, PAGE_READONLY, 0,0, NULL); -- start = buf = MapViewOfFile(hmap, FILE_MAP_READ, 0, 0, 0); -- end = buf + size; -- CloseHandle(fd); -- if (start == NULL) { -- fprintf(stderr, "can't create file mapping\n"); -- exit(1); -- } --#endif /* __CYGWIN__ */ -- -- while (buf < end) { -- struct dump_hdr *dump_hdr = (struct dump_hdr *) buf; -- struct portal_ioctl_hdr * data; -- char tmp[8096]; -- int rc; -- -- line++; -- -- data = (struct portal_ioctl_hdr *) (buf + sizeof(*dump_hdr)); -- if (buf + data->ioc_len > end ) { -- fprintf(stderr, "dump file overflow, %p + %d > %p\n", buf, -- data->ioc_len, end); -- return -1; -- } --#if 0 -- printf ("dump_hdr: %lx data: %lx\n", -- (unsigned long)dump_hdr - (unsigned long)buf, (unsigned long)data - (unsigned long)buf); -- -- printf("%d: opcode %x len: %d ver: %x ", line, dump_hdr->opc, -- data->ioc_len, data->ioc_version); --#endif -- -- memcpy(tmp, data, data->ioc_len); -- -- rc = ioc_func(dump_hdr->dev_id, dump_hdr->opc, tmp); -- if (rc) { -- printf("failed: %d\n", rc); -- exit(1); -- } -- -- buf += data->ioc_len + sizeof(*dump_hdr); -- } -- --#ifndef __CYGWIN__ -- munmap(start, end - start); --#else -- UnmapViewOfFile(start); -- CloseHandle(hmap); --#endif -- -- return 0; --} -- --int --jt_ioc_dump(int argc, char **argv) --{ -- if (argc > 2) { -- fprintf(stderr, "usage: %s [hostname]\n", argv[0]); -- return 0; -- } -- printf("setting dumpfile to: %s\n", argv[1]); -- -- set_ioctl_dump(argv[1]); -- return 0; --} diff --cc lnet/utils/parser.c index 82b4022,b91295b..0000000 deleted file mode 100644,100644 --- a/lnet/utils/parser.c +++ /dev/null @@@ -1,713 -1,651 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * Copyright (C) 2001 Cluster File Systems, Inc. -- * -- * This file is part of Lustre, http://www.sf.net/projects/lustre/ -- * -- * Lustre is free software; you can redistribute it and/or -- * modify it under the terms of version 2 of the GNU General Public -- * License as published by the Free Software Foundation. -- * -- * Lustre is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- * GNU General Public License for more details. -- * -- * You should have received a copy of the GNU General Public License -- * along with Lustre; if not, write to the Free Software -- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -- * -- */ --#include --#include --#include --#include --#include --#include --#include --#include -- --#ifdef HAVE_LIBREADLINE --#define READLINE_LIBRARY --#include -- --/* completion_matches() is #if 0-ed out in modern glibc */ --#ifndef completion_matches --# define completion_matches rl_completion_matches --#endif --#endif -- --extern void using_history(void); --extern void stifle_history(int); --extern void add_history(char *); -- --#include "parser.h" -- --static command_t * top_level; /* Top level of commands, initialized by -- * InitParser */ --static char * parser_prompt = NULL;/* Parser prompt, set by InitParser */ --static int done; /* Set to 1 if user types exit or quit */ -- -- --/* static functions */ --static char *skipwhitespace(char *s); --static char *skiptowhitespace(char *s); --static command_t *find_cmd(char *name, command_t cmds[], char **next); --static int process(char *s, char **next, command_t *lookup, command_t **result, -- char **prev); --static void print_commands(char *str, command_t *table); -- --static char * skipwhitespace(char * s) --{ -- char * t; -- int len; -- -- len = (int)strlen(s); -- for (t = s; t <= s + len && isspace(*t); t++); -- return(t); --} -- -- --static char * skiptowhitespace(char * s) --{ -- char * t; -- -- for (t = s; *t && !isspace(*t); t++); -- return(t); --} -- --static int line2args(char *line, char **argv, int maxargs) --{ -- char *arg; -- int i = 0; -- -- arg = strtok(line, " \t"); -- if ( arg ) { -- argv[i] = arg; -- i++; -- } else -- return 0; -- -- while( (arg = strtok(NULL, " \t")) && (i <= maxargs)) { -- argv[i] = arg; -- i++; -- } -- return i; --} -- --/* find a command -- return it if unique otherwise print alternatives */ --static command_t *Parser_findargcmd(char *name, command_t cmds[]) --{ -- command_t *cmd; -- -- for (cmd = cmds; cmd->pc_name; cmd++) { -- if (strcmp(name, cmd->pc_name) == 0) -- return cmd; -- } -- return NULL; --} -- --int Parser_execarg(int argc, char **argv, command_t cmds[]) --{ -- command_t *cmd; -- -- cmd = Parser_findargcmd(argv[0], cmds); -- if ( cmd ) { -- int rc = (cmd->pc_func)(argc, argv); -- if (rc == CMD_HELP) -- fprintf(stderr, "%s\n", cmd->pc_help); -- return rc; -- } else { -- printf("Try interactive use without arguments or use one of:\n"); -- for (cmd = cmds; cmd->pc_name; cmd++) -- printf("\"%s\" ", cmd->pc_name); -- printf("\nas argument.\n"); -- } -- return -1; --} -- --/* returns the command_t * (NULL if not found) corresponding to a -- _partial_ match with the first token in name. It sets *next to -- point to the following token. Does not modify *name. */ --static command_t * find_cmd(char * name, command_t cmds[], char ** next) --{ -- int i, len; -- -- if (!cmds || !name ) -- return NULL; -- -- /* This sets name to point to the first non-white space character, -- and next to the first whitespace after name, len to the length: do -- this with strtok*/ -- name = skipwhitespace(name); -- *next = skiptowhitespace(name); -- len = *next - name; -- if (len == 0) -- return NULL; -- -- for (i = 0; cmds[i].pc_name; i++) { -- if (strncasecmp(name, cmds[i].pc_name, len) == 0) { -- *next = skipwhitespace(*next); -- return(&cmds[i]); -- } -- } -- return NULL; --} -- --/* Recursively process a command line string s and find the command -- corresponding to it. This can be ambiguous, full, incomplete, -- non-existent. */ --static int process(char *s, char ** next, command_t *lookup, -- command_t **result, char **prev) --{ -- *result = find_cmd(s, lookup, next); -- *prev = s; -- -- /* non existent */ -- if ( ! *result ) -- return CMD_NONE; -- -- /* found entry: is it ambigous, i.e. not exact command name and -- more than one command in the list matches. Note that find_cmd -- points to the first ambiguous entry */ -- if ( strncasecmp(s, (*result)->pc_name, strlen((*result)->pc_name)) && -- find_cmd(s, (*result) + 1, next)) -- return CMD_AMBIG; -- -- /* found a unique command: component or full? */ -- if ( (*result)->pc_func ) { -- return CMD_COMPLETE; -- } else { -- if ( *next == '\0' ) { -- return CMD_INCOMPLETE; -- } else { -- return process(*next, next, (*result)->pc_sub_cmd, result, prev); -- } -- } --} -- --#ifdef HAVE_LIBREADLINE --static command_t * match_tbl; /* Command completion against this table */ --static char * command_generator(const char * text, int state) --{ -- static int index, -- len; -- char *name; -- -- /* Do we have a match table? */ -- if (!match_tbl) -- return NULL; -- -- /* If this is the first time called on this word, state is 0 */ -- if (!state) { -- index = 0; -- len = (int)strlen(text); -- } -- -- /* Return next name in the command list that paritally matches test */ -- while ( (name = (match_tbl + index)->pc_name) ) { -- index++; -- -- if (strncasecmp(name, text, len) == 0) { -- return(strdup(name)); -- } -- } -- -- /* No more matches */ -- return NULL; --} -- --/* probably called by readline */ --static char **command_completion(char * text, int start, int end) --{ -- command_t * table; -- char * pos; -- -- match_tbl = top_level; -- -- for (table = find_cmd(rl_line_buffer, match_tbl, &pos); -- table; table = find_cmd(pos, match_tbl, &pos)) -- { -- -- if (*(pos - 1) == ' ') match_tbl = table->pc_sub_cmd; -- } -- -- return completion_matches(text, command_generator); --} --#endif -- --/* take a string and execute the function or print help */ --int execute_line(char * line) --{ -- command_t *cmd, *ambig; -- char *prev; -- char *next, *tmp; -- char *argv[MAXARGS]; -- int i; -- int rc = 0; -- -- switch( process(line, &next, top_level, &cmd, &prev) ) { -- case CMD_AMBIG: -- fprintf(stderr, "Ambiguous command \'%s\'\nOptions: ", line); -- while( (ambig = find_cmd(prev, cmd, &tmp)) ) { -- fprintf(stderr, "%s ", ambig->pc_name); -- cmd = ambig + 1; -- } -- fprintf(stderr, "\n"); -- break; -- case CMD_NONE: -- fprintf(stderr, "No such command, type help\n"); -- break; -- case CMD_INCOMPLETE: -- fprintf(stderr, -- "'%s' incomplete command. Use '%s x' where x is one of:\n", -- line, line); -- fprintf(stderr, "\t"); -- for (i = 0; cmd->pc_sub_cmd[i].pc_name; i++) { -- fprintf(stderr, "%s ", cmd->pc_sub_cmd[i].pc_name); -- } -- fprintf(stderr, "\n"); -- break; -- case CMD_COMPLETE: -- i = line2args(line, argv, MAXARGS); -- rc = (cmd->pc_func)(i, argv); -- -- if (rc == CMD_HELP) -- fprintf(stderr, "%s\n", cmd->pc_help); -- -- break; -- } -- -- return rc; --} -- --int --noop_fn () --{ -- return (0); --} -- --/* just in case you're ever in an airplane and discover you -- forgot to install readline-dev. :) */ --int init_input() --{ -- int interactive = isatty (fileno (stdin)); -- --#ifdef HAVE_LIBREADLINE -- using_history(); -- stifle_history(HISTORY); -- -- if (!interactive) -- { -- rl_prep_term_function = (rl_vintfunc_t *)noop_fn; -- rl_deprep_term_function = (rl_voidfunc_t *)noop_fn; -- } -- -- rl_attempted_completion_function = (CPPFunction *)command_completion; -- rl_completion_entry_function = (void *)command_generator; --#endif -- return interactive; --} -- --#ifndef HAVE_LIBREADLINE --#define add_history(s) --char * readline(char * prompt) --{ -- char line[2048]; -- int n = 0; -- if (prompt) -- printf ("%s", prompt); -- if (fgets(line, sizeof(line), stdin) == NULL) -- return (NULL); -- n = strlen(line); -- if (n && line[n-1] == '\n') -- line[n-1] = '\0'; -- return strdup(line); --} --#endif -- --/* this is the command execution machine */ --int Parser_commands(void) --{ -- char *line, *s; -- int rc = 0; -- int interactive; -- -- interactive = init_input(); -- -- while(!done) { -- line = readline(interactive ? parser_prompt : NULL); -- -- if (!line) break; -- -- s = skipwhitespace(line); -- -- if (*s) { -- add_history(s); -- rc = execute_line(s); -- } -- -- free(line); -- } -- return rc; --} -- -- --/* sets the parser prompt */ --void Parser_init(char * prompt, command_t * cmds) --{ -- done = 0; -- top_level = cmds; -- if (parser_prompt) free(parser_prompt); -- parser_prompt = strdup(prompt); --} -- --/* frees the parser prompt */ --void Parser_exit(int argc, char *argv[]) --{ -- done = 1; -- free(parser_prompt); -- parser_prompt = NULL; --} -- --/* convert a string to an integer */ --int Parser_int(char *s, int *val) --{ -- int ret; -- -- if (*s != '0') -- ret = sscanf(s, "%d", val); -- else if (*(s+1) != 'x') -- ret = sscanf(s, "%o", val); -- else { -- s++; -- ret = sscanf(++s, "%x", val); -- } -- -- return(ret); --} -- -- --void Parser_qhelp(int argc, char *argv[]) { -- -- printf("Available commands are:\n"); -- -- print_commands(NULL, top_level); -- printf("For more help type: help command-name\n"); --} -- --int Parser_help(int argc, char **argv) --{ -- char line[1024]; -- char *next, *prev, *tmp; -- command_t *result, *ambig; -- int i; -- -- if ( argc == 1 ) { -- Parser_qhelp(argc, argv); -- return 0; -- } -- -- line[0]='\0'; -- for ( i = 1 ; i < argc ; i++ ) { -- strcat(line, argv[i]); -- } -- -- switch ( process(line, &next, top_level, &result, &prev) ) { -- case CMD_COMPLETE: -- fprintf(stderr, "%s: %s\n",line, result->pc_help); -- break; -- case CMD_NONE: -- fprintf(stderr, "%s: Unknown command.\n", line); -- break; -- case CMD_INCOMPLETE: -- fprintf(stderr, -- "'%s' incomplete command. Use '%s x' where x is one of:\n", -- line, line); -- fprintf(stderr, "\t"); -- for (i = 0; result->pc_sub_cmd[i].pc_name; i++) { -- fprintf(stderr, "%s ", result->pc_sub_cmd[i].pc_name); -- } -- fprintf(stderr, "\n"); -- break; -- case CMD_AMBIG: -- fprintf(stderr, "Ambiguous command \'%s\'\nOptions: ", line); -- while( (ambig = find_cmd(prev, result, &tmp)) ) { -- fprintf(stderr, "%s ", ambig->pc_name); -- result = ambig + 1; -- } -- fprintf(stderr, "\n"); -- break; -- } -- return 0; --} -- -- --void Parser_printhelp(char *cmd) --{ -- char *argv[] = { "help", cmd }; -- Parser_help(2, argv); --} -- --/************************************************************************* -- * COMMANDS * -- *************************************************************************/ -- -- --static void print_commands(char * str, command_t * table) { -- command_t * cmds; -- char buf[80]; -- -- for (cmds = table; cmds->pc_name; cmds++) { -- if (cmds->pc_func) { -- if (str) printf("\t%s %s\n", str, cmds->pc_name); -- else printf("\t%s\n", cmds->pc_name); -- } -- if (cmds->pc_sub_cmd) { -- if (str) { -- sprintf(buf, "%s %s", str, cmds->pc_name); -- print_commands(buf, cmds->pc_sub_cmd); -- } else { -- print_commands(cmds->pc_name, cmds->pc_sub_cmd); -- } -- } -- } --} -- --char *Parser_getstr(const char *prompt, const char *deft, char *res, -- size_t len) --{ -- char *line = NULL; -- int size = strlen(prompt) + strlen(deft) + 8; -- char *theprompt; -- theprompt = malloc(size); -- assert(theprompt); -- -- sprintf(theprompt, "%s [%s]: ", prompt, deft); -- -- line = readline(theprompt); -- free(theprompt); -- -- if ( line == NULL || *line == '\0' ) { -- strncpy(res, deft, len); -- } else { -- strncpy(res, line, len); -- } -- -- if ( line ) { -- free(line); -- return res; -- } else { -- return NULL; -- } --} -- --/* get integer from prompt, loop forever to get it */ --int Parser_getint(const char *prompt, long min, long max, long deft, int base) --{ -- int rc; -- long result; -- char *line; -- int size = strlen(prompt) + 40; -- char *theprompt = malloc(size); -- assert(theprompt); -- sprintf(theprompt,"%s [%ld, (0x%lx)]: ", prompt, deft, deft); -- -- fflush(stdout); -- -- do { -- line = NULL; -- line = readline(theprompt); -- if ( !line ) { -- fprintf(stdout, "Please enter an integer.\n"); -- fflush(stdout); -- continue; -- } -- if ( *line == '\0' ) { -- free(line); -- result = deft; -- break; -- } -- rc = Parser_arg2int(line, &result, base); -- free(line); -- if ( rc != 0 ) { -- fprintf(stdout, "Invalid string.\n"); -- fflush(stdout); -- } else if ( result > max || result < min ) { -- fprintf(stdout, "Error: response must lie between %ld and %ld.\n", -- min, max); -- fflush(stdout); -- } else { -- break; -- } -- } while ( 1 ) ; -- -- if (theprompt) -- free(theprompt); -- return result; -- --} -- --/* get boolean (starting with YyNn; loop forever */ --int Parser_getbool(const char *prompt, int deft) --{ -- int result = 0; -- char *line; -- int size = strlen(prompt) + 8; -- char *theprompt = malloc(size); -- assert(theprompt); -- -- fflush(stdout); -- -- if ( deft != 0 && deft != 1 ) { -- fprintf(stderr, "Error: Parser_getbool given bad default (%d).\n", -- deft); -- assert ( 0 ); -- } -- sprintf(theprompt, "%s [%s]: ", prompt, (deft==0)? "N" : "Y"); -- -- do { -- line = NULL; -- line = readline(theprompt); -- if ( line == NULL ) { -- result = deft; -- break; -- } -- if ( *line == '\0' ) { -- result = deft; -- break; -- } -- if ( *line == 'y' || *line == 'Y' ) { -- result = 1; -- break; -- } -- if ( *line == 'n' || *line == 'N' ) { -- result = 0; -- break; -- } -- if ( line ) -- free(line); -- fprintf(stdout, "Invalid string. Must start with yY or nN\n"); -- fflush(stdout); -- } while ( 1 ); -- -- if ( line ) -- free(line); -- if ( theprompt ) -- free(theprompt); -- return result; --} -- --/* parse int out of a string or prompt for it */ --long Parser_intarg(const char *inp, const char *prompt, int deft, -- int min, int max, int base) --{ -- long result; -- int rc; -- -- rc = Parser_arg2int(inp, &result, base); -- -- if ( rc == 0 ) { -- return result; -- } else { -- return Parser_getint(prompt, deft, min, max, base); -- } --} -- --/* parse int out of a string or prompt for it */ --char *Parser_strarg(char *inp, const char *prompt, const char *deft, -- char *answer, int len) --{ -- if ( inp == NULL || *inp == '\0' ) { -- return Parser_getstr(prompt, deft, answer, len); -- } else -- return inp; --} -- --/* change a string into a number: return 0 on success. No invalid characters -- allowed. The processing of base and validity follows strtol(3)*/ --int Parser_arg2int(const char *inp, long *result, int base) --{ -- char *endptr; -- -- if ( (base !=0) && (base < 2 || base > 36) ) -- return 1; -- -- *result = strtol(inp, &endptr, base); -- -- if ( *inp != '\0' && *endptr == '\0' ) -- return 0; -- else -- return 1; - } - - /* Convert human readable size string to and int; "1k" -> 1000 */ - int Parser_size (int *sizep, char *str) { - int size; - char mod[32]; - - switch (sscanf (str, "%d%1[gGmMkK]", &size, mod)) { - default: - return (-1); - - case 1: - *sizep = size; - return (0); - - case 2: - switch (*mod) { - case 'g': - case 'G': - *sizep = size << 30; - return (0); - - case 'm': - case 'M': - *sizep = size << 20; - return (0); - - case 'k': - case 'K': - *sizep = size << 10; - return (0); - - default: - *sizep = size; - return (0); - } - } - } - - /* Convert a string boolean to an int; "enable" -> 1 */ - int Parser_bool (int *b, char *str) { - if (!strcasecmp (str, "no") || - !strcasecmp (str, "n") || - !strcasecmp (str, "off") || - !strcasecmp (str, "down") || - !strcasecmp (str, "disable")) - { - *b = 0; - return (0); - } - - if (!strcasecmp (str, "yes") || - !strcasecmp (str, "y") || - !strcasecmp (str, "on") || - !strcasecmp (str, "up") || - !strcasecmp (str, "enable")) - { - *b = 1; - return (0); - } - - return (-1); --} -- --int Parser_quit(int argc, char **argv) --{ -- argc = argc; -- argv = argv; -- done = 1; -- return 0; --} diff --cc lnet/utils/parser.h index 44e8f2a,9e7e95a..0000000 deleted file mode 100644,100644 --- a/lnet/utils/parser.h +++ /dev/null @@@ -1,73 -1,67 +1,0 @@@ --#ifndef _PARSER_H_ --#define _PARSER_H_ -- --#define HISTORY 100 /* Don't let history grow unbounded */ --#define MAXARGS 512 -- --#define CMD_COMPLETE 0 --#define CMD_INCOMPLETE 1 --#define CMD_NONE 2 --#define CMD_AMBIG 3 --#define CMD_HELP 4 -- --typedef struct parser_cmd { -- char *pc_name; -- int (* pc_func)(int, char **); -- struct parser_cmd * pc_sub_cmd; -- char *pc_help; --} command_t; -- --typedef struct argcmd { -- char *ac_name; -- int (*ac_func)(int, char **); -- char *ac_help; --} argcmd_t; -- --typedef struct network { -- char *type; -- char *server; -- int port; --} network_t; -- --int Parser_quit(int argc, char **argv); --void Parser_init(char *, command_t *); /* Set prompt and load command list */ --int Parser_commands(void); /* Start the command parser */ --void Parser_qhelp(int, char **); /* Quick help routine */ --int Parser_help(int, char **); /* Detailed help routine */ --void Parser_printhelp(char *); /* Detailed help routine */ --void Parser_exit(int, char **); /* Shuts down command parser */ --int Parser_execarg(int argc, char **argv, command_t cmds[]); --int execute_line(char * line); -- --/* Converts a string to an integer */ --int Parser_int(char *, int *); -- --/* Prompts for a string, with default values and a maximum length */ --char *Parser_getstr(const char *prompt, const char *deft, char *res, -- size_t len); -- --/* Prompts for an integer, with minimum, maximum and default values and base */ --int Parser_getint(const char *prompt, long min, long max, long deft, -- int base); -- --/* Prompts for a yes/no, with default */ --int Parser_getbool(const char *prompt, int deft); -- --/* Extracts an integer from a string, or prompts if it cannot get one */ --long Parser_intarg(const char *inp, const char *prompt, int deft, -- int min, int max, int base); -- --/* Extracts a word from the input, or propmts if it cannot get one */ --char *Parser_strarg(char *inp, const char *prompt, const char *deft, -- char *answer, int len); -- --/* Extracts an integer from a string with a base */ --int Parser_arg2int(const char *inp, long *result, int base); - - /* Convert human readable size string to and int; "1k" -> 1000 */ - int Parser_size(int *sizep, char *str); - - /* Convert a string boolean to an int; "enable" -> 1 */ - int Parser_bool(int *b, char *str); -- --#endif diff --cc lnet/utils/portals.c index 6025ee6,1bde59f..0000000 deleted file mode 100644,100644 --- a/lnet/utils/portals.c +++ /dev/null @@@ -1,1788 -1,1853 +1,0 @@@ --/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -- * vim:expandtab:shiftwidth=8:tabstop=8: -- * -- * Copyright (C) 2001, 2002 Cluster File Systems, Inc. -- * -- * This file is part of Portals, http://www.sf.net/projects/lustre/ -- * -- * Portals is free software; you can redistribute it and/or -- * modify it under the terms of version 2 of the GNU General Public -- * License as published by the Free Software Foundation. -- * -- * Portals is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- * GNU General Public License for more details. -- * -- * You should have received a copy of the GNU General Public License -- * along with Portals; if not, write to the Free Software -- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -- * -- */ -- --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include --#include -- --#ifdef __CYGWIN__ -- --#include - - #warning assuming little endian - - #define __cpu_to_le64(x) ((__u64)(x)) - #define __le64_to_cpu(x) ((__u64)(x)) - #define __cpu_to_le32(x) ((__u32)(x)) - #define __le32_to_cpu(x) ((__u32)(x)) - #define __cpu_to_le16(x) ((__u16)(x)) - #define __le16_to_cpu(x) ((__u16)(x)) -- --#endif /* __CYGWIN__ */ -- --#include --#include --#include --#include --#include --#include "parser.h" -- --unsigned int portal_debug; --unsigned int portal_printk; - unsigned int portal_stack; -- --static unsigned int g_nal = 0; -- --static int g_socket_txmem = 0; --static int g_socket_rxmem = 0; --static int g_socket_nonagle = 1; -- --typedef struct --{ -- char *name; -- int num; --} name2num_t; -- --static name2num_t nalnames[] = { -- {"any", 0}, -- {"tcp", SOCKNAL}, -- {"elan", QSWNAL}, -- {"gm", GMNAL}, - {"ib", IBNAL}, - {"scimac", SCIMACNAL}, - {"openib", OPENIBNAL}, -- {NULL, -1} --}; -- --static cfg_record_cb_t g_record_cb; - -/* Convert a string boolean to an int; "enable" -> 1 */ -int ptl_parse_bool (int *b, char *str) { - if (!strcasecmp (str, "no") || - !strcasecmp (str, "n") || - !strcasecmp (str, "off") || - !strcasecmp (str, "down") || - !strcasecmp (str, "disable")) - { - *b = 0; - return (0); - } - - if (!strcasecmp (str, "yes") || - !strcasecmp (str, "y") || - !strcasecmp (str, "on") || - !strcasecmp (str, "up") || - !strcasecmp (str, "enable")) - { - *b = 1; - return (0); - } - - return (-1); -} - -/* Convert human readable size string to and int; "1k" -> 1000 */ -int ptl_parse_size (int *sizep, char *str) { - int size; - char mod[32]; - - switch (sscanf (str, "%d%1[gGmMkK]", &size, mod)) { - default: - return (-1); - - case 1: - *sizep = size; - return (0); - - case 2: - switch (*mod) { - case 'g': - case 'G': - *sizep = size << 30; - return (0); - - case 'm': - case 'M': - *sizep = size << 20; - return (0); - - case 'k': - case 'K': - *sizep = size << 10; - return (0); - - default: - *sizep = size; - return (0); - } - } -} -- --int --ptl_set_cfg_record_cb(cfg_record_cb_t cb) --{ -- g_record_cb = cb; -- return 0; --} -- --int --pcfg_ioctl(struct portals_cfg *pcfg) --{ -- int rc; -- -- if (pcfg->pcfg_nal ==0) -- pcfg->pcfg_nal = g_nal; -- -- if (g_record_cb) { -- rc = g_record_cb(PORTALS_CFG_TYPE, sizeof(*pcfg), pcfg); -- } else { -- struct portal_ioctl_data data; -- PORTAL_IOC_INIT (data); -- data.ioc_pbuf1 = (char*)pcfg; -- data.ioc_plen1 = sizeof(*pcfg); -- /* XXX liblustre hack XXX */ -- data.ioc_nal_cmd = pcfg->pcfg_command; -- data.ioc_nid = pcfg->pcfg_nid; -- -- rc = l_ioctl (PORTALS_DEV_ID, IOC_PORTAL_NAL_CMD, &data); -- } -- -- return (rc); --} -- -- -- --static name2num_t * --name2num_lookup_name (name2num_t *table, char *str) --{ -- while (table->name != NULL) -- if (!strcmp (str, table->name)) -- return (table); -- else -- table++; -- return (NULL); --} -- --static name2num_t * --name2num_lookup_num (name2num_t *table, int num) --{ -- while (table->name != NULL) -- if (num == table->num) -- return (table); -- else -- table++; -- return (NULL); --} -- --int --ptl_name2nal (char *str) --{ -- name2num_t *e = name2num_lookup_name (nalnames, str); -- -- return ((e == NULL) ? -1 : e->num); --} -- --static char * --nal2name (int nal) --{ -- name2num_t *e = name2num_lookup_num (nalnames, nal); -- -- return ((e == NULL) ? "???" : e->name); --} -- --static struct hostent * --ptl_gethostbyname(char * hname) { -- struct hostent *he; -- he = gethostbyname(hname); -- if (!he) { -- switch(h_errno) { -- case HOST_NOT_FOUND: -- case NO_ADDRESS: -- fprintf(stderr, "Unable to resolve hostname: %s\n", -- hname); -- break; -- default: -- fprintf(stderr, "gethostbyname error: %s\n", -- strerror(errno)); -- break; -- } -- return NULL; -- } -- return he; --} -- --int --ptl_parse_port (int *port, char *str) --{ -- char *end; -- -- *port = strtol (str, &end, 0); -- -- if (*end == 0 && /* parsed whole string */ -- *port > 0 && *port < 65536) /* minimal sanity check */ -- return (0); -- -- return (-1); --} -- --int --ptl_parse_time (time_t *t, char *str) --{ -- char *end; -- int n; -- struct tm tm; -- -- *t = strtol (str, &end, 0); -- if (*end == 0) /* parsed whole string */ -- return (0); -- -- memset (&tm, 0, sizeof (tm)); -- n = sscanf (str, "%d-%d-%d-%d:%d:%d", -- &tm.tm_year, &tm.tm_mon, &tm.tm_mday, -- &tm.tm_hour, &tm.tm_min, &tm.tm_sec); -- if (n != 6) -- return (-1); -- -- tm.tm_mon--; /* convert to 0 == Jan */ -- tm.tm_year -= 1900; /* y2k quirk */ -- tm.tm_isdst = -1; /* dunno if it's daylight savings... */ -- -- *t = mktime (&tm); -- if (*t == (time_t)-1) -- return (-1); -- -- return (0); --} -- --int - ptl_parse_ipaddr (__u32 *ipaddrp, char *str) -ptl_parse_ipquad (__u32 *ipaddrp, char *str) --{ - struct hostent *he; -- int a; -- int b; -- int c; -- int d; - - if (!strcmp (str, "_all_")) - { - *ipaddrp = 0; - return (0); - } -- -- if (sscanf (str, "%d.%d.%d.%d", &a, &b, &c, &d) == 4 && -- (a & ~0xff) == 0 && (b & ~0xff) == 0 && -- (c & ~0xff) == 0 && (d & ~0xff) == 0) -- { -- *ipaddrp = (a<<24)|(b<<16)|(c<<8)|d; - return (0); - } - - return (-1); -} - -int -ptl_parse_ipaddr (__u32 *ipaddrp, char *str) -{ - struct hostent *he; - - if (!strcmp (str, "_all_")) - { - *ipaddrp = 0; -- return (0); -- } - - if (ptl_parse_ipquad(ipaddrp, str) == 0) - return (0); -- -- if ((('a' <= str[0] && str[0] <= 'z') || -- ('A' <= str[0] && str[0] <= 'Z')) && -- (he = ptl_gethostbyname (str)) != NULL) -- { -- __u32 addr = *(__u32 *)he->h_addr; -- -- *ipaddrp = ntohl(addr); /* HOST byte order */ -- return (0); -- } -- -- return (-1); --} -- --char * - ptl_ipaddr_2_str (__u32 ipaddr, char *str) -ptl_ipaddr_2_str (__u32 ipaddr, char *str, int lookup) --{ -- __u32 net_ip; -- struct hostent *he; - - net_ip = htonl (ipaddr); - he = gethostbyaddr (&net_ip, sizeof (net_ip), AF_INET); - if (he != NULL) - return (he->h_name); - - if (lookup) { - net_ip = htonl (ipaddr); - he = gethostbyaddr (&net_ip, sizeof (net_ip), AF_INET); - if (he != NULL) { - strcpy(str, he->h_name); - return (str); - } - } -- -- sprintf (str, "%d.%d.%d.%d", -- (ipaddr >> 24) & 0xff, (ipaddr >> 16) & 0xff, -- (ipaddr >> 8) & 0xff, ipaddr & 0xff); -- return (str); --} -- --int --ptl_parse_nid (ptl_nid_t *nidp, char *str) --{ -- __u32 ipaddr; -- char *end; -- unsigned long long ullval; -- -- if (!strcmp (str, "_all_")) { -- *nidp = PTL_NID_ANY; -- return (0); -- } -- -- if (ptl_parse_ipaddr (&ipaddr, str) == 0) { -- *nidp = (ptl_nid_t)ipaddr; -- return (0); -- } -- -- ullval = strtoull(str, &end, 0); -- if (*end == 0) { -- /* parsed whole string */ -- *nidp = (ptl_nid_t)ullval; -- return (0); -- } -- -- return (-1); -} - -__u64 ptl_nid2u64(ptl_nid_t nid) -{ - switch (sizeof (nid)) { - case 8: - return (nid); - case 4: - return ((__u32)nid); - default: - fprintf(stderr, "Unexpected sizeof(ptl_nid_t) == %u\n", sizeof(nid)); - abort(); - /* notreached */ - return (-1); - } --} -- --char * --ptl_nid2str (char *buffer, ptl_nid_t nid) --{ - struct hostent *he = NULL; - __u64 nid64 = ptl_nid2u64(nid); - struct hostent *he = 0; -- -- /* Don't try to resolve NIDs that are e.g. Elan host IDs. Assume -- * TCP addresses in the 0.x.x.x subnet are not in use. This can -- * happen on routers and slows things down a _lot_. Bug 3442. */ -- if (nid & 0xff000000) { -- __u32 addr = htonl((__u32)nid); /* back to NETWORK byte order */ - he = gethostbyaddr((const char *)&addr, sizeof(addr), AF_INET); - - he = gethostbyaddr ((const char *)&addr, sizeof (addr), AF_INET); -- } - -- if (he != NULL) - sprintf(buffer, "%#x:%s", (int)(nid >> 32), he->h_name); - sprintf(buffer, "%#x:%s", (int)(nid64 >> 32), he->h_name); -- else - sprintf(buffer, LPX64, nid); - sprintf(buffer, LPX64, nid64); -- -- return (buffer); --} -- --int g_nal_is_set () --{ -- if (g_nal == 0) { -- fprintf (stderr, "Error: you must run the 'network' command first.\n"); -- return (0); -- } -- -- return (1); --} -- --int g_nal_is_compatible (char *cmd, ...) --{ -- va_list ap; -- int nal; -- -- if (!g_nal_is_set ()) -- return (0); -- -- va_start (ap, cmd); -- -- do { -- nal = va_arg (ap, int); -- } while (nal != 0 && nal != g_nal); -- -- va_end (ap); -- -- if (g_nal == nal) -- return (1); -- -- if (cmd != NULL) { -- /* Don't complain verbosely if we've not been passed a command -- * name to complain about! */ -- fprintf (stderr, "Command %s not compatible with nal %s\n", -- cmd, nal2name (g_nal)); -- } -- return (0); --} -- --int --sock_write (int cfd, void *buffer, int nob) --{ -- while (nob > 0) -- { -- int rc = write (cfd, buffer, nob); -- -- if (rc < 0) -- { -- if (errno == EINTR) -- continue; -- -- return (rc); -- } -- -- if (rc == 0) -- { -- fprintf (stderr, "Unexpected zero sock_write\n"); -- abort(); -- } -- -- nob -= rc; -- buffer = (char *)buffer + nob; -- } -- -- return (0); --} -- --int --sock_read (int cfd, void *buffer, int nob) --{ -- while (nob > 0) -- { -- int rc = read (cfd, buffer, nob); -- -- if (rc < 0) -- { -- if (errno == EINTR) -- continue; -- -- return (rc); -- } -- -- if (rc == 0) /* EOF */ -- { -- errno = ECONNABORTED; -- return (-1); -- } -- -- nob -= rc; -- buffer = (char *)buffer + nob; -- } -- -- return (0); --} -- --int ptl_initialize(int argc, char **argv) --{ -- register_ioc_dev(PORTALS_DEV_ID, PORTALS_DEV_PATH); -- return 0; --} -- -- --int jt_ptl_network(int argc, char **argv) --{ -- name2num_t *entry; -- int nal; -- -- if (argc == 2 && -- (nal = ptl_name2nal (argv[1])) >= 0) { -- g_nal = nal; -- return (0); -- } -- -- fprintf(stderr, "usage: %s \n", argv[0]); -- for (entry = nalnames; entry->name != NULL; entry++) -- fprintf (stderr, "%s%s", entry == nalnames ? "<" : "|", entry->name); -- fprintf(stderr, ">\n"); -- return (-1); --} -- - int - jt_ptl_print_autoconnects (int argc, char **argv) - -int -jt_ptl_print_interfaces (int argc, char **argv) --{ - struct portals_cfg pcfg; - char buffer[64]; - struct portals_cfg pcfg; - char buffer[3][64]; -- int index; -- int rc; -- -- if (!g_nal_is_compatible (argv[0], SOCKNAL, 0)) -- return -1; -- -- for (index = 0;;index++) { - PCFG_INIT (pcfg, NAL_CMD_GET_AUTOCONN); - pcfg.pcfg_count = index; - PCFG_INIT (pcfg, NAL_CMD_GET_INTERFACE); - pcfg.pcfg_count = index; -- -- rc = pcfg_ioctl (&pcfg); -- if (rc != 0) -- break; -- - printf (LPX64"@%s:%d #%d buffer %d " - "nonagle %s affinity %s eager %s share %d\n", - pcfg.pcfg_nid, ptl_ipaddr_2_str (pcfg.pcfg_id, buffer), - pcfg.pcfg_misc, pcfg.pcfg_count, pcfg.pcfg_size, - (pcfg.pcfg_flags & 1) ? "on" : "off", - (pcfg.pcfg_flags & 2) ? "on" : "off", - (pcfg.pcfg_flags & 4) ? "on" : "off", - pcfg.pcfg_wait); - printf ("%s: (%s/%s) npeer %d nroute %d\n", - ptl_ipaddr_2_str(pcfg.pcfg_id, buffer[2], 1), - ptl_ipaddr_2_str(pcfg.pcfg_id, buffer[0], 0), - ptl_ipaddr_2_str(pcfg.pcfg_misc, buffer[1], 0), - pcfg.pcfg_fd, pcfg.pcfg_count); -- } -- -- if (index == 0) - printf ("\n"); - printf ("\n"); -- return 0; --} -- - int - jt_ptl_add_autoconnect (int argc, char **argv) -int -jt_ptl_add_interface (int argc, char **argv) --{ - struct portals_cfg pcfg; - ptl_nid_t nid; - __u32 ip; - int port; - int irq_affinity = 0; - int share = 0; - int eager = 0; - struct portals_cfg pcfg; - __u32 ipaddr; -- int rc; - __u32 netmask = 0xffffff00; -- - if (argc < 4 || argc > 5) { - fprintf (stderr, "usage: %s nid ipaddr port [ise]\n", argv[0]); - if (argc < 2 || argc > 3) { - fprintf (stderr, "usage: %s ipaddr [netmask]\n", argv[0]); -- return 0; -- } -- - if (!g_nal_is_compatible (argv[0], SOCKNAL, 0)) - if (!g_nal_is_compatible(argv[0], SOCKNAL, 0)) -- return -1; -- - if (ptl_parse_nid (&nid, argv[1]) != 0 || - nid == PTL_NID_ANY) { - fprintf (stderr, "Can't parse NID: %s\n", argv[1]); - if (ptl_parse_ipaddr(&ipaddr, argv[1]) != 0) { - fprintf (stderr, "Can't parse ip: %s\n", argv[1]); - return -1; - } - - if (argc > 2 && - ptl_parse_ipquad(&netmask, argv[2]) != 0) { - fprintf (stderr, "Can't parse netmask: %s\n", argv[2]); -- return -1; -- } - - PCFG_INIT(pcfg, NAL_CMD_ADD_INTERFACE); - pcfg.pcfg_id = ipaddr; - pcfg.pcfg_misc = netmask; -- - if (ptl_parse_ipaddr (&ip, argv[2]) != 0) { - fprintf (stderr, "Can't parse ip addr: %s\n", argv[2]); - rc = pcfg_ioctl (&pcfg); - if (rc != 0) { - fprintf (stderr, "failed to add interface: %s\n", - strerror (errno)); -- return -1; -- } - - return 0; -} -- - if (ptl_parse_port (&port, argv[3]) != 0) { - fprintf (stderr, "Can't parse port: %s\n", argv[3]); -int -jt_ptl_del_interface (int argc, char **argv) -{ - struct portals_cfg pcfg; - int rc; - __u32 ipaddr = 0; - - if (argc > 2) { - fprintf (stderr, "usage: %s [ipaddr]\n", argv[0]); - return 0; - } - - if (!g_nal_is_compatible(argv[0], SOCKNAL, 0)) - return -1; - - if (argc == 2 && - ptl_parse_ipaddr(&ipaddr, argv[1]) != 0) { - fprintf (stderr, "Can't parse ip: %s\n", argv[1]); -- return -1; -- } - - PCFG_INIT(pcfg, NAL_CMD_DEL_INTERFACE); - pcfg.pcfg_id = ipaddr; -- - if (argc > 4) { - char *opts = argv[4]; - - while (*opts != 0) - switch (*opts++) { - case 'i': - irq_affinity = 1; - break; - case 's': - share = 1; - break; - case 'e': - eager = 1; - break; - default: - fprintf (stderr, "Can't parse options: %s\n", - argv[4]); - return -1; - } - rc = pcfg_ioctl (&pcfg); - if (rc != 0) { - fprintf (stderr, "failed to delete interface: %s\n", - strerror (errno)); - return -1; -- } - - return 0; -} -- - PCFG_INIT(pcfg, NAL_CMD_ADD_AUTOCONN); -int -jt_ptl_print_peers (int argc, char **argv) -{ - struct portals_cfg pcfg; - char buffer[2][64]; - int index; - int rc; - - if (!g_nal_is_compatible (argv[0], SOCKNAL, OPENIBNAL, 0)) - return -1; - - for (index = 0;;index++) { - PCFG_INIT (pcfg, NAL_CMD_GET_PEER); - pcfg.pcfg_count = index; - - rc = pcfg_ioctl (&pcfg); - if (rc != 0) - break; - - if (g_nal_is_compatible(NULL, SOCKNAL, 0)) - printf (LPX64"[%d]%s@%s:%d #%d\n", - pcfg.pcfg_nid, pcfg.pcfg_wait, - ptl_ipaddr_2_str (pcfg.pcfg_size, buffer[0], 1), - ptl_ipaddr_2_str (pcfg.pcfg_id, buffer[1], 1), - pcfg.pcfg_misc, pcfg.pcfg_count); - else - printf (LPX64"[%d]\n", - pcfg.pcfg_nid, pcfg.pcfg_wait); - } - - if (index == 0) - printf ("\n"); - return 0; -} - -int -jt_ptl_add_peer (int argc, char **argv) -{ - struct portals_cfg pcfg; - ptl_nid_t nid; - __u32 ip = 0; - int port = 0; - int rc; - - if (!g_nal_is_compatible (argv[0], SOCKNAL, OPENIBNAL, 0)) - return -1; - - if (g_nal_is_compatible(NULL, SOCKNAL, 0)) { - if (argc != 4) { - fprintf (stderr, "usage(tcp): %s nid ipaddr port\n", - argv[0]); - return 0; - } - } else if (argc != 2) { - fprintf (stderr, "usage(openib): %s nid\n", argv[0]); - return 0; - } - - if (ptl_parse_nid (&nid, argv[1]) != 0 || - nid == PTL_NID_ANY) { - fprintf (stderr, "Can't parse NID: %s\n", argv[1]); - return -1; - } - - if (g_nal_is_compatible (NULL, SOCKNAL, 0)) { - if (ptl_parse_ipaddr (&ip, argv[2]) != 0) { - fprintf (stderr, "Can't parse ip addr: %s\n", argv[2]); - return -1; - } - - if (ptl_parse_port (&port, argv[3]) != 0) { - fprintf (stderr, "Can't parse port: %s\n", argv[3]); - return -1; - } - } - - PCFG_INIT(pcfg, NAL_CMD_ADD_PEER); -- pcfg.pcfg_nid = nid; -- pcfg.pcfg_id = ip; -- pcfg.pcfg_misc = port; - /* only passing one buffer size! */ - pcfg.pcfg_size = MAX (g_socket_rxmem, g_socket_txmem); - pcfg.pcfg_flags = (g_socket_nonagle ? 0x01 : 0) | - (irq_affinity ? 0x02 : 0) | - (share ? 0x04 : 0) | - (eager ? 0x08 : 0); -- -- rc = pcfg_ioctl (&pcfg); -- if (rc != 0) { - fprintf (stderr, "failed to enable autoconnect: %s\n", - fprintf (stderr, "failed to add peer: %s\n", -- strerror (errno)); -- return -1; -- } -- -- return 0; --} -- --int - jt_ptl_del_autoconnect (int argc, char **argv) -jt_ptl_del_peer (int argc, char **argv) --{ -- struct portals_cfg pcfg; -- ptl_nid_t nid = PTL_NID_ANY; - __u32 ip = 0; - int share = 0; - int keep_conn = 0; - __u32 ip = 0; - int single_share = 0; - int argidx; -- int rc; - - if (argc > 4) { - fprintf (stderr, "usage: %s [nid] [ipaddr] [sk]\n", - argv[0]); - return 0; - } -- - if (!g_nal_is_compatible (argv[0], SOCKNAL, 0)) - if (!g_nal_is_compatible (argv[0], SOCKNAL, OPENIBNAL, 0)) -- return -1; -- - if (g_nal_is_compatible(NULL, SOCKNAL, 0)) { - if (argc > 4) { - fprintf (stderr, "usage: %s [nid] [ipaddr] [single_share]\n", - argv[0]); - return 0; - } - } else if (argc > 3) { - fprintf (stderr, "usage: %s [nid] [single_share]\n", argv[0]); - return 0; - } - -- if (argc > 1 && -- ptl_parse_nid (&nid, argv[1]) != 0) { -- fprintf (stderr, "Can't parse nid: %s\n", argv[1]); -- return -1; -- } -- - if (argc > 2 && - ptl_parse_ipaddr (&ip, argv[2]) != 0) { - fprintf (stderr, "Can't parse ip addr: %s\n", argv[2]); - return -1; - argidx = 2; - if (g_nal_is_compatible(NULL, SOCKNAL, 0)) { - if (argc > argidx && - ptl_parse_ipaddr (&ip, argv[argidx]) != 0) { - fprintf (stderr, "Can't parse ip addr: %s\n", - argv[argidx]); - return -1; - } - argidx++; -- } - - if (argc > 3) { - char *opts = argv[3]; - - while (*opts != 0) - switch (*opts++) { - case 's': - share = 1; - break; - case 'k': - keep_conn = 1; - break; - default: - fprintf (stderr, "Can't parse flags: %s\n", - argv[3]); - return -1; - } - - if (argc > argidx) { - if (!strcmp (argv[3], "single_share")) { - single_share = 1; - } else { - fprintf (stderr, "Unrecognised arg %s'\n", argv[3]); - return -1; - } -- } -- - PCFG_INIT(pcfg, NAL_CMD_DEL_AUTOCONN); - pcfg.pcfg_nid = nid; - pcfg.pcfg_id = ip; - pcfg.pcfg_flags = (share ? 1 : 0) | - (keep_conn ? 2 : 0); - PCFG_INIT(pcfg, NAL_CMD_DEL_PEER); - pcfg.pcfg_nid = nid; - pcfg.pcfg_id = ip; - pcfg.pcfg_flags = single_share; -- -- rc = pcfg_ioctl (&pcfg); -- if (rc != 0) { - fprintf (stderr, "failed to remove autoconnect route: %s\n", - fprintf (stderr, "failed to remove peer: %s\n", -- strerror (errno)); -- return -1; -- } -- -- return 0; --} -- --int --jt_ptl_print_connections (int argc, char **argv) --{ -- struct portals_cfg pcfg; - char buffer[64]; - char buffer[2][64]; -- int index; -- int rc; -- - if (!g_nal_is_compatible (argv[0], SOCKNAL, 0)) - if (!g_nal_is_compatible (argv[0], SOCKNAL, OPENIBNAL, 0)) -- return -1; -- -- for (index = 0;;index++) { -- PCFG_INIT (pcfg, NAL_CMD_GET_CONN); -- pcfg.pcfg_count = index; -- -- rc = pcfg_ioctl (&pcfg); -- if (rc != 0) -- break; -- - printf (LPX64"@%s:%d:%s\n", - pcfg.pcfg_nid, - ptl_ipaddr_2_str (pcfg.pcfg_id, buffer), - pcfg.pcfg_misc, - (pcfg.pcfg_flags == SOCKNAL_CONN_ANY) ? "A" : - (pcfg.pcfg_flags == SOCKNAL_CONN_CONTROL) ? "C" : - (pcfg.pcfg_flags == SOCKNAL_CONN_BULK_IN) ? "I" : - (pcfg.pcfg_flags == SOCKNAL_CONN_BULK_OUT) ? "O" : "?"); - if (g_nal_is_compatible (NULL, SOCKNAL, 0)) - printf ("[%d]%s:"LPX64"@%s:%d:%s %d/%d %s\n", - pcfg.pcfg_gw_nal, /* scheduler */ - ptl_ipaddr_2_str (pcfg.pcfg_fd, buffer[0], 1), /* local IP addr */ - pcfg.pcfg_nid, - ptl_ipaddr_2_str (pcfg.pcfg_id, buffer[1], 1), /* remote IP addr */ - pcfg.pcfg_misc, /* remote port */ - (pcfg.pcfg_flags == SOCKNAL_CONN_ANY) ? "A" : - (pcfg.pcfg_flags == SOCKNAL_CONN_CONTROL) ? "C" : - (pcfg.pcfg_flags == SOCKNAL_CONN_BULK_IN) ? "I" : - (pcfg.pcfg_flags == SOCKNAL_CONN_BULK_OUT) ? "O" : "?", - pcfg.pcfg_count, /* tx buffer size */ - pcfg.pcfg_size, /* rx buffer size */ - pcfg.pcfg_wait ? "nagle" : "nonagle"); - else - printf (LPX64"\n", - pcfg.pcfg_nid); -- } -- -- if (index == 0) -- printf ("\n"); -- return 0; --} -- --int jt_ptl_connect(int argc, char **argv) --{ -- struct portals_cfg pcfg; -- struct sockaddr_in srvaddr; - struct sockaddr_in locaddr; -- __u32 ipaddr; -- char *flag; -- int fd, rc; - int nonagle = 0; - int rxmem = 0; - int txmem = 0; - int bind_irq = 0; -- int type = SOCKNAL_CONN_ANY; - int port, rport; - int o; - int olen; - int port; -- -- if (argc < 3) { - fprintf(stderr, "usage: %s ip port [xibctr]\n", argv[0]); - fprintf(stderr, "usage: %s ip port [type]\n", argv[0]); -- return 0; -- } -- -- if (!g_nal_is_compatible (argv[0], SOCKNAL, 0)) -- return -1; -- -- rc = ptl_parse_ipaddr (&ipaddr, argv[1]); -- if (rc != 0) { -- fprintf(stderr, "Can't parse hostname: %s\n", argv[1]); -- return -1; -- } -- -- if (ptl_parse_port (&port, argv[2]) != 0) { -- fprintf (stderr, "Can't parse port: %s\n", argv[2]); -- return -1; -- } -- -- if (argc > 3) -- for (flag = argv[3]; *flag != 0; flag++) -- switch (*flag) -- { - case 'i': - bind_irq = 1; - break; - -- case 'I': -- if (type != SOCKNAL_CONN_ANY) { -- fprintf(stderr, "Can't flag type twice\n"); -- return -1; -- } -- type = SOCKNAL_CONN_BULK_IN; -- break; -- -- case 'O': -- if (type != SOCKNAL_CONN_ANY) { -- fprintf(stderr, "Can't flag type twice\n"); -- return -1; -- } -- type = SOCKNAL_CONN_BULK_OUT; -- break; -- -- case 'C': -- if (type != SOCKNAL_CONN_ANY) { -- fprintf(stderr, "Can't flag type twice\n"); -- return -1; -- } -- type = SOCKNAL_CONN_CONTROL; -- break; -- -- default: -- fprintf (stderr, "unrecognised flag '%c'\n", -- *flag); -- return (-1); -- } - - memset(&locaddr, 0, sizeof(locaddr)); - locaddr.sin_family = AF_INET; - locaddr.sin_addr.s_addr = INADDR_ANY; -- -- memset(&srvaddr, 0, sizeof(srvaddr)); -- srvaddr.sin_family = AF_INET; -- srvaddr.sin_port = htons(port); -- srvaddr.sin_addr.s_addr = htonl(ipaddr); - - for (rport = IPPORT_RESERVED - 1; rport > IPPORT_RESERVED / 2; --rport) { - fd = socket(PF_INET, SOCK_STREAM, 0); - if ( fd < 0 ) { - fprintf(stderr, "socket() failed: %s\n", strerror(errno)); - return -1; - } - - o = 1; - rc = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, - &o, sizeof(o)); -- - if (g_socket_nonagle) { - o = 1; - rc = setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &o, sizeof (o)); - if (rc != 0) { - fprintf(stderr, "cannot disable nagle: %s\n", - strerror(errno)); - return (-1); - } - } - - if (g_socket_rxmem != 0) { - o = g_socket_rxmem; - rc = setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &o, sizeof (o)); - if (rc != 0) { - fprintf(stderr, "cannot set receive buffer size: %s\n", - strerror(errno)); - return (-1); - } - } - - if (g_socket_txmem != 0) { - o = g_socket_txmem; - rc = setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &o, sizeof (o)); - if (rc != 0) { - fprintf(stderr, "cannot set send buffer size: %s\n", strerror(errno)); - return (-1); - } - } - - locaddr.sin_port = htons(rport); - rc = bind(fd, (struct sockaddr *)&locaddr, sizeof(locaddr)); - if (rc == 0 || errno == EACCES) { - rc = connect(fd, (struct sockaddr *)&srvaddr, sizeof(srvaddr)); - if (rc == 0) { - break; - } else if (errno != EADDRINUSE) { - fprintf(stderr, "Error connecting to host: %s\n", strerror(errno)); - close(fd); - return -1; - } - } else if (errno != EADDRINUSE) { - fprintf(stderr, "Error binding to port %d: %d: %s\n", port, errno, strerror(errno)); - close(fd); - return -1; - } - fd = socket(PF_INET, SOCK_STREAM, 0); - if ( fd < 0 ) { - fprintf(stderr, "socket() failed: %s\n", strerror(errno)); - return -1; -- } -- - if (rport == IPPORT_RESERVED / 2) { - fprintf(stderr, - "Warning: all privileged ports are in use.\n"); - rc = connect(fd, (struct sockaddr *)&srvaddr, sizeof(srvaddr)); - if ( rc == -1 ) { - fprintf(stderr, "connect() failed: %s\n", strerror(errno)); -- return -1; -- } - - olen = sizeof (txmem); - if (getsockopt (fd, SOL_SOCKET, SO_SNDBUF, &txmem, &olen) != 0) - fprintf (stderr, "Can't get send buffer size: %s\n", strerror (errno)); - olen = sizeof (rxmem); - if (getsockopt (fd, SOL_SOCKET, SO_RCVBUF, &rxmem, &olen) != 0) - fprintf (stderr, "Can't get receive buffer size: %s\n", strerror (errno)); - olen = sizeof (nonagle); - if (getsockopt (fd, IPPROTO_TCP, TCP_NODELAY, &nonagle, &olen) != 0) - fprintf (stderr, "Can't get nagle: %s\n", strerror (errno)); -- - printf("Connected host: %s snd: %d rcv: %d nagle: %s type: %s\n", - argv[1], txmem, rxmem, nonagle ? "Disabled" : "Enabled", - printf("Connected host: %s type: %s\n", - argv[1], -- (type == SOCKNAL_CONN_ANY) ? "A" : -- (type == SOCKNAL_CONN_CONTROL) ? "C" : -- (type == SOCKNAL_CONN_BULK_IN) ? "I" : -- (type == SOCKNAL_CONN_BULK_OUT) ? "O" : "?"); -- -- PCFG_INIT(pcfg, NAL_CMD_REGISTER_PEER_FD); -- pcfg.pcfg_nal = g_nal; -- pcfg.pcfg_fd = fd; - pcfg.pcfg_flags = bind_irq; -- pcfg.pcfg_misc = type; -- -- rc = pcfg_ioctl(&pcfg); -- if (rc) { -- fprintf(stderr, "failed to register fd with portals: %s\n", -- strerror(errno)); -- close (fd); -- return -1; -- } -- -- printf("Connection to %s registered with socknal\n", argv[1]); -- -- rc = close(fd); -- if (rc) -- fprintf(stderr, "close failed: %d\n", rc); -- -- return 0; --} -- --int jt_ptl_disconnect(int argc, char **argv) --{ - struct portals_cfg pcfg; - struct portals_cfg pcfg; -- ptl_nid_t nid = PTL_NID_ANY; -- __u32 ipaddr = 0; -- int rc; -- -- if (argc > 3) { -- fprintf(stderr, "usage: %s [nid] [ipaddr]\n", argv[0]); -- return 0; -- } -- - if (!g_nal_is_compatible (NULL, SOCKNAL, 0)) - if (!g_nal_is_compatible (NULL, SOCKNAL, OPENIBNAL, 0)) -- return 0; -- -- if (argc >= 2 && -- ptl_parse_nid (&nid, argv[1]) != 0) { -- fprintf (stderr, "Can't parse nid %s\n", argv[1]); -- return -1; -- } -- - if (argc >= 3 && - if (g_nal_is_compatible (NULL, SOCKNAL, 0) && - argc >= 3 && -- ptl_parse_ipaddr (&ipaddr, argv[2]) != 0) { -- fprintf (stderr, "Can't parse ip addr %s\n", argv[2]); -- return -1; -- } -- -- PCFG_INIT(pcfg, NAL_CMD_CLOSE_CONNECTION); -- pcfg.pcfg_nid = nid; -- pcfg.pcfg_id = ipaddr; -- -- rc = pcfg_ioctl(&pcfg); -- if (rc) { -- fprintf(stderr, "failed to remove connection: %s\n", -- strerror(errno)); -- return -1; -- } -- -- return 0; --} -- --int jt_ptl_push_connection (int argc, char **argv) --{ - struct portals_cfg pcfg; - struct portals_cfg pcfg; -- int rc; -- ptl_nid_t nid = PTL_NID_ANY; -- __u32 ipaddr = 0; -- -- if (argc > 3) { -- fprintf(stderr, "usage: %s [nid] [ip]\n", argv[0]); -- return 0; -- } -- -- if (!g_nal_is_compatible (argv[0], SOCKNAL, 0)) -- return -1; -- -- if (argc > 1 && -- ptl_parse_nid (&nid, argv[1]) != 0) { -- fprintf(stderr, "Can't parse nid: %s\n", argv[1]); -- return -1; -- } -- -- if (argc > 2 && -- ptl_parse_ipaddr (&ipaddr, argv[2]) != 0) { -- fprintf(stderr, "Can't parse ipaddr: %s\n", argv[2]); -- } -- -- PCFG_INIT(pcfg, NAL_CMD_PUSH_CONNECTION); -- pcfg.pcfg_nid = nid; -- pcfg.pcfg_id = ipaddr; -- -- rc = pcfg_ioctl(&pcfg); -- if (rc) { -- fprintf(stderr, "failed to push connection: %s\n", -- strerror(errno)); -- return -1; -- } -- -- return 0; --} -- --int --jt_ptl_print_active_txs (int argc, char **argv) --{ - struct portals_cfg pcfg; - struct portals_cfg pcfg; -- int index; -- int rc; -- -- if (!g_nal_is_compatible (argv[0], QSWNAL, 0)) -- return -1; -- -- for (index = 0;;index++) { -- PCFG_INIT(pcfg, NAL_CMD_GET_TXDESC); -- pcfg.pcfg_count = index; -- -- rc = pcfg_ioctl(&pcfg); -- if (rc != 0) -- break; -- -- printf ("%p: %5s payload %6d bytes to "LPX64" via "LPX64" by pid %6d: %s, %s, state %d\n", -- pcfg.pcfg_pbuf1, -- pcfg.pcfg_count == PTL_MSG_ACK ? "ACK" : -- pcfg.pcfg_count == PTL_MSG_PUT ? "PUT" : -- pcfg.pcfg_count == PTL_MSG_GET ? "GET" : -- pcfg.pcfg_count == PTL_MSG_REPLY ? "REPLY" : "", -- pcfg.pcfg_size, -- pcfg.pcfg_nid, -- pcfg.pcfg_nid2, -- pcfg.pcfg_misc, -- (pcfg.pcfg_flags & 1) ? "delayed" : "immediate", -- (pcfg.pcfg_flags & 2) ? "nblk" : "normal", -- pcfg.pcfg_flags >> 2); -- } -- -- if (index == 0) -- printf ("\n"); -- return 0; --} -- --int jt_ptl_ping(int argc, char **argv) --{ -- int rc; -- ptl_nid_t nid; -- long count = 1; -- long size = 4; -- long timeout = 1; -- struct portal_ioctl_data data; -- -- if (argc < 2) { -- fprintf(stderr, "usage: %s nid [count] [size] [timeout (secs)]\n", argv[0]); -- return 0; -- } -- -- if (!g_nal_is_set()) -- return -1; -- -- if (ptl_parse_nid (&nid, argv[1]) != 0) -- { -- fprintf (stderr, "Can't parse nid \"%s\"\n", argv[1]); -- return (-1); -- } -- -- if (argc > 2) -- { -- count = atol(argv[2]); -- -- if (count < 0 || count > 20000) -- { -- fprintf(stderr, "are you insane? %ld is a crazy count.\n", count); -- return -1; -- } -- } -- -- if (argc > 3) -- size= atol(argv[3]); -- -- if (argc > 4) -- timeout = atol (argv[4]); -- -- PORTAL_IOC_INIT (data); -- data.ioc_count = count; -- data.ioc_size = size; -- data.ioc_nid = nid; -- data.ioc_nal = g_nal; -- data.ioc_timeout = timeout; -- -- rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_PING, &data); -- if (rc) { -- fprintf(stderr, "failed to start pinger: %s\n", -- strerror(errno)); -- return -1; -- } -- return 0; --} -- --int jt_ptl_shownid(int argc, char **argv) --{ -- struct portal_ioctl_data data; -- int rc; -- -- if (argc > 1) { -- fprintf(stderr, "usage: %s\n", argv[0]); -- return 0; -- } -- -- if (!g_nal_is_set()) -- return -1; -- -- PORTAL_IOC_INIT (data); -- data.ioc_nal = g_nal; -- rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_GET_NID, &data); -- if (rc < 0) -- fprintf(stderr, "getting my NID failed: %s\n", -- strerror (errno)); -- else -- printf(LPX64"\n", data.ioc_nid); -- return 0; --} -- --int jt_ptl_mynid(int argc, char **argv) --{ -- int rc; -- char hostname[1024]; -- char *nidstr; -- struct portals_cfg pcfg; -- ptl_nid_t mynid; - - -- if (argc > 2) { -- fprintf(stderr, "usage: %s [NID]\n", argv[0]); -- fprintf(stderr, "NID defaults to the primary IP address of the machine.\n"); -- return 0; -- } -- -- if (!g_nal_is_set()) -- return -1; -- -- if (argc >= 2) -- nidstr = argv[1]; -- else if (gethostname(hostname, sizeof(hostname)) != 0) { -- fprintf(stderr, "gethostname failed: %s\n", -- strerror(errno)); -- return -1; -- } -- else -- nidstr = hostname; -- -- rc = ptl_parse_nid (&mynid, nidstr); -- if (rc != 0) { -- fprintf (stderr, "Can't convert '%s' into a NID\n", nidstr); -- return -1; -- } -- -- PCFG_INIT(pcfg, NAL_CMD_REGISTER_MYNID); -- pcfg.pcfg_nid = mynid; -- -- rc = pcfg_ioctl(&pcfg); -- if (rc < 0) -- fprintf(stderr, "setting my NID failed: %s\n", -- strerror(errno)); -- else - printf("registered my nid "LPX64" (%s)\n", mynid, hostname); - printf("registered my nid "LPX64" (%s)\n", - ptl_nid2u64(mynid), hostname); -- return 0; --} -- --int --jt_ptl_fail_nid (int argc, char **argv) --{ -- int rc; -- ptl_nid_t nid; -- unsigned int threshold; -- struct portal_ioctl_data data; -- -- if (argc < 2 || argc > 3) -- { -- fprintf (stderr, "usage: %s nid|\"_all_\" [count (0 == mend)]\n", argv[0]); -- return (0); -- } -- -- if (!g_nal_is_set()) -- return (-1); -- -- if (!strcmp (argv[1], "_all_")) -- nid = PTL_NID_ANY; -- else if (ptl_parse_nid (&nid, argv[1]) != 0) -- { -- fprintf (stderr, "Can't parse nid \"%s\"\n", argv[1]); -- return (-1); -- } -- -- if (argc < 3) -- threshold = PTL_MD_THRESH_INF; -- else if (sscanf (argv[2], "%i", &threshold) != 1) { -- fprintf (stderr, "Can't parse count \"%s\"\n", argv[2]); -- return (-1); -- } -- -- PORTAL_IOC_INIT (data); -- data.ioc_nal = g_nal; -- data.ioc_nid = nid; -- data.ioc_count = threshold; -- -- rc = l_ioctl (PORTALS_DEV_ID, IOC_PORTAL_FAIL_NID, &data); -- if (rc < 0) -- fprintf (stderr, "IOC_PORTAL_FAIL_NID failed: %s\n", -- strerror (errno)); -- else -- printf ("%s %s\n", threshold == 0 ? "Unfailing" : "Failing", argv[1]); - - return (0); - } - - int - jt_ptl_rxmem (int argc, char **argv) - { - int size; - - if (argc > 1) - { - if (Parser_size (&size, argv[1]) != 0 || size < 0) - { - fprintf (stderr, "Can't parse size %s\n", argv[1]); - return (0); - } - - g_socket_rxmem = size; - } - printf ("Socket rmem = %d\n", g_socket_rxmem); - return (0); - } - - int - jt_ptl_txmem (int argc, char **argv) - { - int size; -- - if (argc > 1) - { - if (Parser_size (&size, argv[1]) != 0 || size < 0) - { - fprintf (stderr, "Can't parse size %s\n", argv[1]); - return (0); - } - g_socket_txmem = size; - } - printf ("Socket txmem = %d\n", g_socket_txmem); - return (0); - } - - int - jt_ptl_nagle (int argc, char **argv) - { - int enable; - - if (argc > 1) - { - if (Parser_bool (&enable, argv[1]) != 0) - { - fprintf (stderr, "Can't parse boolean %s\n", argv[1]); - return (-1); - } - g_socket_nonagle = !enable; - } - printf ("Nagle %s\n", g_socket_nonagle ? "disabled" : "enabled"); -- return (0); --} -- --int --jt_ptl_add_route (int argc, char **argv) --{ -- struct portals_cfg pcfg; -- ptl_nid_t nid1; -- ptl_nid_t nid2; -- ptl_nid_t gateway_nid; -- int rc; -- -- if (argc < 3) -- { -- fprintf (stderr, "usage: %s gateway target [target]\n", argv[0]); -- return (0); -- } -- -- if (!g_nal_is_set()) -- return (-1); -- -- if (ptl_parse_nid (&gateway_nid, argv[1]) != 0) -- { -- fprintf (stderr, "Can't parse gateway NID \"%s\"\n", argv[1]); -- return (-1); -- } -- -- if (ptl_parse_nid (&nid1, argv[2]) != 0) -- { -- fprintf (stderr, "Can't parse first target NID \"%s\"\n", argv[2]); -- return (-1); -- } -- -- if (argc < 4) -- nid2 = nid1; -- else if (ptl_parse_nid (&nid2, argv[3]) != 0) -- { -- fprintf (stderr, "Can't parse second target NID \"%s\"\n", argv[4]); -- return (-1); -- } -- -- PCFG_INIT(pcfg, NAL_CMD_ADD_ROUTE); -- pcfg.pcfg_nid = gateway_nid; -- pcfg.pcfg_nal = ROUTER; -- pcfg.pcfg_gw_nal = g_nal; -- pcfg.pcfg_nid2 = MIN (nid1, nid2); -- pcfg.pcfg_nid3 = MAX (nid1, nid2); -- -- rc = pcfg_ioctl(&pcfg); -- if (rc != 0) -- { -- fprintf (stderr, "NAL_CMD_ADD_ROUTE failed: %s\n", strerror (errno)); -- return (-1); -- } -- -- return (0); --} -- --int --jt_ptl_del_route (int argc, char **argv) --{ -- struct portals_cfg pcfg; -- ptl_nid_t nid; -- ptl_nid_t nid1 = PTL_NID_ANY; -- ptl_nid_t nid2 = PTL_NID_ANY; -- int rc; -- -- if (argc < 2) -- { -- fprintf (stderr, "usage: %s targetNID\n", argv[0]); -- return (0); -- } -- -- if (!g_nal_is_set()) -- return (-1); -- -- if (ptl_parse_nid (&nid, argv[1]) != 0) -- { -- fprintf (stderr, "Can't parse gateway NID \"%s\"\n", argv[1]); -- return (-1); -- } -- -- if (argc >= 3 && -- ptl_parse_nid (&nid1, argv[2]) != 0) -- { -- fprintf (stderr, "Can't parse target NID \"%s\"\n", argv[2]); -- return (-1); -- } -- -- if (argc < 4) { -- nid2 = nid1; -- } else { -- if (ptl_parse_nid (&nid2, argv[3]) != 0) { -- fprintf (stderr, "Can't parse target NID \"%s\"\n", argv[3]); -- return (-1); -- } -- -- if (nid1 > nid2) { -- ptl_nid_t tmp = nid1; -- -- nid1 = nid2; -- nid2 = tmp; -- } -- } -- -- PCFG_INIT(pcfg, NAL_CMD_DEL_ROUTE); -- pcfg.pcfg_nal = ROUTER; -- pcfg.pcfg_gw_nal = g_nal; -- pcfg.pcfg_nid = nid; -- pcfg.pcfg_nid2 = nid1; -- pcfg.pcfg_nid3 = nid2; -- -- rc = pcfg_ioctl(&pcfg); -- if (rc != 0) -- { - fprintf (stderr, "NAL_CMD_DEL_ROUTE ("LPX64") failed: %s\n", nid, strerror (errno)); - fprintf (stderr, "NAL_CMD_DEL_ROUTE ("LPX64") failed: %s\n", - ptl_nid2u64(nid), strerror (errno)); -- return (-1); -- } -- -- return (0); --} -- --int --jt_ptl_notify_router (int argc, char **argv) --{ -- struct portals_cfg pcfg; -- int enable; -- ptl_nid_t nid; -- int rc; -- struct timeval now; -- time_t when; -- -- if (argc < 3) -- { -- fprintf (stderr, "usage: %s targetNID [