Whamcloud - gitweb
b=2776
authorzab <zab>
Fri, 19 Mar 2004 01:10:36 +0000 (01:10 +0000)
committerzab <zab>
Fri, 19 Mar 2004 01:10:36 +0000 (01:10 +0000)
r=phil

land b_cray_portals_merge on HEAD.  This consists of four families of changes:

1) Eric's work to change some APIs in Lustre's portals to come into line with
   Cray's portals and the spec.

2) add --with-cray-portals= to the build and use it to cut up the build
   when we're building lustre against external includes for Cray's portals.

3) Move some facilities from portals.o into libcfs.o so that Lustre can
   consume them when it is running against Cray's portals.o.

4) Fix up the liblustre build.  These changes have also made it to
   b_cray_delivery recently.

175 files changed:
lnet/Makefile.am
lnet/archdep.m4
lnet/configure.in [deleted file]
lnet/include/config.h.in [deleted file]
lnet/include/linux/kp30.h
lnet/include/linux/kpr.h [new file with mode: 0644]
lnet/include/linux/libcfs.h [new file with mode: 0644]
lnet/include/linux/lustre_list.h [new file with mode: 0644]
lnet/include/lnet/api-support.h
lnet/include/lnet/api.h
lnet/include/lnet/arg-blocks.h
lnet/include/lnet/build_check.h [new file with mode: 0644]
lnet/include/lnet/defines.h
lnet/include/lnet/errno.h
lnet/include/lnet/internal.h
lnet/include/lnet/lib-dispatch.h
lnet/include/lnet/lib-lnet.h
lnet/include/lnet/lib-nal.h
lnet/include/lnet/lib-p30.h
lnet/include/lnet/lib-types.h
lnet/include/lnet/list.h
lnet/include/lnet/lnet.h
lnet/include/lnet/nal.h
lnet/include/lnet/nalids.h
lnet/include/lnet/p30.h
lnet/include/lnet/types.h
lnet/klnds/gmlnd/gmlnd.h
lnet/klnds/gmlnd/gmlnd_api.c
lnet/klnds/gmlnd/gmlnd_cb.c
lnet/klnds/iblnd/ibnal.c
lnet/klnds/iblnd/ibnal.h
lnet/klnds/iblnd/ibnal_cb.c
lnet/klnds/qswlnd/qswlnd.c
lnet/klnds/qswlnd/qswlnd.h
lnet/klnds/qswlnd/qswlnd_cb.c
lnet/klnds/scimaclnd/scimacnal.c
lnet/klnds/scimaclnd/scimacnal.h
lnet/klnds/scimaclnd/scimacnal_cb.c
lnet/klnds/socklnd/socklnd.c
lnet/klnds/socklnd/socklnd.h
lnet/klnds/socklnd/socklnd_cb.c
lnet/libcfs/Makefile.am
lnet/libcfs/debug.c
lnet/libcfs/module.c
lnet/lnet/Makefile.am
lnet/lnet/Makefile.mk
lnet/lnet/api-eq.c
lnet/lnet/api-errno.c
lnet/lnet/api-init.c
lnet/lnet/api-ni.c
lnet/lnet/api-wrap.c
lnet/lnet/lib-eq.c
lnet/lnet/lib-init.c
lnet/lnet/lib-md.c
lnet/lnet/lib-me.c
lnet/lnet/lib-move.c
lnet/lnet/lib-msg.c
lnet/lnet/lib-ni.c
lnet/lnet/module.c [new file with mode: 0644]
lnet/router/router.h
lnet/tests/ping_cli.c
lnet/tests/ping_srv.c
lnet/tests/sping_cli.c
lnet/tests/sping_srv.c
lnet/ulnds/procapi.c
lnet/ulnds/proclib.c
lnet/ulnds/socklnd/procapi.c
lnet/ulnds/socklnd/proclib.c
lnet/utils/Makefile.am
lustre/Makefile.am
lustre/configure.in
lustre/include/config.h.in
lustre/include/liblustre.h
lustre/include/linux/lustre_dlm.h
lustre/include/linux/lustre_net.h
lustre/include/linux/obd_class.h
lustre/kernel_patches/patches/bproc-patch-2.4.20
lustre/ldlm/l_lock.c
lustre/ldlm/ldlm_lib.c
lustre/ldlm/ldlm_lock.c
lustre/ldlm/ldlm_lockd.c
lustre/ldlm/ldlm_request.c
lustre/liblustre/Makefile.am
lustre/liblustre/llite_lib.c
lustre/liblustre/namei.c
lustre/liblustre/super.c
lustre/liblustre/tests/.cvsignore
lustre/liblustre/tests/echo_test.c
lustre/llite/llite_lib.c
lustre/llite/rw.c
lustre/llite/super.c
lustre/llite/super25.c
lustre/lov/lov_pack.c
lustre/obdclass/class_obd.c
lustre/obdclass/llog_lvfs.c
lustre/obdclass/lustre_peer.c
lustre/osc/lproc_osc.c
lustre/osc/osc_create.c
lustre/osc/osc_internal.h
lustre/osc/osc_request.c
lustre/portals/Makefile.am
lustre/portals/archdep.m4
lustre/portals/configure.in [deleted file]
lustre/portals/include/config.h.in [deleted file]
lustre/portals/include/linux/kp30.h
lustre/portals/include/linux/kpr.h [new file with mode: 0644]
lustre/portals/include/linux/libcfs.h [new file with mode: 0644]
lustre/portals/include/linux/lustre_list.h [new file with mode: 0644]
lustre/portals/include/portals/api-support.h
lustre/portals/include/portals/api.h
lustre/portals/include/portals/arg-blocks.h
lustre/portals/include/portals/build_check.h [new file with mode: 0644]
lustre/portals/include/portals/defines.h
lustre/portals/include/portals/errno.h
lustre/portals/include/portals/lib-dispatch.h
lustre/portals/include/portals/lib-nal.h
lustre/portals/include/portals/lib-p30.h
lustre/portals/include/portals/lib-types.h
lustre/portals/include/portals/list.h
lustre/portals/include/portals/nal.h
lustre/portals/include/portals/nalids.h
lustre/portals/include/portals/p30.h
lustre/portals/include/portals/types.h
lustre/portals/knals/gmnal/gmnal.h
lustre/portals/knals/gmnal/gmnal_api.c
lustre/portals/knals/gmnal/gmnal_cb.c
lustre/portals/knals/ibnal/ibnal.c
lustre/portals/knals/ibnal/ibnal.h
lustre/portals/knals/ibnal/ibnal_cb.c
lustre/portals/knals/qswnal/qswnal.c
lustre/portals/knals/qswnal/qswnal.h
lustre/portals/knals/qswnal/qswnal_cb.c
lustre/portals/knals/scimacnal/scimacnal.c
lustre/portals/knals/scimacnal/scimacnal.h
lustre/portals/knals/scimacnal/scimacnal_cb.c
lustre/portals/knals/socknal/socknal.c
lustre/portals/knals/socknal/socknal.h
lustre/portals/knals/socknal/socknal_cb.c
lustre/portals/libcfs/Makefile.am
lustre/portals/libcfs/debug.c
lustre/portals/libcfs/module.c
lustre/portals/portals/Makefile.am
lustre/portals/portals/Makefile.mk
lustre/portals/portals/api-eq.c
lustre/portals/portals/api-errno.c
lustre/portals/portals/api-init.c
lustre/portals/portals/api-ni.c
lustre/portals/portals/api-wrap.c
lustre/portals/portals/lib-eq.c
lustre/portals/portals/lib-init.c
lustre/portals/portals/lib-md.c
lustre/portals/portals/lib-me.c
lustre/portals/portals/lib-move.c
lustre/portals/portals/lib-msg.c
lustre/portals/portals/lib-ni.c
lustre/portals/portals/module.c [new file with mode: 0644]
lustre/portals/router/router.h
lustre/portals/tests/ping_cli.c
lustre/portals/tests/ping_srv.c
lustre/portals/tests/sping_cli.c
lustre/portals/tests/sping_srv.c
lustre/portals/unals/procapi.c
lustre/portals/unals/proclib.c
lustre/portals/utils/Makefile.am
lustre/ptlbd/blk.c
lustre/ptlrpc/Makefile.am
lustre/ptlrpc/client.c
lustre/ptlrpc/events.c
lustre/ptlrpc/llog_net.c
lustre/ptlrpc/niobuf.c
lustre/ptlrpc/pers.c [new file with mode: 0644]
lustre/ptlrpc/pinger.c
lustre/ptlrpc/ptlrpc_internal.h
lustre/ptlrpc/service.c
lustre/utils/lconf

index 1a223f2..0083ac1 100644 (file)
@@ -5,8 +5,15 @@
 
 EXTRA_DIST = Rules.linux archdep.m4 include 
 DIST_SUBDIRS = libcfs portals knals unals utils tests doc router
+
 if LIBLUSTRE
 SUBDIRS = portals unals utils
 else
+
+if CRAY_PORTALS
+SUBDIRS = libcfs tests doc 
+else 
 SUBDIRS = libcfs portals knals unals utils tests doc router
 endif
+
+endif
index 65cfaff..b8b5c9d 100644 (file)
@@ -4,6 +4,16 @@ AC_ARG_ENABLE(inkernel, [  --enable-inkernel set up 2.5 kernel makefiles])
 AM_CONDITIONAL(INKERNEL, test x$enable_inkernel = xyes)
 echo "Makefile for in kernel build: $INKERNEL"
 
+# -------- are we building against an external portals? -------
+# haha, I wonder how one is really supposed to do this
+# automake seems to have a DEFS variable which looks good
+AC_ARG_WITH(cray-portals, [  --with-cray-portals=[path] path to cray portals],
+       CRAY_PORTALS_INCLUDE="-I$with_cray_portals"
+       CC="$CC -DCRAY_PORTALS=1"
+       )
+AC_SUBST(CRAY_PORTALS_INCLUDE)
+AM_CONDITIONAL(CRAY_PORTALS, test ! "x$with_cray_portals" = x)
+
 # -------- liblustre compilation --------------
 AC_ARG_WITH(lib, [  --with-lib compile lustre library], host_cpu="lib")
 
@@ -156,10 +166,11 @@ fi
 
 # ------------ include paths ------------------
 
+KINCFLAGS="$CRAY_PORTALS_INCLUDE $CRAY_PORTALS_COMMANDLINE \
+       -I\$(top_srcdir)/include \
+       -I\$(top_srcdir)/portals/include -I$LINUX/include"
 if test $host_cpu != "lib" ; then 
-    KINCFLAGS="-I\$(top_srcdir)/include -I\$(top_srcdir)/portals/include -I$LINUX/include"
-else
-    KINCFLAGS='-I$(top_srcdir)/include -I$(top_srcdir)/portals/include'
+    KINCFLAGS="$KINCFLAGS -I$LINUX/include"
 fi
 CPPFLAGS="$KINCFLAGS $ARCHCPPFLAGS"
 
diff --git a/lnet/configure.in b/lnet/configure.in
deleted file mode 100644 (file)
index bacf532..0000000
+++ /dev/null
@@ -1,34 +0,0 @@
-# This version is here to make autoconf happy; the name is a file which is
-# "unique" to this directory so that configure knows where it should run.
-AC_INIT(knals/Makefile.am, 3.0)
-AC_CANONICAL_SYSTEM
-# Copyright (C) 2001  Cluster File Systems, Inc.
-#
-# This code is issued under the GNU General Public License.
-# See the file COPYING in this distribution
-
-# Automake variables.  Steal the version number from packaging/intersync.spec
-AM_INIT_AUTOMAKE(portals, builtin([esyscmd], [sed -ne '/.*define IVERSION /{ s/.*IVERSION //; p; }' libcfs/module.c]))
-# AM_MAINTAINER_MODE
-
-sinclude(build.m4)
-sinclude(archdep.m4)
-
-if test x$enable_inkernel = xyes ; then
-cp Kernelenv.mk Kernelenv.in
-cp Makefile.mk Makefile.in
-cp libcfs/Makefile.mk libcfs/Makefile.in
-cp portals/Makefile.mk portals/Makefile.in
-cp knals/Makefile.mk knals/Makefile.in
-cp knals/socknal/Makefile.mk knals/socknal/Makefile.in
-cp router/Makefile.mk router/Makefile.in
-fi
-
-AM_CONFIG_HEADER(include/config.h)
-
-AC_OUTPUT([Makefile Kernelenv libcfs/Makefile portals/Makefile \
-          unals/Makefile knals/Makefile router/Makefile \
-         knals/socknal/Makefile knals/gmnal/Makefile knals/qswnal/Makefile \
-         knals/scimacnal/Makefile knals/ibnal/Makefile\
-          utils/Makefile tests/Makefile doc/Makefile ])
-
diff --git a/lnet/include/config.h.in b/lnet/include/config.h.in
deleted file mode 100644 (file)
index f295154..0000000
+++ /dev/null
@@ -1,61 +0,0 @@
-/* portals/include/config.h.in.  Generated from configure.in by autoheader.  */
-
-/* Use the Pinger */
-#undef ENABLE_PINGER
-
-/* Define to 1 if you have the <inttypes.h> header file. */
-#undef HAVE_INTTYPES_H
-
-/* Define to 1 if you have the <memory.h> header file. */
-#undef HAVE_MEMORY_H
-
-/* Define to 1 if you have the <stdint.h> header file. */
-#undef HAVE_STDINT_H
-
-/* Define to 1 if you have the <stdlib.h> header file. */
-#undef HAVE_STDLIB_H
-
-/* Define to 1 if you have the <strings.h> header file. */
-#undef HAVE_STRINGS_H
-
-/* Define to 1 if you have the <string.h> header file. */
-#undef HAVE_STRING_H
-
-/* Define to 1 if you have the <sys/stat.h> header file. */
-#undef HAVE_SYS_STAT_H
-
-/* Define to 1 if you have the <sys/types.h> header file. */
-#undef HAVE_SYS_TYPES_H
-
-/* Define to 1 if you have the <unistd.h> header file. */
-#undef HAVE_UNISTD_H
-
-/* IOCTL Buffer Size */
-#undef OBD_MAX_IOCTL_BUFFER
-
-/* Name of package */
-#undef PACKAGE
-
-/* Define to the address where bug reports for this package should be sent. */
-#undef PACKAGE_BUGREPORT
-
-/* Define to the full name of this package. */
-#undef PACKAGE_NAME
-
-/* Define to the full name and version of this package. */
-#undef PACKAGE_STRING
-
-/* Define to the one symbol short name of this package. */
-#undef PACKAGE_TARNAME
-
-/* Define to the version of this package. */
-#undef PACKAGE_VERSION
-
-/* The size of a `unsigned long long', as computed by sizeof. */
-#undef SIZEOF_UNSIGNED_LONG_LONG
-
-/* Define to 1 if you have the ANSI C header files. */
-#undef STDC_HEADERS
-
-/* Version number of package */
-#undef VERSION
index c080a57..9e7e7c2 100644 (file)
@@ -4,6 +4,7 @@
 #ifndef _KP30_INCLUDED
 #define _KP30_INCLUDED
 
+#include <linux/libcfs.h>
 #define PORTAL_DEBUG
 
 #ifndef offsetof
 
 #define LOWEST_BIT_SET(x)       ((x) & ~((x) - 1))
 
-/*
- *  Debugging
- */
-extern unsigned int portal_subsystem_debug;
-extern unsigned int portal_stack;
-extern unsigned int portal_debug;
-extern unsigned int portal_printk;
-extern unsigned int portal_cerror;
-/* Debugging subsystems (32 bits, non-overlapping) */
-#define S_UNDEFINED   0x00000001
-#define S_MDC         0x00000002
-#define S_MDS         0x00000004
-#define S_OSC         0x00000008
-#define S_OST         0x00000010
-#define S_CLASS       0x00000020
-#define S_LOG         0x00000040
-#define S_LLITE       0x00000080
-#define S_RPC         0x00000100
-#define S_MGMT        0x00000200
-#define S_PORTALS     0x00000400
-#define S_SOCKNAL     0x00000800
-#define S_QSWNAL      0x00001000
-#define S_PINGER      0x00002000
-#define S_FILTER      0x00004000
-#define S_PTLBD       0x00008000
-#define S_ECHO        0x00010000
-#define S_LDLM        0x00020000
-#define S_LOV         0x00040000
-#define S_GMNAL       0x00080000
-#define S_PTLROUTER   0x00100000
-#define S_COBD        0x00200000
-#define S_IBNAL       0x00400000
-
-/* If you change these values, please keep portals/utils/debug.c
- * up to date! */
-
-/* Debugging masks (32 bits, non-overlapping) */
-#define D_TRACE       0x00000001 /* ENTRY/EXIT markers */
-#define D_INODE       0x00000002
-#define D_SUPER       0x00000004
-#define D_EXT2        0x00000008 /* anything from ext2_debug */
-#define D_MALLOC      0x00000010 /* print malloc, free information */
-#define D_CACHE       0x00000020 /* cache-related items */
-#define D_INFO        0x00000040 /* general information */
-#define D_IOCTL       0x00000080 /* ioctl related information */
-#define D_BLOCKS      0x00000100 /* ext2 block allocation */
-#define D_NET         0x00000200 /* network communications */
-#define D_WARNING     0x00000400 /* CWARN(...) == CDEBUG (D_WARNING, ...) */
-#define D_BUFFS       0x00000800
-#define D_OTHER       0x00001000
-#define D_DENTRY      0x00002000
-#define D_PORTALS     0x00004000 /* ENTRY/EXIT markers */
-#define D_PAGE        0x00008000 /* bulk page handling */
-#define D_DLMTRACE    0x00010000
-#define D_ERROR       0x00020000 /* CERROR(...) == CDEBUG (D_ERROR, ...) */
-#define D_EMERG       0x00040000 /* CEMERG(...) == CDEBUG (D_EMERG, ...) */
-#define D_HA          0x00080000 /* recovery and failover */
-#define D_RPCTRACE    0x00100000 /* for distributed debugging */
-#define D_VFSTRACE    0x00200000
-#define D_READA       0x00400000 /* read-ahead */
-
-#ifdef __KERNEL__
-# include <linux/sched.h> /* THREAD_SIZE */
-#else
-# ifndef THREAD_SIZE /* x86_64 has THREAD_SIZE in userspace */
-#  define THREAD_SIZE 8192
-# endif
-#endif
-
-#define LUSTRE_TRACE_SIZE (THREAD_SIZE >> 5)
-
-#ifdef __KERNEL__
-# ifdef  __ia64__
-#  define CDEBUG_STACK (THREAD_SIZE -                                      \
-                        ((unsigned long)__builtin_dwarf_cfa() &            \
-                         (THREAD_SIZE - 1)))
-# else
-#  define CDEBUG_STACK (THREAD_SIZE -                                      \
-                        ((unsigned long)__builtin_frame_address(0) &       \
-                         (THREAD_SIZE - 1)))
-# endif
-
-#define CHECK_STACK(stack)                                                    \
-        do {                                                                  \
-                if ((stack) > 3*THREAD_SIZE/4 && (stack) > portal_stack) {    \
-                        portals_debug_msg(DEBUG_SUBSYSTEM, D_WARNING,         \
-                                          __FILE__, __FUNCTION__, __LINE__,   \
-                                          (stack),"maximum lustre stack %u\n",\
-                                          portal_stack = (stack));            \
-                      /*panic("LBUG");*/                                      \
-                }                                                             \
-        } while (0)
-#else /* __KERNEL__ */
-#define CHECK_STACK(stack) do { } while(0)
-#define CDEBUG_STACK (0L)
-#endif /* __KERNEL__ */
-
-#if 1
-#define CDEBUG(mask, format, a...)                                            \
-do {                                                                          \
-        if (portal_cerror == 0)                                               \
-                break;                                                        \
-        CHECK_STACK(CDEBUG_STACK);                                            \
-        if (((mask) & (D_ERROR | D_EMERG | D_WARNING)) ||                     \
-            (portal_debug & (mask) &&                                         \
-             portal_subsystem_debug & DEBUG_SUBSYSTEM))                       \
-                portals_debug_msg(DEBUG_SUBSYSTEM, mask,                      \
-                                  __FILE__, __FUNCTION__, __LINE__,           \
-                                  CDEBUG_STACK, format, ## a);                \
-} while (0)
-
-#define CWARN(format, a...) CDEBUG(D_WARNING, format, ## a)
-#define CERROR(format, a...) CDEBUG(D_ERROR, format, ## a)
-#define CEMERG(format, a...) CDEBUG(D_EMERG, format, ## a)
-
-#define GOTO(label, rc)                                                 \
-do {                                                                    \
-        long GOTO__ret = (long)(rc);                                    \
-        CDEBUG(D_TRACE,"Process leaving via %s (rc=%lu : %ld : %lx)\n", \
-               #label, (unsigned long)GOTO__ret, (signed long)GOTO__ret,\
-               (signed long)GOTO__ret);                                 \
-        goto label;                                                     \
-} while (0)
-
-#define RETURN(rc)                                                      \
-do {                                                                    \
-        typeof(rc) RETURN__ret = (rc);                                  \
-        CDEBUG(D_TRACE, "Process leaving (rc=%lu : %ld : %lx)\n",       \
-               (long)RETURN__ret, (long)RETURN__ret, (long)RETURN__ret);\
-        return RETURN__ret;                                             \
-} while (0)
-
-#define ENTRY                                                           \
-do {                                                                    \
-        CDEBUG(D_TRACE, "Process entered\n");                           \
-} while (0)
-
-#define EXIT                                                            \
-do {                                                                    \
-        CDEBUG(D_TRACE, "Process leaving\n");                           \
-} while(0)
-#else
-#define CDEBUG(mask, format, a...)      do { } while (0)
-#define CWARN(format, a...)             do { } while (0)
-#define CERROR(format, a...)            printk("<3>" format, ## a)
-#define CEMERG(format, a...)            printk("<0>" format, ## a)
-#define GOTO(label, rc)                 do { (void)(rc); goto label; } while (0)
-#define RETURN(rc)                      return (rc)
-#define ENTRY                           do { } while (0)
-#define EXIT                            do { } while (0)
-#endif
-
 #ifdef __KERNEL__
 # include <linux/vmalloc.h>
 # include <linux/time.h>
@@ -172,7 +21,7 @@ do {                                                                    \
 # include <linux/highmem.h>
 # include <linux/module.h>
 # include <linux/version.h>
-# include <portals/lib-nal.h>
+# include <portals/p30.h>
 # include <linux/smp_lock.h>
 # include <asm/atomic.h>
 
@@ -353,188 +202,6 @@ do {                                                                    \
 #endif
 
 /******************************************************************************/
-/* Kernel Portals Router interface */
-
-typedef void (*kpr_fwd_callback_t)(void *arg, int error); // completion callback
-
-/* space for routing targets to stash "stuff" in a forwarded packet */
-typedef union {
-        long long        _alignment;
-        void            *_space[16];            /* scale with CPU arch */
-} kprfd_scratch_t;
-
-/* Kernel Portals Routing Forwarded message Descriptor */
-typedef struct {
-        struct list_head     kprfd_list;        /* stash in queues (routing target can use) */
-        ptl_nid_t            kprfd_target_nid;  /* final destination NID */
-        ptl_nid_t            kprfd_gateway_nid; /* gateway NID */
-        ptl_hdr_t           *kprfd_hdr;         /* header in wire byte order */
-        int                  kprfd_nob;         /* # payload bytes */
-        int                  kprfd_niov;        /* # payload frags */
-        ptl_kiov_t          *kprfd_kiov;        /* payload fragments */
-        void                *kprfd_router_arg;  /* originating NAL's router arg */
-        kpr_fwd_callback_t   kprfd_callback;    /* completion callback */
-        void                *kprfd_callback_arg; /* completion callback arg */
-        kprfd_scratch_t      kprfd_scratch;     /* scratchpad for routing targets */
-} kpr_fwd_desc_t;
-
-typedef void  (*kpr_fwd_t)(void *arg, kpr_fwd_desc_t *fwd);
-typedef void  (*kpr_notify_t)(void *arg, ptl_nid_t peer, int alive);
-
-/* NAL's routing interface (Kernel Portals Routing Nal Interface) */
-typedef const struct {
-        int             kprni_nalid;    /* NAL's id */
-        void           *kprni_arg;      /* Arg to pass when calling into NAL */
-        kpr_fwd_t       kprni_fwd;      /* NAL's forwarding entrypoint */
-        kpr_notify_t    kprni_notify;   /* NAL's notification entrypoint */
-} kpr_nal_interface_t;
-
-/* Router's routing interface (Kernel Portals Routing Router Interface) */
-typedef const struct {
-        /* register the calling NAL with the router and get back the handle for
-         * subsequent calls */
-        int     (*kprri_register) (kpr_nal_interface_t *nal_interface,
-                                   void **router_arg);
-
-        /* ask the router to find a gateway that forwards to 'nid' and is a
-         * peer of the calling NAL; assume caller will send 'nob' bytes of
-         * payload there */
-        int     (*kprri_lookup) (void *router_arg, ptl_nid_t nid, int nob,
-                                 ptl_nid_t *gateway_nid);
-
-        /* hand a packet over to the router for forwarding */
-        kpr_fwd_t kprri_fwd_start;
-
-        /* hand a packet back to the router for completion */
-        void    (*kprri_fwd_done) (void *router_arg, kpr_fwd_desc_t *fwd,
-                                   int error);
-
-        /* notify the router about peer state */
-        void    (*kprri_notify) (void *router_arg, ptl_nid_t peer,
-                                 int alive, time_t when);
-
-        /* the calling NAL is shutting down */
-        void    (*kprri_shutdown) (void *router_arg);
-
-        /* deregister the calling NAL with the router */
-        void    (*kprri_deregister) (void *router_arg);
-
-} kpr_router_interface_t;
-
-/* Convenient struct for NAL to stash router interface/args */
-typedef struct {
-        kpr_router_interface_t  *kpr_interface;
-        void                    *kpr_arg;
-} kpr_router_t;
-
-/* Router's control interface (Kernel Portals Routing Control Interface) */
-typedef const struct {
-        int     (*kprci_add_route)(int gateway_nal, ptl_nid_t gateway_nid,
-                                   ptl_nid_t lo_nid, ptl_nid_t hi_nid);
-        int     (*kprci_del_route)(int gateway_nal, ptl_nid_t gateway_nid,
-                                   ptl_nid_t lo_nid, ptl_nid_t hi_nid);
-        int     (*kprci_get_route)(int index, int *gateway_nal,
-                                   ptl_nid_t *gateway,
-                                   ptl_nid_t *lo_nid, ptl_nid_t *hi_nid,
-                                   int *alive);
-        int     (*kprci_notify)(int gateway_nal, ptl_nid_t gateway_nid,
-                                int alive, time_t when);
-} kpr_control_interface_t;
-
-extern kpr_control_interface_t  kpr_control_interface;
-extern kpr_router_interface_t   kpr_router_interface;
-
-static inline int
-kpr_register (kpr_router_t *router, kpr_nal_interface_t *nalif)
-{
-        int    rc;
-
-        router->kpr_interface = PORTAL_SYMBOL_GET (kpr_router_interface);
-        if (router->kpr_interface == NULL)
-                return (-ENOENT);
-
-        rc = (router->kpr_interface)->kprri_register (nalif, &router->kpr_arg);
-        if (rc != 0)
-                router->kpr_interface = NULL;
-
-        PORTAL_SYMBOL_PUT (kpr_router_interface);
-        return (rc);
-}
-
-static inline int
-kpr_routing (kpr_router_t *router)
-{
-        return (router->kpr_interface != NULL);
-}
-
-static inline int
-kpr_lookup (kpr_router_t *router, ptl_nid_t nid, int nob, ptl_nid_t *gateway_nid)
-{
-        if (!kpr_routing (router))
-                return (-ENETUNREACH);
-
-        return (router->kpr_interface->kprri_lookup(router->kpr_arg, nid, nob,
-                                                    gateway_nid));
-}
-
-static inline void
-kpr_fwd_init (kpr_fwd_desc_t *fwd, ptl_nid_t nid, ptl_hdr_t *hdr,
-              int nob, int niov, ptl_kiov_t *kiov,
-              kpr_fwd_callback_t callback, void *callback_arg)
-{
-        fwd->kprfd_target_nid   = nid;
-        fwd->kprfd_gateway_nid  = nid;
-        fwd->kprfd_hdr          = hdr;
-        fwd->kprfd_nob          = nob;
-        fwd->kprfd_niov         = niov;
-        fwd->kprfd_kiov         = kiov;
-        fwd->kprfd_callback     = callback;
-        fwd->kprfd_callback_arg = callback_arg;
-}
-
-static inline void
-kpr_fwd_start (kpr_router_t *router, kpr_fwd_desc_t *fwd)
-{
-        if (!kpr_routing (router))
-                fwd->kprfd_callback (fwd->kprfd_callback_arg, -ENETUNREACH);
-        else
-                router->kpr_interface->kprri_fwd_start (router->kpr_arg, fwd);
-}
-
-static inline void
-kpr_fwd_done (kpr_router_t *router, kpr_fwd_desc_t *fwd, int error)
-{
-        LASSERT (kpr_routing (router));
-        router->kpr_interface->kprri_fwd_done (router->kpr_arg, fwd, error);
-}
-
-static inline void
-kpr_notify (kpr_router_t *router,
-            ptl_nid_t peer, int alive, time_t when)
-{
-        if (!kpr_routing (router))
-                return;
-
-        router->kpr_interface->kprri_notify(router->kpr_arg, peer, alive, when);
-}
-
-static inline void
-kpr_shutdown (kpr_router_t *router)
-{
-        if (kpr_routing (router))
-                router->kpr_interface->kprri_shutdown (router->kpr_arg);
-}
-
-static inline void
-kpr_deregister (kpr_router_t *router)
-{
-        if (!kpr_routing (router))
-                return;
-        router->kpr_interface->kprri_deregister (router->kpr_arg);
-        router->kpr_interface = NULL;
-}
-
-/******************************************************************************/
 
 #ifdef PORTALS_PROFILING
 #define prof_enum(FOO) PROF__##FOO
@@ -804,38 +471,6 @@ do {                                                    \
 #define PING_SYNC       0
 #define PING_ASYNC      1
 
-struct portal_ioctl_data {
-        __u32 ioc_len;
-        __u32 ioc_version;
-        __u64 ioc_nid;
-        __u64 ioc_nid2;
-        __u64 ioc_nid3;
-        __u32 ioc_count;
-        __u32 ioc_nal;
-        __u32 ioc_nal_cmd;
-        __u32 ioc_fd;
-        __u32 ioc_id;
-
-        __u32 ioc_flags;
-        __u32 ioc_size;
-
-        __u32 ioc_wait;
-        __u32 ioc_timeout;
-        __u32 ioc_misc;
-
-        __u32 ioc_inllen1;
-        char *ioc_inlbuf1;
-        __u32 ioc_inllen2;
-        char *ioc_inlbuf2;
-
-        __u32 ioc_plen1; /* buffers in userspace */
-        char *ioc_pbuf1;
-        __u32 ioc_plen2; /* buffers in userspace */
-        char *ioc_pbuf2;
-
-        char ioc_bulk[0];
-};
-
 struct portal_ioctl_hdr {
         __u32 ioc_len;
         __u32 ioc_version;
@@ -1076,13 +711,6 @@ enum {
         DEBUG_DAEMON_CONTINUE    =  4,
 };
 
-/* XXX remove to lustre ASAP */
-struct lustre_peer {
-        ptl_nid_t       peer_nid;
-        ptl_handle_ni_t peer_ni;
-};
-
-
 /* module.c */
 typedef int (*nal_cmd_handler_t)(struct portals_cfg *, void * private);
 int kportal_nal_register(int nal, nal_cmd_handler_t handler, void * private);
diff --git a/lnet/include/linux/kpr.h b/lnet/include/linux/kpr.h
new file mode 100644 (file)
index 0000000..45b58fe
--- /dev/null
@@ -0,0 +1,191 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
+#ifndef _KPR_H
+#define _KPR_H
+
+# include <portals/lib-nal.h> /* for ptl_hdr_t */
+
+/******************************************************************************/
+/* Kernel Portals Router interface */
+
+typedef void (*kpr_fwd_callback_t)(void *arg, int error); // completion callback
+
+/* space for routing targets to stash "stuff" in a forwarded packet */
+typedef union {
+        long long        _alignment;
+        void            *_space[16];            /* scale with CPU arch */
+} kprfd_scratch_t;
+
+/* Kernel Portals Routing Forwarded message Descriptor */
+typedef struct {
+        struct list_head     kprfd_list;        /* stash in queues (routing target can use) */
+        ptl_nid_t            kprfd_target_nid;  /* final destination NID */
+        ptl_nid_t            kprfd_gateway_nid; /* gateway NID */
+        ptl_hdr_t           *kprfd_hdr;         /* header in wire byte order */
+        int                  kprfd_nob;         /* # payload bytes */
+        int                  kprfd_niov;        /* # payload frags */
+        ptl_kiov_t          *kprfd_kiov;        /* payload fragments */
+        void                *kprfd_router_arg;  /* originating NAL's router arg */
+        kpr_fwd_callback_t   kprfd_callback;    /* completion callback */
+        void                *kprfd_callback_arg; /* completion callback arg */
+        kprfd_scratch_t      kprfd_scratch;     /* scratchpad for routing targets */
+} kpr_fwd_desc_t;
+
+typedef void  (*kpr_fwd_t)(void *arg, kpr_fwd_desc_t *fwd);
+typedef void  (*kpr_notify_t)(void *arg, ptl_nid_t peer, int alive);
+
+/* NAL's routing interface (Kernel Portals Routing Nal Interface) */
+typedef const struct {
+        int             kprni_nalid;    /* NAL's id */
+        void           *kprni_arg;      /* Arg to pass when calling into NAL */
+        kpr_fwd_t       kprni_fwd;      /* NAL's forwarding entrypoint */
+        kpr_notify_t    kprni_notify;   /* NAL's notification entrypoint */
+} kpr_nal_interface_t;
+
+/* Router's routing interface (Kernel Portals Routing Router Interface) */
+typedef const struct {
+        /* register the calling NAL with the router and get back the handle for
+         * subsequent calls */
+        int     (*kprri_register) (kpr_nal_interface_t *nal_interface,
+                                   void **router_arg);
+
+        /* ask the router to find a gateway that forwards to 'nid' and is a
+         * peer of the calling NAL; assume caller will send 'nob' bytes of
+         * payload there */
+        int     (*kprri_lookup) (void *router_arg, ptl_nid_t nid, int nob,
+                                 ptl_nid_t *gateway_nid);
+
+        /* hand a packet over to the router for forwarding */
+        kpr_fwd_t kprri_fwd_start;
+
+        /* hand a packet back to the router for completion */
+        void    (*kprri_fwd_done) (void *router_arg, kpr_fwd_desc_t *fwd,
+                                   int error);
+
+        /* notify the router about peer state */
+        void    (*kprri_notify) (void *router_arg, ptl_nid_t peer,
+                                 int alive, time_t when);
+
+        /* the calling NAL is shutting down */
+        void    (*kprri_shutdown) (void *router_arg);
+
+        /* deregister the calling NAL with the router */
+        void    (*kprri_deregister) (void *router_arg);
+
+} kpr_router_interface_t;
+
+/* Convenient struct for NAL to stash router interface/args */
+typedef struct {
+        kpr_router_interface_t  *kpr_interface;
+        void                    *kpr_arg;
+} kpr_router_t;
+
+/* Router's control interface (Kernel Portals Routing Control Interface) */
+typedef const struct {
+        int     (*kprci_add_route)(int gateway_nal, ptl_nid_t gateway_nid,
+                                   ptl_nid_t lo_nid, ptl_nid_t hi_nid);
+        int     (*kprci_del_route)(int gateway_nal, ptl_nid_t gateway_nid,
+                                   ptl_nid_t lo_nid, ptl_nid_t hi_nid);
+        int     (*kprci_get_route)(int index, int *gateway_nal,
+                                   ptl_nid_t *gateway,
+                                   ptl_nid_t *lo_nid, ptl_nid_t *hi_nid,
+                                   int *alive);
+        int     (*kprci_notify)(int gateway_nal, ptl_nid_t gateway_nid,
+                                int alive, time_t when);
+} kpr_control_interface_t;
+
+extern kpr_control_interface_t  kpr_control_interface;
+extern kpr_router_interface_t   kpr_router_interface;
+
+static inline int
+kpr_register (kpr_router_t *router, kpr_nal_interface_t *nalif)
+{
+        int    rc;
+
+        router->kpr_interface = PORTAL_SYMBOL_GET (kpr_router_interface);
+        if (router->kpr_interface == NULL)
+                return (-ENOENT);
+
+        rc = (router->kpr_interface)->kprri_register (nalif, &router->kpr_arg);
+        if (rc != 0)
+                router->kpr_interface = NULL;
+
+        PORTAL_SYMBOL_PUT (kpr_router_interface);
+        return (rc);
+}
+
+static inline int
+kpr_routing (kpr_router_t *router)
+{
+        return (router->kpr_interface != NULL);
+}
+
+static inline int
+kpr_lookup (kpr_router_t *router, ptl_nid_t nid, int nob, ptl_nid_t *gateway_nid)
+{
+        if (!kpr_routing (router))
+                return (-ENETUNREACH);
+
+        return (router->kpr_interface->kprri_lookup(router->kpr_arg, nid, nob,
+                                                    gateway_nid));
+}
+
+static inline void
+kpr_fwd_init (kpr_fwd_desc_t *fwd, ptl_nid_t nid, ptl_hdr_t *hdr,
+              int nob, int niov, ptl_kiov_t *kiov,
+              kpr_fwd_callback_t callback, void *callback_arg)
+{
+        fwd->kprfd_target_nid   = nid;
+        fwd->kprfd_gateway_nid  = nid;
+        fwd->kprfd_hdr          = hdr;
+        fwd->kprfd_nob          = nob;
+        fwd->kprfd_niov         = niov;
+        fwd->kprfd_kiov         = kiov;
+        fwd->kprfd_callback     = callback;
+        fwd->kprfd_callback_arg = callback_arg;
+}
+
+static inline void
+kpr_fwd_start (kpr_router_t *router, kpr_fwd_desc_t *fwd)
+{
+        if (!kpr_routing (router))
+                fwd->kprfd_callback (fwd->kprfd_callback_arg, -ENETUNREACH);
+        else
+                router->kpr_interface->kprri_fwd_start (router->kpr_arg, fwd);
+}
+
+static inline void
+kpr_fwd_done (kpr_router_t *router, kpr_fwd_desc_t *fwd, int error)
+{
+        LASSERT (kpr_routing (router));
+        router->kpr_interface->kprri_fwd_done (router->kpr_arg, fwd, error);
+}
+
+static inline void
+kpr_notify (kpr_router_t *router,
+            ptl_nid_t peer, int alive, time_t when)
+{
+        if (!kpr_routing (router))
+                return;
+
+        router->kpr_interface->kprri_notify(router->kpr_arg, peer, alive, when);
+}
+
+static inline void
+kpr_shutdown (kpr_router_t *router)
+{
+        if (kpr_routing (router))
+                router->kpr_interface->kprri_shutdown (router->kpr_arg);
+}
+
+static inline void
+kpr_deregister (kpr_router_t *router)
+{
+        if (!kpr_routing (router))
+                return;
+        router->kpr_interface->kprri_deregister (router->kpr_arg);
+        router->kpr_interface = NULL;
+}
+
+#endif /* _KPR_H */
diff --git a/lnet/include/linux/libcfs.h b/lnet/include/linux/libcfs.h
new file mode 100644 (file)
index 0000000..ff51787
--- /dev/null
@@ -0,0 +1,222 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
+#ifndef _LIBCFS_H
+
+
+#define PORTAL_DEBUG
+
+#ifndef offsetof
+# define offsetof(typ,memb)     ((int)((char *)&(((typ *)0)->memb)))
+#endif
+
+#define LOWEST_BIT_SET(x)       ((x) & ~((x) - 1))
+
+/*
+ *  Debugging
+ */
+extern unsigned int portal_subsystem_debug;
+extern unsigned int portal_stack;
+extern unsigned int portal_debug;
+extern unsigned int portal_printk;
+extern unsigned int portal_cerror;
+/* Debugging subsystems (32 bits, non-overlapping) */
+#define S_UNDEFINED   0x00000001
+#define S_MDC         0x00000002
+#define S_MDS         0x00000004
+#define S_OSC         0x00000008
+#define S_OST         0x00000010
+#define S_CLASS       0x00000020
+#define S_LOG         0x00000040
+#define S_LLITE       0x00000080
+#define S_RPC         0x00000100
+#define S_MGMT        0x00000200
+#define S_PORTALS     0x00000400
+#define S_SOCKNAL     0x00000800
+#define S_QSWNAL      0x00001000
+#define S_PINGER      0x00002000
+#define S_FILTER      0x00004000
+#define S_PTLBD       0x00008000
+#define S_ECHO        0x00010000
+#define S_LDLM        0x00020000
+#define S_LOV         0x00040000
+#define S_GMNAL       0x00080000
+#define S_PTLROUTER   0x00100000
+#define S_COBD        0x00200000
+#define S_IBNAL       0x00400000
+
+/* If you change these values, please keep portals/utils/debug.c
+ * up to date! */
+
+/* Debugging masks (32 bits, non-overlapping) */
+#define D_TRACE       0x00000001 /* ENTRY/EXIT markers */
+#define D_INODE       0x00000002
+#define D_SUPER       0x00000004
+#define D_EXT2        0x00000008 /* anything from ext2_debug */
+#define D_MALLOC      0x00000010 /* print malloc, free information */
+#define D_CACHE       0x00000020 /* cache-related items */
+#define D_INFO        0x00000040 /* general information */
+#define D_IOCTL       0x00000080 /* ioctl related information */
+#define D_BLOCKS      0x00000100 /* ext2 block allocation */
+#define D_NET         0x00000200 /* network communications */
+#define D_WARNING     0x00000400 /* CWARN(...) == CDEBUG (D_WARNING, ...) */
+#define D_BUFFS       0x00000800
+#define D_OTHER       0x00001000
+#define D_DENTRY      0x00002000
+#define D_PORTALS     0x00004000 /* ENTRY/EXIT markers */
+#define D_PAGE        0x00008000 /* bulk page handling */
+#define D_DLMTRACE    0x00010000
+#define D_ERROR       0x00020000 /* CERROR(...) == CDEBUG (D_ERROR, ...) */
+#define D_EMERG       0x00040000 /* CEMERG(...) == CDEBUG (D_EMERG, ...) */
+#define D_HA          0x00080000 /* recovery and failover */
+#define D_RPCTRACE    0x00100000 /* for distributed debugging */
+#define D_VFSTRACE    0x00200000
+#define D_READA       0x00400000 /* read-ahead */
+
+#ifdef __KERNEL__
+# include <linux/sched.h> /* THREAD_SIZE */
+#else
+# ifndef THREAD_SIZE /* x86_64 has THREAD_SIZE in userspace */
+#  define THREAD_SIZE 8192
+# endif
+#endif
+
+#define LUSTRE_TRACE_SIZE (THREAD_SIZE >> 5)
+
+#ifdef __KERNEL__
+# ifdef  __ia64__
+#  define CDEBUG_STACK (THREAD_SIZE -                                      \
+                        ((unsigned long)__builtin_dwarf_cfa() &            \
+                         (THREAD_SIZE - 1)))
+# else
+#  define CDEBUG_STACK (THREAD_SIZE -                                      \
+                        ((unsigned long)__builtin_frame_address(0) &       \
+                         (THREAD_SIZE - 1)))
+# endif
+
+#define CHECK_STACK(stack)                                                    \
+        do {                                                                  \
+                if ((stack) > 3*THREAD_SIZE/4 && (stack) > portal_stack) {    \
+                        portals_debug_msg(DEBUG_SUBSYSTEM, D_WARNING,         \
+                                          __FILE__, __FUNCTION__, __LINE__,   \
+                                          (stack),"maximum lustre stack %u\n",\
+                                          portal_stack = (stack));            \
+                      /*panic("LBUG");*/                                      \
+                }                                                             \
+        } while (0)
+#else /* __KERNEL__ */
+#define CHECK_STACK(stack) do { } while(0)
+#define CDEBUG_STACK (0L)
+#endif /* __KERNEL__ */
+
+#if 1
+#define CDEBUG(mask, format, a...)                                            \
+do {                                                                          \
+        if (portal_cerror == 0)                                               \
+                break;                                                        \
+        CHECK_STACK(CDEBUG_STACK);                                            \
+        if (((mask) & (D_ERROR | D_EMERG | D_WARNING)) ||                     \
+            (portal_debug & (mask) &&                                         \
+             portal_subsystem_debug & DEBUG_SUBSYSTEM))                       \
+                portals_debug_msg(DEBUG_SUBSYSTEM, mask,                      \
+                                  __FILE__, __FUNCTION__, __LINE__,           \
+                                  CDEBUG_STACK, format, ## a);                \
+} while (0)
+
+#define CWARN(format, a...) CDEBUG(D_WARNING, format, ## a)
+#define CERROR(format, a...) CDEBUG(D_ERROR, format, ## a)
+#define CEMERG(format, a...) CDEBUG(D_EMERG, format, ## a)
+
+#define GOTO(label, rc)                                                 \
+do {                                                                    \
+        long GOTO__ret = (long)(rc);                                    \
+        CDEBUG(D_TRACE,"Process leaving via %s (rc=%lu : %ld : %lx)\n", \
+               #label, (unsigned long)GOTO__ret, (signed long)GOTO__ret,\
+               (signed long)GOTO__ret);                                 \
+        goto label;                                                     \
+} while (0)
+
+#define RETURN(rc)                                                      \
+do {                                                                    \
+        typeof(rc) RETURN__ret = (rc);                                  \
+        CDEBUG(D_TRACE, "Process leaving (rc=%lu : %ld : %lx)\n",       \
+               (long)RETURN__ret, (long)RETURN__ret, (long)RETURN__ret);\
+        return RETURN__ret;                                             \
+} while (0)
+
+#define ENTRY                                                           \
+do {                                                                    \
+        CDEBUG(D_TRACE, "Process entered\n");                           \
+} while (0)
+
+#define EXIT                                                            \
+do {                                                                    \
+        CDEBUG(D_TRACE, "Process leaving\n");                           \
+} while(0)
+#else
+#define CDEBUG(mask, format, a...)      do { } while (0)
+#define CWARN(format, a...)             do { } while (0)
+#define CERROR(format, a...)            printk("<3>" format, ## a)
+#define CEMERG(format, a...)            printk("<0>" format, ## a)
+#define GOTO(label, rc)                 do { (void)(rc); goto label; } while (0)
+#define RETURN(rc)                      return (rc)
+#define ENTRY                           do { } while (0)
+#define EXIT                            do { } while (0)
+#endif
+
+struct portal_ioctl_data {
+        __u32 ioc_len;
+        __u32 ioc_version;
+        __u64 ioc_nid;
+        __u64 ioc_nid2;
+        __u64 ioc_nid3;
+        __u32 ioc_count;
+        __u32 ioc_nal;
+        __u32 ioc_nal_cmd;
+        __u32 ioc_fd;
+        __u32 ioc_id;
+
+        __u32 ioc_flags;
+        __u32 ioc_size;
+
+        __u32 ioc_wait;
+        __u32 ioc_timeout;
+        __u32 ioc_misc;
+
+        __u32 ioc_inllen1;
+        char *ioc_inlbuf1;
+        __u32 ioc_inllen2;
+        char *ioc_inlbuf2;
+
+        __u32 ioc_plen1; /* buffers in userspace */
+        char *ioc_pbuf1;
+        __u32 ioc_plen2; /* buffers in userspace */
+        char *ioc_pbuf2;
+
+        char ioc_bulk[0];
+};
+
+#ifdef __KERNEL__
+
+#include <linux/list.h>
+
+struct libcfs_ioctl_handler {
+        struct list_head item;
+        int (*handle_ioctl)(struct portal_ioctl_data *data,
+                            unsigned int cmd, unsigned long args);
+};
+
+#define DECLARE_IOCTL_HANDLER(ident, func)              \
+        struct libcfs_ioctl_handler ident = {           \
+                .item = LIST_HEAD_INIT(ident.item),     \
+                .handle_ioctl = func                    \
+        }
+
+int libcfs_register_ioctl(struct libcfs_ioctl_handler *hand);
+int libcfs_deregister_ioctl(struct libcfs_ioctl_handler *hand);
+
+#endif
+
+#define _LIBCFS_H
+
+#endif /* _LIBCFS_H */
diff --git a/lnet/include/linux/lustre_list.h b/lnet/include/linux/lustre_list.h
new file mode 100644 (file)
index 0000000..a218f2c
--- /dev/null
@@ -0,0 +1,246 @@
+#ifndef _LUSTRE_LIST_H
+#define _LUSTRE_LIST_H
+
+#ifdef __KERNEL__
+#include <linux/list.h>
+#else
+/*
+ * Simple doubly linked list implementation.
+ *
+ * Some of the internal functions ("__xxx") are useful when
+ * manipulating whole lists rather than single entries, as
+ * sometimes we already know the next/prev entries and we can
+ * generate better code by using them directly rather than
+ * using the generic single-entry routines.
+ */
+
+#define prefetch(a) ((void)a)
+
+struct list_head {
+       struct list_head *next, *prev;
+};
+
+typedef struct list_head list_t;
+
+#define LIST_HEAD_INIT(name) { &(name), &(name) }
+
+#define LIST_HEAD(name) \
+       struct list_head name = LIST_HEAD_INIT(name)
+
+#define INIT_LIST_HEAD(ptr) do { \
+       (ptr)->next = (ptr); (ptr)->prev = (ptr); \
+} while (0)
+
+/*
+ * Insert a new entry between two known consecutive entries.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static inline void __list_add(struct list_head * new,
+                             struct list_head * prev,
+                             struct list_head * next)
+{
+       next->prev = new;
+       new->next = next;
+       new->prev = prev;
+       prev->next = new;
+}
+
+/**
+ * list_add - add a new entry
+ * @new: new entry to be added
+ * @head: list head to add it after
+ *
+ * Insert a new entry after the specified head.
+ * This is good for implementing stacks.
+ */
+static inline void list_add(struct list_head *new, struct list_head *head)
+{
+       __list_add(new, head, head->next);
+}
+
+/**
+ * list_add_tail - add a new entry
+ * @new: new entry to be added
+ * @head: list head to add it before
+ *
+ * Insert a new entry before the specified head.
+ * This is useful for implementing queues.
+ */
+static inline void list_add_tail(struct list_head *new, struct list_head *head)
+{
+       __list_add(new, head->prev, head);
+}
+
+/*
+ * Delete a list entry by making the prev/next entries
+ * point to each other.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static inline void __list_del(struct list_head * prev, struct list_head * next)
+{
+       next->prev = prev;
+       prev->next = next;
+}
+
+/**
+ * list_del - deletes entry from list.
+ * @entry: the element to delete from the list.
+ * Note: list_empty on entry does not return true after this, the entry is in an undefined state.
+ */
+static inline void list_del(struct list_head *entry)
+{
+       __list_del(entry->prev, entry->next);
+}
+
+/**
+ * list_del_init - deletes entry from list and reinitialize it.
+ * @entry: the element to delete from the list.
+ */
+static inline void list_del_init(struct list_head *entry)
+{
+       __list_del(entry->prev, entry->next);
+       INIT_LIST_HEAD(entry);
+}
+
+/**
+ * list_move - delete from one list and add as another's head
+ * @list: the entry to move
+ * @head: the head that will precede our entry
+ */
+static inline void list_move(struct list_head *list, struct list_head *head)
+{
+       __list_del(list->prev, list->next);
+       list_add(list, head);
+}
+
+/**
+ * list_move_tail - delete from one list and add as another's tail
+ * @list: the entry to move
+ * @head: the head that will follow our entry
+ */
+static inline void list_move_tail(struct list_head *list,
+                                 struct list_head *head)
+{
+       __list_del(list->prev, list->next);
+       list_add_tail(list, head);
+}
+
+/**
+ * list_empty - tests whether a list is empty
+ * @head: the list to test.
+ */
+static inline int list_empty(struct list_head *head)
+{
+       return head->next == head;
+}
+
+static inline void __list_splice(struct list_head *list,
+                                struct list_head *head)
+{
+       struct list_head *first = list->next;
+       struct list_head *last = list->prev;
+       struct list_head *at = head->next;
+
+       first->prev = head;
+       head->next = first;
+
+       last->next = at;
+       at->prev = last;
+}
+
+/**
+ * list_splice - join two lists
+ * @list: the new list to add.
+ * @head: the place to add it in the first list.
+ */
+static inline void list_splice(struct list_head *list, struct list_head *head)
+{
+       if (!list_empty(list))
+               __list_splice(list, head);
+}
+
+/**
+ * list_splice_init - join two lists and reinitialise the emptied list.
+ * @list: the new list to add.
+ * @head: the place to add it in the first list.
+ *
+ * The list at @list is reinitialised
+ */
+static inline void list_splice_init(struct list_head *list,
+                                   struct list_head *head)
+{
+       if (!list_empty(list)) {
+               __list_splice(list, head);
+               INIT_LIST_HEAD(list);
+       }
+}
+
+/**
+ * list_entry - get the struct for this entry
+ * @ptr:       the &struct list_head pointer.
+ * @type:      the type of the struct this is embedded in.
+ * @member:    the name of the list_struct within the struct.
+ */
+#define list_entry(ptr, type, member) \
+       ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member)))
+
+/**
+ * list_for_each       -       iterate over a list
+ * @pos:       the &struct list_head to use as a loop counter.
+ * @head:      the head for your list.
+ */
+#define list_for_each(pos, head) \
+       for (pos = (head)->next, prefetch(pos->next); pos != (head); \
+               pos = pos->next, prefetch(pos->next))
+
+/**
+ * list_for_each_prev  -       iterate over a list in reverse order
+ * @pos:       the &struct list_head to use as a loop counter.
+ * @head:      the head for your list.
+ */
+#define list_for_each_prev(pos, head) \
+       for (pos = (head)->prev, prefetch(pos->prev); pos != (head); \
+               pos = pos->prev, prefetch(pos->prev))
+
+/**
+ * list_for_each_safe  -       iterate over a list safe against removal of list entry
+ * @pos:       the &struct list_head to use as a loop counter.
+ * @n:         another &struct list_head to use as temporary storage
+ * @head:      the head for your list.
+ */
+#define list_for_each_safe(pos, n, head) \
+       for (pos = (head)->next, n = pos->next; pos != (head); \
+               pos = n, n = pos->next)
+
+/**
+ * list_for_each_entry  -       iterate over list of given type
+ * @pos:        the type * to use as a loop counter.
+ * @head:       the head for your list.
+ * @member:     the name of the list_struct within the struct.
+ */
+#define list_for_each_entry(pos, head, member)                         \
+        for (pos = list_entry((head)->next, typeof(*pos), member),     \
+                    prefetch(pos->member.next);                        \
+            &pos->member != (head);                                    \
+            pos = list_entry(pos->member.next, typeof(*pos), member),  \
+            prefetch(pos->member.next))
+
+/**
+ * list_for_each_entry_safe  -       iterate over list of given type safe against removal of list entry
+ * @pos:        the type * to use as a loop counter.
+ * @n:          another type * to use as temporary storage
+ * @head:       the head for your list.
+ * @member:     the name of the list_struct within the struct.
+ */
+#define list_for_each_entry_safe(pos, n, head, member)                 \
+        for (pos = list_entry((head)->next, typeof(*pos), member),     \
+               n = list_entry(pos->member.next, typeof(*pos), member); \
+            &pos->member != (head);                                    \
+            pos = n, n = list_entry(n->member.next, typeof(*n), member))
+
+#endif /* if !__KERNEL__*/
+#endif /* if !_LUSTRE_LIST_H */
index af4a2dc..db83ae7 100644 (file)
@@ -1,6 +1,8 @@
 # define DEBUG_SUBSYSTEM S_PORTALS
 # define PORTAL_DEBUG
 
+#include "build_check.h"
+
 #ifndef __KERNEL__
 # include <stdio.h>
 # include <stdlib.h>
index a83749b..69fa339 100644 (file)
@@ -1,11 +1,12 @@
 #ifndef P30_API_H
 #define P30_API_H
 
+#include "build_check.h"
+
 #include <portals/types.h>
 
 #ifndef PTL_NO_WRAP
-int PtlInit(void);
-int PtlInitialized(void);
+int PtlInit(int *);
 void PtlFini(void);
 
 int PtlNIInit(ptl_interface_t interface, ptl_pt_index_t ptl_size_in,
@@ -25,10 +26,6 @@ int PtlGetId(ptl_handle_ni_t ni_handle, ptl_process_id_t *id);
  * Network interfaces
  */
 
-#ifndef PTL_NO_WRAP
-int PtlNIBarrier(ptl_handle_ni_t interface_in);
-#endif
-
 int PtlNIStatus(ptl_handle_ni_t interface_in, ptl_sr_index_t register_in,
                 ptl_sr_value_t * status_out);
 
@@ -62,6 +59,13 @@ unsigned int PtlNIDebug(ptl_handle_ni_t ni, unsigned int mask_in);
  */
 int PtlFailNid (ptl_handle_ni_t ni, ptl_nid_t nid, unsigned int threshold);
 
+/*
+ * PtlSnprintHandle: 
+ *
+ * This is not an official Portals 3 API call.  It is provided
+ * so that an application can print an opaque handle.
+ */
+void PtlSnprintHandle (char *str, int str_len, ptl_handle_any_t handle);
 
 /*
  * Match entries
@@ -95,7 +99,7 @@ int PtlMDAttach(ptl_handle_me_t current_in, ptl_md_t md_in,
                 ptl_unlink_t unlink_in, ptl_handle_md_t * handle_out);
 
 int PtlMDBind(ptl_handle_ni_t ni_in, ptl_md_t md_in,
-              ptl_handle_md_t * handle_out);
+             ptl_unlink_t unlink_in, ptl_handle_md_t * handle_out);
 
 int PtlMDUnlink(ptl_handle_md_t md_in);
 
@@ -130,8 +134,8 @@ int PtlEQGet(ptl_handle_eq_t eventq_in, ptl_event_t * event_out);
 
 int PtlEQWait(ptl_handle_eq_t eventq_in, ptl_event_t * event_out);
 
-int PtlEQWait_timeout(ptl_handle_eq_t eventq_in, ptl_event_t * event_out,
-                      int timeout);
+int PtlEQPoll(ptl_handle_eq_t *eventqs_in, int neq_in, int timeout,
+             ptl_event_t *event_out, int *which_out);
 #endif
 
 /*
index 3c3b154..0be8a3d 100644 (file)
@@ -1,6 +1,8 @@
 #ifndef PTL_BLOCKS_H
 #define PTL_BLOCKS_H
 
+#include "build_check.h"
+
 /*
  * blocks.h
  *
@@ -161,6 +163,7 @@ typedef struct PtlMDBind_in {
         ptl_handle_ni_t ni_in;
         ptl_handle_eq_t eq_in;
         ptl_md_t md_in;
+       ptl_unlink_t unlink_in;
 } PtlMDBind_in;
 
 typedef struct PtlMDBind_out {
diff --git a/lnet/include/lnet/build_check.h b/lnet/include/lnet/build_check.h
new file mode 100644 (file)
index 0000000..5db1352
--- /dev/null
@@ -0,0 +1,8 @@
+#ifndef _BUILD_CHECK_H
+#define _BUILD_CHECK_H
+
+#ifdef CRAY_PORTALS
+#error "an application got to me instead of cray's includes"
+#endif
+
+#endif
index 785ce73..61aca3f 100644 (file)
@@ -1,3 +1,4 @@
+#include "build_check.h"
 /*
 **
 ** This files contains definitions that are used throughout the cplant code.
index 08f084a..499f32b 100644 (file)
@@ -1,6 +1,7 @@
 #ifndef _P30_ERRNO_H_
 #define _P30_ERRNO_H_
 
+#include "build_check.h"
 /*
  * include/portals/errno.h
  *
 
 /* If you change these, you must update the string table in api-errno.c */
 typedef enum {
-        PTL_OK              = 0,
-        PTL_SEGV            = 1,
-
-        PTL_NOSPACE         = 2,
-        PTL_INUSE           = 3,
-        PTL_VAL_FAILED      = 4,
-
-        PTL_NAL_FAILED      = 5,
-        PTL_NOINIT          = 6,
-        PTL_INIT_DUP        = 7,
-        PTL_INIT_INV        = 8,
-        PTL_AC_INV_INDEX    = 9,
-
-        PTL_INV_ASIZE       = 10,
-        PTL_INV_HANDLE      = 11,
-        PTL_INV_MD          = 12,
-        PTL_INV_ME          = 13,
-        PTL_INV_NI          = 14,
+        PTL_OK                 = 0,
+        PTL_SEGV               = 1,
+
+        PTL_NO_SPACE           = 2,
+        PTL_ME_IN_USE          = 3,
+        PTL_VAL_FAILED         = 4,
+
+        PTL_NAL_FAILED         = 5,
+        PTL_NO_INIT            = 6,
+        PTL_IFACE_DUP          = 7,
+        PTL_IFACE_INVALID      = 8,
+
+        PTL_HANDLE_INVALID     = 9,
+        PTL_MD_INVALID         = 10,
+        PTL_ME_INVALID         = 11,
 /* If you change these, you must update the string table in api-errno.c */
-        PTL_ILL_MD          = 15,
-        PTL_INV_PROC        = 16,
-        PTL_INV_PSIZE       = 17,
-        PTL_INV_PTINDEX     = 18,
-        PTL_INV_REG         = 19,
-
-        PTL_INV_SR_INDX     = 20,
-        PTL_ML_TOOLONG      = 21,
-        PTL_ADDR_UNKNOWN    = 22,
-        PTL_INV_EQ          = 23,
-        PTL_EQ_DROPPED      = 24,
-
-        PTL_EQ_EMPTY        = 25,
-        PTL_NOUPDATE        = 26,
-        PTL_FAIL            = 27,
-        PTL_NOT_IMPLEMENTED = 28,
-        PTL_NO_ACK          = 29,
-
-        PTL_IOV_TOO_MANY    = 30,
-        PTL_IOV_TOO_SMALL   = 31,
-
-       PTL_EQ_INUSE        = 32,
-
-        PTL_MAX_ERRNO       = 32
+        PTL_PROCESS_INVALID    = 12,
+        PTL_PT_INDEX_INVALID   = 13,
+
+        PTL_SR_INDEX_INVALID   = 14,
+        PTL_EQ_INVALID         = 15,
+        PTL_EQ_DROPPED         = 16,
+
+        PTL_EQ_EMPTY           = 17,
+        PTL_MD_NO_UPDATE       = 18,
+        PTL_FAIL               = 19,
+
+        PTL_IOV_TOO_MANY       = 20,
+        PTL_IOV_TOO_SMALL      = 21,
+
+       PTL_EQ_IN_USE           = 22,
+
+        PTL_MAX_ERRNO          = 23
 } ptl_err_t;
 /* If you change these, you must update the string table in api-errno.c */
 
index a70b465..94f4f48 100644 (file)
@@ -1,6 +1,7 @@
 #ifndef _P30_INTERNAL_H_
 #define _P30_INTERNAL_H_
 
+#include "build_check.h"
 /*
  * p30/internal.h
  *
index f87ff83..90ed4f5 100644 (file)
@@ -1,6 +1,7 @@
 #ifndef PTL_DISPATCH_H
 #define PTL_DISPATCH_H
 
+#include "build_check.h"
 /*
  * include/dispatch.h
  *
index e9e4635..350447e 100644 (file)
@@ -9,6 +9,8 @@
 #ifndef _LIB_P30_H_
 #define _LIB_P30_H_
 
+#include "build_check.h"
+
 #ifdef __KERNEL__
 # include <asm/page.h>
 # include <linux/string.h>
@@ -195,7 +197,7 @@ lib_md_alloc (nal_cb_t *nal, ptl_md_t *umd)
                 niov = umd->niov;
                 size = offsetof(lib_md_t, md_iov.kiov[niov]);
         } else {
-                niov = ((umd->options & PTL_MD_IOV) != 0) ?
+                niov = ((umd->options & PTL_MD_IOVEC) != 0) ?
                        umd->niov : 1;
                 size = offsetof(lib_md_t, md_iov.iov[niov]);
         }
@@ -245,10 +247,14 @@ lib_me_free(nal_cb_t *nal, lib_me_t *me)
 static inline lib_msg_t *
 lib_msg_alloc(nal_cb_t *nal)
 {
-        /* NEVER called with statelock held */
+        /* NEVER called with statelock held; may be in interrupt... */
         lib_msg_t *msg;
 
-        PORTAL_ALLOC(msg, sizeof(*msg));
+        if (in_interrupt())
+                PORTAL_ALLOC_ATOMIC(msg, sizeof(*msg));
+        else
+                PORTAL_ALLOC(msg, sizeof(*msg));
+
         if (msg != NULL) {
                 /* NULL pointers, clear flags etc */
                 memset (msg, 0, sizeof (*msg));
@@ -363,10 +369,10 @@ extern char *dispatch_name(int index);
 extern void lib_enq_event_locked (nal_cb_t *nal, void *private,
                                   lib_eq_t *eq, ptl_event_t *ev);
 extern void lib_finalize (nal_cb_t *nal, void *private, lib_msg_t *msg, 
-                          ptl_err_t status);
+                          ptl_ni_fail_t ni_fail_type);
 extern void lib_parse (nal_cb_t *nal, ptl_hdr_t *hdr, void *private);
-extern lib_msg_t *lib_fake_reply_msg (nal_cb_t *nal, ptl_nid_t peer_nid, 
-                                      lib_md_t *getmd);
+extern lib_msg_t *lib_create_reply_msg (nal_cb_t *nal, ptl_nid_t peer_nid, 
+                                        lib_msg_t *get_msg);
 extern void print_hdr (nal_cb_t * nal, ptl_hdr_t * hdr);
 
 
index 0bf557e..d1d0495 100644 (file)
@@ -1,6 +1,7 @@
 #ifndef _LIB_NAL_H_
 #define _LIB_NAL_H_
 
+#include "build_check.h"
 /*
  * nal.h
  *
index e9e4635..350447e 100644 (file)
@@ -9,6 +9,8 @@
 #ifndef _LIB_P30_H_
 #define _LIB_P30_H_
 
+#include "build_check.h"
+
 #ifdef __KERNEL__
 # include <asm/page.h>
 # include <linux/string.h>
@@ -195,7 +197,7 @@ lib_md_alloc (nal_cb_t *nal, ptl_md_t *umd)
                 niov = umd->niov;
                 size = offsetof(lib_md_t, md_iov.kiov[niov]);
         } else {
-                niov = ((umd->options & PTL_MD_IOV) != 0) ?
+                niov = ((umd->options & PTL_MD_IOVEC) != 0) ?
                        umd->niov : 1;
                 size = offsetof(lib_md_t, md_iov.iov[niov]);
         }
@@ -245,10 +247,14 @@ lib_me_free(nal_cb_t *nal, lib_me_t *me)
 static inline lib_msg_t *
 lib_msg_alloc(nal_cb_t *nal)
 {
-        /* NEVER called with statelock held */
+        /* NEVER called with statelock held; may be in interrupt... */
         lib_msg_t *msg;
 
-        PORTAL_ALLOC(msg, sizeof(*msg));
+        if (in_interrupt())
+                PORTAL_ALLOC_ATOMIC(msg, sizeof(*msg));
+        else
+                PORTAL_ALLOC(msg, sizeof(*msg));
+
         if (msg != NULL) {
                 /* NULL pointers, clear flags etc */
                 memset (msg, 0, sizeof (*msg));
@@ -363,10 +369,10 @@ extern char *dispatch_name(int index);
 extern void lib_enq_event_locked (nal_cb_t *nal, void *private,
                                   lib_eq_t *eq, ptl_event_t *ev);
 extern void lib_finalize (nal_cb_t *nal, void *private, lib_msg_t *msg, 
-                          ptl_err_t status);
+                          ptl_ni_fail_t ni_fail_type);
 extern void lib_parse (nal_cb_t *nal, ptl_hdr_t *hdr, void *private);
-extern lib_msg_t *lib_fake_reply_msg (nal_cb_t *nal, ptl_nid_t peer_nid, 
-                                      lib_md_t *getmd);
+extern lib_msg_t *lib_create_reply_msg (nal_cb_t *nal, ptl_nid_t peer_nid, 
+                                        lib_msg_t *get_msg);
 extern void print_hdr (nal_cb_t * nal, ptl_hdr_t * hdr);
 
 
index 904204b..40776a6 100644 (file)
@@ -10,6 +10,8 @@
 #ifndef _LIB_TYPES_H_
 #define _LIB_TYPES_H_
 
+#include "build_check.h"
+
 #include <portals/types.h>
 #ifdef __KERNEL__
 # include <linux/uio.h>
@@ -133,9 +135,8 @@ typedef struct {
 } lib_counters_t;
 
 /* temporary expedient: limit number of entries in discontiguous MDs */
-# define PTL_MTU        (512<<10)
-# define PTL_MD_MAX_IOV 128
-# define PTL_MD_MAX_PAGES min_t(int, PTL_MD_MAX_IOV, PTL_MTU / PAGE_SIZE)
+#define PTL_MTU        (512<<10)
+#define PTL_MD_MAX_IOV 128
 
 struct lib_msg_t {
         struct list_head  msg_list;
@@ -191,7 +192,6 @@ struct lib_md_t {
         ptl_size_t        max_size;
         int               threshold;
         int               pending;
-        ptl_unlink_t      unlink;
         unsigned int      options;
         unsigned int      md_flags;
         void             *user_ptr;
@@ -204,7 +204,15 @@ struct lib_md_t {
         } md_iov;
 };
 
-#define PTL_MD_FLAG_UNLINK            (1 << 0)
+#define PTL_MD_FLAG_ZOMBIE            (1 << 0)
+#define PTL_MD_FLAG_AUTO_UNLINK       (1 << 1)
+
+static inline int lib_md_exhausted (lib_md_t *md) 
+{
+        return (md->threshold == 0 ||
+                ((md->options & PTL_MD_MAX_SIZE) != 0 &&
+                 md->offset + md->max_size > md->length));
+}
 
 #ifdef PTL_USE_LIB_FREELIST
 typedef struct
index 9cab047..37d9952 100644 (file)
@@ -9,8 +9,6 @@
  * using the generic single-entry routines.
  */
 
-#define prefetch(a) ((void)a)
-
 struct list_head {
        struct list_head *next, *prev;
 };
@@ -194,8 +192,7 @@ static inline void list_splice_init(struct list_head *list,
  * @head:      the head for your list.
  */
 #define list_for_each(pos, head) \
-       for (pos = (head)->next, prefetch(pos->next); pos != (head); \
-               pos = pos->next, prefetch(pos->next))
+       for (pos = (head)->next ; pos != (head); pos = pos->next )
 
 /**
  * list_for_each_prev  -       iterate over a list in reverse order
@@ -203,8 +200,7 @@ static inline void list_splice_init(struct list_head *list,
  * @head:      the head for your list.
  */
 #define list_for_each_prev(pos, head) \
-       for (pos = (head)->prev, prefetch(pos->prev); pos != (head); \
-               pos = pos->prev, prefetch(pos->prev))
+       for (pos = (head)->prev ; pos != (head); pos = pos->prev)
 
 /**
  * list_for_each_safe  -       iterate over a list safe against removal of list entry
@@ -226,11 +222,9 @@ static inline void list_splice_init(struct list_head *list,
  * @member:     the name of the list_struct within the struct.
  */
 #define list_for_each_entry(pos, head, member)                         \
-        for (pos = list_entry((head)->next, typeof(*pos), member),     \
-                    prefetch(pos->member.next);                        \
+        for (pos = list_entry((head)->next, typeof(*pos), member);     \
             &pos->member != (head);                                    \
-            pos = list_entry(pos->member.next, typeof(*pos), member),  \
-            prefetch(pos->member.next))
+            pos = list_entry(pos->member.next, typeof(*pos), member))
 #endif
 
 #ifndef list_for_each_entry_safe
index 8b1495e..577ffab 100644 (file)
@@ -4,6 +4,8 @@
 #ifndef _P30_H_
 #define _P30_H_
 
+#include "build_check.h"
+
 /*
  * p30.h
  *
 #include <portals/api.h>
 #include <portals/nalids.h>
 
-extern int __p30_initialized;  /* for libraries & test codes  */
-extern int __p30_myr_initialized;      /*   that don't know if p30    */
-extern int __p30_ip_initialized;       /*   had been initialized yet  */
-extern ptl_handle_ni_t __myr_ni_handle, __ip_ni_handle;
-
-extern int __p30_myr_timeout;  /* in seconds, for PtlNIBarrier,     */
-extern int __p30_ip_timeout;   /* PtlReduce_all, & PtlBroadcast_all */
-
 /*
  * Debugging flags reserved for the Portals reference library.
  * These are not part of the API as described in the SAND report
index 7cb3ab7..5b72046 100644 (file)
@@ -1,6 +1,8 @@
 #ifndef _NAL_H_
 #define _NAL_H_
 
+#include "build_check.h"
+
 /*
  * p30/nal.h
  *
@@ -27,7 +29,7 @@ struct nal_t {
 
        int (*validate) (nal_t * nal, void *base, size_t extent);
 
-       void (*yield) (nal_t * nal);
+       int (*yield) (nal_t * nal, unsigned long *flags, int milliseconds);
 
        void (*lock) (nal_t * nal, unsigned long *flags);
 
index 1b837b4..1568593 100644 (file)
@@ -1,3 +1,5 @@
+#include "build_check.h"
+
 #define PTL_IFACE_TCP 1
 #define PTL_IFACE_ER 2
 #define PTL_IFACE_SS 3
index 8b1495e..577ffab 100644 (file)
@@ -4,6 +4,8 @@
 #ifndef _P30_H_
 #define _P30_H_
 
+#include "build_check.h"
+
 /*
  * p30.h
  *
 #include <portals/api.h>
 #include <portals/nalids.h>
 
-extern int __p30_initialized;  /* for libraries & test codes  */
-extern int __p30_myr_initialized;      /*   that don't know if p30    */
-extern int __p30_ip_initialized;       /*   had been initialized yet  */
-extern ptl_handle_ni_t __myr_ni_handle, __ip_ni_handle;
-
-extern int __p30_myr_timeout;  /* in seconds, for PtlNIBarrier,     */
-extern int __p30_ip_timeout;   /* PtlReduce_all, & PtlBroadcast_all */
-
 /*
  * Debugging flags reserved for the Portals reference library.
  * These are not part of the API as described in the SAND report
index 74ef493..902db76 100644 (file)
@@ -1,6 +1,8 @@
 #ifndef _P30_TYPES_H_
 #define _P30_TYPES_H_
 
+#include "build_check.h"
+
 #ifdef __linux__
 # include <asm/types.h>
 # if defined(__powerpc__) && !defined(__KERNEL__)
@@ -25,6 +27,11 @@ typedef u_int64_t __u64;
 
 #include <portals/errno.h>
 
+/* This implementation uses the same type for API function return codes and
+ * the completion status in an event  */
+#define PTL_NI_OK  PTL_OK
+typedef ptl_err_t ptl_ni_fail_t;
+
 typedef __u64 ptl_nid_t;
 typedef __u32 ptl_pid_t;
 typedef __u32 ptl_pt_index_t;
@@ -33,6 +40,9 @@ typedef __u64 ptl_match_bits_t;
 typedef __u64 ptl_hdr_data_t;
 typedef __u32 ptl_size_t;
 
+#define PTL_TIME_FOREVER    (-1)
+#define PTL_EQ_HANDLER_NONE NULL
+
 typedef struct {
         unsigned long nal_idx;                 /* which network interface */
         __u64         cookie;                  /* which thing on that interface */
@@ -43,11 +53,11 @@ typedef ptl_handle_any_t ptl_handle_eq_t;
 typedef ptl_handle_any_t ptl_handle_md_t;
 typedef ptl_handle_any_t ptl_handle_me_t;
 
-#define PTL_HANDLE_NONE \
+#define PTL_INVALID_HANDLE \
     ((const ptl_handle_any_t){.nal_idx = -1, .cookie = -1})
-#define PTL_EQ_NONE PTL_HANDLE_NONE
+#define PTL_EQ_NONE PTL_INVALID_HANDLE
 
-static inline int PtlHandleEqual (ptl_handle_any_t h1, ptl_handle_any_t h2)
+static inline int PtlHandleIsEqual (ptl_handle_any_t h1, ptl_handle_any_t h2)
 {
        return (h1.nal_idx == h2.nal_idx && h1.cookie == h2.cookie);
 }
@@ -88,24 +98,38 @@ typedef struct {
 } ptl_md_t;
 
 /* Options for the MD structure */
-#define PTL_MD_OP_PUT           (1 << 0)
-#define PTL_MD_OP_GET           (1 << 1)
-#define PTL_MD_MANAGE_REMOTE    (1 << 2)
-#define PTL_MD_AUTO_UNLINK      (1 << 3)
-#define PTL_MD_TRUNCATE         (1 << 4)
-#define PTL_MD_ACK_DISABLE      (1 << 5)
-#define PTL_MD_IOV             (1 << 6)
-#define PTL_MD_MAX_SIZE                (1 << 7)
-#define PTL_MD_KIOV             (1 << 8)
+#define PTL_MD_OP_PUT               (1 << 0)
+#define PTL_MD_OP_GET               (1 << 1)
+#define PTL_MD_MANAGE_REMOTE        (1 << 2)
+/* unused                           (1 << 3) */
+#define PTL_MD_TRUNCATE             (1 << 4)
+#define PTL_MD_ACK_DISABLE          (1 << 5)
+#define PTL_MD_IOVEC               (1 << 6)
+#define PTL_MD_MAX_SIZE                    (1 << 7)
+#define PTL_MD_KIOV                 (1 << 8)
+#define PTL_MD_EVENT_START_DISABLE  (1 << 9)
+#define PTL_MD_EVENT_END_DISABLE    (1 << 10)
+
+/* For compatibility with Cray Portals */
+#define PTL_MD_LUSTRE_COMPLETION_SEMANTICS  0
 
 #define PTL_MD_THRESH_INF       (-1)
 
 typedef enum {
-        PTL_EVENT_GET,
-        PTL_EVENT_PUT,
-        PTL_EVENT_REPLY,
+        PTL_EVENT_GET_START,
+        PTL_EVENT_GET_END,
+
+        PTL_EVENT_PUT_START,
+        PTL_EVENT_PUT_END,
+
+        PTL_EVENT_REPLY_START,
+        PTL_EVENT_REPLY_END,
+
         PTL_EVENT_ACK,
-        PTL_EVENT_SENT,
+
+        PTL_EVENT_SEND_START,
+       PTL_EVENT_SEND_END,
+
        PTL_EVENT_UNLINK,
 } ptl_event_kind_t;
 
@@ -122,8 +146,6 @@ typedef unsigned PTL_SEQ_BASETYPE ptl_seq_t;
 #endif
 typedef struct {
         ptl_event_kind_t   type;
-       ptl_err_t          status;
-       int                unlinked;
         ptl_process_id_t   initiator;
         ptl_pt_index_t     portal;
         ptl_match_bits_t   match_bits;
@@ -132,7 +154,8 @@ typedef struct {
        ptl_size_t         offset;
         ptl_md_t           mem_desc;
         ptl_hdr_data_t     hdr_data;
-        struct timeval     arrival_time;
+       int                unlinked;
+       ptl_ni_fail_t      ni_fail_type;
 
         volatile ptl_seq_t sequence;
 } ptl_event_t;
index ad46b90..9955599 100644 (file)
@@ -315,7 +315,7 @@ int gmnal_api_shutdown(nal_t *, int);
 
 int gmnal_api_validate(nal_t *, void *, size_t);
 
-void gmnal_api_yield(nal_t *);
+void gmnal_api_yield(nal_t *, unsigned long *, int);
 
 void gmnal_api_lock(nal_t *, unsigned long *);
 
index 1442aa7..338d75c 100644 (file)
@@ -157,13 +157,16 @@ gmnal_api_validate(nal_t *nal, void *base, size_t extent)
  *     Give up the processor
  */
 void
-gmnal_api_yield(nal_t *nal)
+gmnal_api_yield(nal_t *nal, unsigned long *flags, int milliseconds)
 {
        CDEBUG(D_TRACE, "gmnal_api_yield : nal [%p]\n", nal);
 
-       set_current_state(TASK_INTERRUPTIBLE);
-       schedule();
+        if (milliseconds != 0) {
+                CERROR("Blocking yield not implemented yet\n");
+                LBUG();
+        }
 
+        our_cond_resched();
        return;
 }
 
index 1f28746..ece1380 100644 (file)
@@ -272,6 +272,17 @@ void gmnal_cb_sti(nal_cb_t *nal_cb, unsigned long *flags)
        return;
 }
 
+void gmnal_cb_callback(nal_cb_t *nal_cb, void *private, lib_eq_t *eq, ptl_event_t *ev)
+{
+        /* holding cb_lock */
+
+        if (eq->event_callback != NULL)
+                eq->event_callback(ev);
+
+        /* We will wake theads sleeping in yield() here, AFTER the
+         * callback, when we implement blocking yield */
+}
+
 int gmnal_cb_dist(nal_cb_t *nal_cb, ptl_nid_t nid, unsigned long *dist)
 {
        CDEBUG(D_TRACE, "gmnal_cb_dist\n");
index 948badf..02beca7 100644 (file)
@@ -194,7 +194,7 @@ kibnal_shutdown(nal_t *nal, int ni)
 // when do we call this yield function 
 //
 void 
-kibnal_yield( nal_t *nal )
+kibnal_yield( nal_t *nal, unsigned long *flags, int milliseconds )
 {
         kibnal_data_t *k = nal->nal_data;
         nal_cb_t      *nal_cb = k->kib_cb;
@@ -204,6 +204,11 @@ kibnal_yield( nal_t *nal )
         LASSERT (k    == &kibnal_data);
         LASSERT (nal_cb == &kibnal_lib);
 
+        if (milliseconds != 0) {
+                CERROR("Blocking yeild not implemented yet\n");
+                LBUG();
+        }
+        
         // check under what condition that we need to 
         // call schedule()
         // who set this need_resched 
index ff5aeb3..4a1f0d7 100644 (file)
@@ -29,6 +29,7 @@
 #include <portals/p30.h>
 #include <portals/lib-p30.h>
 #include <linux/kp30.h>
+#include <linux/kpr.h>
 
 // Infiniband VAPI/EVAPI header files  
 // Mellanox MT23108 VAPI
index 0688062..f359441 100644 (file)
@@ -221,7 +221,19 @@ void kibnal_sti(nal_cb_t *nal, unsigned long *flags)
         spin_unlock_irqrestore(&data->kib_dispatch_lock,*flags);
 }
 
+//
+// A new event has just been created
+//
+void kibnal_callback(nal_cb_t *nal, void *private, lib_eq_t *eq, ptl_event_t *ev)
+{
+        /* holding kib_dispatch_lock */
 
+        if (eq->event_callback != NULL)
+                eq->event_callback(ev);
+
+        /* We will wake theads sleeping in yield() here, AFTER the
+         * callback, when we implement blocking yield */
+}
 
 //
 // nic distance 
@@ -1285,5 +1297,6 @@ nal_cb_t kibnal_lib = {
         cb_printf:      kibnal_printf,
         cb_cli:         kibnal_cli,
         cb_sti:         kibnal_sti,
+        cb_callback:    kibnal_callback,
         cb_dist:        kibnal_dist // no used at this moment 
 };
index 3b3b5d4..a386eef 100644 (file)
@@ -109,14 +109,43 @@ kqswnal_shutdown(nal_t *nal, int ni)
        return (0);
 }
 
-static void
-kqswnal_yield( nal_t *nal )
+static int
+kqswnal_yield(nal_t *nal, unsigned long *flags, int milliseconds)
 {
+       /* NB called holding statelock */
+        wait_queue_t       wait;
+       unsigned long      now = jiffies;
+
        CDEBUG (D_NET, "yield\n");
 
-       if (current->need_resched)
-               schedule();
-       return;
+       if (milliseconds == 0) {
+               if (current->need_resched)
+                       schedule();
+               return 0;
+       }
+
+       init_waitqueue_entry(&wait, current);
+       set_current_state(TASK_INTERRUPTIBLE);
+       add_wait_queue(&kqswnal_data.kqn_yield_waitq, &wait);
+
+       kqswnal_unlock(nal, flags);
+
+       if (milliseconds < 0)
+               schedule ();
+       else
+               schedule_timeout((milliseconds * HZ) / 1000);
+       
+       kqswnal_lock(nal, flags);
+
+       remove_wait_queue(&kqswnal_data.kqn_yield_waitq, &wait);
+
+       if (milliseconds > 0) {
+               milliseconds -= ((jiffies - now) * 1000) / HZ;
+               if (milliseconds < 0)
+                       milliseconds = 0;
+       }
+       
+       return (milliseconds);
 }
 
 static nal_t *
@@ -491,6 +520,7 @@ kqswnal_initialise (void)
        init_waitqueue_head (&kqswnal_data.kqn_sched_waitq);
 
        spin_lock_init (&kqswnal_data.kqn_statelock);
+       init_waitqueue_head (&kqswnal_data.kqn_yield_waitq);
 
        /* pointers/lists/locks initialised */
        kqswnal_data.kqn_init = KQN_INIT_DATA;
index 5ebf30a..5e32887 100644 (file)
@@ -71,6 +71,7 @@
 #define DEBUG_SUBSYSTEM S_QSWNAL
 
 #include <linux/kp30.h>
+#include <linux/kpr.h>
 #include <portals/p30.h>
 #include <portals/lib-p30.h>
 
@@ -222,6 +223,7 @@ typedef struct
         struct list_head   kqn_delayedtxds;     /* delayed transmits */
 
         spinlock_t         kqn_statelock;       /* cb_cli/cb_sti */
+        wait_queue_head_t  kqn_yield_waitq;     /* where yield waits */
         nal_cb_t          *kqn_cb;              /* -> kqswnal_lib */
 #if MULTIRAIL_EKC
         EP_SYS            *kqn_ep;              /* elan system */
index 157dc70..61c88f6 100644 (file)
@@ -85,6 +85,9 @@ kqswnal_printf (nal_cb_t * nal, const char *fmt, ...)
         CDEBUG (D_NET, "%s", msg);
 }
 
+#if (defined(CONFIG_SPARC32) || defined(CONFIG_SPARC64))
+# error "Can't save/restore irq contexts in different procedures"
+#endif
 
 static void
 kqswnal_cli(nal_cb_t *nal, unsigned long *flags)
@@ -103,6 +106,17 @@ kqswnal_sti(nal_cb_t *nal, unsigned long *flags)
         spin_unlock_irqrestore(&data->kqn_statelock, *flags);
 }
 
+static void
+kqswnal_callback(nal_cb_t *nal, void *private, lib_eq_t *eq, ptl_event_t *ev)
+{
+        /* holding kqn_statelock */
+
+        if (eq->event_callback != NULL)
+                eq->event_callback(ev);
+
+        if (waitqueue_active(&kqswnal_data.kqn_yield_waitq))
+                wake_up_all(&kqswnal_data.kqn_yield_waitq);
+}
 
 static int
 kqswnal_dist(nal_cb_t *nal, ptl_nid_t nid, unsigned long *dist)
@@ -513,15 +527,15 @@ kqswnal_tx_done (kqswnal_tx_t *ktx, int error)
                 lib_finalize (&kqswnal_lib, ktx->ktx_args[0],
                               (lib_msg_t *)ktx->ktx_args[1],
                               (error == 0) ? PTL_OK : 
-                              (error == -ENOMEM) ? PTL_NOSPACE : PTL_FAIL);
+                              (error == -ENOMEM) ? PTL_NO_SPACE : PTL_FAIL);
                 break;
 
         case KTX_GETTING:          /* Peer has DMA-ed direct? */
                 msg = (lib_msg_t *)ktx->ktx_args[1];
 
                 if (error == 0) {
-                        repmsg = lib_fake_reply_msg (&kqswnal_lib, 
-                                                     ktx->ktx_nid, msg->md);
+                        repmsg = lib_create_reply_msg (&kqswnal_lib, 
+                                                       ktx->ktx_nid, msg);
                         if (repmsg == NULL)
                                 error = -ENOMEM;
                 }
@@ -532,7 +546,7 @@ kqswnal_tx_done (kqswnal_tx_t *ktx, int error)
                         lib_finalize (&kqswnal_lib, NULL, repmsg, PTL_OK);
                 } else {
                         lib_finalize (&kqswnal_lib, ktx->ktx_args[0], msg,
-                                      (error == -ENOMEM) ? PTL_NOSPACE : PTL_FAIL);
+                                      (error == -ENOMEM) ? PTL_NO_SPACE : PTL_FAIL);
                 }
                 break;
 
@@ -937,7 +951,7 @@ kqswnal_sendmsg (nal_cb_t     *nal,
                                           in_interrupt()));
         if (ktx == NULL) {
                 kqswnal_cerror_hdr (hdr);
-                return (PTL_NOSPACE);
+                return (PTL_NO_SPACE);
         }
 
         ktx->ktx_nid     = targetnid;
@@ -1845,5 +1859,6 @@ nal_cb_t kqswnal_lib =
         cb_printf:      kqswnal_printf,
         cb_cli:         kqswnal_cli,
         cb_sti:         kqswnal_sti,
+        cb_callback:    kqswnal_callback,
         cb_dist:        kqswnal_dist
 };
index 5ffba31..35de6eb 100644 (file)
@@ -108,10 +108,15 @@ static int kscimacnal_shutdown(nal_t *nal, int ni)
 }
 
 
-static void kscimacnal_yield( nal_t *nal )
+static void kscimacnal_yield( nal_t *nal, unsigned long *flags, int milliseconds )
 {
         LASSERT (nal == &kscimacnal_api);
 
+        if (milliseconds != 0) {
+                CERROR ("Blocking yield not implemented yet\n");
+                LBUG();
+        }
+
         if (current->need_resched) 
                 schedule();
         return;
index 6949557..f132769 100644 (file)
@@ -34,6 +34,7 @@
 #define DEBUG_SUBSYSTEM S_UNDEFINED
 
 #include <linux/kp30.h>
+#include <linux/kpr.h>
 #include <portals/p30.h>
 #include <portals/lib-p30.h>
 
index 52afb98..f9562b2 100644 (file)
@@ -97,6 +97,18 @@ kscimacnal_sti(nal_cb_t *nal, unsigned long *flags)
 }
 
 
+static void 
+kscimacnal_callback(nal_cb_t *nal, void *private, lib_eq_t *eq, ptl_event_t *ev)
+{
+        /* holding ksci_dispatch_lock */
+
+        if (eq->event_callback != NULL)
+                eq->event_callback(ev);
+
+        /* We will wake theads sleeping in yield() here, AFTER the
+         * callback, when we implement blocking yield */
+}
+
 static int 
 kscimacnal_dist(nal_cb_t *nal, ptl_nid_t nid, unsigned long *dist)
 {
@@ -233,7 +245,7 @@ kscimacnal_sendmsg(nal_cb_t        *nal,
         /* save transaction info for later finalize and cleanup */
         PORTAL_ALLOC(ktx, (sizeof(kscimacnal_tx_t)));
         if (!ktx) {
-                return PTL_NOSPACE;
+                return PTL_NO_SPACE;
         }
 
         ktx->ktx_nmapped = 0; /* Start with no mapped pages :) */
@@ -248,7 +260,7 @@ kscimacnal_sendmsg(nal_cb_t        *nal,
                         kscimacnal_txrelease, ktx);
         if (!msg) {
                 PORTAL_FREE(ktx, (sizeof(kscimacnal_tx_t)));
-                return PTL_NOSPACE;
+                return PTL_NO_SPACE;
         }
         mac_put_mblk(msg, sizeof(ptl_hdr_t));
         lastblk=msg;
@@ -285,7 +297,7 @@ kscimacnal_sendmsg(nal_cb_t        *nal,
                 if(!newblk) {
                         mac_free_msg(msg);
                         PORTAL_FREE(ktx, (sizeof(kscimacnal_tx_t)));
-                        return PTL_NOSPACE;
+                        return PTL_NO_SPACE;
                 }
                 mac_put_mblk(newblk, nob);
                 mac_link_mblk(lastblk, newblk);
@@ -597,5 +609,6 @@ nal_cb_t kscimacnal_lib = {
         cb_printf:       kscimacnal_printf,
         cb_cli:          kscimacnal_cli,
         cb_sti:          kscimacnal_sti,
+        cb_callback:     kscimacnal_callback,
         cb_dist:         kscimacnal_dist
 };
index 0dd5d11..d874a6c 100644 (file)
@@ -95,13 +95,6 @@ ksocknal_api_shutdown(nal_t *nal, int ni)
 }
 
 void
-ksocknal_api_yield(nal_t *nal)
-{
-        our_cond_resched();
-        return;
-}
-
-void
 ksocknal_api_lock(nal_t *nal, unsigned long *flags)
 {
         ksock_nal_data_t *k;
@@ -123,6 +116,44 @@ ksocknal_api_unlock(nal_t *nal, unsigned long *flags)
         nal_cb->cb_sti(nal_cb,flags);
 }
 
+int
+ksocknal_api_yield(nal_t *nal, unsigned long *flags, int milliseconds)
+{
+       /* NB called holding statelock */
+        wait_queue_t       wait;
+       unsigned long      now = jiffies;
+
+       CDEBUG (D_NET, "yield\n");
+
+       if (milliseconds == 0) {
+                our_cond_resched();
+               return 0;
+       }
+
+       init_waitqueue_entry(&wait, current);
+       set_current_state (TASK_INTERRUPTIBLE);
+       add_wait_queue (&ksocknal_data.ksnd_yield_waitq, &wait);
+
+       ksocknal_api_unlock(nal, flags);
+
+       if (milliseconds < 0)
+               schedule ();
+       else
+               schedule_timeout((milliseconds * HZ) / 1000);
+       
+       ksocknal_api_lock(nal, flags);
+
+       remove_wait_queue (&ksocknal_data.ksnd_yield_waitq, &wait);
+
+       if (milliseconds > 0) {
+               milliseconds -= ((jiffies - now) * 1000) / HZ;
+               if (milliseconds < 0)
+                       milliseconds = 0;
+       }
+       
+       return (milliseconds);
+}
+
 nal_t *
 ksocknal_init(int interface, ptl_pt_index_t ptl_size,
               ptl_ac_index_t ac_size, ptl_pid_t requested_pid)
@@ -745,6 +776,9 @@ ksocknal_create_conn (ksock_route_t *route, struct socket *sock,
 
         ksocknal_get_peer_addr (conn);
 
+        CWARN("New conn nid:"LPX64" ip:%08x/%d incarnation:"LPX64"\n",
+              nid, conn->ksnc_ipaddr, conn->ksnc_port, incarnation);
+
         irq = ksocknal_conn_irq (conn);
 
         write_lock_irqsave (&ksocknal_data.ksnd_global_lock, flags);
@@ -1071,6 +1105,11 @@ ksocknal_close_stale_conns_locked (ksock_peer_t *peer, __u64 incarnation)
 
                 if (conn->ksnc_incarnation == incarnation)
                         continue;
+
+                CWARN("Closing stale conn nid:"LPX64" ip:%08x/%d "
+                      "incarnation:"LPX64"("LPX64")\n",
+                      peer->ksnp_nid, conn->ksnc_ipaddr, conn->ksnc_port,
+                      conn->ksnc_incarnation, incarnation);
                 
                 count++;
                 ksocknal_close_conn_locked (conn, -ESTALE);
@@ -1568,7 +1607,6 @@ ksocknal_module_init (void)
 
         ksocknal_api.forward  = ksocknal_api_forward;
         ksocknal_api.shutdown = ksocknal_api_shutdown;
-        ksocknal_api.yield    = ksocknal_api_yield;
         ksocknal_api.validate = NULL;           /* our api validate is a NOOP */
         ksocknal_api.lock     = ksocknal_api_lock;
         ksocknal_api.unlock   = ksocknal_api_unlock;
@@ -1600,7 +1638,8 @@ ksocknal_module_init (void)
 
         ksocknal_data.ksnd_nal_cb = &ksocknal_lib;
         spin_lock_init (&ksocknal_data.ksnd_nal_cb_lock);
-
+        init_waitqueue_head(&ksocknal_data.ksnd_yield_waitq);
+        
         spin_lock_init(&ksocknal_data.ksnd_small_fmp.fmp_lock);
         INIT_LIST_HEAD(&ksocknal_data.ksnd_small_fmp.fmp_idle_fmbs);
         INIT_LIST_HEAD(&ksocknal_data.ksnd_small_fmp.fmp_blocked_conns);
@@ -1743,9 +1782,9 @@ ksocknal_module_init (void)
         ksocknal_data.ksnd_init = SOCKNAL_INIT_ALL;
 
         printk(KERN_INFO "Lustre: Routing socket NAL loaded "
-               "(Routing %s, initial mem %d)\n",
+               "(Routing %s, initial mem %d, incarnation "LPX64")\n",
                kpr_routing (&ksocknal_data.ksnd_router) ?
-               "enabled" : "disabled", pkmem);
+               "enabled" : "disabled", pkmem, ksocknal_data.ksnd_incarnation);
 
         return (0);
 }
index 17a7e49..bd3c1fb 100644 (file)
@@ -61,6 +61,7 @@
 
 #include <linux/kp30.h>
 #include <linux/portals_compat25.h>
+#include <linux/kpr.h>
 #include <portals/p30.h>
 #include <portals/lib-p30.h>
 #include <portals/socknal.h>
@@ -157,6 +158,7 @@ typedef struct {
 
         nal_cb_t         *ksnd_nal_cb;
         spinlock_t        ksnd_nal_cb_lock;     /* lib cli/sti lock */
+        wait_queue_head_t ksnd_yield_waitq;     /* where yield waits */
 
         atomic_t          ksnd_nthreads;        /* # live threads */
         int               ksnd_shuttingdown;    /* tell threads to exit */
index c89e20e..ebb32da 100644 (file)
@@ -90,6 +90,8 @@ ksocknal_cli(nal_cb_t *nal, unsigned long *flags)
 {
         ksock_nal_data_t *data = nal->nal_data;
 
+        /* OK to ignore 'flags'; we're only ever serialise threads and
+         * never need to lock out interrupts */
         spin_lock(&data->ksnd_nal_cb_lock);
 }
 
@@ -99,9 +101,23 @@ ksocknal_sti(nal_cb_t *nal, unsigned long *flags)
         ksock_nal_data_t *data;
         data = nal->nal_data;
 
+        /* OK to ignore 'flags'; we're only ever serialise threads and
+         * never need to lock out interrupts */
         spin_unlock(&data->ksnd_nal_cb_lock);
 }
 
+void
+ksocknal_callback(nal_cb_t *nal, void *private, lib_eq_t *eq, ptl_event_t *ev)
+{
+        /* holding ksnd_nal_cb_lock */
+
+        if (eq->event_callback != NULL)
+                eq->event_callback(ev);
+        
+        if (waitqueue_active(&ksocknal_data.ksnd_yield_waitq))
+                wake_up_all(&ksocknal_data.ksnd_yield_waitq);
+}
+
 int
 ksocknal_dist(nal_cb_t *nal, ptl_nid_t nid, unsigned long *dist)
 {
@@ -1058,7 +1074,7 @@ ksocknal_sendmsg(nal_cb_t     *nal,
         if (ltx == NULL) {
                 CERROR("Can't allocate tx desc type %d size %d %s\n",
                        type, desc_size, in_interrupt() ? "(intr)" : "");
-                return (PTL_NOSPACE);
+                return (PTL_NO_SPACE);
         }
 
         atomic_inc(&ksocknal_data.ksnd_nactive_ltxs);
@@ -2659,8 +2675,8 @@ ksocknal_reaper (void *arg)
                 }
                 ksocknal_data.ksnd_reaper_waketime = jiffies + timeout;
 
-                add_wait_queue (&ksocknal_data.ksnd_reaper_waitq, &wait);
                 set_current_state (TASK_INTERRUPTIBLE);
+                add_wait_queue (&ksocknal_data.ksnd_reaper_waitq, &wait);
 
                 if (!ksocknal_data.ksnd_shuttingdown &&
                     list_empty (&ksocknal_data.ksnd_deathrow_conns) &&
@@ -2692,5 +2708,6 @@ nal_cb_t ksocknal_lib = {
         cb_printf:       ksocknal_printf,
         cb_cli:          ksocknal_cli,
         cb_sti:          ksocknal_sti,
+        cb_callback:     ksocknal_callback,
         cb_dist:         ksocknal_dist
 };
index cf9220b..4f0b303 100644 (file)
@@ -4,23 +4,14 @@
 # See the file COPYING in this distribution
 
 
-MODULE = portals
-modulenet_DATA = portals.o
-EXTRA_PROGRAMS = portals
+MODULE = libcfs
+modulenet_DATA = libcfs.o
+EXTRA_PROGRAMS = libcfs
 
-LIBLINKS := lib-dispatch.c lib-eq.c lib-init.c lib-md.c lib-me.c lib-move.c lib-msg.c lib-ni.c lib-pid.c
-APILINKS := api-eq.c api-errno.c api-init.c api-me.c api-ni.c api-wrap.c
-LINKS = $(APILINKS) $(LIBLINKS) 
-DISTCLEANFILES = $(LINKS) link-stamp *.orig *.rej
-
-$(LINKS): link-stamp
-link-stamp:
-       -list='$(LIBLINKS)'; for f in $$list; do echo $$f ; ln -sf $(srcdir)/../portals/$$f .; done
-       -list='$(APILINKS)'; for f in $$list; do echo $$f ; ln -sf $(srcdir)/../portals/$$f .; done
-       echo timestamp > link-stamp
+DISTCLEANFILES = *.orig *.rej
 
 DEFS =
-portals_SOURCES = $(LINKS) module.c proc.c debug.c lwt.c
+libcfs_SOURCES = module.c proc.c debug.c lwt.c
 
 # Don't distribute any patched files.
 dist-hook:
index e98779f..0f88a11 100644 (file)
 
 #include <linux/kp30.h>
 #include <linux/portals_compat25.h>
+#include <linux/libcfs.h>
+
+unsigned int portal_subsystem_debug = ~0 - (S_PORTALS | S_QSWNAL | S_SOCKNAL |
+                                            S_GMNAL | S_IBNAL);
+EXPORT_SYMBOL(portal_subsystem_debug);
+
+unsigned int portal_debug = (D_WARNING | D_DLMTRACE | D_ERROR | D_EMERG | D_HA |
+                             D_RPCTRACE | D_VFSTRACE | D_MALLOC);
+EXPORT_SYMBOL(portal_debug);
+
+unsigned int portal_cerror = 1;
+EXPORT_SYMBOL(portal_cerror);
+
+unsigned int portal_printk;
+EXPORT_SYMBOL(portal_printk);
+
+unsigned int portal_stack;
+EXPORT_SYMBOL(portal_stack);
+
+#ifdef __KERNEL__
+atomic_t portal_kmemory = ATOMIC_INIT(0);
+EXPORT_SYMBOL(portal_kmemory);
+#endif
 
 #define DEBUG_OVERFLOW 1024
 static char *debug_buf = NULL;
@@ -926,6 +949,8 @@ void portals_run_lbug_upcall(char *file, const char *fn, const int line)
 char *portals_nid2str(int nal, ptl_nid_t nid, char *str)
 {
         switch(nal){
+/* XXX this should be a nal method of some sort */
+#ifndef CRAY_PORTALS 
         case TCPNAL:
                 /* userspace NAL */
         case SOCKNAL:
@@ -938,8 +963,9 @@ char *portals_nid2str(int nal, ptl_nid_t nid, char *str)
         case SCIMACNAL:
                 sprintf(str, "%u:%u", (__u32)(nid >> 32), (__u32)nid);
                 break;
+#endif
         default:
-                return NULL;
+                snprintf(str, PTL_NALFMT_SIZE-1, "(?%llx)", (long long)nid);
         }
         return str;
 }
index 2f5a852..9daa8e0 100644 (file)
 
 extern void (kping_client)(struct portal_ioctl_data *);
 
-struct nal_cmd_handler {
-        nal_cmd_handler_t nch_handler;
-        void * nch_private;
-};
-
-static struct nal_cmd_handler nal_cmd[NAL_MAX_NR + 1];
-struct semaphore nal_cmd_sem;
-
 #ifdef PORTAL_DEBUG
 void kportal_assertion_failed(char *expr, char *file, const char *func,
                               const int line)
@@ -203,7 +195,7 @@ kportal_blockallsigs ()
 }
 
 /* called when opening /dev/device */
-static int kportal_psdev_open(struct inode * inode, struct file * file)
+static int libcfs_psdev_open(struct inode * inode, struct file * file)
 {
         struct portals_device_userstate *pdu;
         ENTRY;
@@ -224,7 +216,7 @@ static int kportal_psdev_open(struct inode * inode, struct file * file)
 }
 
 /* called when closing /dev/device */
-static int kportal_psdev_release(struct inode * inode, struct file * file)
+static int libcfs_psdev_release(struct inode * inode, struct file * file)
 {
         struct portals_device_userstate *pdu;
         ENTRY;
@@ -247,263 +239,49 @@ static inline void freedata(void *data, int len)
         PORTAL_FREE(data, len);
 }
 
-static int
-kportal_add_route(int gateway_nalid, ptl_nid_t gateway_nid, 
-                  ptl_nid_t lo_nid, ptl_nid_t hi_nid)
-{
-        int rc;
-        kpr_control_interface_t *ci;
-
-        ci = (kpr_control_interface_t *) PORTAL_SYMBOL_GET (kpr_control_interface);
-        if (ci == NULL)
-                return (-ENODEV);
-
-        rc = ci->kprci_add_route (gateway_nalid, gateway_nid, lo_nid, hi_nid);
-
-        PORTAL_SYMBOL_PUT(kpr_control_interface);
-        return (rc);
-}
-
-static int
-kportal_del_route(int gw_nalid, ptl_nid_t gw_nid, 
-                  ptl_nid_t lo, ptl_nid_t hi)
-{
-        int rc;
-        kpr_control_interface_t *ci;
-
-        ci = (kpr_control_interface_t *)PORTAL_SYMBOL_GET(kpr_control_interface);
-        if (ci == NULL)
-                return (-ENODEV);
-
-        rc = ci->kprci_del_route (gw_nalid, gw_nid, lo, hi);
-
-        PORTAL_SYMBOL_PUT(kpr_control_interface);
-        return (rc);
-}
-
-static int
-kportal_notify_router (int gw_nalid, ptl_nid_t gw_nid,
-                       int alive, time_t when)
-{
-        int rc;
-        kpr_control_interface_t *ci;
-
-        /* No error if router not preset.  Sysadmin is allowed to notify
-         * _everywhere_ when a NID boots or crashes, even if they know
-         * nothing of the peer. */
-        ci = (kpr_control_interface_t *)PORTAL_SYMBOL_GET(kpr_control_interface);
-        if (ci == NULL)
-                return (0);
-
-        rc = ci->kprci_notify (gw_nalid, gw_nid, alive, when);
-
-        PORTAL_SYMBOL_PUT(kpr_control_interface);
-        return (rc);
-}
-
-static int
-kportal_get_route(int index, __u32 *gateway_nalidp, ptl_nid_t *gateway_nidp,
-                  ptl_nid_t *lo_nidp, ptl_nid_t *hi_nidp, int *alivep)
-{
-        int       gateway_nalid;
-        ptl_nid_t gateway_nid;
-        ptl_nid_t lo_nid;
-        ptl_nid_t hi_nid;
-        int       alive;
-        int       rc;
-        kpr_control_interface_t *ci;
-
-        ci = (kpr_control_interface_t *) PORTAL_SYMBOL_GET(kpr_control_interface);
-        if (ci == NULL)
-                return (-ENODEV);
-
-        rc = ci->kprci_get_route(index, &gateway_nalid, &gateway_nid,
-                                 &lo_nid, &hi_nid, &alive);
-
-        if (rc == 0) {
-                CDEBUG(D_IOCTL, "got route [%d] %d "LPX64":"LPX64" - "LPX64", %s\n",
-                       index, gateway_nalid, gateway_nid, lo_nid, hi_nid,
-                       alive ? "up" : "down");
-
-                *gateway_nalidp = (__u32)gateway_nalid;
-                *gateway_nidp   = gateway_nid;
-                *lo_nidp        = lo_nid;
-                *hi_nidp        = hi_nid;
-                *alivep         = alive;
-        }
-
-        PORTAL_SYMBOL_PUT (kpr_control_interface);
-        return (rc);
-}
-
-static int 
-kportal_router_cmd(struct portals_cfg *pcfg, void * private)
-{
-        int err = -EINVAL;
-        ENTRY;
-
-        switch(pcfg->pcfg_command) {
-        default:
-                CDEBUG(D_IOCTL, "Inappropriate cmd: %d\n", pcfg->pcfg_command);
-                break;
-                
-        case NAL_CMD_ADD_ROUTE:
-                CDEBUG(D_IOCTL, "Adding route: [%d] "LPU64" : "LPU64" - "LPU64"\n",
-                       pcfg->pcfg_nal, pcfg->pcfg_nid, 
-                       pcfg->pcfg_nid2, pcfg->pcfg_nid3);
-                err = kportal_add_route(pcfg->pcfg_gw_nal, pcfg->pcfg_nid,
-                                        pcfg->pcfg_nid2, pcfg->pcfg_nid3);
-                break;
-
-        case NAL_CMD_DEL_ROUTE:
-                CDEBUG (D_IOCTL, "Removing routes via [%d] "LPU64" : "LPU64" - "LPU64"\n",
-                        pcfg->pcfg_gw_nal, pcfg->pcfg_nid, 
-                        pcfg->pcfg_nid2, pcfg->pcfg_nid3);
-                err = kportal_del_route (pcfg->pcfg_gw_nal, pcfg->pcfg_nid,
-                                         pcfg->pcfg_nid2, pcfg->pcfg_nid3);
-                break;
-
-        case NAL_CMD_NOTIFY_ROUTER: {
-                CDEBUG (D_IOCTL, "Notifying peer [%d] "LPU64" %s @ %ld\n",
-                        pcfg->pcfg_gw_nal, pcfg->pcfg_nid,
-                        pcfg->pcfg_flags ? "Enabling" : "Disabling",
-                        (time_t)pcfg->pcfg_nid3);
-                
-                err = kportal_notify_router (pcfg->pcfg_gw_nal, pcfg->pcfg_nid,
-                                             pcfg->pcfg_flags, 
-                                             (time_t)pcfg->pcfg_nid3);
-                break;
-        }
-                
-        case NAL_CMD_GET_ROUTE:
-                CDEBUG (D_IOCTL, "Getting route [%d]\n", pcfg->pcfg_count);
-                err = kportal_get_route(pcfg->pcfg_count, &pcfg->pcfg_gw_nal,
-                                        &pcfg->pcfg_nid, 
-                                        &pcfg->pcfg_nid2, &pcfg->pcfg_nid3,
-                                        &pcfg->pcfg_flags);
-                break;
-        }
-        RETURN(err);
-}
-
-int
-kportal_nal_cmd(struct portals_cfg *pcfg)
-{
-        __u32 nal = pcfg->pcfg_nal;
-        int rc = -EINVAL;
-
-        ENTRY;
-
-        down(&nal_cmd_sem);
-        if (nal > 0 && nal <= NAL_MAX_NR && nal_cmd[nal].nch_handler) {
-                CDEBUG(D_IOCTL, "calling handler nal: %d, cmd: %d\n", nal, 
-                       pcfg->pcfg_command);
-                rc = nal_cmd[nal].nch_handler(pcfg, nal_cmd[nal].nch_private);
-        }
-        up(&nal_cmd_sem);
-        RETURN(rc);
-}
-
-ptl_handle_ni_t *
-kportal_get_ni (int nal)
-{
-
-        switch (nal)
-        {
-        case QSWNAL:
-                return (PORTAL_SYMBOL_GET(kqswnal_ni));
-        case SOCKNAL:
-                return (PORTAL_SYMBOL_GET(ksocknal_ni));
-        case GMNAL:
-                return  (PORTAL_SYMBOL_GET(kgmnal_ni));
-        case IBNAL:
-                return  (PORTAL_SYMBOL_GET(kibnal_ni));
-        case TCPNAL:
-                /* userspace NAL */
-                return (NULL);
-        case SCIMACNAL:
-                return  (PORTAL_SYMBOL_GET(kscimacnal_ni));
-        default:
-                /* A warning to a naive caller */
-                CERROR ("unknown nal: %d\n", nal);
-                return (NULL);
-        }
-}
-
-void
-kportal_put_ni (int nal)
-{
-
-        switch (nal)
-        {
-        case QSWNAL:
-                PORTAL_SYMBOL_PUT(kqswnal_ni);
-                break;
-        case SOCKNAL:
-                PORTAL_SYMBOL_PUT(ksocknal_ni);
-                break;
-        case GMNAL:
-                PORTAL_SYMBOL_PUT(kgmnal_ni);
-                break;
-        case IBNAL:
-                PORTAL_SYMBOL_PUT(kibnal_ni);
-                break;
-        case TCPNAL:
-                /* A lesson to a malicious caller */
-                LBUG ();
-        case SCIMACNAL:
-                PORTAL_SYMBOL_PUT(kscimacnal_ni);
-                break;
-        default:
-                CERROR ("unknown nal: %d\n", nal);
-        }
-}
+static DECLARE_RWSEM(ioctl_list_sem);
+static LIST_HEAD(ioctl_list);
 
-int
-kportal_nal_register(int nal, nal_cmd_handler_t handler, void * private)
+int libcfs_register_ioctl(struct libcfs_ioctl_handler *hand)
 {
         int rc = 0;
+        down_read(&ioctl_list_sem);
+        if (!list_empty(&hand->item))
+                rc = -EBUSY;
+        up_read(&ioctl_list_sem);
 
-        CDEBUG(D_IOCTL, "Register NAL %d, handler: %p\n", nal, handler);
-
-        if (nal > 0  && nal <= NAL_MAX_NR) {
-                down(&nal_cmd_sem);
-                if (nal_cmd[nal].nch_handler != NULL)
-                        rc = -EBUSY;
-                else {
-                        nal_cmd[nal].nch_handler = handler;
-                        nal_cmd[nal].nch_private = private;
-                }
-                up(&nal_cmd_sem);
+        if (rc == 0) {
+                down_write(&ioctl_list_sem);
+                list_add_tail(&hand->item, &ioctl_list);
+                up_write(&ioctl_list_sem);
         }
-        return rc;
+        RETURN(0);
 }
+EXPORT_SYMBOL(libcfs_register_ioctl);
 
-int
-kportal_nal_unregister(int nal)
+int libcfs_deregister_ioctl(struct libcfs_ioctl_handler *hand)
 {
         int rc = 0;
+        down_read(&ioctl_list_sem);
+        if (list_empty(&hand->item))
+                rc = -ENOENT;
+        up_read(&ioctl_list_sem);
 
-        CDEBUG(D_IOCTL, "Unregister NAL %d\n", nal);
-
-        if (nal > 0  && nal <= NAL_MAX_NR) {
-                down(&nal_cmd_sem);
-                nal_cmd[nal].nch_handler = NULL;
-                nal_cmd[nal].nch_private = NULL;
-                up(&nal_cmd_sem);
+        if (rc == 0) {
+                down_write(&ioctl_list_sem);
+                list_del_init(&hand->item);
+                up_write(&ioctl_list_sem);
         }
-        return rc;
+        RETURN(0);
 }
+EXPORT_SYMBOL(libcfs_deregister_ioctl);
 
-
-static int kportal_ioctl(struct inode *inode, struct file *file,
-                         unsigned int cmd, unsigned long arg)
+static int libcfs_ioctl(struct inode *inode, struct file *file,
+                        unsigned int cmd, unsigned long arg)
 {
-        int err = 0;
+        int err = -EINVAL;
         char buf[1024];
         struct portal_ioctl_data *data;
-        char str[PTL_NALFMT_SIZE];
-
         ENTRY;
 
         if (current->fsuid != 0)
@@ -556,79 +334,6 @@ static int kportal_ioctl(struct inode *inode, struct file *file,
                         RETURN(-EINVAL);
                 portals_debug_mark_buffer(data->ioc_inlbuf1);
                 RETURN(0);
-        case IOC_PORTAL_PING: {
-                void (*ping)(struct portal_ioctl_data *);
-
-                CDEBUG(D_IOCTL, "doing %d pings to nid "LPX64" (%s)\n",
-                       data->ioc_count, data->ioc_nid,
-                       portals_nid2str(data->ioc_nal, data->ioc_nid, str));
-                ping = PORTAL_SYMBOL_GET(kping_client);
-                if (!ping)
-                        CERROR("PORTAL_SYMBOL_GET failed\n");
-                else {
-                        ping(data);
-                        PORTAL_SYMBOL_PUT(kping_client);
-                }
-                RETURN(0);
-        }
-
-        case IOC_PORTAL_GET_NID: {
-                const ptl_handle_ni_t *nip;
-                ptl_process_id_t       pid;
-
-                CDEBUG (D_IOCTL, "Getting nid for nal [%d]\n", data->ioc_nal);
-
-                nip = kportal_get_ni (data->ioc_nal);
-                if (nip == NULL)
-                        RETURN (-EINVAL);
-
-                err = PtlGetId (*nip, &pid);
-                LASSERT (err == PTL_OK);
-                kportal_put_ni (data->ioc_nal);
-
-                data->ioc_nid = pid.nid;
-                if (copy_to_user ((char *)arg, data, sizeof (*data)))
-                        err = -EFAULT;
-                break;
-        }
-
-        case IOC_PORTAL_NAL_CMD: {
-                struct portals_cfg pcfg;
-
-                LASSERT (data->ioc_plen1 == sizeof(pcfg));
-                err = copy_from_user(&pcfg, (void *)data->ioc_pbuf1, 
-                                     sizeof(pcfg));
-                if ( err ) {
-                        EXIT;
-                        return err;
-                }
-
-                CDEBUG (D_IOCTL, "nal command nal %d cmd %d\n", pcfg.pcfg_nal,
-                        pcfg.pcfg_command);
-                err = kportal_nal_cmd(&pcfg);
-                if (err == 0) {
-                        if (copy_to_user((char *)data->ioc_pbuf1, &pcfg, 
-                                         sizeof (pcfg)))
-                                err = -EFAULT;
-                        if (copy_to_user((char *)arg, data, sizeof (*data)))
-                                err = -EFAULT;
-                }
-                break;
-        }
-        case IOC_PORTAL_FAIL_NID: {
-                const ptl_handle_ni_t *nip;
-
-                CDEBUG (D_IOCTL, "fail nid: [%d] "LPU64" count %d\n",
-                        data->ioc_nal, data->ioc_nid, data->ioc_count);
-
-                nip = kportal_get_ni (data->ioc_nal);
-                if (nip == NULL)
-                        return (-EINVAL);
-
-                err = PtlFailNid (*nip, data->ioc_nid, data->ioc_count);
-                kportal_put_ni (data->ioc_nal);
-                break;
-        }
 #if LWT_SUPPORT
         case IOC_PORTAL_LWT_CONTROL: 
                 err = lwt_control (data->ioc_flags, data->ioc_misc);
@@ -666,26 +371,34 @@ static int kportal_ioctl(struct inode *inode, struct file *file,
                 }
                 break;
 
-        default:
+        default: {
+                struct libcfs_ioctl_handler *hand;
                 err = -EINVAL;
-                break;
+                down_read(&ioctl_list_sem);
+                list_for_each_entry(hand, &ioctl_list, item) {
+                        err = hand->handle_ioctl(data, cmd, arg);
+                        if (err != -EINVAL)
+                                break;
+                }
+                up_read(&ioctl_list_sem);
+                } break;
         }
 
         RETURN(err);
 }
 
 
-static struct file_operations portalsdev_fops = {
-        ioctl:   kportal_ioctl,
-        open:    kportal_psdev_open,
-        release: kportal_psdev_release
+static struct file_operations libcfs_fops = {
+        ioctl:   libcfs_ioctl,
+        open:    libcfs_psdev_open,
+        release: libcfs_psdev_release
 };
 
 
-static struct miscdevice portal_dev = {
+static struct miscdevice libcfs_dev = {
         PORTAL_MINOR,
         "portals",
-        &portalsdev_fops
+        &libcfs_fops
 };
 
 extern int insert_proc(void);
@@ -694,7 +407,7 @@ MODULE_AUTHOR("Peter J. Braam <braam@clusterfs.com>");
 MODULE_DESCRIPTION("Portals v3.1");
 MODULE_LICENSE("GPL");
 
-static int init_kportals_module(void)
+static int init_libcfs_module(void)
 {
         int rc;
 
@@ -711,41 +424,23 @@ static int init_kportals_module(void)
                 goto cleanup_debug;
         }
 #endif
-        sema_init(&nal_cmd_sem, 1);
-
-        rc = misc_register(&portal_dev);
+        rc = misc_register(&libcfs_dev);
         if (rc) {
                 CERROR("misc_register: error %d\n", rc);
                 goto cleanup_lwt;
         }
 
-        rc = PtlInit();
-        if (rc) {
-                CERROR("PtlInit: error %d\n", rc);
-                goto cleanup_deregister;
-        }
-
         rc = insert_proc();
         if (rc) {
                 CERROR("insert_proc: error %d\n", rc);
-                goto cleanup_fini;
-        }
-
-        rc = kportal_nal_register(ROUTER, kportal_router_cmd, NULL);
-        if (rc) {
-                CERROR("kportal_nal_registre: ROUTER error %d\n", rc);
-                goto cleanup_proc;
+                goto cleanup_deregister;
         }
 
         CDEBUG (D_OTHER, "portals setup OK\n");
         return (0);
 
- cleanup_proc:
-        remove_proc();
- cleanup_fini:
-        PtlFini();
  cleanup_deregister:
-        misc_deregister(&portal_dev);
+        misc_deregister(&libcfs_dev);
  cleanup_lwt:
 #if LWT_SUPPORT
         lwt_fini();
@@ -755,19 +450,17 @@ static int init_kportals_module(void)
         return rc;
 }
 
-static void exit_kportals_module(void)
+static void exit_libcfs_module(void)
 {
         int rc;
 
-        kportal_nal_unregister(ROUTER);
         remove_proc();
-        PtlFini();
 
         CDEBUG(D_MALLOC, "before Portals cleanup: kmem %d\n",
                atomic_read(&portal_kmemory));
 
 
-        rc = misc_deregister(&portal_dev);
+        rc = misc_deregister(&libcfs_dev);
         if (rc)
                 CERROR("misc_deregister error %d\n", rc);
 
@@ -784,54 +477,9 @@ static void exit_kportals_module(void)
                 printk(KERN_ERR "LustreError: portals_debug_cleanup: %d\n", rc);
 }
 
-EXPORT_SYMBOL(lib_dispatch);
-EXPORT_SYMBOL(PtlMEAttach);
-EXPORT_SYMBOL(PtlMEInsert);
-EXPORT_SYMBOL(PtlMEUnlink);
-EXPORT_SYMBOL(PtlEQAlloc);
-EXPORT_SYMBOL(PtlMDAttach);
-EXPORT_SYMBOL(PtlMDUnlink);
-EXPORT_SYMBOL(PtlNIInit);
-EXPORT_SYMBOL(PtlNIFini);
-EXPORT_SYMBOL(PtlNIDebug);
-EXPORT_SYMBOL(PtlInit);
-EXPORT_SYMBOL(PtlFini);
-EXPORT_SYMBOL(PtlPut);
-EXPORT_SYMBOL(PtlGet);
-EXPORT_SYMBOL(ptl_err_str);
-EXPORT_SYMBOL(portal_subsystem_debug);
-EXPORT_SYMBOL(portal_debug);
-EXPORT_SYMBOL(portal_stack);
-EXPORT_SYMBOL(portal_printk);
-EXPORT_SYMBOL(portal_cerror);
-EXPORT_SYMBOL(PtlEQWait);
-EXPORT_SYMBOL(PtlEQFree);
-EXPORT_SYMBOL(PtlEQGet);
-EXPORT_SYMBOL(PtlGetId);
-EXPORT_SYMBOL(PtlMDBind);
-EXPORT_SYMBOL(lib_iov_nob);
-EXPORT_SYMBOL(lib_copy_iov2buf);
-EXPORT_SYMBOL(lib_copy_buf2iov);
-EXPORT_SYMBOL(lib_extract_iov);
-EXPORT_SYMBOL(lib_kiov_nob);
-EXPORT_SYMBOL(lib_copy_kiov2buf);
-EXPORT_SYMBOL(lib_copy_buf2kiov);
-EXPORT_SYMBOL(lib_extract_kiov);
-EXPORT_SYMBOL(lib_finalize);
-EXPORT_SYMBOL(lib_parse);
-EXPORT_SYMBOL(lib_fake_reply_msg);
-EXPORT_SYMBOL(lib_init);
-EXPORT_SYMBOL(lib_fini);
-EXPORT_SYMBOL(portal_kmemory);
 EXPORT_SYMBOL(kportal_daemonize);
 EXPORT_SYMBOL(kportal_blockallsigs);
-EXPORT_SYMBOL(kportal_nal_register);
-EXPORT_SYMBOL(kportal_nal_unregister);
 EXPORT_SYMBOL(kportal_assertion_failed);
-EXPORT_SYMBOL(dispatch_name);
-EXPORT_SYMBOL(kportal_get_ni);
-EXPORT_SYMBOL(kportal_put_ni);
-EXPORT_SYMBOL(kportal_nal_cmd);
 
-module_init(init_kportals_module);
-module_exit (exit_kportals_module);
+module_init(init_libcfs_module);
+module_exit(exit_libcfs_module);
index d17db61..59f9dd9 100644 (file)
@@ -3,12 +3,30 @@
 # This code is issued under the GNU General Public License.
 # See the file COPYING in this distribution
 
+DEFS =
+
+my_sources = api-eq.c api-init.c api-me.c api-errno.c api-ni.c api-wrap.c \
+               lib-dispatch.c lib-init.c lib-me.c lib-msg.c lib-eq.c \
+               lib-md.c lib-move.c lib-ni.c lib-pid.c
 
-CPPFLAGS=
-INCLUDES=-I$(top_srcdir)/portals/include -I$(top_srcdir)/include
 noinst_LIBRARIES= libportals.a
-libportals_a_SOURCES= api-eq.c api-init.c api-me.c api-errno.c api-ni.c api-wrap.c lib-dispatch.c lib-init.c lib-me.c lib-msg.c lib-eq.c lib-md.c lib-move.c lib-ni.c lib-pid.c
+libportals_a_SOURCES= $(my_sources)
 
 if LIBLUSTRE
 libportals_a_CFLAGS= -fPIC
+else
+
+MODULE = portals
+modulenet_DATA = portals.o
+EXTRA_PROGRAMS = portals
+
+DISTCLEANFILES = *.orig *.rej
+
+portals_SOURCES= module.c $(my_sources)
+
+# Don't distribute any patched files.
+dist-hook:
+       list='$(EXT2C)'; for f in $$list; do rm -f $(distdir)/$$f; done
+
+include ../Rules.linux
 endif
index 7822846..de01765 100644 (file)
@@ -9,4 +9,4 @@ obj-y += portals.o
 portals-objs    :=     lib-dispatch.o lib-eq.o lib-init.o lib-md.o lib-me.o \
                        lib-move.o lib-msg.o lib-ni.o lib-pid.o \
                        api-eq.o api-errno.o api-init.o api-me.o api-ni.o \
-                       api-wrap.o
+                       api-wrap.o module.o
index 964b9d8..390156a 100644 (file)
@@ -47,35 +47,16 @@ void ptl_eq_ni_fini(nal_t * nal)
         /* Nothing to do anymore... */
 }
 
-int PtlEQGet(ptl_handle_eq_t eventq, ptl_event_t * ev)
+int ptl_get_event (ptl_eq_t *eq, ptl_event_t *ev)
 {
-        ptl_eq_t *eq;
-        int rc, new_index;
-        unsigned long flags;
-        ptl_event_t *new_event;
-        nal_t *nal;
+        int          new_index = eq->sequence & (eq->size - 1);
+        ptl_event_t *new_event = &eq->base[new_index];
         ENTRY;
 
-        if (!ptl_init)
-                RETURN(PTL_NOINIT);
-
-        nal = ptl_hndl2nal(&eventq);
-        if (!nal)
-                RETURN(PTL_INV_EQ);
-
-        eq = ptl_handle2usereq(&eventq);
-        nal->lock(nal, &flags);
-
-        /* size must be a power of 2 to handle a wrapped sequence # */
-        LASSERT (eq->size != 0 &&
-                 eq->size == LOWEST_BIT_SET (eq->size));
-
-        new_index = eq->sequence & (eq->size - 1);
-        new_event = &eq->base[new_index];
         CDEBUG(D_INFO, "new_event: %p, sequence: %lu, eq->size: %u\n",
                new_event, eq->sequence, eq->size);
+
         if (PTL_SEQ_GT (eq->sequence, new_event->sequence)) {
-                nal->unlock(nal, &flags);
                 RETURN(PTL_EQ_EMPTY);
         }
 
@@ -86,117 +67,75 @@ int PtlEQGet(ptl_handle_eq_t eventq, ptl_event_t * ev)
         if (eq->sequence != new_event->sequence) {
                 CERROR("DROPPING EVENT: eq seq %lu ev seq %lu\n",
                        eq->sequence, new_event->sequence);
-                rc = PTL_EQ_DROPPED;
-        } else {
-                rc = PTL_OK;
+                RETURN(PTL_EQ_DROPPED);
         }
 
         eq->sequence = new_event->sequence + 1;
-        nal->unlock(nal, &flags);
-        RETURN(rc);
+        RETURN(PTL_OK);
 }
 
-
-int PtlEQWait(ptl_handle_eq_t eventq_in, ptl_event_t *event_out)
+int PtlEQGet(ptl_handle_eq_t eventq, ptl_event_t * ev)
 {
-        int rc;
+        int which;
         
-        /* PtlEQGet does the handle checking */
-        while ((rc = PtlEQGet(eventq_in, event_out)) == PTL_EQ_EMPTY) {
-                nal_t *nal = ptl_hndl2nal(&eventq_in);
-                
-                if (nal->yield)
-                        nal->yield(nal);
-        }
-
-        return rc;
+        return (PtlEQPoll (&eventq, 1, 0, ev, &which));
 }
 
-#ifndef __KERNEL__
-#if 0
-static jmp_buf eq_jumpbuf;
-
-static void eq_timeout(int signal)
-{
-        sigset_t set;
-
-        /* signal will be automatically disabled in sig handler,
-         * must enable it before long jump
-         */
-        sigemptyset(&set);
-        sigaddset(&set, SIGALRM);
-        sigprocmask(SIG_UNBLOCK, &set, NULL);
-
-        longjmp(eq_jumpbuf, -1);
-}
-
-int PtlEQWait_timeout(ptl_handle_eq_t eventq_in, ptl_event_t * event_out,
-                      int timeout)
+int PtlEQWait(ptl_handle_eq_t eventq_in, ptl_event_t *event_out)
 {
-        static void (*prev) (int) = NULL;
-        static int left_over;
-        time_t time_at_start;
-        int rc;
-
-        if (setjmp(eq_jumpbuf)) {
-                signal(SIGALRM, prev);
-                alarm(left_over - timeout);
-                return PTL_EQ_EMPTY;
-        }
-
-        left_over = alarm(timeout);
-        prev = signal(SIGALRM, eq_timeout);
-        time_at_start = time(NULL);
-        if (left_over && left_over < timeout)
-                alarm(left_over);
-
-        rc = PtlEQWait(eventq_in, event_out);
-
-        signal(SIGALRM, prev);
-        alarm(left_over);       /* Should compute how long we waited */
-
-        return rc;
+        int which;
+        
+        return (PtlEQPoll (&eventq_in, 1, PTL_TIME_FOREVER, 
+                           event_out, &which));
 }
-#else
-#include <errno.h>
-
-/* FIXME
- * Here timeout need a trick with tcpnal, definitely unclean but OK for
- * this moment.
- */
 
-/* global variables defined by tcpnal */
-extern int __tcpnal_eqwait_timeout_value;
-extern int __tcpnal_eqwait_timedout;
-
-int PtlEQWait_timeout(ptl_handle_eq_t eventq_in, ptl_event_t * event_out,
-                      int timeout)
+int PtlEQPoll(ptl_handle_eq_t *eventqs_in, int neq_in, int timeout,
+              ptl_event_t *event_out, int *which_out)
 {
-        int rc;
+        nal_t        *nal;
+        int           i;
+        int           rc;
+        unsigned long flags;
+        
+        if (!ptl_init)
+                RETURN(PTL_NO_INIT);
 
-        if (!timeout)
-                return PtlEQWait(eventq_in, event_out);
+        if (neq_in < 1)
+                RETURN(PTL_EQ_INVALID);
+        
+        nal = ptl_hndl2nal(&eventqs_in[0]);
+        if (nal == NULL)
+                RETURN(PTL_EQ_INVALID);
 
-        __tcpnal_eqwait_timeout_value = timeout;
+        nal->lock(nal, &flags);
 
-        while ((rc = PtlEQGet(eventq_in, event_out)) == PTL_EQ_EMPTY) {
-                nal_t *nal = ptl_hndl2nal(&eventq_in);
+        for (;;) {
+                for (i = 0; i < neq_in; i++) {
+                        ptl_eq_t *eq = ptl_handle2usereq(&eventqs_in[i]);
+
+                        if (i > 0 &&
+                            ptl_hndl2nal(&eventqs_in[i]) != nal) {
+                                nal->unlock(nal, &flags);
+                                RETURN (PTL_EQ_INVALID);
+                        }
+
+                        /* size must be a power of 2 to handle a wrapped sequence # */
+                        LASSERT (eq->size != 0 &&
+                                 eq->size == LOWEST_BIT_SET (eq->size));
+
+                        rc = ptl_get_event (eq, event_out);
+                        if (rc != PTL_EQ_EMPTY) {
+                                nal->unlock(nal, &flags);
+                                *which_out = i;
+                                RETURN(rc);
+                        }
+                }
                 
-                if (nal->yield)
-                        nal->yield(nal);
-
-                if (__tcpnal_eqwait_timedout) {
-                        if (__tcpnal_eqwait_timedout != ETIMEDOUT)
-                                printf("Warning: yield return error %d\n",
-                                        __tcpnal_eqwait_timedout);
-                        rc = PTL_EQ_EMPTY;
-                        break;
+                if (timeout == 0) {
+                        nal->unlock(nal, &flags);
+                        RETURN (PTL_EQ_EMPTY);
                 }
+                        
+                timeout = nal->yield(nal, &flags, timeout);
         }
-
-        __tcpnal_eqwait_timeout_value = 0;
-
-        return rc;
 }
-#endif
-#endif /* __KERNEL__ */
index b5e7aa1..0e155da 100644 (file)
@@ -12,43 +12,35 @@ const char *ptl_err_str[] = {
         "PTL_OK",
         "PTL_SEGV",
 
-        "PTL_NOSPACE",
-        "PTL_INUSE",
+        "PTL_NO_SPACE",
+        "PTL_ME_IN_USE",
         "PTL_VAL_FAILED",
 
         "PTL_NAL_FAILED",
-        "PTL_NOINIT",
-        "PTL_INIT_DUP",
-        "PTL_INIT_INV",
-        "PTL_AC_INV_INDEX",
-
-        "PTL_INV_ASIZE",
-        "PTL_INV_HANDLE",
-        "PTL_INV_MD",
-        "PTL_INV_ME",
-        "PTL_INV_NI",
+        "PTL_NO_INIT",
+        "PTL_IFACE_DUP",
+        "PTL_IFACE_INVALID",
+
+        "PTL_HANDLE_INVALID",
+        "PTL_MD_INVALID",
+        "PTL_ME_INVALID",
 /* If you change these, you must update the number table in portals/errno.h */
-        "PTL_ILL_MD",
-        "PTL_INV_PROC",
-        "PTL_INV_PSIZE",
-        "PTL_INV_PTINDEX",
-        "PTL_INV_REG",
-
-        "PTL_INV_SR_INDX",
-        "PTL_ML_TOOLONG",
-        "PTL_ADDR_UNKNOWN",
-        "PTL_INV_EQ",
+        "PTL_PROCESS_INVALID",
+        "PTL_PT_INDEX_INVALID",
+
+        "PTL_SR_INDEX_INVALID",
+        "PTL_EQ_INVALID",
         "PTL_EQ_DROPPED",
 
         "PTL_EQ_EMPTY",
-        "PTL_NOUPDATE",
+        "PTL_MD_NO_UPDATE",
         "PTL_FAIL",
-        "PTL_NOT_IMPLEMENTED",
-        "PTL_NO_ACK",
 
         "PTL_IOV_TOO_MANY",
         "PTL_IOV_TOO_SMALL",
 
-        "PTL_EQ_INUSE",
+        "PTL_EQ_IN_USE",
+
+        "PTL_MAX_ERRNO"
 };
 /* If you change these, you must update the number table in portals/errno.h */
index e2921ac..e41bad8 100644 (file)
 #include <portals/api-support.h>
 
 int ptl_init;
-unsigned int portal_subsystem_debug = ~0 - (S_PORTALS | S_QSWNAL | S_SOCKNAL |
-                                            S_GMNAL | S_IBNAL);
-unsigned int portal_debug = (D_WARNING | D_DLMTRACE | D_ERROR | D_EMERG | D_HA |
-                             D_RPCTRACE | D_VFSTRACE | D_MALLOC);
-unsigned int portal_cerror = 1;
-unsigned int portal_printk;
-unsigned int portal_stack;
-
-#ifdef __KERNEL__
-atomic_t portal_kmemory = ATOMIC_INIT(0);
-#endif
 
 int __p30_initialized;
 int __p30_myr_initialized;
@@ -44,20 +33,20 @@ int __p30_ip_initialized;
 ptl_handle_ni_t __myr_ni_handle;
 ptl_handle_ni_t __ip_ni_handle;
 
-int __p30_myr_timeout = 10;
-int __p30_ip_timeout;
-
-int PtlInit(void)
+int PtlInit(int *max_interfaces)
 {
+        if (max_interfaces != NULL)
+                *max_interfaces = NAL_ENUM_END_MARKER;
 
         if (ptl_init)
                 return PTL_OK;
 
+        LASSERT(!strcmp(ptl_err_str[PTL_MAX_ERRNO], "PTL_MAX_ERRNO"));
+
         ptl_ni_init();
         ptl_me_init();
         ptl_eq_init();
         ptl_init = 1;
-        __p30_initialized = 1;
 
         return PTL_OK;
 }
@@ -72,3 +61,9 @@ void PtlFini(void)
         ptl_ni_fini();
         ptl_init = 0;
 }
+
+
+void PtlSnprintHandle(char *str, int len, ptl_handle_any_t h)
+{
+        snprintf(str, len, "0x%lx."LPX64, h.nal_idx, h.cookie);
+}
index 18eea91..02082c6 100644 (file)
@@ -110,7 +110,7 @@ int PtlNIInit(ptl_interface_t interface, ptl_pt_index_t ptl_size,
         int i;
 
         if (!ptl_init)
-                return PTL_NOINIT;
+                return PTL_NO_INIT;
 
         ptl_ni_init_mutex_enter ();
 
@@ -136,7 +136,7 @@ int PtlNIInit(ptl_interface_t interface, ptl_pt_index_t ptl_size,
                 if (nal->shutdown)
                         nal->shutdown (nal, ptl_num_interfaces);
                 ptl_ni_init_mutex_exit ();
-                return PTL_NOSPACE;
+                return PTL_NO_SPACE;
         }
 
         handle->nal_idx = (NI_HANDLE_MAGIC & ~NI_HANDLE_MASK) | ptl_num_interfaces;
@@ -157,14 +157,14 @@ int PtlNIFini(ptl_handle_ni_t ni)
         int rc;
 
         if (!ptl_init)
-                return PTL_NOINIT;
+                return PTL_NO_INIT;
 
         ptl_ni_init_mutex_enter ();
 
         nal = ptl_hndl2nal (&ni);
         if (nal == NULL) {
                 ptl_ni_init_mutex_exit ();
-                return PTL_INV_HANDLE;
+                return PTL_HANDLE_INVALID;
         }
 
         idx = ni.nal_idx & NI_HANDLE_MASK;
index d23a6aa..9c82c30 100644 (file)
@@ -33,12 +33,12 @@ static int do_forward(ptl_handle_any_t any_h, int cmd, void *argbuf,
 
         if (!ptl_init) {
                 CERROR("Not initialized\n");
-                return PTL_NOINIT;
+                return PTL_NO_INIT;
         }
 
         nal = ptl_hndl2nal(&any_h);
         if (!nal)
-                return PTL_INV_HANDLE;
+                return PTL_HANDLE_INVALID;
 
         nal->forward(nal, cmd, argbuf, argsize, retbuf, retsize);
 
@@ -194,7 +194,7 @@ int PtlMEInsert(ptl_handle_me_t current_in, ptl_process_id_t match_id_in,
                         sizeof(ret));
 
         if (rc != PTL_OK)
-                return (rc == PTL_INV_HANDLE) ? PTL_INV_ME : rc;
+                return (rc == PTL_HANDLE_INVALID) ? PTL_ME_INVALID : rc;
 
         if (handle_out) {
                 handle_out->nal_idx = current_in.nal_idx;
@@ -216,7 +216,7 @@ int PtlMEUnlink(ptl_handle_me_t current_in)
                         sizeof(ret));
 
         if (rc != PTL_OK)
-                return (rc == PTL_INV_HANDLE) ? PTL_INV_ME : rc;
+                return (rc == PTL_HANDLE_INVALID) ? PTL_ME_INVALID : rc;
 
         return ret.rc;
 }
@@ -250,7 +250,7 @@ int PtlMEDump(ptl_handle_me_t current_in)
                         sizeof(ret));
 
         if (rc != PTL_OK)
-                return (rc == PTL_INV_HANDLE) ? PTL_INV_ME : rc;
+                return (rc == PTL_HANDLE_INVALID) ? PTL_ME_INVALID : rc;
 
         return ret.rc;
 }
@@ -263,16 +263,16 @@ static int validate_md(ptl_handle_any_t current_in, ptl_md_t md_in)
 
         if (!ptl_init) {
                 CERROR("PtlMDAttach/Bind/Update: Not initialized\n");
-                return PTL_NOINIT;
+                return PTL_NO_INIT;
         }
 
         nal = ptl_hndl2nal(&current_in);
         if (!nal)
-                return PTL_INV_HANDLE;
+                return PTL_HANDLE_INVALID;
 
         if (nal->validate != NULL)                /* nal->validate not a NOOP */
         {
-                if ((md_in.options & PTL_MD_IOV) == 0)        /* contiguous */
+                if ((md_in.options & PTL_MD_IOVEC) == 0) /* contiguous */
                 {
                         rc = nal->validate (nal, md_in.start, md_in.length);
                         if (rc)
@@ -296,7 +296,7 @@ static int validate_md(ptl_handle_any_t current_in, ptl_md_t md_in)
 
 static ptl_handle_eq_t md2eq (ptl_md_t *md)
 {
-        if (PtlHandleEqual (md->eventq, PTL_EQ_NONE))
+        if (PtlHandleIsEqual (md->eventq, PTL_EQ_NONE))
                 return (PTL_EQ_NONE);
         
         return (ptl_handle2usereq (&md->eventq)->cb_eq_handle);
@@ -322,7 +322,7 @@ int PtlMDAttach(ptl_handle_me_t me_in, ptl_md_t md_in,
         }
 
         if (rc != PTL_OK)
-                return (rc == PTL_INV_HANDLE) ? PTL_INV_ME : rc;
+                return (rc == PTL_HANDLE_INVALID) ? PTL_ME_INVALID : rc;
 
         if (handle_out) {
                 handle_out->nal_idx = me_in.nal_idx;
@@ -334,7 +334,7 @@ int PtlMDAttach(ptl_handle_me_t me_in, ptl_md_t md_in,
 
 
 int PtlMDBind(ptl_handle_ni_t ni_in, ptl_md_t md_in,
-                       ptl_handle_md_t * handle_out)
+              ptl_unlink_t unlink_in, ptl_handle_md_t * handle_out)
 {
         PtlMDBind_in args;
         PtlMDBind_out ret;
@@ -347,6 +347,7 @@ int PtlMDBind(ptl_handle_ni_t ni_in, ptl_md_t md_in,
         args.eq_in = md2eq(&md_in);
         args.ni_in = ni_in;
         args.md_in = md_in;
+        args.unlink_in = unlink_in;
 
         rc = do_forward(ni_in, PTL_MDBIND, 
                         &args, sizeof(args), &ret, sizeof(ret));
@@ -379,13 +380,13 @@ int PtlMDUpdate(ptl_handle_md_t md_in, ptl_md_t *old_inout,
         if (new_inout) {
                 rc = validate_md (md_in, *new_inout);
                 if (rc != PTL_OK)
-                        return (rc == PTL_INV_HANDLE) ? PTL_INV_MD : rc;
+                        return (rc == PTL_HANDLE_INVALID) ? PTL_MD_INVALID : rc;
                 args.new_inout = *new_inout;
                 args.new_inout_valid = 1;
         } else
                 args.new_inout_valid = 0;
 
-        if (PtlHandleEqual (testq_in, PTL_EQ_NONE)) {
+        if (PtlHandleIsEqual (testq_in, PTL_EQ_NONE)) {
                 args.testq_in = PTL_EQ_NONE;
                 args.sequence_in = -1;
         } else {
@@ -398,7 +399,7 @@ int PtlMDUpdate(ptl_handle_md_t md_in, ptl_md_t *old_inout,
         rc = do_forward(md_in, PTL_MDUPDATE, &args, sizeof(args), &ret,
                         sizeof(ret));
         if (rc != PTL_OK)
-                return (rc == PTL_INV_HANDLE) ? PTL_INV_MD : rc;
+                return (rc == PTL_HANDLE_INVALID) ? PTL_MD_INVALID : rc;
 
         if (old_inout)
                 *old_inout = ret.old_inout;
@@ -416,7 +417,7 @@ int PtlMDUnlink(ptl_handle_md_t md_in)
         rc = do_forward(md_in, PTL_MDUNLINK, &args, sizeof(args), &ret,
                         sizeof(ret));
         if (rc != PTL_OK)
-                return (rc == PTL_INV_HANDLE) ? PTL_INV_MD : rc;
+                return (rc == PTL_HANDLE_INVALID) ? PTL_MD_INVALID : rc;
 
         return ret.rc;
 }
@@ -433,11 +434,11 @@ int PtlEQAlloc(ptl_handle_ni_t interface, ptl_size_t count,
         nal_t *nal;
 
         if (!ptl_init)
-                return PTL_NOINIT;
+                return PTL_NO_INIT;
         
         nal = ptl_hndl2nal (&interface);
         if (nal == NULL)
-                return PTL_INV_HANDLE;
+                return PTL_HANDLE_INVALID;
 
         if (count != LOWEST_BIT_SET(count)) {   /* not a power of 2 already */
                 do {                    /* knock off all but the top bit... */
@@ -452,7 +453,7 @@ int PtlEQAlloc(ptl_handle_ni_t interface, ptl_size_t count,
 
         PORTAL_ALLOC(ev, count * sizeof(ptl_event_t));
         if (!ev)
-                return PTL_NOSPACE;
+                return PTL_NO_SPACE;
 
         for (i = 0; i < count; i++)
                 ev[i].sequence = 0;
@@ -478,7 +479,7 @@ int PtlEQAlloc(ptl_handle_ni_t interface, ptl_size_t count,
 
         PORTAL_ALLOC(eq, sizeof(*eq));
         if (!eq) {
-                rc = PTL_NOSPACE;
+                rc = PTL_NO_SPACE;
                 goto fail;
         }
 
index ce343c1..8a91860 100644 (file)
@@ -52,7 +52,7 @@ int do_PtlEQAlloc_internal(nal_cb_t * nal, void *private, void *v_args,
 
         eq = lib_eq_alloc (nal);
         if (eq == NULL)
-                return (ret->rc = PTL_NOSPACE);
+                return (ret->rc = PTL_NO_SPACE);
 
         state_lock(nal, &flags);
 
@@ -104,9 +104,9 @@ int do_PtlEQFree_internal(nal_cb_t * nal, void *private, void *v_args,
 
         eq = ptl_handle2eq(&args->eventq_in, nal);
         if (eq == NULL) {
-                ret->rc = PTL_INV_EQ;
+                ret->rc = PTL_EQ_INVALID;
         } else if (eq->eq_refcount != 0) {
-                ret->rc = PTL_EQ_INUSE;
+                ret->rc = PTL_EQ_IN_USE;
         } else {
                 if (nal->cb_unmap != NULL) {
                         struct iovec iov = {
index d4d8860..61ef465 100644 (file)
@@ -63,7 +63,7 @@ lib_freelist_init (nal_cb_t *nal, lib_freelist_t *fl, int n, int size)
 
         space = nal->cb_malloc (nal, n * size);
         if (space == NULL)
-                return (PTL_NOSPACE);
+                return (PTL_NO_SPACE);
 
         INIT_LIST_HEAD (&fl->fl_list);
         fl->fl_objs = space;
@@ -179,7 +179,7 @@ lib_setup_handle_hash (nal_cb_t *nal)
                 (struct list_head *)nal->cb_malloc (nal, ni->ni_lh_hash_size
                                                     * sizeof (struct list_head));
         if (ni->ni_lh_hash_table == NULL)
-                return (PTL_NOSPACE);
+                return (PTL_NO_SPACE);
         
         for (i = 0; i < ni->ni_lh_hash_size; i++)
                 INIT_LIST_HEAD (&ni->ni_lh_hash_table[i]);
@@ -295,7 +295,7 @@ lib_init(nal_cb_t * nal, ptl_nid_t nid, ptl_pid_t pid, int gsize,
 
         ni->tbl.tbl = nal->cb_malloc(nal, sizeof(struct list_head) * ptl_size);
         if (ni->tbl.tbl == NULL) {
-                rc = PTL_NOSPACE;
+                rc = PTL_NO_SPACE;
                 goto out;
         }
 
index a1ed583..9a391cd 100644 (file)
 #include <portals/lib-p30.h>
 #include <portals/arg-blocks.h>
 
-/*
- * must be called with state lock held
- */
+/* must be called with state lock held */
 void lib_md_unlink(nal_cb_t * nal, lib_md_t * md)
 {
-        lib_me_t *me = md->me;
+        if ((md->md_flags & PTL_MD_FLAG_ZOMBIE) == 0) {
+                /* first unlink attempt... */
+                lib_me_t *me = md->me;
+
+                md->md_flags |= PTL_MD_FLAG_ZOMBIE;
+
+                /* Disassociate from ME (if any), and unlink it if it was created
+                 * with PTL_UNLINK */
+                if (me != NULL) {
+                        me->md = NULL;
+                        if (me->unlink == PTL_UNLINK)
+                                lib_me_unlink(nal, me);
+                }
+
+                /* emsure all future handle lookups fail */
+                lib_invalidate_handle(nal, &md->md_lh);
+        }
 
         if (md->pending != 0) {
                 CDEBUG(D_NET, "Queueing unlink of md %p\n", md);
-                md->md_flags |= PTL_MD_FLAG_UNLINK;
                 return;
         }
 
@@ -52,23 +65,16 @@ void lib_md_unlink(nal_cb_t * nal, lib_md_t * md)
                 if (nal->cb_unmap_pages != NULL)
                         nal->cb_unmap_pages (nal, md->md_niov, md->md_iov.kiov, 
                                              &md->md_addrkey);
-        } else if (nal->cb_unmap != NULL)
+        } else if (nal->cb_unmap != NULL) {
                 nal->cb_unmap (nal, md->md_niov, md->md_iov.iov, 
                                &md->md_addrkey);
-
-        if (me) {
-                me->md = NULL;
-                if (me->unlink == PTL_UNLINK)
-                        lib_me_unlink(nal, me);
         }
 
-        if (md->eq != NULL)
-        {
+        if (md->eq != NULL) {
                 md->eq->eq_refcount--;
                 LASSERT (md->eq->eq_refcount >= 0);
         }
 
-        lib_invalidate_handle (nal, &md->md_lh);
         list_del (&md->md_list);
         lib_md_free(nal, md);
 }
@@ -77,8 +83,6 @@ void lib_md_unlink(nal_cb_t * nal, lib_md_t * md)
 static int lib_md_build(nal_cb_t *nal, lib_md_t *new, void *private,
                         ptl_md_t *md, ptl_handle_eq_t *eqh, int unlink)
 {
-        const int     max_size_opts = PTL_MD_AUTO_UNLINK |
-                                      PTL_MD_MAX_SIZE;
         lib_eq_t     *eq = NULL;
         int           rc;
         int           i;
@@ -88,39 +92,45 @@ static int lib_md_build(nal_cb_t *nal, lib_md_t *new, void *private,
          * otherwise caller may only lib_md_free() it.
          */
 
-        if (!PtlHandleEqual (*eqh, PTL_EQ_NONE)) {
+        if (!PtlHandleIsEqual (*eqh, PTL_EQ_NONE)) {
                 eq = ptl_handle2eq(eqh, nal);
                 if (eq == NULL)
-                        return PTL_INV_EQ;
+                        return PTL_EQ_INVALID;
         }
 
         /* Must check this _before_ allocation.  Also, note that non-iov
          * MDs must set md_niov to 0. */
-        LASSERT((md->options & (PTL_MD_IOV | PTL_MD_KIOV)) == 0 ||
+        LASSERT((md->options & (PTL_MD_IOVEC | PTL_MD_KIOV)) == 0 ||
                 md->niov <= PTL_MD_MAX_IOV);
 
-        if ((md->options & max_size_opts) != 0 && /* max size used */
+        /* This implementation doesn't know how to create START events or
+         * disable END events.  Best to LASSERT our caller is compliant so
+         * we find out quickly...  */
+        LASSERT (PtlHandleIsEqual (*eqh, PTL_EQ_NONE) ||
+                 ((md->options & PTL_MD_EVENT_START_DISABLE) != 0 &&
+                  (md->options & PTL_MD_EVENT_END_DISABLE) == 0));
+
+        if ((md->options & PTL_MD_MAX_SIZE) != 0 && /* max size used */
             (md->max_size < 0 || md->max_size > md->length)) // illegal max_size
-                return PTL_INV_MD;
+                return PTL_MD_INVALID;
 
         new->me = NULL;
         new->start = md->start;
         new->length = md->length;
         new->offset = 0;
         new->max_size = md->max_size;
-        new->unlink = unlink;
         new->options = md->options;
         new->user_ptr = md->user_ptr;
         new->eq = eq;
         new->threshold = md->threshold;
         new->pending = 0;
-        new->md_flags = 0;
+        new->md_flags = (unlink == PTL_UNLINK) ? PTL_MD_FLAG_AUTO_UNLINK : 0;
 
-        if ((md->options & PTL_MD_IOV) != 0) {
+        if ((md->options & PTL_MD_IOVEC) != 0) {
                 int total_length = 0;
 
                 if ((md->options & PTL_MD_KIOV) != 0) /* Can't specify both */
-                        return PTL_INV_MD; 
+                        return PTL_MD_INVALID; 
 
                 new->md_niov = md->niov;
                 
@@ -147,14 +157,14 @@ static int lib_md_build(nal_cb_t *nal, lib_md_t *new, void *private,
                 }
         } else if ((md->options & PTL_MD_KIOV) != 0) {
 #ifndef __KERNEL__
-                return PTL_INV_MD;
+                return PTL_MD_INVALID;
 #else
                 int total_length = 0;
                 
                 /* Trap attempt to use paged I/O if unsupported early. */
                 if (nal->cb_send_pages == NULL ||
                     nal->cb_recv_pages == NULL)
-                        return PTL_INV_MD;
+                        return PTL_MD_INVALID;
 
                 new->md_niov = md->niov;
 
@@ -219,7 +229,7 @@ void lib_md_deconstruct(nal_cb_t * nal, lib_md_t * md, ptl_md_t * new)
         new->options = md->options;
         new->user_ptr = md->user_ptr;
         ptl_eq2handle(&new->eventq, md->eq);
-        new->niov = ((md->options & (PTL_MD_IOV | PTL_MD_KIOV)) == 0) ? 0 : md->md_niov;
+        new->niov = ((md->options & (PTL_MD_IOVEC | PTL_MD_KIOV)) == 0) ? 0 : md->md_niov;
 }
 
 int do_PtlMDAttach(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
@@ -240,21 +250,21 @@ int do_PtlMDAttach(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
         lib_md_t *md;
         unsigned long flags;
 
-        if ((args->md_in.options & (PTL_MD_KIOV | PTL_MD_IOV)) != 0 &&
+        if ((args->md_in.options & (PTL_MD_KIOV | PTL_MD_IOVEC)) != 0 &&
             args->md_in.niov > PTL_MD_MAX_IOV) /* too many fragments */
                 return (ret->rc = PTL_IOV_TOO_MANY);
 
         md = lib_md_alloc(nal, &args->md_in);
         if (md == NULL)
-                return (ret->rc = PTL_NOSPACE);
+                return (ret->rc = PTL_NO_SPACE);
 
         state_lock(nal, &flags);
 
         me = ptl_handle2me(&args->me_in, nal);
         if (me == NULL) {
-                ret->rc = PTL_INV_ME;
+                ret->rc = PTL_ME_INVALID;
         } else if (me->md != NULL) {
-                ret->rc = PTL_INUSE;
+                ret->rc = PTL_ME_IN_USE;
         } else {
                 ret->rc = lib_md_build(nal, md, private, &args->md_in,
                                        &args->eq_in, args->unlink_in);
@@ -292,18 +302,18 @@ int do_PtlMDBind(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
         lib_md_t *md;
         unsigned long flags;
 
-        if ((args->md_in.options & (PTL_MD_KIOV | PTL_MD_IOV)) != 0 &&
+        if ((args->md_in.options & (PTL_MD_KIOV | PTL_MD_IOVEC)) != 0 &&
             args->md_in.niov > PTL_MD_MAX_IOV) /* too many fragments */
                 return (ret->rc = PTL_IOV_TOO_MANY);
 
         md = lib_md_alloc(nal, &args->md_in);
         if (md == NULL)
-                return (ret->rc = PTL_NOSPACE);
+                return (ret->rc = PTL_NO_SPACE);
 
         state_lock(nal, &flags);
 
-        ret->rc = lib_md_build(nal, md, private,
-                               &args->md_in, &args->eq_in, PTL_UNLINK);
+        ret->rc = lib_md_build(nal, md, private, &args->md_in, 
+                               &args->eq_in, args->unlink_in);
 
         if (ret->rc == PTL_OK) {
                 ptl_md2handle(&ret->handle_out, md);
@@ -331,7 +341,7 @@ int do_PtlMDUnlink(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
         md = ptl_handle2md(&args->md_in, nal);
         if (md == NULL) {
                 state_unlock(nal, &flags);
-                return (ret->rc = PTL_INV_MD);
+                return (ret->rc = PTL_MD_INVALID);
         }
 
         /* If the MD is busy, lib_md_unlink just marks it for deletion, and
@@ -343,7 +353,7 @@ int do_PtlMDUnlink(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
                 memset(&ev, 0, sizeof(ev));
 
                 ev.type = PTL_EVENT_UNLINK;
-                ev.status = PTL_OK;
+                ev.ni_fail_type = PTL_OK;
                 ev.unlinked = 1;
                 lib_md_deconstruct(nal, md, &ev.mem_desc);
                 
@@ -385,7 +395,7 @@ int do_PtlMDUpdate_internal(nal_cb_t * nal, void *private, void *v_args,
 
         md = ptl_handle2md(&args->md_in, nal);
         if (md == NULL) {
-                 ret->rc = PTL_INV_MD;
+                 ret->rc = PTL_MD_INVALID;
                  goto out;
         }
 
@@ -399,8 +409,8 @@ int do_PtlMDUpdate_internal(nal_cb_t * nal, void *private, void *v_args,
 
         /* XXX fttb, the new MD must be the same type wrt fragmentation */
         if (((new->options ^ md->options) & 
-             (PTL_MD_IOV | PTL_MD_KIOV)) != 0) {
-                ret->rc = PTL_INV_MD;
+             (PTL_MD_IOVEC | PTL_MD_KIOV)) != 0) {
+                ret->rc = PTL_MD_INVALID;
                 goto out;
         }
 
@@ -414,30 +424,32 @@ int do_PtlMDUpdate_internal(nal_cb_t * nal, void *private, void *v_args,
                 goto out;
         }
 
-        if (!PtlHandleEqual (args->testq_in, PTL_EQ_NONE)) {
+        if (!PtlHandleIsEqual (args->testq_in, PTL_EQ_NONE)) {
                 test_eq = ptl_handle2eq(&args->testq_in, nal);
                 if (test_eq == NULL) {
-                        ret->rc = PTL_INV_EQ;
+                        ret->rc = PTL_EQ_INVALID;
                         goto out;
                 }
         }
 
         if (md->pending != 0) {
-                        ret->rc = PTL_NOUPDATE;
+                        ret->rc = PTL_MD_NO_UPDATE;
                         goto out;
         }
 
         if (test_eq == NULL ||
             test_eq->sequence == args->sequence_in) {
                 lib_me_t *me = md->me;
+                int       unlink = (md->md_flags & PTL_MD_FLAG_AUTO_UNLINK) ?
+                                   PTL_UNLINK : PTL_RETAIN;
 
                 // #warning this does not track eq refcounts properly 
                 ret->rc = lib_md_build(nal, md, private,
-                                       new, &new->eventq, md->unlink);
+                                       new, &new->eventq, unlink);
 
                 md->me = me;
         } else {
-                ret->rc = PTL_NOUPDATE;
+                ret->rc = PTL_MD_NO_UPDATE;
         }
 
  out:
index 31ac214..e3c46ea 100644 (file)
@@ -45,15 +45,15 @@ int do_PtlMEAttach(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
         lib_me_t *me;
 
         if (args->index_in >= tbl->size)
-                return ret->rc = PTL_INV_PTINDEX;
+                return ret->rc = PTL_PT_INDEX_INVALID;
 
         /* Should check for valid matchid, but not yet */
         if (0)
-                return ret->rc = PTL_INV_PROC;
+                return ret->rc = PTL_PROCESS_INVALID;
 
         me = lib_me_alloc (nal);
         if (me == NULL)
-                return (ret->rc = PTL_NOSPACE);
+                return (ret->rc = PTL_NO_SPACE);
 
         state_lock(nal, &flags);
 
@@ -87,7 +87,7 @@ int do_PtlMEInsert(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
 
         new = lib_me_alloc (nal);
         if (new == NULL)
-                return (ret->rc = PTL_NOSPACE);
+                return (ret->rc = PTL_NO_SPACE);
 
         /* Should check for valid matchid, but not yet */
 
@@ -98,7 +98,7 @@ int do_PtlMEInsert(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
                 lib_me_free (nal, new);
 
                 state_unlock (nal, &flags);
-                return (ret->rc = PTL_INV_ME);
+                return (ret->rc = PTL_ME_INVALID);
         }
 
         new->match_id = args->match_id_in;
@@ -132,7 +132,7 @@ int do_PtlMEUnlink(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
 
         me = ptl_handle2me(&args->current_in, nal);
         if (me == NULL) {
-                ret->rc = PTL_INV_ME;
+                ret->rc = PTL_ME_INVALID;
         } else {
                 lib_me_unlink(nal, me);
                 ret->rc = PTL_OK;
@@ -174,7 +174,7 @@ int do_PtlTblDump(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
         unsigned long flags;
 
         if (args->index_in < 0 || args->index_in >= tbl->size)
-                return ret->rc = PTL_INV_PTINDEX;
+                return ret->rc = PTL_PT_INDEX_INVALID;
 
         nal->cb_printf(nal, "Portal table index %d\n", args->index_in);
 
@@ -200,7 +200,7 @@ int do_PtlMEDump(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
 
         me = ptl_handle2me(&args->current_in, nal);
         if (me == NULL) {
-                ret->rc = PTL_INV_ME;
+                ret->rc = PTL_ME_INVALID;
         } else {
                 lib_me_dump(nal, me);
                 ret->rc = PTL_OK;
index ecd543c..477ddf8 100644 (file)
 #include <portals/lib-p30.h>
 #include <portals/arg-blocks.h>
 
-/*
- * Right now it does not check access control lists.
- *
- * We only support one MD per ME, which is how the Portals 3.1 spec is written.
- * All previous complication is removed.
- */
-
-static lib_me_t *
-lib_find_me(nal_cb_t *nal, int index, int op_mask, ptl_nid_t src_nid,
-            ptl_pid_t src_pid, ptl_size_t rlength, ptl_size_t roffset,
-            ptl_match_bits_t match_bits, ptl_size_t *mlength_out,
-            ptl_size_t *offset_out, int *unlink_out)
+/* forward ref */
+static void lib_commit_md (nal_cb_t *nal, lib_md_t *md, lib_msg_t *msg);
+
+static lib_md_t *
+lib_match_md(nal_cb_t *nal, int index, int op_mask, 
+             ptl_nid_t src_nid, ptl_pid_t src_pid, 
+             ptl_size_t rlength, ptl_size_t roffset,
+             ptl_match_bits_t match_bits, lib_msg_t *msg,
+             ptl_size_t *mlength_out, ptl_size_t *offset_out)
 {
         lib_ni_t         *ni = &nal->ni;
         struct list_head *match_list = &ni->tbl.tbl[index];
@@ -53,7 +50,6 @@ lib_find_me(nal_cb_t *nal, int index, int op_mask, ptl_nid_t src_nid,
         lib_md_t         *md;
         ptl_size_t        mlength;
         ptl_size_t        offset;
-
         ENTRY;
 
         CDEBUG (D_NET, "Request from "LPU64".%d of length %d into portal %d "
@@ -75,14 +71,14 @@ lib_find_me(nal_cb_t *nal, int index, int op_mask, ptl_nid_t src_nid,
 
                 LASSERT (me == md->me);
 
-                /* MD deactivated */
-                if (md->threshold == 0)
-                        continue;
-
                 /* mismatched MD op */
                 if ((md->options & op_mask) == 0)
                         continue;
 
+                /* MD exhausted */
+                if (lib_md_exhausted(md))
+                        continue;
+
                 /* mismatched ME nid/pid? */
                 if (me->match_id.nid != PTL_NID_ANY &&
                     me->match_id.nid != src_nid)
@@ -103,10 +99,12 @@ lib_find_me(nal_cb_t *nal, int index, int op_mask, ptl_nid_t src_nid,
                 else
                         offset = roffset;
 
-                mlength = md->length - offset;
-                if ((md->options & PTL_MD_MAX_SIZE) != 0 &&
-                    mlength > md->max_size)
+                if ((md->options & PTL_MD_MAX_SIZE) != 0) {
                         mlength = md->max_size;
+                        LASSERT (md->offset + mlength <= md->length);
+                } else {
+                        mlength = md->length - offset;
+                }
 
                 if (rlength <= mlength) {        /* fits in allowed space */
                         mlength = rlength;
@@ -118,13 +116,38 @@ lib_find_me(nal_cb_t *nal, int index, int op_mask, ptl_nid_t src_nid,
                         goto failed;
                 }
 
+                /* Commit to this ME/MD */
+                CDEBUG(D_NET, "Incoming %s index %x from "LPU64"/%u of "
+                       "length %d/%d into md "LPX64" [%d] + %d\n", 
+                       (op_mask == PTL_MD_OP_PUT) ? "put" : "get",
+                       index, src_nid, src_pid, mlength, rlength, 
+                       md->md_lh.lh_cookie, md->md_niov, offset);
+
+                lib_commit_md(nal, md, msg);
                 md->offset = offset + mlength;
 
+                /* NB Caller sets ev.type and ev.hdr_data */
+                msg->ev.initiator.nid = src_nid;
+                msg->ev.initiator.pid = src_pid;
+                msg->ev.portal = index;
+                msg->ev.match_bits = match_bits;
+                msg->ev.rlength = rlength;
+                msg->ev.mlength = mlength;
+                msg->ev.offset = offset;
+
+                lib_md_deconstruct(nal, md, &msg->ev.mem_desc);
+
                 *offset_out = offset;
                 *mlength_out = mlength;
-                *unlink_out = ((md->options & PTL_MD_AUTO_UNLINK) != 0 &&
-                               md->offset >= (md->length - md->max_size));
-                RETURN (me);
+
+                /* Auto-unlink NOW, so the ME gets unlinked if required.
+                 * We bumped md->pending above so the MD just gets flagged
+                 * for unlink when it is finalized. */
+                if ((md->md_flags & PTL_MD_FLAG_AUTO_UNLINK) != 0 &&
+                    lib_md_exhausted(md))
+                        lib_md_unlink(nal, md);
+
+                RETURN (md);
         }
 
  failed:
@@ -627,9 +650,7 @@ parse_put(nal_cb_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
         lib_ni_t        *ni = &nal->ni;
         ptl_size_t       mlength = 0;
         ptl_size_t       offset = 0;
-        int              unlink = 0;
         ptl_err_t        rc;
-        lib_me_t        *me;
         lib_md_t        *md;
         unsigned long    flags;
                 
@@ -640,36 +661,19 @@ parse_put(nal_cb_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
 
         state_lock(nal, &flags);
 
-        me = lib_find_me(nal, hdr->msg.put.ptl_index, PTL_MD_OP_PUT,
-                         hdr->src_nid, hdr->src_pid,
-                         hdr->payload_length, hdr->msg.put.offset,
-                         hdr->msg.put.match_bits,
-                         &mlength, &offset, &unlink);
-        if (me == NULL) {
+        md = lib_match_md(nal, hdr->msg.put.ptl_index, PTL_MD_OP_PUT,
+                          hdr->src_nid, hdr->src_pid,
+                          hdr->payload_length, hdr->msg.put.offset,
+                          hdr->msg.put.match_bits, msg,
+                          &mlength, &offset);
+        if (md == NULL) {
                 state_unlock(nal, &flags);
                 return (PTL_FAIL);
         }
 
-        md = me->md;
-        CDEBUG(D_NET, "Incoming put index %x from "LPU64"/%u of length %d/%d "
-               "into md "LPX64" [%d] + %d\n", hdr->msg.put.ptl_index,
-               hdr->src_nid, hdr->src_pid, mlength, hdr->payload_length, 
-               md->md_lh.lh_cookie, md->md_niov, offset);
-
-        lib_commit_md(nal, md, msg);
-
-        msg->ev.type = PTL_EVENT_PUT;
-        msg->ev.initiator.nid = hdr->src_nid;
-        msg->ev.initiator.pid = hdr->src_pid;
-        msg->ev.portal = hdr->msg.put.ptl_index;
-        msg->ev.match_bits = hdr->msg.put.match_bits;
-        msg->ev.rlength = hdr->payload_length;
-        msg->ev.mlength = mlength;
-        msg->ev.offset = offset;
+        msg->ev.type = PTL_EVENT_PUT_END;
         msg->ev.hdr_data = hdr->msg.put.hdr_data;
 
-        lib_md_deconstruct(nal, md, &msg->ev.mem_desc);
-
         if (!ptl_is_wire_handle_none(&hdr->msg.put.ack_wmd) &&
             !(md->options & PTL_MD_ACK_DISABLE)) {
                 msg->ack_wmd = hdr->msg.put.ack_wmd;
@@ -678,11 +682,6 @@ parse_put(nal_cb_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
         ni->counters.recv_count++;
         ni->counters.recv_length += mlength;
 
-        /* only unlink after MD's pending count has been bumped in
-         * lib_commit_md() otherwise lib_me_unlink() will nuke it */
-        if (unlink)
-                lib_me_unlink (nal, me);
-
         state_unlock(nal, &flags);
 
         rc = lib_recv(nal, private, msg, md, offset, mlength,
@@ -700,8 +699,6 @@ parse_get(nal_cb_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
         lib_ni_t        *ni = &nal->ni;
         ptl_size_t       mlength = 0;
         ptl_size_t       offset = 0;
-        int              unlink = 0;
-        lib_me_t        *me;
         lib_md_t        *md;
         ptl_hdr_t        reply;
         unsigned long    flags;
@@ -715,44 +712,22 @@ parse_get(nal_cb_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
 
         state_lock(nal, &flags);
 
-        me = lib_find_me(nal, hdr->msg.get.ptl_index, PTL_MD_OP_GET,
-                         hdr->src_nid, hdr->src_pid,
-                         hdr->msg.get.sink_length, hdr->msg.get.src_offset,
-                         hdr->msg.get.match_bits,
-                         &mlength, &offset, &unlink);
-        if (me == NULL) {
+        md = lib_match_md(nal, hdr->msg.get.ptl_index, PTL_MD_OP_GET,
+                          hdr->src_nid, hdr->src_pid,
+                          hdr->msg.get.sink_length, hdr->msg.get.src_offset,
+                          hdr->msg.get.match_bits, msg,
+                          &mlength, &offset);
+        if (md == NULL) {
                 state_unlock(nal, &flags);
                 return (PTL_FAIL);
         }
 
-        md = me->md;
-        CDEBUG(D_NET, "Incoming get index %d from "LPU64".%u of length %d/%d "
-               "from md "LPX64" [%d] + %d\n", hdr->msg.get.ptl_index,
-               hdr->src_nid, hdr->src_pid, mlength, hdr->payload_length, 
-               md->md_lh.lh_cookie, md->md_niov, offset);
-
-        lib_commit_md(nal, md, msg);
-
-        msg->ev.type = PTL_EVENT_GET;
-        msg->ev.initiator.nid = hdr->src_nid;
-        msg->ev.initiator.pid = hdr->src_pid;
-        msg->ev.portal = hdr->msg.get.ptl_index;
-        msg->ev.match_bits = hdr->msg.get.match_bits;
-        msg->ev.rlength = hdr->payload_length;
-        msg->ev.mlength = mlength;
-        msg->ev.offset = offset;
+        msg->ev.type = PTL_EVENT_GET_END;
         msg->ev.hdr_data = 0;
 
-        lib_md_deconstruct(nal, md, &msg->ev.mem_desc);
-
         ni->counters.send_count++;
         ni->counters.send_length += mlength;
 
-        /* only unlink after MD's refcount has been bumped in
-         * lib_commit_md() otherwise lib_me_unlink() will nuke it */
-        if (unlink)
-                lib_me_unlink (nal, me);
-
         state_unlock(nal, &flags);
 
         memset (&reply, 0, sizeof (reply));
@@ -828,7 +803,7 @@ parse_reply(nal_cb_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
 
         lib_commit_md(nal, md, msg);
 
-        msg->ev.type = PTL_EVENT_REPLY;
+        msg->ev.type = PTL_EVENT_REPLY_END;
         msg->ev.initiator.nid = hdr->src_nid;
         msg->ev.initiator.pid = hdr->src_pid;
         msg->ev.rlength = rlength;
@@ -1044,8 +1019,6 @@ lib_parse(nal_cb_t *nal, ptl_hdr_t *hdr, void *private)
                 return;
         }
 
-        do_gettimeofday(&msg->ev.arrival_time);
-
         switch (hdr->type) {
         case PTL_MSG_ACK:
                 rc = parse_ack(nal, hdr, private, msg);
@@ -1112,14 +1085,14 @@ do_PtlPut(nal_cb_t *nal, void *private, void *v_args, void *v_ret)
         {
                 CERROR(LPU64": Dropping PUT to "LPU64": simulated failure\n",
                        nal->ni.nid, id->nid);
-                return (ret->rc = PTL_INV_PROC);
+                return (ret->rc = PTL_PROCESS_INVALID);
         }
 
         msg = lib_msg_alloc(nal);
         if (msg == NULL) {
                 CERROR(LPU64": Dropping PUT to "LPU64": ENOMEM on lib_msg_t\n",
                        ni->nid, id->nid);
-                return (ret->rc = PTL_NOSPACE);
+                return (ret->rc = PTL_NO_SPACE);
         }
 
         state_lock(nal, &flags);
@@ -1129,7 +1102,7 @@ do_PtlPut(nal_cb_t *nal, void *private, void *v_args, void *v_ret)
                 lib_msg_free(nal, msg);
                 state_unlock(nal, &flags);
         
-                return (ret->rc = PTL_INV_MD);
+                return (ret->rc = PTL_MD_INVALID);
         }
 
         CDEBUG(D_NET, "PtlPut -> %Lu: %lu\n", (unsigned long long)id->nid,
@@ -1158,7 +1131,7 @@ do_PtlPut(nal_cb_t *nal, void *private, void *v_args, void *v_ret)
 
         lib_commit_md(nal, md, msg);
         
-        msg->ev.type = PTL_EVENT_SENT;
+        msg->ev.type = PTL_EVENT_SEND_END;
         msg->ev.initiator.nid = ni->nid;
         msg->ev.initiator.pid = ni->pid;
         msg->ev.portal = args->portal_in;
@@ -1188,19 +1161,18 @@ do_PtlPut(nal_cb_t *nal, void *private, void *v_args, void *v_ret)
 }
 
 lib_msg_t * 
-lib_fake_reply_msg (nal_cb_t *nal, ptl_nid_t peer_nid, lib_md_t *getmd)
+lib_create_reply_msg (nal_cb_t *nal, ptl_nid_t peer_nid, lib_msg_t *getmsg)
 {
         /* The NAL can DMA direct to the GET md (i.e. no REPLY msg).  This
-         * returns a msg the NAL can pass to lib_finalize() so that a REPLY
-         * event still occurs. 
+         * returns a msg for the NAL to pass to lib_finalize() when the sink
+         * data has been received.
          *
-         * CAVEAT EMPTOR: 'getmd' is passed by pointer so it MUST be valid.
-         * This can only be guaranteed while a lib_msg_t holds a reference
-         * on it (ie. pending > 0), so best call this before the
-         * lib_finalize() of the original GET. */
+         * CAVEAT EMPTOR: 'getmsg' is the original GET, which is freed when
+         * lib_finalize() is called on it, so the NAL must call this first */
 
         lib_ni_t        *ni = &nal->ni;
         lib_msg_t       *msg = lib_msg_alloc(nal);
+        lib_md_t        *getmd = getmsg->md;
         unsigned long    flags;
 
         state_lock(nal, &flags);
@@ -1225,7 +1197,7 @@ lib_fake_reply_msg (nal_cb_t *nal, ptl_nid_t peer_nid, lib_md_t *getmd)
 
         lib_commit_md (nal, getmd, msg);
 
-        msg->ev.type = PTL_EVENT_REPLY;
+        msg->ev.type = PTL_EVENT_REPLY_END;
         msg->ev.initiator.nid = peer_nid;
         msg->ev.initiator.pid = 0;      /* XXX FIXME!!! */
         msg->ev.rlength = msg->ev.mlength = getmd->length;
@@ -1281,14 +1253,14 @@ do_PtlGet(nal_cb_t *nal, void *private, void *v_args, void *v_ret)
         {
                 CERROR(LPU64": Dropping PUT to "LPU64": simulated failure\n",
                        nal->ni.nid, id->nid);
-                return (ret->rc = PTL_INV_PROC);
+                return (ret->rc = PTL_PROCESS_INVALID);
         }
 
         msg = lib_msg_alloc(nal);
         if (msg == NULL) {
                 CERROR(LPU64": Dropping GET to "LPU64": ENOMEM on lib_msg_t\n",
                        ni->nid, id->nid);
-                return (ret->rc = PTL_NOSPACE);
+                return (ret->rc = PTL_NO_SPACE);
         }
 
         state_lock(nal, &flags);
@@ -1298,7 +1270,7 @@ do_PtlGet(nal_cb_t *nal, void *private, void *v_args, void *v_ret)
                 lib_msg_free(nal, msg);
                 state_unlock(nal, &flags);
 
-                return ret->rc = PTL_INV_MD;
+                return ret->rc = PTL_MD_INVALID;
         }
 
         CDEBUG(D_NET, "PtlGet -> %Lu: %lu\n", (unsigned long long)id->nid,
@@ -1323,7 +1295,7 @@ do_PtlGet(nal_cb_t *nal, void *private, void *v_args, void *v_ret)
 
         lib_commit_md(nal, md, msg);
 
-        msg->ev.type = PTL_EVENT_SENT;
+        msg->ev.type = PTL_EVENT_SEND_END;
         msg->ev.initiator.nid = ni->nid;
         msg->ev.initiator.pid = ni->pid;
         msg->ev.portal = args->portal_in;
index 04c69b1..869c9d6 100644 (file)
@@ -100,7 +100,7 @@ lib_finalize(nal_cb_t *nal, void *private, lib_msg_t *msg, ptl_err_t status)
         if (status == PTL_OK &&
             !ptl_is_wire_handle_none(&msg->ack_wmd)) {
 
-                LASSERT(msg->ev.type == PTL_EVENT_PUT);
+                LASSERT(msg->ev.type == PTL_EVENT_PUT_END);
 
                 memset (&ack, 0, sizeof (ack));
                 ack.type     = HTON__u32 (PTL_MSG_ACK);
@@ -133,11 +133,16 @@ lib_finalize(nal_cb_t *nal, void *private, lib_msg_t *msg, ptl_err_t status)
         LASSERT (md->pending >= 0);
 
         /* Should I unlink this MD? */
-        unlink = (md->pending == 0 &&           /* No other refs */
-                  (md->threshold == 0 ||        /* All ops done */
-                   md->md_flags & PTL_MD_FLAG_UNLINK) != 0); /* black spot */
-
-        msg->ev.status = status;
+        if (md->pending != 0)                   /* other refs */
+                unlink = 0;
+        else if ((md->md_flags & PTL_MD_FLAG_ZOMBIE) != 0)
+                unlink = 1;
+        else if ((md->md_flags & PTL_MD_FLAG_AUTO_UNLINK) == 0)
+                unlink = 0;
+        else
+                unlink = lib_md_exhausted(md);
+
+        msg->ev.ni_fail_type = status;
         msg->ev.unlinked = unlink;
 
         if (md->eq != NULL)
index 9e90576..296bc4a 100644 (file)
@@ -88,7 +88,7 @@ int do_PtlNIStatus(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
         else if (args->register_in == PTL_SR_MSGS_MAX)
                 ret->status_out = count->msgs_max;
         else
-                ret->rc = PTL_INV_SR_INDX;
+                ret->rc = PTL_SR_INDEX_INVALID;
 
         return ret->rc;
 }
@@ -119,7 +119,7 @@ int do_PtlNIDist(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
 
         if ((rc = nal->cb_dist(nal, nid, &dist)) != 0) {
                 ret->distance_out = (unsigned long) MAX_DIST;
-                return PTL_INV_PROC;
+                return PTL_PROCESS_INVALID;
         }
 
         ret->distance_out = dist;
diff --git a/lnet/lnet/module.c b/lnet/lnet/module.c
new file mode 100644 (file)
index 0000000..012d3d9
--- /dev/null
@@ -0,0 +1,479 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef EXPORT_SYMTAB
+# define EXPORT_SYMTAB
+#endif
+#define DEBUG_SUBSYSTEM S_PORTALS
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/smp_lock.h>
+#include <linux/unistd.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+
+#include <linux/fs.h>
+#include <linux/stat.h>
+#include <asm/uaccess.h>
+#include <asm/segment.h>
+#include <linux/miscdevice.h>
+
+#include <portals/lib-p30.h>
+#include <portals/p30.h>
+#include <linux/kp30.h>
+#include <linux/kpr.h>
+#include <linux/portals_compat25.h>
+
+extern void (kping_client)(struct portal_ioctl_data *);
+
+struct nal_cmd_handler {
+        nal_cmd_handler_t nch_handler;
+        void * nch_private;
+};
+
+static struct nal_cmd_handler nal_cmd[NAL_MAX_NR + 1];
+static DECLARE_MUTEX(nal_cmd_sem);
+
+
+static int
+kportal_add_route(int gateway_nalid, ptl_nid_t gateway_nid, 
+                  ptl_nid_t lo_nid, ptl_nid_t hi_nid)
+{
+        int rc;
+        kpr_control_interface_t *ci;
+
+        ci = (kpr_control_interface_t *) PORTAL_SYMBOL_GET (kpr_control_interface);
+        if (ci == NULL)
+                return (-ENODEV);
+
+        rc = ci->kprci_add_route (gateway_nalid, gateway_nid, lo_nid, hi_nid);
+
+        PORTAL_SYMBOL_PUT(kpr_control_interface);
+        return (rc);
+}
+
+static int
+kportal_del_route(int gw_nalid, ptl_nid_t gw_nid, 
+                  ptl_nid_t lo, ptl_nid_t hi)
+{
+        int rc;
+        kpr_control_interface_t *ci;
+
+        ci = (kpr_control_interface_t *)PORTAL_SYMBOL_GET(kpr_control_interface);
+        if (ci == NULL)
+                return (-ENODEV);
+
+        rc = ci->kprci_del_route (gw_nalid, gw_nid, lo, hi);
+
+        PORTAL_SYMBOL_PUT(kpr_control_interface);
+        return (rc);
+}
+
+static int
+kportal_notify_router (int gw_nalid, ptl_nid_t gw_nid,
+                       int alive, time_t when)
+{
+        int rc;
+        kpr_control_interface_t *ci;
+
+        /* No error if router not preset.  Sysadmin is allowed to notify
+         * _everywhere_ when a NID boots or crashes, even if they know
+         * nothing of the peer. */
+        ci = (kpr_control_interface_t *)PORTAL_SYMBOL_GET(kpr_control_interface);
+        if (ci == NULL)
+                return (0);
+
+        rc = ci->kprci_notify (gw_nalid, gw_nid, alive, when);
+
+        PORTAL_SYMBOL_PUT(kpr_control_interface);
+        return (rc);
+}
+
+static int
+kportal_get_route(int index, __u32 *gateway_nalidp, ptl_nid_t *gateway_nidp,
+                  ptl_nid_t *lo_nidp, ptl_nid_t *hi_nidp, int *alivep)
+{
+        int       gateway_nalid;
+        ptl_nid_t gateway_nid;
+        ptl_nid_t lo_nid;
+        ptl_nid_t hi_nid;
+        int       alive;
+        int       rc;
+        kpr_control_interface_t *ci;
+
+        ci = (kpr_control_interface_t *) PORTAL_SYMBOL_GET(kpr_control_interface);
+        if (ci == NULL)
+                return (-ENODEV);
+
+        rc = ci->kprci_get_route(index, &gateway_nalid, &gateway_nid,
+                                 &lo_nid, &hi_nid, &alive);
+
+        if (rc == 0) {
+                CDEBUG(D_IOCTL, "got route [%d] %d "LPX64":"LPX64" - "LPX64", %s\n",
+                       index, gateway_nalid, gateway_nid, lo_nid, hi_nid,
+                       alive ? "up" : "down");
+
+                *gateway_nalidp = (__u32)gateway_nalid;
+                *gateway_nidp   = gateway_nid;
+                *lo_nidp        = lo_nid;
+                *hi_nidp        = hi_nid;
+                *alivep         = alive;
+        }
+
+        PORTAL_SYMBOL_PUT (kpr_control_interface);
+        return (rc);
+}
+
+static int 
+kportal_router_cmd(struct portals_cfg *pcfg, void * private)
+{
+        int err = -EINVAL;
+        ENTRY;
+
+        switch(pcfg->pcfg_command) {
+        default:
+                CDEBUG(D_IOCTL, "Inappropriate cmd: %d\n", pcfg->pcfg_command);
+                break;
+                
+        case NAL_CMD_ADD_ROUTE:
+                CDEBUG(D_IOCTL, "Adding route: [%d] "LPU64" : "LPU64" - "LPU64"\n",
+                       pcfg->pcfg_nal, pcfg->pcfg_nid, 
+                       pcfg->pcfg_nid2, pcfg->pcfg_nid3);
+                err = kportal_add_route(pcfg->pcfg_gw_nal, pcfg->pcfg_nid,
+                                        pcfg->pcfg_nid2, pcfg->pcfg_nid3);
+                break;
+
+        case NAL_CMD_DEL_ROUTE:
+                CDEBUG (D_IOCTL, "Removing routes via [%d] "LPU64" : "LPU64" - "LPU64"\n",
+                        pcfg->pcfg_gw_nal, pcfg->pcfg_nid, 
+                        pcfg->pcfg_nid2, pcfg->pcfg_nid3);
+                err = kportal_del_route (pcfg->pcfg_gw_nal, pcfg->pcfg_nid,
+                                         pcfg->pcfg_nid2, pcfg->pcfg_nid3);
+                break;
+
+        case NAL_CMD_NOTIFY_ROUTER: {
+                CDEBUG (D_IOCTL, "Notifying peer [%d] "LPU64" %s @ %ld\n",
+                        pcfg->pcfg_gw_nal, pcfg->pcfg_nid,
+                        pcfg->pcfg_flags ? "Enabling" : "Disabling",
+                        (time_t)pcfg->pcfg_nid3);
+                
+                err = kportal_notify_router (pcfg->pcfg_gw_nal, pcfg->pcfg_nid,
+                                             pcfg->pcfg_flags, 
+                                             (time_t)pcfg->pcfg_nid3);
+                break;
+        }
+                
+        case NAL_CMD_GET_ROUTE:
+                CDEBUG (D_IOCTL, "Getting route [%d]\n", pcfg->pcfg_count);
+                err = kportal_get_route(pcfg->pcfg_count, &pcfg->pcfg_gw_nal,
+                                        &pcfg->pcfg_nid, 
+                                        &pcfg->pcfg_nid2, &pcfg->pcfg_nid3,
+                                        &pcfg->pcfg_flags);
+                break;
+        }
+        RETURN(err);
+}
+
+int
+kportal_nal_cmd(struct portals_cfg *pcfg)
+{
+        __u32 nal = pcfg->pcfg_nal;
+        int rc = -EINVAL;
+
+        ENTRY;
+
+        down(&nal_cmd_sem);
+        if (nal > 0 && nal <= NAL_MAX_NR && nal_cmd[nal].nch_handler) {
+                CDEBUG(D_IOCTL, "calling handler nal: %d, cmd: %d\n", nal, 
+                       pcfg->pcfg_command);
+                rc = nal_cmd[nal].nch_handler(pcfg, nal_cmd[nal].nch_private);
+        }
+        up(&nal_cmd_sem);
+        RETURN(rc);
+}
+
+ptl_handle_ni_t *
+kportal_get_ni (int nal)
+{
+
+        switch (nal)
+        {
+        case QSWNAL:
+                return (PORTAL_SYMBOL_GET(kqswnal_ni));
+        case SOCKNAL:
+                return (PORTAL_SYMBOL_GET(ksocknal_ni));
+        case GMNAL:
+                return  (PORTAL_SYMBOL_GET(kgmnal_ni));
+        case IBNAL:
+                return  (PORTAL_SYMBOL_GET(kibnal_ni));
+        case TCPNAL:
+                /* userspace NAL */
+                return (NULL);
+        case SCIMACNAL:
+                return  (PORTAL_SYMBOL_GET(kscimacnal_ni));
+        default:
+                /* A warning to a naive caller */
+                CERROR ("unknown nal: %d\n", nal);
+                return (NULL);
+        }
+}
+
+void
+kportal_put_ni (int nal)
+{
+
+        switch (nal)
+        {
+        case QSWNAL:
+                PORTAL_SYMBOL_PUT(kqswnal_ni);
+                break;
+        case SOCKNAL:
+                PORTAL_SYMBOL_PUT(ksocknal_ni);
+                break;
+        case GMNAL:
+                PORTAL_SYMBOL_PUT(kgmnal_ni);
+                break;
+        case IBNAL:
+                PORTAL_SYMBOL_PUT(kibnal_ni);
+                break;
+        case TCPNAL:
+                /* A lesson to a malicious caller */
+                LBUG ();
+        case SCIMACNAL:
+                PORTAL_SYMBOL_PUT(kscimacnal_ni);
+                break;
+        default:
+                CERROR ("unknown nal: %d\n", nal);
+        }
+}
+
+int
+kportal_nal_register(int nal, nal_cmd_handler_t handler, void * private)
+{
+        int rc = 0;
+
+        CDEBUG(D_IOCTL, "Register NAL %d, handler: %p\n", nal, handler);
+
+        if (nal > 0  && nal <= NAL_MAX_NR) {
+                down(&nal_cmd_sem);
+                if (nal_cmd[nal].nch_handler != NULL)
+                        rc = -EBUSY;
+                else {
+                        nal_cmd[nal].nch_handler = handler;
+                        nal_cmd[nal].nch_private = private;
+                }
+                up(&nal_cmd_sem);
+        }
+        return rc;
+}
+
+int
+kportal_nal_unregister(int nal)
+{
+        int rc = 0;
+
+        CDEBUG(D_IOCTL, "Unregister NAL %d\n", nal);
+
+        if (nal > 0  && nal <= NAL_MAX_NR) {
+                down(&nal_cmd_sem);
+                nal_cmd[nal].nch_handler = NULL;
+                nal_cmd[nal].nch_private = NULL;
+                up(&nal_cmd_sem);
+        }
+        return rc;
+}
+
+static int kportal_ioctl(struct portal_ioctl_data *data, 
+                         unsigned int cmd, unsigned long arg)
+{
+        int err = 0;
+        char str[PTL_NALFMT_SIZE];
+        ENTRY;
+
+        switch (cmd) {
+        case IOC_PORTAL_PING: {
+                void (*ping)(struct portal_ioctl_data *);
+
+                CDEBUG(D_IOCTL, "doing %d pings to nid "LPX64" (%s)\n",
+                       data->ioc_count, data->ioc_nid,
+                       portals_nid2str(data->ioc_nal, data->ioc_nid, str));
+                ping = PORTAL_SYMBOL_GET(kping_client);
+                if (!ping)
+                        CERROR("PORTAL_SYMBOL_GET failed\n");
+                else {
+                        ping(data);
+                        PORTAL_SYMBOL_PUT(kping_client);
+                }
+                RETURN(0);
+        }
+
+        case IOC_PORTAL_GET_NID: {
+                const ptl_handle_ni_t *nip;
+                ptl_process_id_t       pid;
+
+                CDEBUG (D_IOCTL, "Getting nid for nal [%d]\n", data->ioc_nal);
+
+                nip = kportal_get_ni (data->ioc_nal);
+                if (nip == NULL)
+                        RETURN (-EINVAL);
+
+                err = PtlGetId (*nip, &pid);
+                LASSERT (err == PTL_OK);
+                kportal_put_ni (data->ioc_nal);
+
+                data->ioc_nid = pid.nid;
+                if (copy_to_user ((char *)arg, data, sizeof (*data)))
+                        err = -EFAULT;
+                break;
+        }
+
+        case IOC_PORTAL_NAL_CMD: {
+                struct portals_cfg pcfg;
+
+                LASSERT (data->ioc_plen1 == sizeof(pcfg));
+                err = copy_from_user(&pcfg, (void *)data->ioc_pbuf1, 
+                                     sizeof(pcfg));
+                if ( err ) {
+                        EXIT;
+                        return err;
+                }
+
+                CDEBUG (D_IOCTL, "nal command nal %d cmd %d\n", pcfg.pcfg_nal,
+                        pcfg.pcfg_command);
+                err = kportal_nal_cmd(&pcfg);
+                if (err == 0) {
+                        if (copy_to_user((char *)data->ioc_pbuf1, &pcfg, 
+                                         sizeof (pcfg)))
+                                err = -EFAULT;
+                        if (copy_to_user((char *)arg, data, sizeof (*data)))
+                                err = -EFAULT;
+                }
+                break;
+        }
+        case IOC_PORTAL_FAIL_NID: {
+                const ptl_handle_ni_t *nip;
+
+                CDEBUG (D_IOCTL, "fail nid: [%d] "LPU64" count %d\n",
+                        data->ioc_nal, data->ioc_nid, data->ioc_count);
+
+                nip = kportal_get_ni (data->ioc_nal);
+                if (nip == NULL)
+                        return (-EINVAL);
+
+                err = PtlFailNid (*nip, data->ioc_nid, data->ioc_count);
+                kportal_put_ni (data->ioc_nal);
+                break;
+        }
+        default:
+                err = -EINVAL;
+                break;
+        }
+
+        RETURN(err);
+}
+
+DECLARE_IOCTL_HANDLER(kportal_ioctl_handler, kportal_ioctl);
+
+static int init_kportals_module(void)
+{
+        int rc;
+        ENTRY;
+
+        rc = PtlInit(NULL);
+        if (rc) {
+                CERROR("PtlInit: error %d\n", rc);
+                RETURN(rc);
+        }
+
+        rc = kportal_nal_register(ROUTER, kportal_router_cmd, NULL);
+        if (rc) {
+                PtlFini();
+                CERROR("kportal_nal_registre: ROUTER error %d\n", rc);
+        }
+
+        if (rc == 0)
+                libcfs_register_ioctl(&kportal_ioctl_handler);
+
+        RETURN(rc);
+}
+
+static void exit_kportals_module(void)
+{
+        libcfs_deregister_ioctl(&kportal_ioctl_handler);
+        kportal_nal_unregister(ROUTER);
+        PtlFini();
+}
+
+EXPORT_SYMBOL(kportal_nal_register);
+EXPORT_SYMBOL(kportal_nal_unregister);
+EXPORT_SYMBOL(kportal_get_ni);
+EXPORT_SYMBOL(kportal_put_ni);
+EXPORT_SYMBOL(kportal_nal_cmd);
+
+EXPORT_SYMBOL(ptl_err_str);
+EXPORT_SYMBOL(lib_dispatch);
+EXPORT_SYMBOL(PtlMEAttach);
+EXPORT_SYMBOL(PtlMEInsert);
+EXPORT_SYMBOL(PtlMEUnlink);
+EXPORT_SYMBOL(PtlEQAlloc);
+EXPORT_SYMBOL(PtlMDAttach);
+EXPORT_SYMBOL(PtlMDUnlink);
+EXPORT_SYMBOL(PtlNIInit);
+EXPORT_SYMBOL(PtlNIFini);
+EXPORT_SYMBOL(PtlNIDebug);
+EXPORT_SYMBOL(PtlInit);
+EXPORT_SYMBOL(PtlFini);
+EXPORT_SYMBOL(PtlSnprintHandle);
+EXPORT_SYMBOL(PtlPut);
+EXPORT_SYMBOL(PtlGet);
+EXPORT_SYMBOL(PtlEQWait);
+EXPORT_SYMBOL(PtlEQFree);
+EXPORT_SYMBOL(PtlEQGet);
+EXPORT_SYMBOL(PtlGetId);
+EXPORT_SYMBOL(PtlMDBind);
+EXPORT_SYMBOL(lib_iov_nob);
+EXPORT_SYMBOL(lib_copy_iov2buf);
+EXPORT_SYMBOL(lib_copy_buf2iov);
+EXPORT_SYMBOL(lib_extract_iov);
+EXPORT_SYMBOL(lib_kiov_nob);
+EXPORT_SYMBOL(lib_copy_kiov2buf);
+EXPORT_SYMBOL(lib_copy_buf2kiov);
+EXPORT_SYMBOL(lib_extract_kiov);
+EXPORT_SYMBOL(lib_finalize);
+EXPORT_SYMBOL(lib_parse);
+EXPORT_SYMBOL(lib_create_reply_msg);
+EXPORT_SYMBOL(lib_init);
+EXPORT_SYMBOL(lib_fini);
+EXPORT_SYMBOL(dispatch_name);
+
+MODULE_AUTHOR("Peter J. Braam <braam@clusterfs.com>");
+MODULE_DESCRIPTION("Portals v3.1");
+MODULE_LICENSE("GPL");
+module_init(init_kportals_module);
+module_exit(exit_kportals_module);
index ea25439..309025b 100644 (file)
@@ -39,6 +39,7 @@
 #define DEBUG_SUBSYSTEM S_PTLROUTER
 
 #include <linux/kp30.h>
+#include <linux/kpr.h>
 #include <portals/p30.h>
 #include <portals/lib-p30.h>
 
index 85c0d71..9977f20 100644 (file)
@@ -187,7 +187,7 @@ pingcli_start(struct portal_ioctl_data *args)
         client->md_in_head.length    = (args->ioc_size + STDSIZE)
                                                 * count;
         client->md_in_head.threshold = PTL_MD_THRESH_INF;
-        client->md_in_head.options   = PTL_MD_OP_PUT;
+        client->md_in_head.options   = PTL_MD_EVENT_START_DISABLE | PTL_MD_OP_PUT;
         client->md_in_head.user_ptr  = NULL;
         client->md_in_head.eventq    = client->eq;
         memset (client->inbuf, 0, (args->ioc_size + STDSIZE) * count);
@@ -203,7 +203,7 @@ pingcli_start(struct portal_ioctl_data *args)
         client->md_out_head.start     = client->outbuf;
         client->md_out_head.length    = STDSIZE + args->ioc_size;
         client->md_out_head.threshold = args->ioc_count;
-        client->md_out_head.options   = PTL_MD_OP_PUT;
+        client->md_out_head.options   = PTL_MD_EVENT_START_DISABLE | PTL_MD_OP_PUT;
         client->md_out_head.user_ptr  = NULL;
         client->md_out_head.eventq    = PTL_EQ_NONE;
 
@@ -213,7 +213,7 @@ pingcli_start(struct portal_ioctl_data *args)
 
         /* Bind the outgoing ping header */
         if ((rc=PtlMDBind (*nip, client->md_out_head,
-                                        &client->md_out_head_h))) {
+                           PTL_UNLINK, &client->md_out_head_h))) {
                 CERROR ("PtlMDBind error %d\n", rc);
                 pingcli_shutdown (1);
                 return NULL;
index 1e40ed8..0aa1ea7 100644 (file)
@@ -129,13 +129,13 @@ int pingsrv_thread(void *arg)
                 server->mdout.length    = server->evnt.rlength;
                 server->mdout.start     = server->in_buf;
                 server->mdout.threshold = 1; 
-                server->mdout.options   = PTL_MD_OP_PUT;
+                server->mdout.options   = PTL_MD_EVENT_START_DISABLE | PTL_MD_OP_PUT;
                 server->mdout.user_ptr  = NULL;
                 server->mdout.eventq    = PTL_EQ_NONE;
        
                 /* Bind the outgoing buffer */
                 if ((rc = PtlMDBind (server->ni, server->mdout, 
-                                                &server->mdout_h))) {
+                                     PTL_UNLINK, &server->mdout_h))) {
                          PDEBUG ("PtlMDBind", rc);
                          pingsrv_shutdown (1);
                          return 1;
@@ -145,7 +145,7 @@ int pingsrv_thread(void *arg)
                 server->mdin.start     = server->in_buf;
                 server->mdin.length    = MAXSIZE;
                 server->mdin.threshold = 1; 
-                server->mdin.options   = PTL_MD_OP_PUT;
+                server->mdin.options   = PTL_MD_EVENT_START_DISABLE | PTL_MD_OP_PUT;
                 server->mdin.user_ptr  = NULL;
                 server->mdin.eventq    = server->eq;
         
@@ -245,7 +245,7 @@ static struct pingsrv_data *pingsrv_setup(void)
         server->mdin.start     = server->in_buf;
         server->mdin.length    = MAXSIZE;
         server->mdin.threshold = 1; 
-        server->mdin.options   = PTL_MD_OP_PUT;
+        server->mdin.options   = PTL_MD_EVENT_START_DISABLE | PTL_MD_OP_PUT;
         server->mdin.user_ptr  = NULL;
         server->mdin.eventq    = server->eq;
         memset (server->in_buf, 0, STDSIZE);
index 64a1dd2..663da4e 100644 (file)
@@ -180,7 +180,7 @@ pingcli_start(struct portal_ioctl_data *args)
         client->md_in_head.start     = client->inbuf;
         client->md_in_head.length    = STDSIZE;
         client->md_in_head.threshold = 1;
-        client->md_in_head.options   = PTL_MD_OP_PUT;
+        client->md_in_head.options   = PTL_MD_EVENT_START_DISABLE | PTL_MD_OP_PUT;
         client->md_in_head.user_ptr  = NULL;
         client->md_in_head.eventq    = client->eq;
         memset (client->inbuf, 0, STDSIZE);
@@ -197,7 +197,7 @@ pingcli_start(struct portal_ioctl_data *args)
         client->md_out_head.start     = client->outbuf;
         client->md_out_head.length    = STDSIZE;
         client->md_out_head.threshold = 1;
-        client->md_out_head.options   = PTL_MD_OP_PUT;
+        client->md_out_head.options   = PTL_MD_EVENT_START_DISABLE | PTL_MD_OP_PUT;
         client->md_out_head.user_ptr  = NULL;
         client->md_out_head.eventq    = PTL_EQ_NONE;
 
@@ -205,7 +205,7 @@ pingcli_start(struct portal_ioctl_data *args)
 
         /* Bind the outgoing ping header */
         if ((rc=PtlMDBind (*nip, client->md_out_head,
-                                        &client->md_out_head_h))) {
+                           PTL_UNLINK, &client->md_out_head_h))) {
                 CERROR ("PtlMDBind error %d\n", rc);
                 pingcli_shutdown (1);
                 return (NULL);
index b8bda29..e8fb470 100644 (file)
@@ -121,13 +121,13 @@ int pingsrv_thread(void *arg)
                 server->mdout.start     = server->in_buf;
                 server->mdout.length    = STDSIZE;
                 server->mdout.threshold = 1; 
-                server->mdout.options   = PTL_MD_OP_PUT;
+                server->mdout.options   = PTL_MD_EVENT_START_DISABLE | PTL_MD_OP_PUT;
                 server->mdout.user_ptr  = NULL;
                 server->mdout.eventq    = PTL_EQ_NONE;
        
                 /* Bind the outgoing buffer */
                 if ((rc = PtlMDBind (server->ni, server->mdout, 
-                                                &server->mdout_h))) {
+                                     PTL_UNLINK, &server->mdout_h))) {
                          PDEBUG ("PtlMDBind", rc);
                          pingsrv_shutdown (1);
                          return 1;
@@ -137,7 +137,7 @@ int pingsrv_thread(void *arg)
                 server->mdin.start     = server->in_buf;
                 server->mdin.length    = STDSIZE;
                 server->mdin.threshold = 1; 
-                server->mdin.options   = PTL_MD_OP_PUT;
+                server->mdin.options   = PTL_MD_EVENT_START_DISABLE | PTL_MD_OP_PUT;
                 server->mdin.user_ptr  = NULL;
                 server->mdin.eventq    = server->eq;
         
@@ -234,7 +234,7 @@ static struct pingsrv_data *pingsrv_setup(void)
         server->mdin.start     = server->in_buf;
         server->mdin.length    = STDSIZE;
         server->mdin.threshold = 1; 
-        server->mdin.options   = PTL_MD_OP_PUT;
+        server->mdin.options   = PTL_MD_EVENT_START_DISABLE | PTL_MD_OP_PUT;
         server->mdin.user_ptr  = NULL;
         server->mdin.eventq    = server->eq;
         memset (server->in_buf, 0, STDSIZE);
index c27f555..00a7ae4 100644 (file)
@@ -127,11 +127,21 @@ static int procbridge_validate(nal_t *nal, void *base, size_t extent)
 }
 
 
-/* FIXME cfs temporary workaround! FIXME
- * global time out value
- */
-int __tcpnal_eqwait_timeout_value = 0;
-int __tcpnal_eqwait_timedout = 0;
+static void procbridge_lock(nal_t * n, unsigned long *flags)
+{
+    bridge b=(bridge)n->nal_data;
+    procbridge p=(procbridge)b->local;
+
+    pthread_mutex_lock(&p->mutex);
+}
+
+static void procbridge_unlock(nal_t * n, unsigned long *flags)
+{
+    bridge b=(bridge)n->nal_data;
+    procbridge p=(procbridge)b->local;
+
+    pthread_mutex_unlock(&p->mutex);
+}
 
 /* Function: yield
  * Arguments:  pid:
@@ -141,31 +151,43 @@ int __tcpnal_eqwait_timedout = 0;
  *   overload it to explicitly block until signalled by the
  *   lower half.
  */
-static void procbridge_yield(nal_t *n)
+static int procbridge_yield(nal_t *n, unsigned long *flags, int milliseconds)
 {
     bridge b=(bridge)n->nal_data;
     procbridge p=(procbridge)b->local;
 
-    pthread_mutex_lock(&p->mutex);
-    if (!__tcpnal_eqwait_timeout_value) {
+    if (milliseconds == 0)
+            return 0;
+            
+    if (milliseconds < 0) {
         pthread_cond_wait(&p->cond,&p->mutex);
     } else {
+        struct timeval then;
         struct timeval now;
         struct timespec timeout;
 
-        gettimeofday(&now, NULL);
-        timeout.tv_sec = now.tv_sec + __tcpnal_eqwait_timeout_value;
-        timeout.tv_nsec = now.tv_usec * 1000;
+        gettimeofday(&then, NULL);
+        timeout.tv_sec = then.tv_sec + milliseconds/1000;
+        timeout.tv_nsec = then.tv_usec * 1000 + milliseconds % 1000 * 1000000;
+        if (timeout.tv_nsec >= 1000000000) {
+                timeout.tv_sec++;
+                timeout.tv_nsec -= 1000000000;
+        }
+
+        pthread_cond_timedwait(&p->cond, &p->mutex, &timeout);
 
-        __tcpnal_eqwait_timedout =
-                pthread_cond_timedwait(&p->cond, &p->mutex, &timeout);
+        gettimeofday(&now, NULL);
+        milliseconds -= (now.tv_sec - then.tv_sec) * 1000 + 
+                        (now.tv_usec - then.tv_usec) / 1000;
+        
+        if (milliseconds < 0)
+                milliseconds = 0;
     }
-    pthread_mutex_unlock(&p->mutex);
+
+    return (milliseconds);
 }
 
 
-static void procbridge_lock(nal_t * nal, unsigned long *flags){}
-static void procbridge_unlock(nal_t * nal, unsigned long *flags){}
 /* api_nal
  *  the interface vector to allow the generic code to access
  *  this nal. this is seperate from the library side nal_cb.
@@ -233,7 +255,6 @@ nal_t *procbridge_interface(int num_interface,
     pthread_mutex_init(&p->mutex,0);
     pthread_cond_init(&p->cond, 0);
     p->nal_flags = 0;
-    pthread_mutex_init(&p->nal_cb_lock, 0);
 
     /* initialize notifier */
     if (socketpair(AF_UNIX, SOCK_STREAM, 0, p->notifier)) {
index 2a5ba0d..1cfb233 100644 (file)
@@ -95,7 +95,7 @@ static void nal_cli(nal_cb_t *nal,
     bridge b = (bridge) nal->nal_data;
     procbridge p = (procbridge) b->local;
 
-    pthread_mutex_lock(&p->nal_cb_lock);
+    pthread_mutex_lock(&p->mutex);
 }
 
 
@@ -105,9 +105,21 @@ static void nal_sti(nal_cb_t *nal,
     bridge b = (bridge)nal->nal_data;
     procbridge p = (procbridge) b->local;
 
-    pthread_mutex_unlock(&p->nal_cb_lock);
+    pthread_mutex_unlock(&p->mutex);
 }
 
+static void nal_callback(nal_cb_t *nal, void *private,
+                         lib_eq_t *eq, ptl_event_t *ev)
+{
+        bridge b = (bridge)nal->nal_data;
+        procbridge p = (procbridge) b->local;
+
+        /* holding p->mutex */
+        if (eq->event_callback != NULL)
+                eq->event_callback(ev);
+        
+        pthread_cond_broadcast(&p->cond);
+}
 
 static int nal_dist(nal_cb_t *nal,
                     ptl_nid_t nid,
@@ -116,21 +128,20 @@ static int nal_dist(nal_cb_t *nal,
     return 0;
 }
 
-static void wakeup_topside(void *z)
+static void check_stopping(void *z)
 {
     bridge b = z;
     procbridge p = b->local;
-    int stop;
 
+    if ((p->nal_flags & NAL_FLAG_STOPPING) == 0)
+            return;
+    
     pthread_mutex_lock(&p->mutex);
-    stop = p->nal_flags & NAL_FLAG_STOPPING;
-    if (stop)
-        p->nal_flags |= NAL_FLAG_STOPPED;
+    p->nal_flags |= NAL_FLAG_STOPPED;
     pthread_cond_broadcast(&p->cond);
     pthread_mutex_unlock(&p->mutex);
 
-    if (stop)
-        pthread_exit(0);
+    pthread_exit(0);
 }
 
 
@@ -175,6 +186,7 @@ void *nal_thread(void *z)
     b->nal_cb->cb_printf=nal_printf;
     b->nal_cb->cb_cli=nal_cli;
     b->nal_cb->cb_sti=nal_sti;
+    b->nal_cb->cb_callback=nal_callback;
     b->nal_cb->cb_dist=nal_dist;
 
     pid_request = args->nia_requested_pid;
@@ -216,7 +228,7 @@ void *nal_thread(void *z)
            performs an operation and returns to blocking mode. we
            overload this function to inform the api side that
            it may be interested in looking at the event queue */
-        register_thunk(wakeup_topside,b);
+        register_thunk(check_stopping,b);
         timer_loop();
     }
     return(0);
index c27f555..00a7ae4 100644 (file)
@@ -127,11 +127,21 @@ static int procbridge_validate(nal_t *nal, void *base, size_t extent)
 }
 
 
-/* FIXME cfs temporary workaround! FIXME
- * global time out value
- */
-int __tcpnal_eqwait_timeout_value = 0;
-int __tcpnal_eqwait_timedout = 0;
+static void procbridge_lock(nal_t * n, unsigned long *flags)
+{
+    bridge b=(bridge)n->nal_data;
+    procbridge p=(procbridge)b->local;
+
+    pthread_mutex_lock(&p->mutex);
+}
+
+static void procbridge_unlock(nal_t * n, unsigned long *flags)
+{
+    bridge b=(bridge)n->nal_data;
+    procbridge p=(procbridge)b->local;
+
+    pthread_mutex_unlock(&p->mutex);
+}
 
 /* Function: yield
  * Arguments:  pid:
@@ -141,31 +151,43 @@ int __tcpnal_eqwait_timedout = 0;
  *   overload it to explicitly block until signalled by the
  *   lower half.
  */
-static void procbridge_yield(nal_t *n)
+static int procbridge_yield(nal_t *n, unsigned long *flags, int milliseconds)
 {
     bridge b=(bridge)n->nal_data;
     procbridge p=(procbridge)b->local;
 
-    pthread_mutex_lock(&p->mutex);
-    if (!__tcpnal_eqwait_timeout_value) {
+    if (milliseconds == 0)
+            return 0;
+            
+    if (milliseconds < 0) {
         pthread_cond_wait(&p->cond,&p->mutex);
     } else {
+        struct timeval then;
         struct timeval now;
         struct timespec timeout;
 
-        gettimeofday(&now, NULL);
-        timeout.tv_sec = now.tv_sec + __tcpnal_eqwait_timeout_value;
-        timeout.tv_nsec = now.tv_usec * 1000;
+        gettimeofday(&then, NULL);
+        timeout.tv_sec = then.tv_sec + milliseconds/1000;
+        timeout.tv_nsec = then.tv_usec * 1000 + milliseconds % 1000 * 1000000;
+        if (timeout.tv_nsec >= 1000000000) {
+                timeout.tv_sec++;
+                timeout.tv_nsec -= 1000000000;
+        }
+
+        pthread_cond_timedwait(&p->cond, &p->mutex, &timeout);
 
-        __tcpnal_eqwait_timedout =
-                pthread_cond_timedwait(&p->cond, &p->mutex, &timeout);
+        gettimeofday(&now, NULL);
+        milliseconds -= (now.tv_sec - then.tv_sec) * 1000 + 
+                        (now.tv_usec - then.tv_usec) / 1000;
+        
+        if (milliseconds < 0)
+                milliseconds = 0;
     }
-    pthread_mutex_unlock(&p->mutex);
+
+    return (milliseconds);
 }
 
 
-static void procbridge_lock(nal_t * nal, unsigned long *flags){}
-static void procbridge_unlock(nal_t * nal, unsigned long *flags){}
 /* api_nal
  *  the interface vector to allow the generic code to access
  *  this nal. this is seperate from the library side nal_cb.
@@ -233,7 +255,6 @@ nal_t *procbridge_interface(int num_interface,
     pthread_mutex_init(&p->mutex,0);
     pthread_cond_init(&p->cond, 0);
     p->nal_flags = 0;
-    pthread_mutex_init(&p->nal_cb_lock, 0);
 
     /* initialize notifier */
     if (socketpair(AF_UNIX, SOCK_STREAM, 0, p->notifier)) {
index 2a5ba0d..1cfb233 100644 (file)
@@ -95,7 +95,7 @@ static void nal_cli(nal_cb_t *nal,
     bridge b = (bridge) nal->nal_data;
     procbridge p = (procbridge) b->local;
 
-    pthread_mutex_lock(&p->nal_cb_lock);
+    pthread_mutex_lock(&p->mutex);
 }
 
 
@@ -105,9 +105,21 @@ static void nal_sti(nal_cb_t *nal,
     bridge b = (bridge)nal->nal_data;
     procbridge p = (procbridge) b->local;
 
-    pthread_mutex_unlock(&p->nal_cb_lock);
+    pthread_mutex_unlock(&p->mutex);
 }
 
+static void nal_callback(nal_cb_t *nal, void *private,
+                         lib_eq_t *eq, ptl_event_t *ev)
+{
+        bridge b = (bridge)nal->nal_data;
+        procbridge p = (procbridge) b->local;
+
+        /* holding p->mutex */
+        if (eq->event_callback != NULL)
+                eq->event_callback(ev);
+        
+        pthread_cond_broadcast(&p->cond);
+}
 
 static int nal_dist(nal_cb_t *nal,
                     ptl_nid_t nid,
@@ -116,21 +128,20 @@ static int nal_dist(nal_cb_t *nal,
     return 0;
 }
 
-static void wakeup_topside(void *z)
+static void check_stopping(void *z)
 {
     bridge b = z;
     procbridge p = b->local;
-    int stop;
 
+    if ((p->nal_flags & NAL_FLAG_STOPPING) == 0)
+            return;
+    
     pthread_mutex_lock(&p->mutex);
-    stop = p->nal_flags & NAL_FLAG_STOPPING;
-    if (stop)
-        p->nal_flags |= NAL_FLAG_STOPPED;
+    p->nal_flags |= NAL_FLAG_STOPPED;
     pthread_cond_broadcast(&p->cond);
     pthread_mutex_unlock(&p->mutex);
 
-    if (stop)
-        pthread_exit(0);
+    pthread_exit(0);
 }
 
 
@@ -175,6 +186,7 @@ void *nal_thread(void *z)
     b->nal_cb->cb_printf=nal_printf;
     b->nal_cb->cb_cli=nal_cli;
     b->nal_cb->cb_sti=nal_sti;
+    b->nal_cb->cb_callback=nal_callback;
     b->nal_cb->cb_dist=nal_dist;
 
     pid_request = args->nia_requested_pid;
@@ -216,7 +228,7 @@ void *nal_thread(void *z)
            performs an operation and returns to blocking mode. we
            overload this function to inform the api side that
            it may be interested in looking at the event queue */
-        register_thunk(wakeup_topside,b);
+        register_thunk(check_stopping,b);
         timer_loop();
     }
     return(0);
index 925406f..e871d9a 100644 (file)
@@ -3,7 +3,8 @@
 # This code is issued under the GNU General Public License.
 # See the file COPYING in this distribution
 
-COMPILE = $(CC) -Wall -g -I$(srcdir)/../include
+# ../ for <portals/*.h>, ../../ for <config.h>
+COMPILE = $(CC) -Wall -g -I$(srcdir)/../include -I$(srcdir)/../../include
 LINK = $(CC) -o $@
 
 if LIBLUSTRE
index 045bace..69d5b51 100644 (file)
@@ -11,13 +11,21 @@ else
 DIRS24 = ptlbd
 endif
 
+# just until things are farther along
+if CRAY_PORTALS
+UTILS_TESTS = 
+else
+UTILS_TESTS = utils tests
+endif
+
 if LIBLUSTRE
 SUBDIRS = portals obdclass lov ptlrpc obdecho osc utils mdc lvfs liblustre
 else
-SUBDIRS = lvfs portals obdclass include $(DIRS24) mds utils obdfilter mdc osc ost 
-SUBDIRS+= llite obdecho lov cobd tests doc scripts conf ptlrpc
+SUBDIRS = lvfs portals obdclass include $(DIRS24) mds obdfilter mdc osc ost 
+SUBDIRS+= llite obdecho lov cobd doc scripts conf ptlrpc $(UTILS_TESTS)
 endif
 
+
 if SNAPFS
 SUBDIRS+= snapfs
 endif
index 4638e92..aff6e19 100644 (file)
@@ -67,7 +67,7 @@ if test x$enable_inkernel = xyes ; then
                sh -e -x -c '(cp -f $0.mk $0.in)'
 fi
 
-AM_CONFIG_HEADER(portals/include/config.h)
+AM_CONFIG_HEADER(include/config.h)
 
 AC_OUTPUT([Makefile lvfs/Makefile portals/Makefile portals/Kernelenv \
           portals/libcfs/Makefile portals/portals/Makefile \
index 14f0f3b..eca8fdd 100644 (file)
@@ -1,10 +1,61 @@
-/* include/config.h.in.  Generated automatically from configure.in by autoheader.  */
+/* include/config.h.in.  Generated from configure.in by autoheader.  */
 
-/* Define if you have the `readline' library (-lreadline). */
-#undef HAVE_LIBREADLINE
+/* Use the Pinger */
+#undef ENABLE_PINGER
+
+/* Define to 1 if you have the <inttypes.h> header file. */
+#undef HAVE_INTTYPES_H
+
+/* Define to 1 if you have the <memory.h> header file. */
+#undef HAVE_MEMORY_H
+
+/* Define to 1 if you have the <stdint.h> header file. */
+#undef HAVE_STDINT_H
+
+/* Define to 1 if you have the <stdlib.h> header file. */
+#undef HAVE_STDLIB_H
+
+/* Define to 1 if you have the <strings.h> header file. */
+#undef HAVE_STRINGS_H
+
+/* Define to 1 if you have the <string.h> header file. */
+#undef HAVE_STRING_H
+
+/* Define to 1 if you have the <sys/stat.h> header file. */
+#undef HAVE_SYS_STAT_H
+
+/* Define to 1 if you have the <sys/types.h> header file. */
+#undef HAVE_SYS_TYPES_H
+
+/* Define to 1 if you have the <unistd.h> header file. */
+#undef HAVE_UNISTD_H
+
+/* IOCTL Buffer Size */
+#undef OBD_MAX_IOCTL_BUFFER
 
 /* Name of package */
 #undef PACKAGE
 
+/* Define to the address where bug reports for this package should be sent. */
+#undef PACKAGE_BUGREPORT
+
+/* Define to the full name of this package. */
+#undef PACKAGE_NAME
+
+/* Define to the full name and version of this package. */
+#undef PACKAGE_STRING
+
+/* Define to the one symbol short name of this package. */
+#undef PACKAGE_TARNAME
+
+/* Define to the version of this package. */
+#undef PACKAGE_VERSION
+
+/* The size of a `unsigned long long', as computed by sizeof. */
+#undef SIZEOF_UNSIGNED_LONG_LONG
+
+/* Define to 1 if you have the ANSI C header files. */
+#undef STDC_HEADERS
+
 /* Version number of package */
 #undef VERSION
index 064ac80..2133888 100644 (file)
@@ -263,6 +263,8 @@ typedef int spinlock_t;
 typedef __u64 kdev_t;
 
 #define SPIN_LOCK_UNLOCKED 0
+#define LASSERT_SPIN_LOCKED(lock) do {} while(0)
+
 static inline void spin_lock(spinlock_t *l) {return;}
 static inline void spin_unlock(spinlock_t *l) {return;}
 static inline void spin_lock_init(spinlock_t *l) {return;}
index 3063c09..6736418 100644 (file)
@@ -422,6 +422,11 @@ int ldlm_handle_cancel(struct ptlrpc_request *req);
 int ldlm_del_waiting_lock(struct ldlm_lock *lock);
 int ldlm_get_ref(void);
 void ldlm_put_ref(int force);
+#ifndef __KERNEL__
+void liblustre_ldlm_handle_bl_callback(struct ldlm_namespace *ns,
+                                       struct ldlm_lock_desc *ld,
+                                       struct ldlm_lock *lock);
+#endif
 
 /* ldlm_lock.c */
 ldlm_processing_policy ldlm_get_processing_policy(struct ldlm_resource *res);
index b3d9308..8c5e54d 100644 (file)
 #include <linux/lustre_import.h>
 #include <linux/lprocfs_status.h>
 
+/* MD flags we _always_ use */
+#define PTLRPC_MD_OPTIONS  (PTL_MD_EVENT_START_DISABLE | \
+                            PTL_MD_LUSTRE_COMPLETION_SEMANTICS)
+
+/* Define some large-ish defaults for MTU and MAX_IOV if portals ones
+ * aren't defined (i.e. no limits) or too large */
+#if (defined(PTL_MTU) && (PTL_MTU <= (1 << 20)))
+# define PTLRPC_MTU  PTL_MTU
+#else
+# define PTLRPC_MTU  (1 << 20)
+#endif
+#if (defined(PTL_MAX_IOV) && (PTL_MAX_IOV <= 512))
+# define PTLRPC_MAX_IOV PTL_MAX_IOV
+#else
+# define PTLRPC_MAX_IOV 512
+#endif
+
+/* Define consistent max bulk size/pages */
+#if (PTLRPC_MTU > PTLRPC_MAX_IOV * PAGE_SIZE)
+# define PTLRPC_MAX_BRW_PAGES   PTLRPC_MAX_IOV
+# define PTLRPC_MAX_BRW_SIZE   (PTLRPC_MAX_IOV * PAGE_SIZE)
+#else
+# define PTLRPC_MAX_BRW_PAGES  (PTLRPC_MTU / PAGE_SIZE)
+# define PTLRPC_MAX_BRW_SIZE    PTLRPC_MTU
+#endif
+
 /* Size over which to OBD_VMALLOC() rather than OBD_ALLOC() service request
  * buffers */
 #define SVC_BUF_VMALLOC_THRESHOLD (2*PAGE_SIZE)
@@ -377,8 +403,8 @@ struct ptlrpc_bulk_desc {
         __u32 bd_portal;
         struct ptlrpc_request *bd_req;          /* associated request */
         wait_queue_head_t      bd_waitq;        /* server side only WQ */
-        int                    bd_page_count;   /* # pages (== entries in bd_iov) */
-        int                    bd_max_pages;    /* allocated size of bd_iov */
+        int                    bd_iov_count;    /* # entries in bd_iov */
+        int                    bd_max_iov;      /* allocated size of bd_iov */
         int                    bd_nob;          /* # bytes covered */
         int                    bd_nob_transferred; /* # bytes GOT/PUT */
 
@@ -387,10 +413,10 @@ struct ptlrpc_bulk_desc {
         struct ptlrpc_cb_id    bd_cbid;         /* network callback info */
         ptl_handle_md_t        bd_md_h;         /* associated MD */
         
-#ifdef __KERNEL__
-        ptl_kiov_t bd_iov[PTL_MD_MAX_IOV];
+#if (!CRAY_PORTALS && defined(__KERNEL__))
+        ptl_kiov_t             bd_iov[0];
 #else
-        struct iovec bd_iov[PTL_MD_MAX_IOV];
+        struct iovec           bd_iov[0];
 #endif
 };
 
index c7848b3..ebc2a0f 100644 (file)
@@ -1058,7 +1058,7 @@ typedef __u8 class_uuid_t[16];
 void class_uuid_unparse(class_uuid_t in, struct obd_uuid *out);
 
 /* lustre_peer.c    */
-int lustre_uuid_to_peer(char *uuid, struct lustre_peer *peer);
+int lustre_uuid_to_peer(char *uuid, ptl_handle_ni_t *peer_ni, ptl_nid_t *peer_nid);
 int class_add_uuid(char *uuid, __u64 nid, __u32 nal);
 int class_del_uuid (char *uuid);
 void class_init_uuidlist(void);
index 90d86c2..a13ecc9 100644 (file)
@@ -1,3 +1,5 @@
+$Id: bproc-patch-2.4.20,v 1.6 2004/03/19 01:09:33 zab Exp $
+
 Index: linux/fs/exec.c
 ===================================================================
 --- linux.orig/fs/exec.c       2003-09-03 17:52:00.000000000 -0400
@@ -762,7 +764,7 @@ Index: linux/kernel/bproc_hook.c
 + *  along with this program; if not, write to the Free Software
 + *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 + *
-+ * Id: bproc-patch-2.4.20,v 1.3.2.1 2004/02/14 07:21:44 nic Exp $
++ * $Id: bproc-patch-2.4.20,v 1.6 2004/03/19 01:09:33 zab Exp $
 + *-----------------------------------------------------------------------*/
 +#include <linux/kernel.h>
 +#include <linux/sched.h>
@@ -830,7 +832,7 @@ Index: linux/include/linux/bproc.h
 + *  along with this program; if not, write to the Free Software
 + *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 + *
-+ * Id: bproc-patch-2.4.20,v 1.3.2.1 2004/02/14 07:21:44 nic Exp $
++ * $Id: bproc-patch-2.4.20,v 1.6 2004/03/19 01:09:33 zab Exp $
 + *-----------------------------------------------------------------------*/
 +#ifndef _LINUX_BPROC_H
 +#define _LINUX_BPROC_H
index d1f8c56..69d3f14 100644 (file)
@@ -133,6 +133,9 @@ void l_check_no_ns_lock(struct ldlm_namespace *ns)
 #else
 void l_check_no_ns_lock(struct ldlm_namespace *ns)
 {
-#warning "FIXME: check lock in user space??"
+        if (l_has_lock(&ns->ns_lock)) {
+                CERROR("namespace %s lock held illegally; tell phil\n",
+                       ns->ns_name);
+        }
 }
 #endif /* __KERNEL__ */
index 6c1f750..57d1058 100644 (file)
@@ -109,7 +109,7 @@ int client_obd_setup(struct obd_device *obddev, obd_count len, void *buf)
         spin_lock_init(&cli->cl_write_rpc_hist.oh_lock);
         spin_lock_init(&cli->cl_read_page_hist.oh_lock);
         spin_lock_init(&cli->cl_write_page_hist.oh_lock);
-        cli->cl_max_pages_per_rpc = PTL_MD_MAX_PAGES;
+        cli->cl_max_pages_per_rpc = PTLRPC_MAX_BRW_PAGES;
         cli->cl_max_rpcs_in_flight = OSC_MAX_RIF_DEFAULT;
 
         ldlm_get_ref();
index b5b0e33..73f10eb 100644 (file)
@@ -477,8 +477,13 @@ void ldlm_lock_decref_internal(struct ldlm_lock *lock, __u32 mode)
 
                 LDLM_LOCK_GET(lock); /* dropped by bl thread */
                 ldlm_lock_remove_from_lru(lock);
+#ifdef __KERNEL__
                 ldlm_bl_to_thread(ns, NULL, lock);
                 l_unlock(&ns->ns_lock);
+#else
+                l_unlock(&ns->ns_lock);
+                liblustre_ldlm_handle_bl_callback(ns, NULL, lock);
+#endif
         } else if (ns->ns_client == LDLM_NAMESPACE_CLIENT &&
                    !lock->l_readers && !lock->l_writers) {
                 /* If this is a client-side namespace and this was the last
index 6602713..76a719f 100644 (file)
@@ -965,6 +965,14 @@ int ldlm_bl_to_thread(struct ldlm_namespace *ns, struct ldlm_lock_desc *ld,
 
         RETURN(0);
 }
+#else
+/* XXX */
+void liblustre_ldlm_handle_bl_callback(struct ldlm_namespace *ns,
+                                       struct ldlm_lock_desc *ld,
+                                       struct ldlm_lock *lock)
+{
+        ldlm_handle_bl_callback(ns, ld, lock);
+}
 #endif
 
 static int ldlm_callback_handler(struct ptlrpc_request *req)
index de9e7c5..7d5000e 100644 (file)
@@ -586,6 +586,9 @@ int ldlm_cli_cancel(struct lustre_handle *lockh)
 int ldlm_cancel_lru(struct ldlm_namespace *ns)
 {
         struct list_head *tmp, *next;
+#ifndef __KERNEL__
+        LIST_HEAD(cblist);
+#endif
         int count, rc = 0;
         ENTRY;
 
@@ -612,12 +615,25 @@ int ldlm_cancel_lru(struct ldlm_namespace *ns)
 
                 LDLM_LOCK_GET(lock); /* dropped by bl thread */
                 ldlm_lock_remove_from_lru(lock);
+#if __KERNEL__
                 ldlm_bl_to_thread(ns, NULL, lock);
+#else
+                list_add(&lock->l_lru, &cblist);
+#endif
 
                 if (--count == 0)
                         break;
         }
         l_unlock(&ns->ns_lock);
+#ifndef __KERNEL__
+        while (!list_empty(&cblist)) {
+                struct ldlm_lock *lock;
+
+                lock = list_entry(cblist.next, struct ldlm_lock, l_lru);
+                list_del_init(&lock->l_lru);
+                liblustre_ldlm_handle_bl_callback(ns, NULL, lock);
+        }
+#endif
         RETURN(rc);
 }
 
index 6622485..66d7bf5 100644 (file)
@@ -27,8 +27,6 @@ SYSIO_LIBS = $(SYSIO)/drivers/native/libsysio_native.a \
              $(SYSIO)/src/libsysio.a \
              $(SYSIO)/dev/stdfd/libsysio_stdfd.a
 
-#SYSIO_LIBS = $(SYSIO)/lib/libsysio.a
-
 lib_LIBRARIES = liblustre.a
 noinst_LIBRARIES = libllite.a
 
index 1cb6a37..c028744 100644 (file)
@@ -48,6 +48,8 @@
 
 #include "llite_lib.h"
 
+unsigned int portal_subsystem_debug = ~0 - (S_PORTALS | S_QSWNAL | S_SOCKNAL |
+                                            S_GMNAL | S_IBNAL);
 
 ptl_handle_ni_t         tcpnal_ni;
 struct task_struct     *current;
@@ -139,10 +141,11 @@ ptl_nid_t tcpnal_mynid;
 
 int init_lib_portals()
 {
+        int max_interfaces;
         int rc;
         ENTRY;
 
-        PtlInit();
+        PtlInit(&max_interfaces);
         rc = PtlNIInit(procbridge_interface, 0, 0, 0, &tcpnal_ni);
         if (rc != 0) {
                 CERROR("TCPNAL: PtlNIInit failed: error %d\n", rc);
index 3929e2c..af4a0dc 100644 (file)
@@ -108,8 +108,8 @@ void llu_lookup_finish_locks(struct lookup_intent *it, struct pnode *pnode)
                 mdc_set_lock_data(&it->d.lustre.it_lock_handle, inode);
         }
 
-        /* drop IT_LOOKUP locks */
-        if (it->it_op == IT_LOOKUP)
+        /* drop lookup/getattr locks */
+        if (it->it_op == IT_LOOKUP || it->it_op == IT_GETATTR)
                 ll_intent_release(it);
 
 }
@@ -279,14 +279,19 @@ int llu_pb_revalidate(struct pnode *pnode, int flags, struct lookup_intent *it)
                 GOTO(out, rc = 0);
 
         rc = pnode_revalidate_finish(req, 1, it, pnode);
+        if (rc != 0) {
+                ll_intent_release(it);
+                GOTO(out, rc = 0);
+        }
+        rc = 1;
 
         /* Note: ll_intent_lock may cause a callback, check this! */
 
-        if (it->it_op & (IT_OPEN | IT_GETATTR))
+        if (it->it_op & IT_OPEN)
                 LL_SAVE_INTENT(pb->pb_ino, it);
-        RETURN(1);
+
  out:
-        if (req)
+        if (req && rc == 1)
                 ptlrpc_req_finished(req);
         if (rc == 0) {
                 LASSERT(pb->pb_ino);
@@ -295,8 +300,6 @@ int llu_pb_revalidate(struct pnode *pnode, int flags, struct lookup_intent *it)
         } else {
                 llu_lookup_finish_locks(it, pnode);
                 llu_i2info(pb->pb_ino)->lli_stale_flag = 0;
-                if (it->it_op & (IT_OPEN | IT_GETATTR))
-                        LL_SAVE_INTENT(pb->pb_ino, it);
         }
         RETURN(rc);
 }
@@ -361,7 +364,7 @@ static int lookup_it_finish(struct ptlrpc_request *request, int offset,
         }
 
         /* intent will be further used in cases of open()/getattr() */
-        if (inode && (it->it_op & (IT_OPEN | IT_GETATTR)))
+        if (inode && (it->it_op & IT_OPEN))
                 LL_SAVE_INTENT(inode, it);
 
         child->p_base->pb_ino = inode;
index 8321956..a03dc4a 100644 (file)
@@ -485,14 +485,7 @@ static int llu_iop_getattr(struct pnode *pno,
         rc = llu_inode_revalidate(ino);
         if (!rc) {
                 copy_stat_buf(ino, b);
-
-                if (llu_i2info(ino)->lli_it) {
-                        struct lookup_intent *it;
-
-                        LL_GET_INTENT(ino, it);
-                        it->it_op_release(it);
-                        OBD_FREE(it, sizeof(*it));
-                }
+                LASSERT(!llu_i2info(ino)->lli_it);
         }
 
         RETURN(rc);
index e995588..c038ed7 100644 (file)
@@ -1,3 +1,2 @@
-.deps
 Makefile
-Makefile.in
+Makefile.in
\ No newline at end of file
index 51bf60f..40d3731 100644 (file)
@@ -19,6 +19,9 @@ struct ldlm_namespace;
 struct ldlm_res_id;
 struct obd_import;
 
+unsigned int portal_subsystem_debug = ~0 - (S_PORTALS | S_QSWNAL | S_SOCKNAL |
+                                            S_GMNAL | S_IBNAL);
+                                                                                                                        
 void *inter_module_get(char *arg)
 {
         if (!strcmp(arg, "tcpnal_ni"))
@@ -121,9 +124,10 @@ ptl_nid_t tcpnal_mynid;
 
 int init_lib_portals()
 {
+       int max_interfaces;
         int rc;
 
-        PtlInit();
+        PtlInit(&max_interfaces);
         rc = PtlNIInit(procbridge_interface, 0, 0, 0, &tcpnal_ni);
         if (rc != 0) {
                 CERROR("ksocknal: PtlNIInit failed: error %d\n", rc);
index 8ae804e..2aad640 100644 (file)
@@ -24,6 +24,7 @@
 #define DEBUG_SUBSYSTEM S_LLITE
 
 #include <linux/module.h>
+#include <linux/types.h>
 #include <linux/random.h>
 #include <linux/version.h>
 
index d6c9f63..61c2b4b 100644 (file)
@@ -557,7 +557,7 @@ static int ll_issue_page_read(struct obd_export *exp,
         RETURN(rc);
 }
 
-#define LL_RA_MIN(inode) ((unsigned long)PTL_MD_MAX_PAGES / 2)
+#define LL_RA_MIN(inode) ((unsigned long)PTLRPC_MAX_BRW_PAGES / 2)
 #define LL_RA_MAX(inode) ((ll_i2info(inode)->lli_smd->lsm_xfersize * 3) >> \
                           PAGE_CACHE_SHIFT)
 
index 57ceb3f..e248be7 100644 (file)
@@ -24,6 +24,7 @@
 #define DEBUG_SUBSYSTEM S_LLITE
 
 #include <linux/module.h>
+#include <linux/types.h>
 #include <linux/random.h>
 #include <linux/version.h>
 #include <linux/lustre_lite.h>
index ee340b9..526776b 100644 (file)
@@ -24,6 +24,7 @@
 #define DEBUG_SUBSYSTEM S_LLITE
 
 #include <linux/module.h>
+#include <linux/types.h>
 #include <linux/random.h>
 #include <linux/version.h>
 #include <linux/lustre_lite.h>
index 1b40327..2a51441 100644 (file)
@@ -295,7 +295,7 @@ int lov_alloc_memmd(struct lov_stripe_md **lsmp, int stripe_count, int pattern)
         (*lsmp)->lsm_magic = LOV_MAGIC;
         (*lsmp)->lsm_stripe_count = stripe_count;
         (*lsmp)->lsm_maxbytes = LUSTRE_STRIPE_MAXBYTES * stripe_count;
-        (*lsmp)->lsm_xfersize = PTL_MTU * stripe_count;
+        (*lsmp)->lsm_xfersize = PTLRPC_MTU * stripe_count;
         (*lsmp)->lsm_pattern = pattern;
         (*lsmp)->lsm_oinfo[0].loi_ost_idx = ~0;
 
index 83a6563..2d39a95 100644 (file)
@@ -294,10 +294,11 @@ int class_handle_ioctl(unsigned int cmd, unsigned long arg)
 
 
         case OBD_IOC_CLOSE_UUID: {
-                struct lustre_peer peer;
+                ptl_nid_t       peer_nid;
+                ptl_handle_ni_t peer_ni;
                 CDEBUG(D_IOCTL, "closing all connections to uuid %s\n",
                        data->ioc_inlbuf1);
-                lustre_uuid_to_peer(data->ioc_inlbuf1, &peer);
+                lustre_uuid_to_peer(data->ioc_inlbuf1, &peer_ni, &peer_nid);
                 GOTO(out, err = 0);
         }
 
index 28503f2..9d99e9a 100644 (file)
@@ -740,14 +740,14 @@ static int llog_lvfs_destroy(struct llog_handle *handle)
 }
 
 int llog_get_cat_list(struct obd_device *obd, struct obd_device *disk_obd,
-                      char *name, int count, struct llog_logid *idarray)
+                      char *name, int count, struct llog_catid *idarray)
 {
         LBUG();
         return 0;
 }
 
 int llog_put_cat_list(struct obd_device *obd, struct obd_device *disk_obd,
-                      char *name, int count, struct llog_logid *idarray)
+                      char *name, int count, struct llog_catid *idarray)
 {
         LBUG();
         return 0;
index bbe3340..1e227e1 100644 (file)
@@ -61,7 +61,8 @@ void class_exit_uuidlist(void)
         class_del_uuid(NULL);
 }
 
-int lustre_uuid_to_peer(char *uuid, struct lustre_peer *peer)
+int lustre_uuid_to_peer(char *uuid, 
+                        ptl_handle_ni_t *peer_ni, ptl_nid_t *peer_nid)
 {
         struct list_head *tmp;
 
@@ -72,8 +73,8 @@ int lustre_uuid_to_peer(char *uuid, struct lustre_peer *peer)
                         list_entry(tmp, struct uuid_nid_data, head);
 
                 if (strcmp(data->uuid, uuid) == 0) {
-                        peer->peer_nid = data->nid;
-                        peer->peer_ni = data->ni;
+                        *peer_nid = data->nid;
+                        *peer_ni = data->ni;
 
                         spin_unlock (&g_uuid_lock);
                         return 0;
index 0492fc6..93b4276 100644 (file)
@@ -59,7 +59,7 @@ int osc_wr_max_pages_per_rpc(struct file *file, const char *buffer,
         if (rc)
                 return rc;
 
-        if (val < 1 || val > PTL_MD_MAX_PAGES)
+        if (val < 1 || val > PTLRPC_MAX_BRW_PAGES)
                 return -ERANGE;
 
         spin_lock(&cli->cl_loi_list_lock);
index 6d3b80f..d4db2c7 100644 (file)
@@ -36,7 +36,8 @@
 # include <linux/module.h>
 # include <linux/mm.h>
 # include <linux/highmem.h>
-# include <linux/lustre_dlm.h>
+# include <linux/ctype.h>
+# include <linux/init.h>
 # if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
 #  include <linux/workqueue.h>
 #  include <linux/smp_lock.h>
 # include <liblustre.h>
 #endif
 
-#ifndef  __CYGWIN__
-# include <linux/ctype.h>
-# include <linux/init.h>
-#else
+#ifdef  __CYGWIN__
 # include <ctype.h>
 #endif
 
+# include <linux/lustre_dlm.h>
 #include <linux/obd_class.h>
 #include "osc_internal.h"
 
index f011d0e..b14d013 100644 (file)
 #ifndef OSC_INTERNAL_H
 #define OSC_INTERNAL_H
 
-#include <portals/lib-types.h> /* for PTL_MTU and PTL_MD_MAX_PAGES */
-
-
-/* bug 1578: negotiate BRW_MAX_SIZE with the OST, instead of hard-coding it */
-#define OSC_BRW_MAX_SIZE PTL_MTU
-#define OSC_BRW_MAX_IOV PTL_MD_MAX_PAGES
-
 #define OAP_MAGIC 8675309
 
 struct osc_async_page {
index ecb2bf3..005ebbd 100644 (file)
@@ -36,7 +36,8 @@
 # include <linux/module.h>
 # include <linux/mm.h>
 # include <linux/highmem.h>
-# include <linux/lustre_dlm.h>
+# include <linux/ctype.h>
+# include <linux/init.h>
 # if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
 #  include <linux/workqueue.h>
 #  include <linux/smp_lock.h>
 # include <liblustre.h>
 #endif
 
+# include <linux/lustre_dlm.h>
 #include <linux/kp30.h>
 #include <linux/lustre_net.h>
 #include <linux/lustre_user.h>
 #include <linux/obd_ost.h>
 #include <linux/obd_lov.h>
 
-#ifndef  __CYGWIN__
-# include <linux/ctype.h>
-# include <linux/init.h>
-#else
+#ifdef  __CYGWIN__
 # include <ctype.h>
 #endif
 
@@ -189,7 +188,7 @@ static int osc_getattr_interpret(struct ptlrpc_request *req,
                 memcpy(aa->aa_oa, &body->oa, sizeof(*aa->aa_oa));
 
                 /* This should really be sent by the OST */
-                aa->aa_oa->o_blksize = OSC_BRW_MAX_SIZE;
+                aa->aa_oa->o_blksize = PTLRPC_MAX_BRW_SIZE;
                 aa->aa_oa->o_valid |= OBD_MD_FLBLKSZ;
         } else {
                 CERROR("can't unpack ost_body\n");
@@ -264,7 +263,7 @@ static int osc_getattr(struct obd_export *exp, struct obdo *oa,
         memcpy(oa, &body->oa, sizeof(*oa));
 
         /* This should really be sent by the OST */
-        oa->o_blksize = OSC_BRW_MAX_SIZE;
+        oa->o_blksize = PTLRPC_MAX_BRW_SIZE;
         oa->o_valid |= OBD_MD_FLBLKSZ;
 
         EXIT;
@@ -357,7 +356,7 @@ int osc_real_create(struct obd_export *exp, struct obdo *oa,
         memcpy(oa, &body->oa, sizeof(*oa));
 
         /* This should really be sent by the OST */
-        oa->o_blksize = OSC_BRW_MAX_SIZE;
+        oa->o_blksize = PTLRPC_MAX_BRW_SIZE;
         oa->o_valid |= OBD_MD_FLBLKSZ;
 
         /* XXX LOV STACKING: the lsm that is passed to us from LOV does not
@@ -1082,8 +1081,8 @@ static int osc_brw(int cmd, struct obd_export *exp, struct obdo *oa,
                 obd_count pages_per_brw;
                 int rc;
 
-                if (page_count > OSC_BRW_MAX_IOV)
-                        pages_per_brw = OSC_BRW_MAX_IOV;
+                if (page_count > PTLRPC_MAX_BRW_PAGES)
+                        pages_per_brw = PTLRPC_MAX_BRW_PAGES;
                 else
                         pages_per_brw = page_count;
 
@@ -1122,8 +1121,8 @@ static int osc_brw_async(int cmd, struct obd_export *exp, struct obdo *oa,
                 obd_count pages_per_brw;
                 int rc;
 
-                if (page_count > OSC_BRW_MAX_IOV)
-                        pages_per_brw = OSC_BRW_MAX_IOV;
+                if (page_count > PTLRPC_MAX_BRW_PAGES)
+                        pages_per_brw = PTLRPC_MAX_BRW_PAGES;
                 else
                         pages_per_brw = page_count;
 
@@ -2342,8 +2341,8 @@ static int sanosc_brw(int cmd, struct obd_export *exp, struct obdo *oa,
                 obd_count pages_per_brw;
                 int rc;
 
-                if (page_count > OSC_BRW_MAX_IOV)
-                        pages_per_brw = OSC_BRW_MAX_IOV;
+                if (page_count > PTLRPC_MAX_BRW_PAGES)
+                        pages_per_brw = PTLRPC_MAX_BRW_PAGES;
                 else
                         pages_per_brw = page_count;
 
index 1a223f2..0083ac1 100644 (file)
@@ -5,8 +5,15 @@
 
 EXTRA_DIST = Rules.linux archdep.m4 include 
 DIST_SUBDIRS = libcfs portals knals unals utils tests doc router
+
 if LIBLUSTRE
 SUBDIRS = portals unals utils
 else
+
+if CRAY_PORTALS
+SUBDIRS = libcfs tests doc 
+else 
 SUBDIRS = libcfs portals knals unals utils tests doc router
 endif
+
+endif
index 65cfaff..b8b5c9d 100644 (file)
@@ -4,6 +4,16 @@ AC_ARG_ENABLE(inkernel, [  --enable-inkernel set up 2.5 kernel makefiles])
 AM_CONDITIONAL(INKERNEL, test x$enable_inkernel = xyes)
 echo "Makefile for in kernel build: $INKERNEL"
 
+# -------- are we building against an external portals? -------
+# haha, I wonder how one is really supposed to do this
+# automake seems to have a DEFS variable which looks good
+AC_ARG_WITH(cray-portals, [  --with-cray-portals=[path] path to cray portals],
+       CRAY_PORTALS_INCLUDE="-I$with_cray_portals"
+       CC="$CC -DCRAY_PORTALS=1"
+       )
+AC_SUBST(CRAY_PORTALS_INCLUDE)
+AM_CONDITIONAL(CRAY_PORTALS, test ! "x$with_cray_portals" = x)
+
 # -------- liblustre compilation --------------
 AC_ARG_WITH(lib, [  --with-lib compile lustre library], host_cpu="lib")
 
@@ -156,10 +166,11 @@ fi
 
 # ------------ include paths ------------------
 
+KINCFLAGS="$CRAY_PORTALS_INCLUDE $CRAY_PORTALS_COMMANDLINE \
+       -I\$(top_srcdir)/include \
+       -I\$(top_srcdir)/portals/include -I$LINUX/include"
 if test $host_cpu != "lib" ; then 
-    KINCFLAGS="-I\$(top_srcdir)/include -I\$(top_srcdir)/portals/include -I$LINUX/include"
-else
-    KINCFLAGS='-I$(top_srcdir)/include -I$(top_srcdir)/portals/include'
+    KINCFLAGS="$KINCFLAGS -I$LINUX/include"
 fi
 CPPFLAGS="$KINCFLAGS $ARCHCPPFLAGS"
 
diff --git a/lustre/portals/configure.in b/lustre/portals/configure.in
deleted file mode 100644 (file)
index bacf532..0000000
+++ /dev/null
@@ -1,34 +0,0 @@
-# This version is here to make autoconf happy; the name is a file which is
-# "unique" to this directory so that configure knows where it should run.
-AC_INIT(knals/Makefile.am, 3.0)
-AC_CANONICAL_SYSTEM
-# Copyright (C) 2001  Cluster File Systems, Inc.
-#
-# This code is issued under the GNU General Public License.
-# See the file COPYING in this distribution
-
-# Automake variables.  Steal the version number from packaging/intersync.spec
-AM_INIT_AUTOMAKE(portals, builtin([esyscmd], [sed -ne '/.*define IVERSION /{ s/.*IVERSION //; p; }' libcfs/module.c]))
-# AM_MAINTAINER_MODE
-
-sinclude(build.m4)
-sinclude(archdep.m4)
-
-if test x$enable_inkernel = xyes ; then
-cp Kernelenv.mk Kernelenv.in
-cp Makefile.mk Makefile.in
-cp libcfs/Makefile.mk libcfs/Makefile.in
-cp portals/Makefile.mk portals/Makefile.in
-cp knals/Makefile.mk knals/Makefile.in
-cp knals/socknal/Makefile.mk knals/socknal/Makefile.in
-cp router/Makefile.mk router/Makefile.in
-fi
-
-AM_CONFIG_HEADER(include/config.h)
-
-AC_OUTPUT([Makefile Kernelenv libcfs/Makefile portals/Makefile \
-          unals/Makefile knals/Makefile router/Makefile \
-         knals/socknal/Makefile knals/gmnal/Makefile knals/qswnal/Makefile \
-         knals/scimacnal/Makefile knals/ibnal/Makefile\
-          utils/Makefile tests/Makefile doc/Makefile ])
-
diff --git a/lustre/portals/include/config.h.in b/lustre/portals/include/config.h.in
deleted file mode 100644 (file)
index f295154..0000000
+++ /dev/null
@@ -1,61 +0,0 @@
-/* portals/include/config.h.in.  Generated from configure.in by autoheader.  */
-
-/* Use the Pinger */
-#undef ENABLE_PINGER
-
-/* Define to 1 if you have the <inttypes.h> header file. */
-#undef HAVE_INTTYPES_H
-
-/* Define to 1 if you have the <memory.h> header file. */
-#undef HAVE_MEMORY_H
-
-/* Define to 1 if you have the <stdint.h> header file. */
-#undef HAVE_STDINT_H
-
-/* Define to 1 if you have the <stdlib.h> header file. */
-#undef HAVE_STDLIB_H
-
-/* Define to 1 if you have the <strings.h> header file. */
-#undef HAVE_STRINGS_H
-
-/* Define to 1 if you have the <string.h> header file. */
-#undef HAVE_STRING_H
-
-/* Define to 1 if you have the <sys/stat.h> header file. */
-#undef HAVE_SYS_STAT_H
-
-/* Define to 1 if you have the <sys/types.h> header file. */
-#undef HAVE_SYS_TYPES_H
-
-/* Define to 1 if you have the <unistd.h> header file. */
-#undef HAVE_UNISTD_H
-
-/* IOCTL Buffer Size */
-#undef OBD_MAX_IOCTL_BUFFER
-
-/* Name of package */
-#undef PACKAGE
-
-/* Define to the address where bug reports for this package should be sent. */
-#undef PACKAGE_BUGREPORT
-
-/* Define to the full name of this package. */
-#undef PACKAGE_NAME
-
-/* Define to the full name and version of this package. */
-#undef PACKAGE_STRING
-
-/* Define to the one symbol short name of this package. */
-#undef PACKAGE_TARNAME
-
-/* Define to the version of this package. */
-#undef PACKAGE_VERSION
-
-/* The size of a `unsigned long long', as computed by sizeof. */
-#undef SIZEOF_UNSIGNED_LONG_LONG
-
-/* Define to 1 if you have the ANSI C header files. */
-#undef STDC_HEADERS
-
-/* Version number of package */
-#undef VERSION
index c080a57..9e7e7c2 100644 (file)
@@ -4,6 +4,7 @@
 #ifndef _KP30_INCLUDED
 #define _KP30_INCLUDED
 
+#include <linux/libcfs.h>
 #define PORTAL_DEBUG
 
 #ifndef offsetof
 
 #define LOWEST_BIT_SET(x)       ((x) & ~((x) - 1))
 
-/*
- *  Debugging
- */
-extern unsigned int portal_subsystem_debug;
-extern unsigned int portal_stack;
-extern unsigned int portal_debug;
-extern unsigned int portal_printk;
-extern unsigned int portal_cerror;
-/* Debugging subsystems (32 bits, non-overlapping) */
-#define S_UNDEFINED   0x00000001
-#define S_MDC         0x00000002
-#define S_MDS         0x00000004
-#define S_OSC         0x00000008
-#define S_OST         0x00000010
-#define S_CLASS       0x00000020
-#define S_LOG         0x00000040
-#define S_LLITE       0x00000080
-#define S_RPC         0x00000100
-#define S_MGMT        0x00000200
-#define S_PORTALS     0x00000400
-#define S_SOCKNAL     0x00000800
-#define S_QSWNAL      0x00001000
-#define S_PINGER      0x00002000
-#define S_FILTER      0x00004000
-#define S_PTLBD       0x00008000
-#define S_ECHO        0x00010000
-#define S_LDLM        0x00020000
-#define S_LOV         0x00040000
-#define S_GMNAL       0x00080000
-#define S_PTLROUTER   0x00100000
-#define S_COBD        0x00200000
-#define S_IBNAL       0x00400000
-
-/* If you change these values, please keep portals/utils/debug.c
- * up to date! */
-
-/* Debugging masks (32 bits, non-overlapping) */
-#define D_TRACE       0x00000001 /* ENTRY/EXIT markers */
-#define D_INODE       0x00000002
-#define D_SUPER       0x00000004
-#define D_EXT2        0x00000008 /* anything from ext2_debug */
-#define D_MALLOC      0x00000010 /* print malloc, free information */
-#define D_CACHE       0x00000020 /* cache-related items */
-#define D_INFO        0x00000040 /* general information */
-#define D_IOCTL       0x00000080 /* ioctl related information */
-#define D_BLOCKS      0x00000100 /* ext2 block allocation */
-#define D_NET         0x00000200 /* network communications */
-#define D_WARNING     0x00000400 /* CWARN(...) == CDEBUG (D_WARNING, ...) */
-#define D_BUFFS       0x00000800
-#define D_OTHER       0x00001000
-#define D_DENTRY      0x00002000
-#define D_PORTALS     0x00004000 /* ENTRY/EXIT markers */
-#define D_PAGE        0x00008000 /* bulk page handling */
-#define D_DLMTRACE    0x00010000
-#define D_ERROR       0x00020000 /* CERROR(...) == CDEBUG (D_ERROR, ...) */
-#define D_EMERG       0x00040000 /* CEMERG(...) == CDEBUG (D_EMERG, ...) */
-#define D_HA          0x00080000 /* recovery and failover */
-#define D_RPCTRACE    0x00100000 /* for distributed debugging */
-#define D_VFSTRACE    0x00200000
-#define D_READA       0x00400000 /* read-ahead */
-
-#ifdef __KERNEL__
-# include <linux/sched.h> /* THREAD_SIZE */
-#else
-# ifndef THREAD_SIZE /* x86_64 has THREAD_SIZE in userspace */
-#  define THREAD_SIZE 8192
-# endif
-#endif
-
-#define LUSTRE_TRACE_SIZE (THREAD_SIZE >> 5)
-
-#ifdef __KERNEL__
-# ifdef  __ia64__
-#  define CDEBUG_STACK (THREAD_SIZE -                                      \
-                        ((unsigned long)__builtin_dwarf_cfa() &            \
-                         (THREAD_SIZE - 1)))
-# else
-#  define CDEBUG_STACK (THREAD_SIZE -                                      \
-                        ((unsigned long)__builtin_frame_address(0) &       \
-                         (THREAD_SIZE - 1)))
-# endif
-
-#define CHECK_STACK(stack)                                                    \
-        do {                                                                  \
-                if ((stack) > 3*THREAD_SIZE/4 && (stack) > portal_stack) {    \
-                        portals_debug_msg(DEBUG_SUBSYSTEM, D_WARNING,         \
-                                          __FILE__, __FUNCTION__, __LINE__,   \
-                                          (stack),"maximum lustre stack %u\n",\
-                                          portal_stack = (stack));            \
-                      /*panic("LBUG");*/                                      \
-                }                                                             \
-        } while (0)
-#else /* __KERNEL__ */
-#define CHECK_STACK(stack) do { } while(0)
-#define CDEBUG_STACK (0L)
-#endif /* __KERNEL__ */
-
-#if 1
-#define CDEBUG(mask, format, a...)                                            \
-do {                                                                          \
-        if (portal_cerror == 0)                                               \
-                break;                                                        \
-        CHECK_STACK(CDEBUG_STACK);                                            \
-        if (((mask) & (D_ERROR | D_EMERG | D_WARNING)) ||                     \
-            (portal_debug & (mask) &&                                         \
-             portal_subsystem_debug & DEBUG_SUBSYSTEM))                       \
-                portals_debug_msg(DEBUG_SUBSYSTEM, mask,                      \
-                                  __FILE__, __FUNCTION__, __LINE__,           \
-                                  CDEBUG_STACK, format, ## a);                \
-} while (0)
-
-#define CWARN(format, a...) CDEBUG(D_WARNING, format, ## a)
-#define CERROR(format, a...) CDEBUG(D_ERROR, format, ## a)
-#define CEMERG(format, a...) CDEBUG(D_EMERG, format, ## a)
-
-#define GOTO(label, rc)                                                 \
-do {                                                                    \
-        long GOTO__ret = (long)(rc);                                    \
-        CDEBUG(D_TRACE,"Process leaving via %s (rc=%lu : %ld : %lx)\n", \
-               #label, (unsigned long)GOTO__ret, (signed long)GOTO__ret,\
-               (signed long)GOTO__ret);                                 \
-        goto label;                                                     \
-} while (0)
-
-#define RETURN(rc)                                                      \
-do {                                                                    \
-        typeof(rc) RETURN__ret = (rc);                                  \
-        CDEBUG(D_TRACE, "Process leaving (rc=%lu : %ld : %lx)\n",       \
-               (long)RETURN__ret, (long)RETURN__ret, (long)RETURN__ret);\
-        return RETURN__ret;                                             \
-} while (0)
-
-#define ENTRY                                                           \
-do {                                                                    \
-        CDEBUG(D_TRACE, "Process entered\n");                           \
-} while (0)
-
-#define EXIT                                                            \
-do {                                                                    \
-        CDEBUG(D_TRACE, "Process leaving\n");                           \
-} while(0)
-#else
-#define CDEBUG(mask, format, a...)      do { } while (0)
-#define CWARN(format, a...)             do { } while (0)
-#define CERROR(format, a...)            printk("<3>" format, ## a)
-#define CEMERG(format, a...)            printk("<0>" format, ## a)
-#define GOTO(label, rc)                 do { (void)(rc); goto label; } while (0)
-#define RETURN(rc)                      return (rc)
-#define ENTRY                           do { } while (0)
-#define EXIT                            do { } while (0)
-#endif
-
 #ifdef __KERNEL__
 # include <linux/vmalloc.h>
 # include <linux/time.h>
@@ -172,7 +21,7 @@ do {                                                                    \
 # include <linux/highmem.h>
 # include <linux/module.h>
 # include <linux/version.h>
-# include <portals/lib-nal.h>
+# include <portals/p30.h>
 # include <linux/smp_lock.h>
 # include <asm/atomic.h>
 
@@ -353,188 +202,6 @@ do {                                                                    \
 #endif
 
 /******************************************************************************/
-/* Kernel Portals Router interface */
-
-typedef void (*kpr_fwd_callback_t)(void *arg, int error); // completion callback
-
-/* space for routing targets to stash "stuff" in a forwarded packet */
-typedef union {
-        long long        _alignment;
-        void            *_space[16];            /* scale with CPU arch */
-} kprfd_scratch_t;
-
-/* Kernel Portals Routing Forwarded message Descriptor */
-typedef struct {
-        struct list_head     kprfd_list;        /* stash in queues (routing target can use) */
-        ptl_nid_t            kprfd_target_nid;  /* final destination NID */
-        ptl_nid_t            kprfd_gateway_nid; /* gateway NID */
-        ptl_hdr_t           *kprfd_hdr;         /* header in wire byte order */
-        int                  kprfd_nob;         /* # payload bytes */
-        int                  kprfd_niov;        /* # payload frags */
-        ptl_kiov_t          *kprfd_kiov;        /* payload fragments */
-        void                *kprfd_router_arg;  /* originating NAL's router arg */
-        kpr_fwd_callback_t   kprfd_callback;    /* completion callback */
-        void                *kprfd_callback_arg; /* completion callback arg */
-        kprfd_scratch_t      kprfd_scratch;     /* scratchpad for routing targets */
-} kpr_fwd_desc_t;
-
-typedef void  (*kpr_fwd_t)(void *arg, kpr_fwd_desc_t *fwd);
-typedef void  (*kpr_notify_t)(void *arg, ptl_nid_t peer, int alive);
-
-/* NAL's routing interface (Kernel Portals Routing Nal Interface) */
-typedef const struct {
-        int             kprni_nalid;    /* NAL's id */
-        void           *kprni_arg;      /* Arg to pass when calling into NAL */
-        kpr_fwd_t       kprni_fwd;      /* NAL's forwarding entrypoint */
-        kpr_notify_t    kprni_notify;   /* NAL's notification entrypoint */
-} kpr_nal_interface_t;
-
-/* Router's routing interface (Kernel Portals Routing Router Interface) */
-typedef const struct {
-        /* register the calling NAL with the router and get back the handle for
-         * subsequent calls */
-        int     (*kprri_register) (kpr_nal_interface_t *nal_interface,
-                                   void **router_arg);
-
-        /* ask the router to find a gateway that forwards to 'nid' and is a
-         * peer of the calling NAL; assume caller will send 'nob' bytes of
-         * payload there */
-        int     (*kprri_lookup) (void *router_arg, ptl_nid_t nid, int nob,
-                                 ptl_nid_t *gateway_nid);
-
-        /* hand a packet over to the router for forwarding */
-        kpr_fwd_t kprri_fwd_start;
-
-        /* hand a packet back to the router for completion */
-        void    (*kprri_fwd_done) (void *router_arg, kpr_fwd_desc_t *fwd,
-                                   int error);
-
-        /* notify the router about peer state */
-        void    (*kprri_notify) (void *router_arg, ptl_nid_t peer,
-                                 int alive, time_t when);
-
-        /* the calling NAL is shutting down */
-        void    (*kprri_shutdown) (void *router_arg);
-
-        /* deregister the calling NAL with the router */
-        void    (*kprri_deregister) (void *router_arg);
-
-} kpr_router_interface_t;
-
-/* Convenient struct for NAL to stash router interface/args */
-typedef struct {
-        kpr_router_interface_t  *kpr_interface;
-        void                    *kpr_arg;
-} kpr_router_t;
-
-/* Router's control interface (Kernel Portals Routing Control Interface) */
-typedef const struct {
-        int     (*kprci_add_route)(int gateway_nal, ptl_nid_t gateway_nid,
-                                   ptl_nid_t lo_nid, ptl_nid_t hi_nid);
-        int     (*kprci_del_route)(int gateway_nal, ptl_nid_t gateway_nid,
-                                   ptl_nid_t lo_nid, ptl_nid_t hi_nid);
-        int     (*kprci_get_route)(int index, int *gateway_nal,
-                                   ptl_nid_t *gateway,
-                                   ptl_nid_t *lo_nid, ptl_nid_t *hi_nid,
-                                   int *alive);
-        int     (*kprci_notify)(int gateway_nal, ptl_nid_t gateway_nid,
-                                int alive, time_t when);
-} kpr_control_interface_t;
-
-extern kpr_control_interface_t  kpr_control_interface;
-extern kpr_router_interface_t   kpr_router_interface;
-
-static inline int
-kpr_register (kpr_router_t *router, kpr_nal_interface_t *nalif)
-{
-        int    rc;
-
-        router->kpr_interface = PORTAL_SYMBOL_GET (kpr_router_interface);
-        if (router->kpr_interface == NULL)
-                return (-ENOENT);
-
-        rc = (router->kpr_interface)->kprri_register (nalif, &router->kpr_arg);
-        if (rc != 0)
-                router->kpr_interface = NULL;
-
-        PORTAL_SYMBOL_PUT (kpr_router_interface);
-        return (rc);
-}
-
-static inline int
-kpr_routing (kpr_router_t *router)
-{
-        return (router->kpr_interface != NULL);
-}
-
-static inline int
-kpr_lookup (kpr_router_t *router, ptl_nid_t nid, int nob, ptl_nid_t *gateway_nid)
-{
-        if (!kpr_routing (router))
-                return (-ENETUNREACH);
-
-        return (router->kpr_interface->kprri_lookup(router->kpr_arg, nid, nob,
-                                                    gateway_nid));
-}
-
-static inline void
-kpr_fwd_init (kpr_fwd_desc_t *fwd, ptl_nid_t nid, ptl_hdr_t *hdr,
-              int nob, int niov, ptl_kiov_t *kiov,
-              kpr_fwd_callback_t callback, void *callback_arg)
-{
-        fwd->kprfd_target_nid   = nid;
-        fwd->kprfd_gateway_nid  = nid;
-        fwd->kprfd_hdr          = hdr;
-        fwd->kprfd_nob          = nob;
-        fwd->kprfd_niov         = niov;
-        fwd->kprfd_kiov         = kiov;
-        fwd->kprfd_callback     = callback;
-        fwd->kprfd_callback_arg = callback_arg;
-}
-
-static inline void
-kpr_fwd_start (kpr_router_t *router, kpr_fwd_desc_t *fwd)
-{
-        if (!kpr_routing (router))
-                fwd->kprfd_callback (fwd->kprfd_callback_arg, -ENETUNREACH);
-        else
-                router->kpr_interface->kprri_fwd_start (router->kpr_arg, fwd);
-}
-
-static inline void
-kpr_fwd_done (kpr_router_t *router, kpr_fwd_desc_t *fwd, int error)
-{
-        LASSERT (kpr_routing (router));
-        router->kpr_interface->kprri_fwd_done (router->kpr_arg, fwd, error);
-}
-
-static inline void
-kpr_notify (kpr_router_t *router,
-            ptl_nid_t peer, int alive, time_t when)
-{
-        if (!kpr_routing (router))
-                return;
-
-        router->kpr_interface->kprri_notify(router->kpr_arg, peer, alive, when);
-}
-
-static inline void
-kpr_shutdown (kpr_router_t *router)
-{
-        if (kpr_routing (router))
-                router->kpr_interface->kprri_shutdown (router->kpr_arg);
-}
-
-static inline void
-kpr_deregister (kpr_router_t *router)
-{
-        if (!kpr_routing (router))
-                return;
-        router->kpr_interface->kprri_deregister (router->kpr_arg);
-        router->kpr_interface = NULL;
-}
-
-/******************************************************************************/
 
 #ifdef PORTALS_PROFILING
 #define prof_enum(FOO) PROF__##FOO
@@ -804,38 +471,6 @@ do {                                                    \
 #define PING_SYNC       0
 #define PING_ASYNC      1
 
-struct portal_ioctl_data {
-        __u32 ioc_len;
-        __u32 ioc_version;
-        __u64 ioc_nid;
-        __u64 ioc_nid2;
-        __u64 ioc_nid3;
-        __u32 ioc_count;
-        __u32 ioc_nal;
-        __u32 ioc_nal_cmd;
-        __u32 ioc_fd;
-        __u32 ioc_id;
-
-        __u32 ioc_flags;
-        __u32 ioc_size;
-
-        __u32 ioc_wait;
-        __u32 ioc_timeout;
-        __u32 ioc_misc;
-
-        __u32 ioc_inllen1;
-        char *ioc_inlbuf1;
-        __u32 ioc_inllen2;
-        char *ioc_inlbuf2;
-
-        __u32 ioc_plen1; /* buffers in userspace */
-        char *ioc_pbuf1;
-        __u32 ioc_plen2; /* buffers in userspace */
-        char *ioc_pbuf2;
-
-        char ioc_bulk[0];
-};
-
 struct portal_ioctl_hdr {
         __u32 ioc_len;
         __u32 ioc_version;
@@ -1076,13 +711,6 @@ enum {
         DEBUG_DAEMON_CONTINUE    =  4,
 };
 
-/* XXX remove to lustre ASAP */
-struct lustre_peer {
-        ptl_nid_t       peer_nid;
-        ptl_handle_ni_t peer_ni;
-};
-
-
 /* module.c */
 typedef int (*nal_cmd_handler_t)(struct portals_cfg *, void * private);
 int kportal_nal_register(int nal, nal_cmd_handler_t handler, void * private);
diff --git a/lustre/portals/include/linux/kpr.h b/lustre/portals/include/linux/kpr.h
new file mode 100644 (file)
index 0000000..45b58fe
--- /dev/null
@@ -0,0 +1,191 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
+#ifndef _KPR_H
+#define _KPR_H
+
+# include <portals/lib-nal.h> /* for ptl_hdr_t */
+
+/******************************************************************************/
+/* Kernel Portals Router interface */
+
+typedef void (*kpr_fwd_callback_t)(void *arg, int error); // completion callback
+
+/* space for routing targets to stash "stuff" in a forwarded packet */
+typedef union {
+        long long        _alignment;
+        void            *_space[16];            /* scale with CPU arch */
+} kprfd_scratch_t;
+
+/* Kernel Portals Routing Forwarded message Descriptor */
+typedef struct {
+        struct list_head     kprfd_list;        /* stash in queues (routing target can use) */
+        ptl_nid_t            kprfd_target_nid;  /* final destination NID */
+        ptl_nid_t            kprfd_gateway_nid; /* gateway NID */
+        ptl_hdr_t           *kprfd_hdr;         /* header in wire byte order */
+        int                  kprfd_nob;         /* # payload bytes */
+        int                  kprfd_niov;        /* # payload frags */
+        ptl_kiov_t          *kprfd_kiov;        /* payload fragments */
+        void                *kprfd_router_arg;  /* originating NAL's router arg */
+        kpr_fwd_callback_t   kprfd_callback;    /* completion callback */
+        void                *kprfd_callback_arg; /* completion callback arg */
+        kprfd_scratch_t      kprfd_scratch;     /* scratchpad for routing targets */
+} kpr_fwd_desc_t;
+
+typedef void  (*kpr_fwd_t)(void *arg, kpr_fwd_desc_t *fwd);
+typedef void  (*kpr_notify_t)(void *arg, ptl_nid_t peer, int alive);
+
+/* NAL's routing interface (Kernel Portals Routing Nal Interface) */
+typedef const struct {
+        int             kprni_nalid;    /* NAL's id */
+        void           *kprni_arg;      /* Arg to pass when calling into NAL */
+        kpr_fwd_t       kprni_fwd;      /* NAL's forwarding entrypoint */
+        kpr_notify_t    kprni_notify;   /* NAL's notification entrypoint */
+} kpr_nal_interface_t;
+
+/* Router's routing interface (Kernel Portals Routing Router Interface) */
+typedef const struct {
+        /* register the calling NAL with the router and get back the handle for
+         * subsequent calls */
+        int     (*kprri_register) (kpr_nal_interface_t *nal_interface,
+                                   void **router_arg);
+
+        /* ask the router to find a gateway that forwards to 'nid' and is a
+         * peer of the calling NAL; assume caller will send 'nob' bytes of
+         * payload there */
+        int     (*kprri_lookup) (void *router_arg, ptl_nid_t nid, int nob,
+                                 ptl_nid_t *gateway_nid);
+
+        /* hand a packet over to the router for forwarding */
+        kpr_fwd_t kprri_fwd_start;
+
+        /* hand a packet back to the router for completion */
+        void    (*kprri_fwd_done) (void *router_arg, kpr_fwd_desc_t *fwd,
+                                   int error);
+
+        /* notify the router about peer state */
+        void    (*kprri_notify) (void *router_arg, ptl_nid_t peer,
+                                 int alive, time_t when);
+
+        /* the calling NAL is shutting down */
+        void    (*kprri_shutdown) (void *router_arg);
+
+        /* deregister the calling NAL with the router */
+        void    (*kprri_deregister) (void *router_arg);
+
+} kpr_router_interface_t;
+
+/* Convenient struct for NAL to stash router interface/args */
+typedef struct {
+        kpr_router_interface_t  *kpr_interface;
+        void                    *kpr_arg;
+} kpr_router_t;
+
+/* Router's control interface (Kernel Portals Routing Control Interface) */
+typedef const struct {
+        int     (*kprci_add_route)(int gateway_nal, ptl_nid_t gateway_nid,
+                                   ptl_nid_t lo_nid, ptl_nid_t hi_nid);
+        int     (*kprci_del_route)(int gateway_nal, ptl_nid_t gateway_nid,
+                                   ptl_nid_t lo_nid, ptl_nid_t hi_nid);
+        int     (*kprci_get_route)(int index, int *gateway_nal,
+                                   ptl_nid_t *gateway,
+                                   ptl_nid_t *lo_nid, ptl_nid_t *hi_nid,
+                                   int *alive);
+        int     (*kprci_notify)(int gateway_nal, ptl_nid_t gateway_nid,
+                                int alive, time_t when);
+} kpr_control_interface_t;
+
+extern kpr_control_interface_t  kpr_control_interface;
+extern kpr_router_interface_t   kpr_router_interface;
+
+static inline int
+kpr_register (kpr_router_t *router, kpr_nal_interface_t *nalif)
+{
+        int    rc;
+
+        router->kpr_interface = PORTAL_SYMBOL_GET (kpr_router_interface);
+        if (router->kpr_interface == NULL)
+                return (-ENOENT);
+
+        rc = (router->kpr_interface)->kprri_register (nalif, &router->kpr_arg);
+        if (rc != 0)
+                router->kpr_interface = NULL;
+
+        PORTAL_SYMBOL_PUT (kpr_router_interface);
+        return (rc);
+}
+
+static inline int
+kpr_routing (kpr_router_t *router)
+{
+        return (router->kpr_interface != NULL);
+}
+
+static inline int
+kpr_lookup (kpr_router_t *router, ptl_nid_t nid, int nob, ptl_nid_t *gateway_nid)
+{
+        if (!kpr_routing (router))
+                return (-ENETUNREACH);
+
+        return (router->kpr_interface->kprri_lookup(router->kpr_arg, nid, nob,
+                                                    gateway_nid));
+}
+
+static inline void
+kpr_fwd_init (kpr_fwd_desc_t *fwd, ptl_nid_t nid, ptl_hdr_t *hdr,
+              int nob, int niov, ptl_kiov_t *kiov,
+              kpr_fwd_callback_t callback, void *callback_arg)
+{
+        fwd->kprfd_target_nid   = nid;
+        fwd->kprfd_gateway_nid  = nid;
+        fwd->kprfd_hdr          = hdr;
+        fwd->kprfd_nob          = nob;
+        fwd->kprfd_niov         = niov;
+        fwd->kprfd_kiov         = kiov;
+        fwd->kprfd_callback     = callback;
+        fwd->kprfd_callback_arg = callback_arg;
+}
+
+static inline void
+kpr_fwd_start (kpr_router_t *router, kpr_fwd_desc_t *fwd)
+{
+        if (!kpr_routing (router))
+                fwd->kprfd_callback (fwd->kprfd_callback_arg, -ENETUNREACH);
+        else
+                router->kpr_interface->kprri_fwd_start (router->kpr_arg, fwd);
+}
+
+static inline void
+kpr_fwd_done (kpr_router_t *router, kpr_fwd_desc_t *fwd, int error)
+{
+        LASSERT (kpr_routing (router));
+        router->kpr_interface->kprri_fwd_done (router->kpr_arg, fwd, error);
+}
+
+static inline void
+kpr_notify (kpr_router_t *router,
+            ptl_nid_t peer, int alive, time_t when)
+{
+        if (!kpr_routing (router))
+                return;
+
+        router->kpr_interface->kprri_notify(router->kpr_arg, peer, alive, when);
+}
+
+static inline void
+kpr_shutdown (kpr_router_t *router)
+{
+        if (kpr_routing (router))
+                router->kpr_interface->kprri_shutdown (router->kpr_arg);
+}
+
+static inline void
+kpr_deregister (kpr_router_t *router)
+{
+        if (!kpr_routing (router))
+                return;
+        router->kpr_interface->kprri_deregister (router->kpr_arg);
+        router->kpr_interface = NULL;
+}
+
+#endif /* _KPR_H */
diff --git a/lustre/portals/include/linux/libcfs.h b/lustre/portals/include/linux/libcfs.h
new file mode 100644 (file)
index 0000000..ff51787
--- /dev/null
@@ -0,0 +1,222 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
+#ifndef _LIBCFS_H
+
+
+#define PORTAL_DEBUG
+
+#ifndef offsetof
+# define offsetof(typ,memb)     ((int)((char *)&(((typ *)0)->memb)))
+#endif
+
+#define LOWEST_BIT_SET(x)       ((x) & ~((x) - 1))
+
+/*
+ *  Debugging
+ */
+extern unsigned int portal_subsystem_debug;
+extern unsigned int portal_stack;
+extern unsigned int portal_debug;
+extern unsigned int portal_printk;
+extern unsigned int portal_cerror;
+/* Debugging subsystems (32 bits, non-overlapping) */
+#define S_UNDEFINED   0x00000001
+#define S_MDC         0x00000002
+#define S_MDS         0x00000004
+#define S_OSC         0x00000008
+#define S_OST         0x00000010
+#define S_CLASS       0x00000020
+#define S_LOG         0x00000040
+#define S_LLITE       0x00000080
+#define S_RPC         0x00000100
+#define S_MGMT        0x00000200
+#define S_PORTALS     0x00000400
+#define S_SOCKNAL     0x00000800
+#define S_QSWNAL      0x00001000
+#define S_PINGER      0x00002000
+#define S_FILTER      0x00004000
+#define S_PTLBD       0x00008000
+#define S_ECHO        0x00010000
+#define S_LDLM        0x00020000
+#define S_LOV         0x00040000
+#define S_GMNAL       0x00080000
+#define S_PTLROUTER   0x00100000
+#define S_COBD        0x00200000
+#define S_IBNAL       0x00400000
+
+/* If you change these values, please keep portals/utils/debug.c
+ * up to date! */
+
+/* Debugging masks (32 bits, non-overlapping) */
+#define D_TRACE       0x00000001 /* ENTRY/EXIT markers */
+#define D_INODE       0x00000002
+#define D_SUPER       0x00000004
+#define D_EXT2        0x00000008 /* anything from ext2_debug */
+#define D_MALLOC      0x00000010 /* print malloc, free information */
+#define D_CACHE       0x00000020 /* cache-related items */
+#define D_INFO        0x00000040 /* general information */
+#define D_IOCTL       0x00000080 /* ioctl related information */
+#define D_BLOCKS      0x00000100 /* ext2 block allocation */
+#define D_NET         0x00000200 /* network communications */
+#define D_WARNING     0x00000400 /* CWARN(...) == CDEBUG (D_WARNING, ...) */
+#define D_BUFFS       0x00000800
+#define D_OTHER       0x00001000
+#define D_DENTRY      0x00002000
+#define D_PORTALS     0x00004000 /* ENTRY/EXIT markers */
+#define D_PAGE        0x00008000 /* bulk page handling */
+#define D_DLMTRACE    0x00010000
+#define D_ERROR       0x00020000 /* CERROR(...) == CDEBUG (D_ERROR, ...) */
+#define D_EMERG       0x00040000 /* CEMERG(...) == CDEBUG (D_EMERG, ...) */
+#define D_HA          0x00080000 /* recovery and failover */
+#define D_RPCTRACE    0x00100000 /* for distributed debugging */
+#define D_VFSTRACE    0x00200000
+#define D_READA       0x00400000 /* read-ahead */
+
+#ifdef __KERNEL__
+# include <linux/sched.h> /* THREAD_SIZE */
+#else
+# ifndef THREAD_SIZE /* x86_64 has THREAD_SIZE in userspace */
+#  define THREAD_SIZE 8192
+# endif
+#endif
+
+#define LUSTRE_TRACE_SIZE (THREAD_SIZE >> 5)
+
+#ifdef __KERNEL__
+# ifdef  __ia64__
+#  define CDEBUG_STACK (THREAD_SIZE -                                      \
+                        ((unsigned long)__builtin_dwarf_cfa() &            \
+                         (THREAD_SIZE - 1)))
+# else
+#  define CDEBUG_STACK (THREAD_SIZE -                                      \
+                        ((unsigned long)__builtin_frame_address(0) &       \
+                         (THREAD_SIZE - 1)))
+# endif
+
+#define CHECK_STACK(stack)                                                    \
+        do {                                                                  \
+                if ((stack) > 3*THREAD_SIZE/4 && (stack) > portal_stack) {    \
+                        portals_debug_msg(DEBUG_SUBSYSTEM, D_WARNING,         \
+                                          __FILE__, __FUNCTION__, __LINE__,   \
+                                          (stack),"maximum lustre stack %u\n",\
+                                          portal_stack = (stack));            \
+                      /*panic("LBUG");*/                                      \
+                }                                                             \
+        } while (0)
+#else /* __KERNEL__ */
+#define CHECK_STACK(stack) do { } while(0)
+#define CDEBUG_STACK (0L)
+#endif /* __KERNEL__ */
+
+#if 1
+#define CDEBUG(mask, format, a...)                                            \
+do {                                                                          \
+        if (portal_cerror == 0)                                               \
+                break;                                                        \
+        CHECK_STACK(CDEBUG_STACK);                                            \
+        if (((mask) & (D_ERROR | D_EMERG | D_WARNING)) ||                     \
+            (portal_debug & (mask) &&                                         \
+             portal_subsystem_debug & DEBUG_SUBSYSTEM))                       \
+                portals_debug_msg(DEBUG_SUBSYSTEM, mask,                      \
+                                  __FILE__, __FUNCTION__, __LINE__,           \
+                                  CDEBUG_STACK, format, ## a);                \
+} while (0)
+
+#define CWARN(format, a...) CDEBUG(D_WARNING, format, ## a)
+#define CERROR(format, a...) CDEBUG(D_ERROR, format, ## a)
+#define CEMERG(format, a...) CDEBUG(D_EMERG, format, ## a)
+
+#define GOTO(label, rc)                                                 \
+do {                                                                    \
+        long GOTO__ret = (long)(rc);                                    \
+        CDEBUG(D_TRACE,"Process leaving via %s (rc=%lu : %ld : %lx)\n", \
+               #label, (unsigned long)GOTO__ret, (signed long)GOTO__ret,\
+               (signed long)GOTO__ret);                                 \
+        goto label;                                                     \
+} while (0)
+
+#define RETURN(rc)                                                      \
+do {                                                                    \
+        typeof(rc) RETURN__ret = (rc);                                  \
+        CDEBUG(D_TRACE, "Process leaving (rc=%lu : %ld : %lx)\n",       \
+               (long)RETURN__ret, (long)RETURN__ret, (long)RETURN__ret);\
+        return RETURN__ret;                                             \
+} while (0)
+
+#define ENTRY                                                           \
+do {                                                                    \
+        CDEBUG(D_TRACE, "Process entered\n");                           \
+} while (0)
+
+#define EXIT                                                            \
+do {                                                                    \
+        CDEBUG(D_TRACE, "Process leaving\n");                           \
+} while(0)
+#else
+#define CDEBUG(mask, format, a...)      do { } while (0)
+#define CWARN(format, a...)             do { } while (0)
+#define CERROR(format, a...)            printk("<3>" format, ## a)
+#define CEMERG(format, a...)            printk("<0>" format, ## a)
+#define GOTO(label, rc)                 do { (void)(rc); goto label; } while (0)
+#define RETURN(rc)                      return (rc)
+#define ENTRY                           do { } while (0)
+#define EXIT                            do { } while (0)
+#endif
+
+struct portal_ioctl_data {
+        __u32 ioc_len;
+        __u32 ioc_version;
+        __u64 ioc_nid;
+        __u64 ioc_nid2;
+        __u64 ioc_nid3;
+        __u32 ioc_count;
+        __u32 ioc_nal;
+        __u32 ioc_nal_cmd;
+        __u32 ioc_fd;
+        __u32 ioc_id;
+
+        __u32 ioc_flags;
+        __u32 ioc_size;
+
+        __u32 ioc_wait;
+        __u32 ioc_timeout;
+        __u32 ioc_misc;
+
+        __u32 ioc_inllen1;
+        char *ioc_inlbuf1;
+        __u32 ioc_inllen2;
+        char *ioc_inlbuf2;
+
+        __u32 ioc_plen1; /* buffers in userspace */
+        char *ioc_pbuf1;
+        __u32 ioc_plen2; /* buffers in userspace */
+        char *ioc_pbuf2;
+
+        char ioc_bulk[0];
+};
+
+#ifdef __KERNEL__
+
+#include <linux/list.h>
+
+struct libcfs_ioctl_handler {
+        struct list_head item;
+        int (*handle_ioctl)(struct portal_ioctl_data *data,
+                            unsigned int cmd, unsigned long args);
+};
+
+#define DECLARE_IOCTL_HANDLER(ident, func)              \
+        struct libcfs_ioctl_handler ident = {           \
+                .item = LIST_HEAD_INIT(ident.item),     \
+                .handle_ioctl = func                    \
+        }
+
+int libcfs_register_ioctl(struct libcfs_ioctl_handler *hand);
+int libcfs_deregister_ioctl(struct libcfs_ioctl_handler *hand);
+
+#endif
+
+#define _LIBCFS_H
+
+#endif /* _LIBCFS_H */
diff --git a/lustre/portals/include/linux/lustre_list.h b/lustre/portals/include/linux/lustre_list.h
new file mode 100644 (file)
index 0000000..a218f2c
--- /dev/null
@@ -0,0 +1,246 @@
+#ifndef _LUSTRE_LIST_H
+#define _LUSTRE_LIST_H
+
+#ifdef __KERNEL__
+#include <linux/list.h>
+#else
+/*
+ * Simple doubly linked list implementation.
+ *
+ * Some of the internal functions ("__xxx") are useful when
+ * manipulating whole lists rather than single entries, as
+ * sometimes we already know the next/prev entries and we can
+ * generate better code by using them directly rather than
+ * using the generic single-entry routines.
+ */
+
+#define prefetch(a) ((void)a)
+
+struct list_head {
+       struct list_head *next, *prev;
+};
+
+typedef struct list_head list_t;
+
+#define LIST_HEAD_INIT(name) { &(name), &(name) }
+
+#define LIST_HEAD(name) \
+       struct list_head name = LIST_HEAD_INIT(name)
+
+#define INIT_LIST_HEAD(ptr) do { \
+       (ptr)->next = (ptr); (ptr)->prev = (ptr); \
+} while (0)
+
+/*
+ * Insert a new entry between two known consecutive entries.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static inline void __list_add(struct list_head * new,
+                             struct list_head * prev,
+                             struct list_head * next)
+{
+       next->prev = new;
+       new->next = next;
+       new->prev = prev;
+       prev->next = new;
+}
+
+/**
+ * list_add - add a new entry
+ * @new: new entry to be added
+ * @head: list head to add it after
+ *
+ * Insert a new entry after the specified head.
+ * This is good for implementing stacks.
+ */
+static inline void list_add(struct list_head *new, struct list_head *head)
+{
+       __list_add(new, head, head->next);
+}
+
+/**
+ * list_add_tail - add a new entry
+ * @new: new entry to be added
+ * @head: list head to add it before
+ *
+ * Insert a new entry before the specified head.
+ * This is useful for implementing queues.
+ */
+static inline void list_add_tail(struct list_head *new, struct list_head *head)
+{
+       __list_add(new, head->prev, head);
+}
+
+/*
+ * Delete a list entry by making the prev/next entries
+ * point to each other.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static inline void __list_del(struct list_head * prev, struct list_head * next)
+{
+       next->prev = prev;
+       prev->next = next;
+}
+
+/**
+ * list_del - deletes entry from list.
+ * @entry: the element to delete from the list.
+ * Note: list_empty on entry does not return true after this, the entry is in an undefined state.
+ */
+static inline void list_del(struct list_head *entry)
+{
+       __list_del(entry->prev, entry->next);
+}
+
+/**
+ * list_del_init - deletes entry from list and reinitialize it.
+ * @entry: the element to delete from the list.
+ */
+static inline void list_del_init(struct list_head *entry)
+{
+       __list_del(entry->prev, entry->next);
+       INIT_LIST_HEAD(entry);
+}
+
+/**
+ * list_move - delete from one list and add as another's head
+ * @list: the entry to move
+ * @head: the head that will precede our entry
+ */
+static inline void list_move(struct list_head *list, struct list_head *head)
+{
+       __list_del(list->prev, list->next);
+       list_add(list, head);
+}
+
+/**
+ * list_move_tail - delete from one list and add as another's tail
+ * @list: the entry to move
+ * @head: the head that will follow our entry
+ */
+static inline void list_move_tail(struct list_head *list,
+                                 struct list_head *head)
+{
+       __list_del(list->prev, list->next);
+       list_add_tail(list, head);
+}
+
+/**
+ * list_empty - tests whether a list is empty
+ * @head: the list to test.
+ */
+static inline int list_empty(struct list_head *head)
+{
+       return head->next == head;
+}
+
+static inline void __list_splice(struct list_head *list,
+                                struct list_head *head)
+{
+       struct list_head *first = list->next;
+       struct list_head *last = list->prev;
+       struct list_head *at = head->next;
+
+       first->prev = head;
+       head->next = first;
+
+       last->next = at;
+       at->prev = last;
+}
+
+/**
+ * list_splice - join two lists
+ * @list: the new list to add.
+ * @head: the place to add it in the first list.
+ */
+static inline void list_splice(struct list_head *list, struct list_head *head)
+{
+       if (!list_empty(list))
+               __list_splice(list, head);
+}
+
+/**
+ * list_splice_init - join two lists and reinitialise the emptied list.
+ * @list: the new list to add.
+ * @head: the place to add it in the first list.
+ *
+ * The list at @list is reinitialised
+ */
+static inline void list_splice_init(struct list_head *list,
+                                   struct list_head *head)
+{
+       if (!list_empty(list)) {
+               __list_splice(list, head);
+               INIT_LIST_HEAD(list);
+       }
+}
+
+/**
+ * list_entry - get the struct for this entry
+ * @ptr:       the &struct list_head pointer.
+ * @type:      the type of the struct this is embedded in.
+ * @member:    the name of the list_struct within the struct.
+ */
+#define list_entry(ptr, type, member) \
+       ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member)))
+
+/**
+ * list_for_each       -       iterate over a list
+ * @pos:       the &struct list_head to use as a loop counter.
+ * @head:      the head for your list.
+ */
+#define list_for_each(pos, head) \
+       for (pos = (head)->next, prefetch(pos->next); pos != (head); \
+               pos = pos->next, prefetch(pos->next))
+
+/**
+ * list_for_each_prev  -       iterate over a list in reverse order
+ * @pos:       the &struct list_head to use as a loop counter.
+ * @head:      the head for your list.
+ */
+#define list_for_each_prev(pos, head) \
+       for (pos = (head)->prev, prefetch(pos->prev); pos != (head); \
+               pos = pos->prev, prefetch(pos->prev))
+
+/**
+ * list_for_each_safe  -       iterate over a list safe against removal of list entry
+ * @pos:       the &struct list_head to use as a loop counter.
+ * @n:         another &struct list_head to use as temporary storage
+ * @head:      the head for your list.
+ */
+#define list_for_each_safe(pos, n, head) \
+       for (pos = (head)->next, n = pos->next; pos != (head); \
+               pos = n, n = pos->next)
+
+/**
+ * list_for_each_entry  -       iterate over list of given type
+ * @pos:        the type * to use as a loop counter.
+ * @head:       the head for your list.
+ * @member:     the name of the list_struct within the struct.
+ */
+#define list_for_each_entry(pos, head, member)                         \
+        for (pos = list_entry((head)->next, typeof(*pos), member),     \
+                    prefetch(pos->member.next);                        \
+            &pos->member != (head);                                    \
+            pos = list_entry(pos->member.next, typeof(*pos), member),  \
+            prefetch(pos->member.next))
+
+/**
+ * list_for_each_entry_safe  -       iterate over list of given type safe against removal of list entry
+ * @pos:        the type * to use as a loop counter.
+ * @n:          another type * to use as temporary storage
+ * @head:       the head for your list.
+ * @member:     the name of the list_struct within the struct.
+ */
+#define list_for_each_entry_safe(pos, n, head, member)                 \
+        for (pos = list_entry((head)->next, typeof(*pos), member),     \
+               n = list_entry(pos->member.next, typeof(*pos), member); \
+            &pos->member != (head);                                    \
+            pos = n, n = list_entry(n->member.next, typeof(*n), member))
+
+#endif /* if !__KERNEL__*/
+#endif /* if !_LUSTRE_LIST_H */
index af4a2dc..db83ae7 100644 (file)
@@ -1,6 +1,8 @@
 # define DEBUG_SUBSYSTEM S_PORTALS
 # define PORTAL_DEBUG
 
+#include "build_check.h"
+
 #ifndef __KERNEL__
 # include <stdio.h>
 # include <stdlib.h>
index a83749b..69fa339 100644 (file)
@@ -1,11 +1,12 @@
 #ifndef P30_API_H
 #define P30_API_H
 
+#include "build_check.h"
+
 #include <portals/types.h>
 
 #ifndef PTL_NO_WRAP
-int PtlInit(void);
-int PtlInitialized(void);
+int PtlInit(int *);
 void PtlFini(void);
 
 int PtlNIInit(ptl_interface_t interface, ptl_pt_index_t ptl_size_in,
@@ -25,10 +26,6 @@ int PtlGetId(ptl_handle_ni_t ni_handle, ptl_process_id_t *id);
  * Network interfaces
  */
 
-#ifndef PTL_NO_WRAP
-int PtlNIBarrier(ptl_handle_ni_t interface_in);
-#endif
-
 int PtlNIStatus(ptl_handle_ni_t interface_in, ptl_sr_index_t register_in,
                 ptl_sr_value_t * status_out);
 
@@ -62,6 +59,13 @@ unsigned int PtlNIDebug(ptl_handle_ni_t ni, unsigned int mask_in);
  */
 int PtlFailNid (ptl_handle_ni_t ni, ptl_nid_t nid, unsigned int threshold);
 
+/*
+ * PtlSnprintHandle: 
+ *
+ * This is not an official Portals 3 API call.  It is provided
+ * so that an application can print an opaque handle.
+ */
+void PtlSnprintHandle (char *str, int str_len, ptl_handle_any_t handle);
 
 /*
  * Match entries
@@ -95,7 +99,7 @@ int PtlMDAttach(ptl_handle_me_t current_in, ptl_md_t md_in,
                 ptl_unlink_t unlink_in, ptl_handle_md_t * handle_out);
 
 int PtlMDBind(ptl_handle_ni_t ni_in, ptl_md_t md_in,
-              ptl_handle_md_t * handle_out);
+             ptl_unlink_t unlink_in, ptl_handle_md_t * handle_out);
 
 int PtlMDUnlink(ptl_handle_md_t md_in);
 
@@ -130,8 +134,8 @@ int PtlEQGet(ptl_handle_eq_t eventq_in, ptl_event_t * event_out);
 
 int PtlEQWait(ptl_handle_eq_t eventq_in, ptl_event_t * event_out);
 
-int PtlEQWait_timeout(ptl_handle_eq_t eventq_in, ptl_event_t * event_out,
-                      int timeout);
+int PtlEQPoll(ptl_handle_eq_t *eventqs_in, int neq_in, int timeout,
+             ptl_event_t *event_out, int *which_out);
 #endif
 
 /*
index 3c3b154..0be8a3d 100644 (file)
@@ -1,6 +1,8 @@
 #ifndef PTL_BLOCKS_H
 #define PTL_BLOCKS_H
 
+#include "build_check.h"
+
 /*
  * blocks.h
  *
@@ -161,6 +163,7 @@ typedef struct PtlMDBind_in {
         ptl_handle_ni_t ni_in;
         ptl_handle_eq_t eq_in;
         ptl_md_t md_in;
+       ptl_unlink_t unlink_in;
 } PtlMDBind_in;
 
 typedef struct PtlMDBind_out {
diff --git a/lustre/portals/include/portals/build_check.h b/lustre/portals/include/portals/build_check.h
new file mode 100644 (file)
index 0000000..5db1352
--- /dev/null
@@ -0,0 +1,8 @@
+#ifndef _BUILD_CHECK_H
+#define _BUILD_CHECK_H
+
+#ifdef CRAY_PORTALS
+#error "an application got to me instead of cray's includes"
+#endif
+
+#endif
index 785ce73..61aca3f 100644 (file)
@@ -1,3 +1,4 @@
+#include "build_check.h"
 /*
 **
 ** This files contains definitions that are used throughout the cplant code.
index 08f084a..499f32b 100644 (file)
@@ -1,6 +1,7 @@
 #ifndef _P30_ERRNO_H_
 #define _P30_ERRNO_H_
 
+#include "build_check.h"
 /*
  * include/portals/errno.h
  *
 
 /* If you change these, you must update the string table in api-errno.c */
 typedef enum {
-        PTL_OK              = 0,
-        PTL_SEGV            = 1,
-
-        PTL_NOSPACE         = 2,
-        PTL_INUSE           = 3,
-        PTL_VAL_FAILED      = 4,
-
-        PTL_NAL_FAILED      = 5,
-        PTL_NOINIT          = 6,
-        PTL_INIT_DUP        = 7,
-        PTL_INIT_INV        = 8,
-        PTL_AC_INV_INDEX    = 9,
-
-        PTL_INV_ASIZE       = 10,
-        PTL_INV_HANDLE      = 11,
-        PTL_INV_MD          = 12,
-        PTL_INV_ME          = 13,
-        PTL_INV_NI          = 14,
+        PTL_OK                 = 0,
+        PTL_SEGV               = 1,
+
+        PTL_NO_SPACE           = 2,
+        PTL_ME_IN_USE          = 3,
+        PTL_VAL_FAILED         = 4,
+
+        PTL_NAL_FAILED         = 5,
+        PTL_NO_INIT            = 6,
+        PTL_IFACE_DUP          = 7,
+        PTL_IFACE_INVALID      = 8,
+
+        PTL_HANDLE_INVALID     = 9,
+        PTL_MD_INVALID         = 10,
+        PTL_ME_INVALID         = 11,
 /* If you change these, you must update the string table in api-errno.c */
-        PTL_ILL_MD          = 15,
-        PTL_INV_PROC        = 16,
-        PTL_INV_PSIZE       = 17,
-        PTL_INV_PTINDEX     = 18,
-        PTL_INV_REG         = 19,
-
-        PTL_INV_SR_INDX     = 20,
-        PTL_ML_TOOLONG      = 21,
-        PTL_ADDR_UNKNOWN    = 22,
-        PTL_INV_EQ          = 23,
-        PTL_EQ_DROPPED      = 24,
-
-        PTL_EQ_EMPTY        = 25,
-        PTL_NOUPDATE        = 26,
-        PTL_FAIL            = 27,
-        PTL_NOT_IMPLEMENTED = 28,
-        PTL_NO_ACK          = 29,
-
-        PTL_IOV_TOO_MANY    = 30,
-        PTL_IOV_TOO_SMALL   = 31,
-
-       PTL_EQ_INUSE        = 32,
-
-        PTL_MAX_ERRNO       = 32
+        PTL_PROCESS_INVALID    = 12,
+        PTL_PT_INDEX_INVALID   = 13,
+
+        PTL_SR_INDEX_INVALID   = 14,
+        PTL_EQ_INVALID         = 15,
+        PTL_EQ_DROPPED         = 16,
+
+        PTL_EQ_EMPTY           = 17,
+        PTL_MD_NO_UPDATE       = 18,
+        PTL_FAIL               = 19,
+
+        PTL_IOV_TOO_MANY       = 20,
+        PTL_IOV_TOO_SMALL      = 21,
+
+       PTL_EQ_IN_USE           = 22,
+
+        PTL_MAX_ERRNO          = 23
 } ptl_err_t;
 /* If you change these, you must update the string table in api-errno.c */
 
index f87ff83..90ed4f5 100644 (file)
@@ -1,6 +1,7 @@
 #ifndef PTL_DISPATCH_H
 #define PTL_DISPATCH_H
 
+#include "build_check.h"
 /*
  * include/dispatch.h
  *
index 0bf557e..d1d0495 100644 (file)
@@ -1,6 +1,7 @@
 #ifndef _LIB_NAL_H_
 #define _LIB_NAL_H_
 
+#include "build_check.h"
 /*
  * nal.h
  *
index e9e4635..350447e 100644 (file)
@@ -9,6 +9,8 @@
 #ifndef _LIB_P30_H_
 #define _LIB_P30_H_
 
+#include "build_check.h"
+
 #ifdef __KERNEL__
 # include <asm/page.h>
 # include <linux/string.h>
@@ -195,7 +197,7 @@ lib_md_alloc (nal_cb_t *nal, ptl_md_t *umd)
                 niov = umd->niov;
                 size = offsetof(lib_md_t, md_iov.kiov[niov]);
         } else {
-                niov = ((umd->options & PTL_MD_IOV) != 0) ?
+                niov = ((umd->options & PTL_MD_IOVEC) != 0) ?
                        umd->niov : 1;
                 size = offsetof(lib_md_t, md_iov.iov[niov]);
         }
@@ -245,10 +247,14 @@ lib_me_free(nal_cb_t *nal, lib_me_t *me)
 static inline lib_msg_t *
 lib_msg_alloc(nal_cb_t *nal)
 {
-        /* NEVER called with statelock held */
+        /* NEVER called with statelock held; may be in interrupt... */
         lib_msg_t *msg;
 
-        PORTAL_ALLOC(msg, sizeof(*msg));
+        if (in_interrupt())
+                PORTAL_ALLOC_ATOMIC(msg, sizeof(*msg));
+        else
+                PORTAL_ALLOC(msg, sizeof(*msg));
+
         if (msg != NULL) {
                 /* NULL pointers, clear flags etc */
                 memset (msg, 0, sizeof (*msg));
@@ -363,10 +369,10 @@ extern char *dispatch_name(int index);
 extern void lib_enq_event_locked (nal_cb_t *nal, void *private,
                                   lib_eq_t *eq, ptl_event_t *ev);
 extern void lib_finalize (nal_cb_t *nal, void *private, lib_msg_t *msg, 
-                          ptl_err_t status);
+                          ptl_ni_fail_t ni_fail_type);
 extern void lib_parse (nal_cb_t *nal, ptl_hdr_t *hdr, void *private);
-extern lib_msg_t *lib_fake_reply_msg (nal_cb_t *nal, ptl_nid_t peer_nid, 
-                                      lib_md_t *getmd);
+extern lib_msg_t *lib_create_reply_msg (nal_cb_t *nal, ptl_nid_t peer_nid, 
+                                        lib_msg_t *get_msg);
 extern void print_hdr (nal_cb_t * nal, ptl_hdr_t * hdr);
 
 
index 904204b..40776a6 100644 (file)
@@ -10,6 +10,8 @@
 #ifndef _LIB_TYPES_H_
 #define _LIB_TYPES_H_
 
+#include "build_check.h"
+
 #include <portals/types.h>
 #ifdef __KERNEL__
 # include <linux/uio.h>
@@ -133,9 +135,8 @@ typedef struct {
 } lib_counters_t;
 
 /* temporary expedient: limit number of entries in discontiguous MDs */
-# define PTL_MTU        (512<<10)
-# define PTL_MD_MAX_IOV 128
-# define PTL_MD_MAX_PAGES min_t(int, PTL_MD_MAX_IOV, PTL_MTU / PAGE_SIZE)
+#define PTL_MTU        (512<<10)
+#define PTL_MD_MAX_IOV 128
 
 struct lib_msg_t {
         struct list_head  msg_list;
@@ -191,7 +192,6 @@ struct lib_md_t {
         ptl_size_t        max_size;
         int               threshold;
         int               pending;
-        ptl_unlink_t      unlink;
         unsigned int      options;
         unsigned int      md_flags;
         void             *user_ptr;
@@ -204,7 +204,15 @@ struct lib_md_t {
         } md_iov;
 };
 
-#define PTL_MD_FLAG_UNLINK            (1 << 0)
+#define PTL_MD_FLAG_ZOMBIE            (1 << 0)
+#define PTL_MD_FLAG_AUTO_UNLINK       (1 << 1)
+
+static inline int lib_md_exhausted (lib_md_t *md) 
+{
+        return (md->threshold == 0 ||
+                ((md->options & PTL_MD_MAX_SIZE) != 0 &&
+                 md->offset + md->max_size > md->length));
+}
 
 #ifdef PTL_USE_LIB_FREELIST
 typedef struct
index 9cab047..37d9952 100644 (file)
@@ -9,8 +9,6 @@
  * using the generic single-entry routines.
  */
 
-#define prefetch(a) ((void)a)
-
 struct list_head {
        struct list_head *next, *prev;
 };
@@ -194,8 +192,7 @@ static inline void list_splice_init(struct list_head *list,
  * @head:      the head for your list.
  */
 #define list_for_each(pos, head) \
-       for (pos = (head)->next, prefetch(pos->next); pos != (head); \
-               pos = pos->next, prefetch(pos->next))
+       for (pos = (head)->next ; pos != (head); pos = pos->next )
 
 /**
  * list_for_each_prev  -       iterate over a list in reverse order
@@ -203,8 +200,7 @@ static inline void list_splice_init(struct list_head *list,
  * @head:      the head for your list.
  */
 #define list_for_each_prev(pos, head) \
-       for (pos = (head)->prev, prefetch(pos->prev); pos != (head); \
-               pos = pos->prev, prefetch(pos->prev))
+       for (pos = (head)->prev ; pos != (head); pos = pos->prev)
 
 /**
  * list_for_each_safe  -       iterate over a list safe against removal of list entry
@@ -226,11 +222,9 @@ static inline void list_splice_init(struct list_head *list,
  * @member:     the name of the list_struct within the struct.
  */
 #define list_for_each_entry(pos, head, member)                         \
-        for (pos = list_entry((head)->next, typeof(*pos), member),     \
-                    prefetch(pos->member.next);                        \
+        for (pos = list_entry((head)->next, typeof(*pos), member);     \
             &pos->member != (head);                                    \
-            pos = list_entry(pos->member.next, typeof(*pos), member),  \
-            prefetch(pos->member.next))
+            pos = list_entry(pos->member.next, typeof(*pos), member))
 #endif
 
 #ifndef list_for_each_entry_safe
index 7cb3ab7..5b72046 100644 (file)
@@ -1,6 +1,8 @@
 #ifndef _NAL_H_
 #define _NAL_H_
 
+#include "build_check.h"
+
 /*
  * p30/nal.h
  *
@@ -27,7 +29,7 @@ struct nal_t {
 
        int (*validate) (nal_t * nal, void *base, size_t extent);
 
-       void (*yield) (nal_t * nal);
+       int (*yield) (nal_t * nal, unsigned long *flags, int milliseconds);
 
        void (*lock) (nal_t * nal, unsigned long *flags);
 
index 1b837b4..1568593 100644 (file)
@@ -1,3 +1,5 @@
+#include "build_check.h"
+
 #define PTL_IFACE_TCP 1
 #define PTL_IFACE_ER 2
 #define PTL_IFACE_SS 3
index 8b1495e..577ffab 100644 (file)
@@ -4,6 +4,8 @@
 #ifndef _P30_H_
 #define _P30_H_
 
+#include "build_check.h"
+
 /*
  * p30.h
  *
 #include <portals/api.h>
 #include <portals/nalids.h>
 
-extern int __p30_initialized;  /* for libraries & test codes  */
-extern int __p30_myr_initialized;      /*   that don't know if p30    */
-extern int __p30_ip_initialized;       /*   had been initialized yet  */
-extern ptl_handle_ni_t __myr_ni_handle, __ip_ni_handle;
-
-extern int __p30_myr_timeout;  /* in seconds, for PtlNIBarrier,     */
-extern int __p30_ip_timeout;   /* PtlReduce_all, & PtlBroadcast_all */
-
 /*
  * Debugging flags reserved for the Portals reference library.
  * These are not part of the API as described in the SAND report
index 74ef493..902db76 100644 (file)
@@ -1,6 +1,8 @@
 #ifndef _P30_TYPES_H_
 #define _P30_TYPES_H_
 
+#include "build_check.h"
+
 #ifdef __linux__
 # include <asm/types.h>
 # if defined(__powerpc__) && !defined(__KERNEL__)
@@ -25,6 +27,11 @@ typedef u_int64_t __u64;
 
 #include <portals/errno.h>
 
+/* This implementation uses the same type for API function return codes and
+ * the completion status in an event  */
+#define PTL_NI_OK  PTL_OK
+typedef ptl_err_t ptl_ni_fail_t;
+
 typedef __u64 ptl_nid_t;
 typedef __u32 ptl_pid_t;
 typedef __u32 ptl_pt_index_t;
@@ -33,6 +40,9 @@ typedef __u64 ptl_match_bits_t;
 typedef __u64 ptl_hdr_data_t;
 typedef __u32 ptl_size_t;
 
+#define PTL_TIME_FOREVER    (-1)
+#define PTL_EQ_HANDLER_NONE NULL
+
 typedef struct {
         unsigned long nal_idx;                 /* which network interface */
         __u64         cookie;                  /* which thing on that interface */
@@ -43,11 +53,11 @@ typedef ptl_handle_any_t ptl_handle_eq_t;
 typedef ptl_handle_any_t ptl_handle_md_t;
 typedef ptl_handle_any_t ptl_handle_me_t;
 
-#define PTL_HANDLE_NONE \
+#define PTL_INVALID_HANDLE \
     ((const ptl_handle_any_t){.nal_idx = -1, .cookie = -1})
-#define PTL_EQ_NONE PTL_HANDLE_NONE
+#define PTL_EQ_NONE PTL_INVALID_HANDLE
 
-static inline int PtlHandleEqual (ptl_handle_any_t h1, ptl_handle_any_t h2)
+static inline int PtlHandleIsEqual (ptl_handle_any_t h1, ptl_handle_any_t h2)
 {
        return (h1.nal_idx == h2.nal_idx && h1.cookie == h2.cookie);
 }
@@ -88,24 +98,38 @@ typedef struct {
 } ptl_md_t;
 
 /* Options for the MD structure */
-#define PTL_MD_OP_PUT           (1 << 0)
-#define PTL_MD_OP_GET           (1 << 1)
-#define PTL_MD_MANAGE_REMOTE    (1 << 2)
-#define PTL_MD_AUTO_UNLINK      (1 << 3)
-#define PTL_MD_TRUNCATE         (1 << 4)
-#define PTL_MD_ACK_DISABLE      (1 << 5)
-#define PTL_MD_IOV             (1 << 6)
-#define PTL_MD_MAX_SIZE                (1 << 7)
-#define PTL_MD_KIOV             (1 << 8)
+#define PTL_MD_OP_PUT               (1 << 0)
+#define PTL_MD_OP_GET               (1 << 1)
+#define PTL_MD_MANAGE_REMOTE        (1 << 2)
+/* unused                           (1 << 3) */
+#define PTL_MD_TRUNCATE             (1 << 4)
+#define PTL_MD_ACK_DISABLE          (1 << 5)
+#define PTL_MD_IOVEC               (1 << 6)
+#define PTL_MD_MAX_SIZE                    (1 << 7)
+#define PTL_MD_KIOV                 (1 << 8)
+#define PTL_MD_EVENT_START_DISABLE  (1 << 9)
+#define PTL_MD_EVENT_END_DISABLE    (1 << 10)
+
+/* For compatibility with Cray Portals */
+#define PTL_MD_LUSTRE_COMPLETION_SEMANTICS  0
 
 #define PTL_MD_THRESH_INF       (-1)
 
 typedef enum {
-        PTL_EVENT_GET,
-        PTL_EVENT_PUT,
-        PTL_EVENT_REPLY,
+        PTL_EVENT_GET_START,
+        PTL_EVENT_GET_END,
+
+        PTL_EVENT_PUT_START,
+        PTL_EVENT_PUT_END,
+
+        PTL_EVENT_REPLY_START,
+        PTL_EVENT_REPLY_END,
+
         PTL_EVENT_ACK,
-        PTL_EVENT_SENT,
+
+        PTL_EVENT_SEND_START,
+       PTL_EVENT_SEND_END,
+
        PTL_EVENT_UNLINK,
 } ptl_event_kind_t;
 
@@ -122,8 +146,6 @@ typedef unsigned PTL_SEQ_BASETYPE ptl_seq_t;
 #endif
 typedef struct {
         ptl_event_kind_t   type;
-       ptl_err_t          status;
-       int                unlinked;
         ptl_process_id_t   initiator;
         ptl_pt_index_t     portal;
         ptl_match_bits_t   match_bits;
@@ -132,7 +154,8 @@ typedef struct {
        ptl_size_t         offset;
         ptl_md_t           mem_desc;
         ptl_hdr_data_t     hdr_data;
-        struct timeval     arrival_time;
+       int                unlinked;
+       ptl_ni_fail_t      ni_fail_type;
 
         volatile ptl_seq_t sequence;
 } ptl_event_t;
index ad46b90..9955599 100644 (file)
@@ -315,7 +315,7 @@ int gmnal_api_shutdown(nal_t *, int);
 
 int gmnal_api_validate(nal_t *, void *, size_t);
 
-void gmnal_api_yield(nal_t *);
+void gmnal_api_yield(nal_t *, unsigned long *, int);
 
 void gmnal_api_lock(nal_t *, unsigned long *);
 
index 1442aa7..338d75c 100644 (file)
@@ -157,13 +157,16 @@ gmnal_api_validate(nal_t *nal, void *base, size_t extent)
  *     Give up the processor
  */
 void
-gmnal_api_yield(nal_t *nal)
+gmnal_api_yield(nal_t *nal, unsigned long *flags, int milliseconds)
 {
        CDEBUG(D_TRACE, "gmnal_api_yield : nal [%p]\n", nal);
 
-       set_current_state(TASK_INTERRUPTIBLE);
-       schedule();
+        if (milliseconds != 0) {
+                CERROR("Blocking yield not implemented yet\n");
+                LBUG();
+        }
 
+        our_cond_resched();
        return;
 }
 
index 1f28746..ece1380 100644 (file)
@@ -272,6 +272,17 @@ void gmnal_cb_sti(nal_cb_t *nal_cb, unsigned long *flags)
        return;
 }
 
+void gmnal_cb_callback(nal_cb_t *nal_cb, void *private, lib_eq_t *eq, ptl_event_t *ev)
+{
+        /* holding cb_lock */
+
+        if (eq->event_callback != NULL)
+                eq->event_callback(ev);
+
+        /* We will wake theads sleeping in yield() here, AFTER the
+         * callback, when we implement blocking yield */
+}
+
 int gmnal_cb_dist(nal_cb_t *nal_cb, ptl_nid_t nid, unsigned long *dist)
 {
        CDEBUG(D_TRACE, "gmnal_cb_dist\n");
index 948badf..02beca7 100644 (file)
@@ -194,7 +194,7 @@ kibnal_shutdown(nal_t *nal, int ni)
 // when do we call this yield function 
 //
 void 
-kibnal_yield( nal_t *nal )
+kibnal_yield( nal_t *nal, unsigned long *flags, int milliseconds )
 {
         kibnal_data_t *k = nal->nal_data;
         nal_cb_t      *nal_cb = k->kib_cb;
@@ -204,6 +204,11 @@ kibnal_yield( nal_t *nal )
         LASSERT (k    == &kibnal_data);
         LASSERT (nal_cb == &kibnal_lib);
 
+        if (milliseconds != 0) {
+                CERROR("Blocking yeild not implemented yet\n");
+                LBUG();
+        }
+        
         // check under what condition that we need to 
         // call schedule()
         // who set this need_resched 
index ff5aeb3..4a1f0d7 100644 (file)
@@ -29,6 +29,7 @@
 #include <portals/p30.h>
 #include <portals/lib-p30.h>
 #include <linux/kp30.h>
+#include <linux/kpr.h>
 
 // Infiniband VAPI/EVAPI header files  
 // Mellanox MT23108 VAPI
index 0688062..f359441 100644 (file)
@@ -221,7 +221,19 @@ void kibnal_sti(nal_cb_t *nal, unsigned long *flags)
         spin_unlock_irqrestore(&data->kib_dispatch_lock,*flags);
 }
 
+//
+// A new event has just been created
+//
+void kibnal_callback(nal_cb_t *nal, void *private, lib_eq_t *eq, ptl_event_t *ev)
+{
+        /* holding kib_dispatch_lock */
 
+        if (eq->event_callback != NULL)
+                eq->event_callback(ev);
+
+        /* We will wake theads sleeping in yield() here, AFTER the
+         * callback, when we implement blocking yield */
+}
 
 //
 // nic distance 
@@ -1285,5 +1297,6 @@ nal_cb_t kibnal_lib = {
         cb_printf:      kibnal_printf,
         cb_cli:         kibnal_cli,
         cb_sti:         kibnal_sti,
+        cb_callback:    kibnal_callback,
         cb_dist:        kibnal_dist // no used at this moment 
 };
index 3b3b5d4..a386eef 100644 (file)
@@ -109,14 +109,43 @@ kqswnal_shutdown(nal_t *nal, int ni)
        return (0);
 }
 
-static void
-kqswnal_yield( nal_t *nal )
+static int
+kqswnal_yield(nal_t *nal, unsigned long *flags, int milliseconds)
 {
+       /* NB called holding statelock */
+        wait_queue_t       wait;
+       unsigned long      now = jiffies;
+
        CDEBUG (D_NET, "yield\n");
 
-       if (current->need_resched)
-               schedule();
-       return;
+       if (milliseconds == 0) {
+               if (current->need_resched)
+                       schedule();
+               return 0;
+       }
+
+       init_waitqueue_entry(&wait, current);
+       set_current_state(TASK_INTERRUPTIBLE);
+       add_wait_queue(&kqswnal_data.kqn_yield_waitq, &wait);
+
+       kqswnal_unlock(nal, flags);
+
+       if (milliseconds < 0)
+               schedule ();
+       else
+               schedule_timeout((milliseconds * HZ) / 1000);
+       
+       kqswnal_lock(nal, flags);
+
+       remove_wait_queue(&kqswnal_data.kqn_yield_waitq, &wait);
+
+       if (milliseconds > 0) {
+               milliseconds -= ((jiffies - now) * 1000) / HZ;
+               if (milliseconds < 0)
+                       milliseconds = 0;
+       }
+       
+       return (milliseconds);
 }
 
 static nal_t *
@@ -491,6 +520,7 @@ kqswnal_initialise (void)
        init_waitqueue_head (&kqswnal_data.kqn_sched_waitq);
 
        spin_lock_init (&kqswnal_data.kqn_statelock);
+       init_waitqueue_head (&kqswnal_data.kqn_yield_waitq);
 
        /* pointers/lists/locks initialised */
        kqswnal_data.kqn_init = KQN_INIT_DATA;
index 5ebf30a..5e32887 100644 (file)
@@ -71,6 +71,7 @@
 #define DEBUG_SUBSYSTEM S_QSWNAL
 
 #include <linux/kp30.h>
+#include <linux/kpr.h>
 #include <portals/p30.h>
 #include <portals/lib-p30.h>
 
@@ -222,6 +223,7 @@ typedef struct
         struct list_head   kqn_delayedtxds;     /* delayed transmits */
 
         spinlock_t         kqn_statelock;       /* cb_cli/cb_sti */
+        wait_queue_head_t  kqn_yield_waitq;     /* where yield waits */
         nal_cb_t          *kqn_cb;              /* -> kqswnal_lib */
 #if MULTIRAIL_EKC
         EP_SYS            *kqn_ep;              /* elan system */
index 157dc70..61c88f6 100644 (file)
@@ -85,6 +85,9 @@ kqswnal_printf (nal_cb_t * nal, const char *fmt, ...)
         CDEBUG (D_NET, "%s", msg);
 }
 
+#if (defined(CONFIG_SPARC32) || defined(CONFIG_SPARC64))
+# error "Can't save/restore irq contexts in different procedures"
+#endif
 
 static void
 kqswnal_cli(nal_cb_t *nal, unsigned long *flags)
@@ -103,6 +106,17 @@ kqswnal_sti(nal_cb_t *nal, unsigned long *flags)
         spin_unlock_irqrestore(&data->kqn_statelock, *flags);
 }
 
+static void
+kqswnal_callback(nal_cb_t *nal, void *private, lib_eq_t *eq, ptl_event_t *ev)
+{
+        /* holding kqn_statelock */
+
+        if (eq->event_callback != NULL)
+                eq->event_callback(ev);
+
+        if (waitqueue_active(&kqswnal_data.kqn_yield_waitq))
+                wake_up_all(&kqswnal_data.kqn_yield_waitq);
+}
 
 static int
 kqswnal_dist(nal_cb_t *nal, ptl_nid_t nid, unsigned long *dist)
@@ -513,15 +527,15 @@ kqswnal_tx_done (kqswnal_tx_t *ktx, int error)
                 lib_finalize (&kqswnal_lib, ktx->ktx_args[0],
                               (lib_msg_t *)ktx->ktx_args[1],
                               (error == 0) ? PTL_OK : 
-                              (error == -ENOMEM) ? PTL_NOSPACE : PTL_FAIL);
+                              (error == -ENOMEM) ? PTL_NO_SPACE : PTL_FAIL);
                 break;
 
         case KTX_GETTING:          /* Peer has DMA-ed direct? */
                 msg = (lib_msg_t *)ktx->ktx_args[1];
 
                 if (error == 0) {
-                        repmsg = lib_fake_reply_msg (&kqswnal_lib, 
-                                                     ktx->ktx_nid, msg->md);
+                        repmsg = lib_create_reply_msg (&kqswnal_lib, 
+                                                       ktx->ktx_nid, msg);
                         if (repmsg == NULL)
                                 error = -ENOMEM;
                 }
@@ -532,7 +546,7 @@ kqswnal_tx_done (kqswnal_tx_t *ktx, int error)
                         lib_finalize (&kqswnal_lib, NULL, repmsg, PTL_OK);
                 } else {
                         lib_finalize (&kqswnal_lib, ktx->ktx_args[0], msg,
-                                      (error == -ENOMEM) ? PTL_NOSPACE : PTL_FAIL);
+                                      (error == -ENOMEM) ? PTL_NO_SPACE : PTL_FAIL);
                 }
                 break;
 
@@ -937,7 +951,7 @@ kqswnal_sendmsg (nal_cb_t     *nal,
                                           in_interrupt()));
         if (ktx == NULL) {
                 kqswnal_cerror_hdr (hdr);
-                return (PTL_NOSPACE);
+                return (PTL_NO_SPACE);
         }
 
         ktx->ktx_nid     = targetnid;
@@ -1845,5 +1859,6 @@ nal_cb_t kqswnal_lib =
         cb_printf:      kqswnal_printf,
         cb_cli:         kqswnal_cli,
         cb_sti:         kqswnal_sti,
+        cb_callback:    kqswnal_callback,
         cb_dist:        kqswnal_dist
 };
index 5ffba31..35de6eb 100644 (file)
@@ -108,10 +108,15 @@ static int kscimacnal_shutdown(nal_t *nal, int ni)
 }
 
 
-static void kscimacnal_yield( nal_t *nal )
+static void kscimacnal_yield( nal_t *nal, unsigned long *flags, int milliseconds )
 {
         LASSERT (nal == &kscimacnal_api);
 
+        if (milliseconds != 0) {
+                CERROR ("Blocking yield not implemented yet\n");
+                LBUG();
+        }
+
         if (current->need_resched) 
                 schedule();
         return;
index 6949557..f132769 100644 (file)
@@ -34,6 +34,7 @@
 #define DEBUG_SUBSYSTEM S_UNDEFINED
 
 #include <linux/kp30.h>
+#include <linux/kpr.h>
 #include <portals/p30.h>
 #include <portals/lib-p30.h>
 
index 52afb98..f9562b2 100644 (file)
@@ -97,6 +97,18 @@ kscimacnal_sti(nal_cb_t *nal, unsigned long *flags)
 }
 
 
+static void 
+kscimacnal_callback(nal_cb_t *nal, void *private, lib_eq_t *eq, ptl_event_t *ev)
+{
+        /* holding ksci_dispatch_lock */
+
+        if (eq->event_callback != NULL)
+                eq->event_callback(ev);
+
+        /* We will wake theads sleeping in yield() here, AFTER the
+         * callback, when we implement blocking yield */
+}
+
 static int 
 kscimacnal_dist(nal_cb_t *nal, ptl_nid_t nid, unsigned long *dist)
 {
@@ -233,7 +245,7 @@ kscimacnal_sendmsg(nal_cb_t        *nal,
         /* save transaction info for later finalize and cleanup */
         PORTAL_ALLOC(ktx, (sizeof(kscimacnal_tx_t)));
         if (!ktx) {
-                return PTL_NOSPACE;
+                return PTL_NO_SPACE;
         }
 
         ktx->ktx_nmapped = 0; /* Start with no mapped pages :) */
@@ -248,7 +260,7 @@ kscimacnal_sendmsg(nal_cb_t        *nal,
                         kscimacnal_txrelease, ktx);
         if (!msg) {
                 PORTAL_FREE(ktx, (sizeof(kscimacnal_tx_t)));
-                return PTL_NOSPACE;
+                return PTL_NO_SPACE;
         }
         mac_put_mblk(msg, sizeof(ptl_hdr_t));
         lastblk=msg;
@@ -285,7 +297,7 @@ kscimacnal_sendmsg(nal_cb_t        *nal,
                 if(!newblk) {
                         mac_free_msg(msg);
                         PORTAL_FREE(ktx, (sizeof(kscimacnal_tx_t)));
-                        return PTL_NOSPACE;
+                        return PTL_NO_SPACE;
                 }
                 mac_put_mblk(newblk, nob);
                 mac_link_mblk(lastblk, newblk);
@@ -597,5 +609,6 @@ nal_cb_t kscimacnal_lib = {
         cb_printf:       kscimacnal_printf,
         cb_cli:          kscimacnal_cli,
         cb_sti:          kscimacnal_sti,
+        cb_callback:     kscimacnal_callback,
         cb_dist:         kscimacnal_dist
 };
index 0dd5d11..d874a6c 100644 (file)
@@ -95,13 +95,6 @@ ksocknal_api_shutdown(nal_t *nal, int ni)
 }
 
 void
-ksocknal_api_yield(nal_t *nal)
-{
-        our_cond_resched();
-        return;
-}
-
-void
 ksocknal_api_lock(nal_t *nal, unsigned long *flags)
 {
         ksock_nal_data_t *k;
@@ -123,6 +116,44 @@ ksocknal_api_unlock(nal_t *nal, unsigned long *flags)
         nal_cb->cb_sti(nal_cb,flags);
 }
 
+int
+ksocknal_api_yield(nal_t *nal, unsigned long *flags, int milliseconds)
+{
+       /* NB called holding statelock */
+        wait_queue_t       wait;
+       unsigned long      now = jiffies;
+
+       CDEBUG (D_NET, "yield\n");
+
+       if (milliseconds == 0) {
+                our_cond_resched();
+               return 0;
+       }
+
+       init_waitqueue_entry(&wait, current);
+       set_current_state (TASK_INTERRUPTIBLE);
+       add_wait_queue (&ksocknal_data.ksnd_yield_waitq, &wait);
+
+       ksocknal_api_unlock(nal, flags);
+
+       if (milliseconds < 0)
+               schedule ();
+       else
+               schedule_timeout((milliseconds * HZ) / 1000);
+       
+       ksocknal_api_lock(nal, flags);
+
+       remove_wait_queue (&ksocknal_data.ksnd_yield_waitq, &wait);
+
+       if (milliseconds > 0) {
+               milliseconds -= ((jiffies - now) * 1000) / HZ;
+               if (milliseconds < 0)
+                       milliseconds = 0;
+       }
+       
+       return (milliseconds);
+}
+
 nal_t *
 ksocknal_init(int interface, ptl_pt_index_t ptl_size,
               ptl_ac_index_t ac_size, ptl_pid_t requested_pid)
@@ -745,6 +776,9 @@ ksocknal_create_conn (ksock_route_t *route, struct socket *sock,
 
         ksocknal_get_peer_addr (conn);
 
+        CWARN("New conn nid:"LPX64" ip:%08x/%d incarnation:"LPX64"\n",
+              nid, conn->ksnc_ipaddr, conn->ksnc_port, incarnation);
+
         irq = ksocknal_conn_irq (conn);
 
         write_lock_irqsave (&ksocknal_data.ksnd_global_lock, flags);
@@ -1071,6 +1105,11 @@ ksocknal_close_stale_conns_locked (ksock_peer_t *peer, __u64 incarnation)
 
                 if (conn->ksnc_incarnation == incarnation)
                         continue;
+
+                CWARN("Closing stale conn nid:"LPX64" ip:%08x/%d "
+                      "incarnation:"LPX64"("LPX64")\n",
+                      peer->ksnp_nid, conn->ksnc_ipaddr, conn->ksnc_port,
+                      conn->ksnc_incarnation, incarnation);
                 
                 count++;
                 ksocknal_close_conn_locked (conn, -ESTALE);
@@ -1568,7 +1607,6 @@ ksocknal_module_init (void)
 
         ksocknal_api.forward  = ksocknal_api_forward;
         ksocknal_api.shutdown = ksocknal_api_shutdown;
-        ksocknal_api.yield    = ksocknal_api_yield;
         ksocknal_api.validate = NULL;           /* our api validate is a NOOP */
         ksocknal_api.lock     = ksocknal_api_lock;
         ksocknal_api.unlock   = ksocknal_api_unlock;
@@ -1600,7 +1638,8 @@ ksocknal_module_init (void)
 
         ksocknal_data.ksnd_nal_cb = &ksocknal_lib;
         spin_lock_init (&ksocknal_data.ksnd_nal_cb_lock);
-
+        init_waitqueue_head(&ksocknal_data.ksnd_yield_waitq);
+        
         spin_lock_init(&ksocknal_data.ksnd_small_fmp.fmp_lock);
         INIT_LIST_HEAD(&ksocknal_data.ksnd_small_fmp.fmp_idle_fmbs);
         INIT_LIST_HEAD(&ksocknal_data.ksnd_small_fmp.fmp_blocked_conns);
@@ -1743,9 +1782,9 @@ ksocknal_module_init (void)
         ksocknal_data.ksnd_init = SOCKNAL_INIT_ALL;
 
         printk(KERN_INFO "Lustre: Routing socket NAL loaded "
-               "(Routing %s, initial mem %d)\n",
+               "(Routing %s, initial mem %d, incarnation "LPX64")\n",
                kpr_routing (&ksocknal_data.ksnd_router) ?
-               "enabled" : "disabled", pkmem);
+               "enabled" : "disabled", pkmem, ksocknal_data.ksnd_incarnation);
 
         return (0);
 }
index 17a7e49..bd3c1fb 100644 (file)
@@ -61,6 +61,7 @@
 
 #include <linux/kp30.h>
 #include <linux/portals_compat25.h>
+#include <linux/kpr.h>
 #include <portals/p30.h>
 #include <portals/lib-p30.h>
 #include <portals/socknal.h>
@@ -157,6 +158,7 @@ typedef struct {
 
         nal_cb_t         *ksnd_nal_cb;
         spinlock_t        ksnd_nal_cb_lock;     /* lib cli/sti lock */
+        wait_queue_head_t ksnd_yield_waitq;     /* where yield waits */
 
         atomic_t          ksnd_nthreads;        /* # live threads */
         int               ksnd_shuttingdown;    /* tell threads to exit */
index c89e20e..ebb32da 100644 (file)
@@ -90,6 +90,8 @@ ksocknal_cli(nal_cb_t *nal, unsigned long *flags)
 {
         ksock_nal_data_t *data = nal->nal_data;
 
+        /* OK to ignore 'flags'; we're only ever serialise threads and
+         * never need to lock out interrupts */
         spin_lock(&data->ksnd_nal_cb_lock);
 }
 
@@ -99,9 +101,23 @@ ksocknal_sti(nal_cb_t *nal, unsigned long *flags)
         ksock_nal_data_t *data;
         data = nal->nal_data;
 
+        /* OK to ignore 'flags'; we're only ever serialise threads and
+         * never need to lock out interrupts */
         spin_unlock(&data->ksnd_nal_cb_lock);
 }
 
+void
+ksocknal_callback(nal_cb_t *nal, void *private, lib_eq_t *eq, ptl_event_t *ev)
+{
+        /* holding ksnd_nal_cb_lock */
+
+        if (eq->event_callback != NULL)
+                eq->event_callback(ev);
+        
+        if (waitqueue_active(&ksocknal_data.ksnd_yield_waitq))
+                wake_up_all(&ksocknal_data.ksnd_yield_waitq);
+}
+
 int
 ksocknal_dist(nal_cb_t *nal, ptl_nid_t nid, unsigned long *dist)
 {
@@ -1058,7 +1074,7 @@ ksocknal_sendmsg(nal_cb_t     *nal,
         if (ltx == NULL) {
                 CERROR("Can't allocate tx desc type %d size %d %s\n",
                        type, desc_size, in_interrupt() ? "(intr)" : "");
-                return (PTL_NOSPACE);
+                return (PTL_NO_SPACE);
         }
 
         atomic_inc(&ksocknal_data.ksnd_nactive_ltxs);
@@ -2659,8 +2675,8 @@ ksocknal_reaper (void *arg)
                 }
                 ksocknal_data.ksnd_reaper_waketime = jiffies + timeout;
 
-                add_wait_queue (&ksocknal_data.ksnd_reaper_waitq, &wait);
                 set_current_state (TASK_INTERRUPTIBLE);
+                add_wait_queue (&ksocknal_data.ksnd_reaper_waitq, &wait);
 
                 if (!ksocknal_data.ksnd_shuttingdown &&
                     list_empty (&ksocknal_data.ksnd_deathrow_conns) &&
@@ -2692,5 +2708,6 @@ nal_cb_t ksocknal_lib = {
         cb_printf:       ksocknal_printf,
         cb_cli:          ksocknal_cli,
         cb_sti:          ksocknal_sti,
+        cb_callback:     ksocknal_callback,
         cb_dist:         ksocknal_dist
 };
index cf9220b..4f0b303 100644 (file)
@@ -4,23 +4,14 @@
 # See the file COPYING in this distribution
 
 
-MODULE = portals
-modulenet_DATA = portals.o
-EXTRA_PROGRAMS = portals
+MODULE = libcfs
+modulenet_DATA = libcfs.o
+EXTRA_PROGRAMS = libcfs
 
-LIBLINKS := lib-dispatch.c lib-eq.c lib-init.c lib-md.c lib-me.c lib-move.c lib-msg.c lib-ni.c lib-pid.c
-APILINKS := api-eq.c api-errno.c api-init.c api-me.c api-ni.c api-wrap.c
-LINKS = $(APILINKS) $(LIBLINKS) 
-DISTCLEANFILES = $(LINKS) link-stamp *.orig *.rej
-
-$(LINKS): link-stamp
-link-stamp:
-       -list='$(LIBLINKS)'; for f in $$list; do echo $$f ; ln -sf $(srcdir)/../portals/$$f .; done
-       -list='$(APILINKS)'; for f in $$list; do echo $$f ; ln -sf $(srcdir)/../portals/$$f .; done
-       echo timestamp > link-stamp
+DISTCLEANFILES = *.orig *.rej
 
 DEFS =
-portals_SOURCES = $(LINKS) module.c proc.c debug.c lwt.c
+libcfs_SOURCES = module.c proc.c debug.c lwt.c
 
 # Don't distribute any patched files.
 dist-hook:
index e98779f..0f88a11 100644 (file)
 
 #include <linux/kp30.h>
 #include <linux/portals_compat25.h>
+#include <linux/libcfs.h>
+
+unsigned int portal_subsystem_debug = ~0 - (S_PORTALS | S_QSWNAL | S_SOCKNAL |
+                                            S_GMNAL | S_IBNAL);
+EXPORT_SYMBOL(portal_subsystem_debug);
+
+unsigned int portal_debug = (D_WARNING | D_DLMTRACE | D_ERROR | D_EMERG | D_HA |
+                             D_RPCTRACE | D_VFSTRACE | D_MALLOC);
+EXPORT_SYMBOL(portal_debug);
+
+unsigned int portal_cerror = 1;
+EXPORT_SYMBOL(portal_cerror);
+
+unsigned int portal_printk;
+EXPORT_SYMBOL(portal_printk);
+
+unsigned int portal_stack;
+EXPORT_SYMBOL(portal_stack);
+
+#ifdef __KERNEL__
+atomic_t portal_kmemory = ATOMIC_INIT(0);
+EXPORT_SYMBOL(portal_kmemory);
+#endif
 
 #define DEBUG_OVERFLOW 1024
 static char *debug_buf = NULL;
@@ -926,6 +949,8 @@ void portals_run_lbug_upcall(char *file, const char *fn, const int line)
 char *portals_nid2str(int nal, ptl_nid_t nid, char *str)
 {
         switch(nal){
+/* XXX this should be a nal method of some sort */
+#ifndef CRAY_PORTALS 
         case TCPNAL:
                 /* userspace NAL */
         case SOCKNAL:
@@ -938,8 +963,9 @@ char *portals_nid2str(int nal, ptl_nid_t nid, char *str)
         case SCIMACNAL:
                 sprintf(str, "%u:%u", (__u32)(nid >> 32), (__u32)nid);
                 break;
+#endif
         default:
-                return NULL;
+                snprintf(str, PTL_NALFMT_SIZE-1, "(?%llx)", (long long)nid);
         }
         return str;
 }
index 2f5a852..9daa8e0 100644 (file)
 
 extern void (kping_client)(struct portal_ioctl_data *);
 
-struct nal_cmd_handler {
-        nal_cmd_handler_t nch_handler;
-        void * nch_private;
-};
-
-static struct nal_cmd_handler nal_cmd[NAL_MAX_NR + 1];
-struct semaphore nal_cmd_sem;
-
 #ifdef PORTAL_DEBUG
 void kportal_assertion_failed(char *expr, char *file, const char *func,
                               const int line)
@@ -203,7 +195,7 @@ kportal_blockallsigs ()
 }
 
 /* called when opening /dev/device */
-static int kportal_psdev_open(struct inode * inode, struct file * file)
+static int libcfs_psdev_open(struct inode * inode, struct file * file)
 {
         struct portals_device_userstate *pdu;
         ENTRY;
@@ -224,7 +216,7 @@ static int kportal_psdev_open(struct inode * inode, struct file * file)
 }
 
 /* called when closing /dev/device */
-static int kportal_psdev_release(struct inode * inode, struct file * file)
+static int libcfs_psdev_release(struct inode * inode, struct file * file)
 {
         struct portals_device_userstate *pdu;
         ENTRY;
@@ -247,263 +239,49 @@ static inline void freedata(void *data, int len)
         PORTAL_FREE(data, len);
 }
 
-static int
-kportal_add_route(int gateway_nalid, ptl_nid_t gateway_nid, 
-                  ptl_nid_t lo_nid, ptl_nid_t hi_nid)
-{
-        int rc;
-        kpr_control_interface_t *ci;
-
-        ci = (kpr_control_interface_t *) PORTAL_SYMBOL_GET (kpr_control_interface);
-        if (ci == NULL)
-                return (-ENODEV);
-
-        rc = ci->kprci_add_route (gateway_nalid, gateway_nid, lo_nid, hi_nid);
-
-        PORTAL_SYMBOL_PUT(kpr_control_interface);
-        return (rc);
-}
-
-static int
-kportal_del_route(int gw_nalid, ptl_nid_t gw_nid, 
-                  ptl_nid_t lo, ptl_nid_t hi)
-{
-        int rc;
-        kpr_control_interface_t *ci;
-
-        ci = (kpr_control_interface_t *)PORTAL_SYMBOL_GET(kpr_control_interface);
-        if (ci == NULL)
-                return (-ENODEV);
-
-        rc = ci->kprci_del_route (gw_nalid, gw_nid, lo, hi);
-
-        PORTAL_SYMBOL_PUT(kpr_control_interface);
-        return (rc);
-}
-
-static int
-kportal_notify_router (int gw_nalid, ptl_nid_t gw_nid,
-                       int alive, time_t when)
-{
-        int rc;
-        kpr_control_interface_t *ci;
-
-        /* No error if router not preset.  Sysadmin is allowed to notify
-         * _everywhere_ when a NID boots or crashes, even if they know
-         * nothing of the peer. */
-        ci = (kpr_control_interface_t *)PORTAL_SYMBOL_GET(kpr_control_interface);
-        if (ci == NULL)
-                return (0);
-
-        rc = ci->kprci_notify (gw_nalid, gw_nid, alive, when);
-
-        PORTAL_SYMBOL_PUT(kpr_control_interface);
-        return (rc);
-}
-
-static int
-kportal_get_route(int index, __u32 *gateway_nalidp, ptl_nid_t *gateway_nidp,
-                  ptl_nid_t *lo_nidp, ptl_nid_t *hi_nidp, int *alivep)
-{
-        int       gateway_nalid;
-        ptl_nid_t gateway_nid;
-        ptl_nid_t lo_nid;
-        ptl_nid_t hi_nid;
-        int       alive;
-        int       rc;
-        kpr_control_interface_t *ci;
-
-        ci = (kpr_control_interface_t *) PORTAL_SYMBOL_GET(kpr_control_interface);
-        if (ci == NULL)
-                return (-ENODEV);
-
-        rc = ci->kprci_get_route(index, &gateway_nalid, &gateway_nid,
-                                 &lo_nid, &hi_nid, &alive);
-
-        if (rc == 0) {
-                CDEBUG(D_IOCTL, "got route [%d] %d "LPX64":"LPX64" - "LPX64", %s\n",
-                       index, gateway_nalid, gateway_nid, lo_nid, hi_nid,
-                       alive ? "up" : "down");
-
-                *gateway_nalidp = (__u32)gateway_nalid;
-                *gateway_nidp   = gateway_nid;
-                *lo_nidp        = lo_nid;
-                *hi_nidp        = hi_nid;
-                *alivep         = alive;
-        }
-
-        PORTAL_SYMBOL_PUT (kpr_control_interface);
-        return (rc);
-}
-
-static int 
-kportal_router_cmd(struct portals_cfg *pcfg, void * private)
-{
-        int err = -EINVAL;
-        ENTRY;
-
-        switch(pcfg->pcfg_command) {
-        default:
-                CDEBUG(D_IOCTL, "Inappropriate cmd: %d\n", pcfg->pcfg_command);
-                break;
-                
-        case NAL_CMD_ADD_ROUTE:
-                CDEBUG(D_IOCTL, "Adding route: [%d] "LPU64" : "LPU64" - "LPU64"\n",
-                       pcfg->pcfg_nal, pcfg->pcfg_nid, 
-                       pcfg->pcfg_nid2, pcfg->pcfg_nid3);
-                err = kportal_add_route(pcfg->pcfg_gw_nal, pcfg->pcfg_nid,
-                                        pcfg->pcfg_nid2, pcfg->pcfg_nid3);
-                break;
-
-        case NAL_CMD_DEL_ROUTE:
-                CDEBUG (D_IOCTL, "Removing routes via [%d] "LPU64" : "LPU64" - "LPU64"\n",
-                        pcfg->pcfg_gw_nal, pcfg->pcfg_nid, 
-                        pcfg->pcfg_nid2, pcfg->pcfg_nid3);
-                err = kportal_del_route (pcfg->pcfg_gw_nal, pcfg->pcfg_nid,
-                                         pcfg->pcfg_nid2, pcfg->pcfg_nid3);
-                break;
-
-        case NAL_CMD_NOTIFY_ROUTER: {
-                CDEBUG (D_IOCTL, "Notifying peer [%d] "LPU64" %s @ %ld\n",
-                        pcfg->pcfg_gw_nal, pcfg->pcfg_nid,
-                        pcfg->pcfg_flags ? "Enabling" : "Disabling",
-                        (time_t)pcfg->pcfg_nid3);
-                
-                err = kportal_notify_router (pcfg->pcfg_gw_nal, pcfg->pcfg_nid,
-                                             pcfg->pcfg_flags, 
-                                             (time_t)pcfg->pcfg_nid3);
-                break;
-        }
-                
-        case NAL_CMD_GET_ROUTE:
-                CDEBUG (D_IOCTL, "Getting route [%d]\n", pcfg->pcfg_count);
-                err = kportal_get_route(pcfg->pcfg_count, &pcfg->pcfg_gw_nal,
-                                        &pcfg->pcfg_nid, 
-                                        &pcfg->pcfg_nid2, &pcfg->pcfg_nid3,
-                                        &pcfg->pcfg_flags);
-                break;
-        }
-        RETURN(err);
-}
-
-int
-kportal_nal_cmd(struct portals_cfg *pcfg)
-{
-        __u32 nal = pcfg->pcfg_nal;
-        int rc = -EINVAL;
-
-        ENTRY;
-
-        down(&nal_cmd_sem);
-        if (nal > 0 && nal <= NAL_MAX_NR && nal_cmd[nal].nch_handler) {
-                CDEBUG(D_IOCTL, "calling handler nal: %d, cmd: %d\n", nal, 
-                       pcfg->pcfg_command);
-                rc = nal_cmd[nal].nch_handler(pcfg, nal_cmd[nal].nch_private);
-        }
-        up(&nal_cmd_sem);
-        RETURN(rc);
-}
-
-ptl_handle_ni_t *
-kportal_get_ni (int nal)
-{
-
-        switch (nal)
-        {
-        case QSWNAL:
-                return (PORTAL_SYMBOL_GET(kqswnal_ni));
-        case SOCKNAL:
-                return (PORTAL_SYMBOL_GET(ksocknal_ni));
-        case GMNAL:
-                return  (PORTAL_SYMBOL_GET(kgmnal_ni));
-        case IBNAL:
-                return  (PORTAL_SYMBOL_GET(kibnal_ni));
-        case TCPNAL:
-                /* userspace NAL */
-                return (NULL);
-        case SCIMACNAL:
-                return  (PORTAL_SYMBOL_GET(kscimacnal_ni));
-        default:
-                /* A warning to a naive caller */
-                CERROR ("unknown nal: %d\n", nal);
-                return (NULL);
-        }
-}
-
-void
-kportal_put_ni (int nal)
-{
-
-        switch (nal)
-        {
-        case QSWNAL:
-                PORTAL_SYMBOL_PUT(kqswnal_ni);
-                break;
-        case SOCKNAL:
-                PORTAL_SYMBOL_PUT(ksocknal_ni);
-                break;
-        case GMNAL:
-                PORTAL_SYMBOL_PUT(kgmnal_ni);
-                break;
-        case IBNAL:
-                PORTAL_SYMBOL_PUT(kibnal_ni);
-                break;
-        case TCPNAL:
-                /* A lesson to a malicious caller */
-                LBUG ();
-        case SCIMACNAL:
-                PORTAL_SYMBOL_PUT(kscimacnal_ni);
-                break;
-        default:
-                CERROR ("unknown nal: %d\n", nal);
-        }
-}
+static DECLARE_RWSEM(ioctl_list_sem);
+static LIST_HEAD(ioctl_list);
 
-int
-kportal_nal_register(int nal, nal_cmd_handler_t handler, void * private)
+int libcfs_register_ioctl(struct libcfs_ioctl_handler *hand)
 {
         int rc = 0;
+        down_read(&ioctl_list_sem);
+        if (!list_empty(&hand->item))
+                rc = -EBUSY;
+        up_read(&ioctl_list_sem);
 
-        CDEBUG(D_IOCTL, "Register NAL %d, handler: %p\n", nal, handler);
-
-        if (nal > 0  && nal <= NAL_MAX_NR) {
-                down(&nal_cmd_sem);
-                if (nal_cmd[nal].nch_handler != NULL)
-                        rc = -EBUSY;
-                else {
-                        nal_cmd[nal].nch_handler = handler;
-                        nal_cmd[nal].nch_private = private;
-                }
-                up(&nal_cmd_sem);
+        if (rc == 0) {
+                down_write(&ioctl_list_sem);
+                list_add_tail(&hand->item, &ioctl_list);
+                up_write(&ioctl_list_sem);
         }
-        return rc;
+        RETURN(0);
 }
+EXPORT_SYMBOL(libcfs_register_ioctl);
 
-int
-kportal_nal_unregister(int nal)
+int libcfs_deregister_ioctl(struct libcfs_ioctl_handler *hand)
 {
         int rc = 0;
+        down_read(&ioctl_list_sem);
+        if (list_empty(&hand->item))
+                rc = -ENOENT;
+        up_read(&ioctl_list_sem);
 
-        CDEBUG(D_IOCTL, "Unregister NAL %d\n", nal);
-
-        if (nal > 0  && nal <= NAL_MAX_NR) {
-                down(&nal_cmd_sem);
-                nal_cmd[nal].nch_handler = NULL;
-                nal_cmd[nal].nch_private = NULL;
-                up(&nal_cmd_sem);
+        if (rc == 0) {
+                down_write(&ioctl_list_sem);
+                list_del_init(&hand->item);
+                up_write(&ioctl_list_sem);
         }
-        return rc;
+        RETURN(0);
 }
+EXPORT_SYMBOL(libcfs_deregister_ioctl);
 
-
-static int kportal_ioctl(struct inode *inode, struct file *file,
-                         unsigned int cmd, unsigned long arg)
+static int libcfs_ioctl(struct inode *inode, struct file *file,
+                        unsigned int cmd, unsigned long arg)
 {
-        int err = 0;
+        int err = -EINVAL;
         char buf[1024];
         struct portal_ioctl_data *data;
-        char str[PTL_NALFMT_SIZE];
-
         ENTRY;
 
         if (current->fsuid != 0)
@@ -556,79 +334,6 @@ static int kportal_ioctl(struct inode *inode, struct file *file,
                         RETURN(-EINVAL);
                 portals_debug_mark_buffer(data->ioc_inlbuf1);
                 RETURN(0);
-        case IOC_PORTAL_PING: {
-                void (*ping)(struct portal_ioctl_data *);
-
-                CDEBUG(D_IOCTL, "doing %d pings to nid "LPX64" (%s)\n",
-                       data->ioc_count, data->ioc_nid,
-                       portals_nid2str(data->ioc_nal, data->ioc_nid, str));
-                ping = PORTAL_SYMBOL_GET(kping_client);
-                if (!ping)
-                        CERROR("PORTAL_SYMBOL_GET failed\n");
-                else {
-                        ping(data);
-                        PORTAL_SYMBOL_PUT(kping_client);
-                }
-                RETURN(0);
-        }
-
-        case IOC_PORTAL_GET_NID: {
-                const ptl_handle_ni_t *nip;
-                ptl_process_id_t       pid;
-
-                CDEBUG (D_IOCTL, "Getting nid for nal [%d]\n", data->ioc_nal);
-
-                nip = kportal_get_ni (data->ioc_nal);
-                if (nip == NULL)
-                        RETURN (-EINVAL);
-
-                err = PtlGetId (*nip, &pid);
-                LASSERT (err == PTL_OK);
-                kportal_put_ni (data->ioc_nal);
-
-                data->ioc_nid = pid.nid;
-                if (copy_to_user ((char *)arg, data, sizeof (*data)))
-                        err = -EFAULT;
-                break;
-        }
-
-        case IOC_PORTAL_NAL_CMD: {
-                struct portals_cfg pcfg;
-
-                LASSERT (data->ioc_plen1 == sizeof(pcfg));
-                err = copy_from_user(&pcfg, (void *)data->ioc_pbuf1, 
-                                     sizeof(pcfg));
-                if ( err ) {
-                        EXIT;
-                        return err;
-                }
-
-                CDEBUG (D_IOCTL, "nal command nal %d cmd %d\n", pcfg.pcfg_nal,
-                        pcfg.pcfg_command);
-                err = kportal_nal_cmd(&pcfg);
-                if (err == 0) {
-                        if (copy_to_user((char *)data->ioc_pbuf1, &pcfg, 
-                                         sizeof (pcfg)))
-                                err = -EFAULT;
-                        if (copy_to_user((char *)arg, data, sizeof (*data)))
-                                err = -EFAULT;
-                }
-                break;
-        }
-        case IOC_PORTAL_FAIL_NID: {
-                const ptl_handle_ni_t *nip;
-
-                CDEBUG (D_IOCTL, "fail nid: [%d] "LPU64" count %d\n",
-                        data->ioc_nal, data->ioc_nid, data->ioc_count);
-
-                nip = kportal_get_ni (data->ioc_nal);
-                if (nip == NULL)
-                        return (-EINVAL);
-
-                err = PtlFailNid (*nip, data->ioc_nid, data->ioc_count);
-                kportal_put_ni (data->ioc_nal);
-                break;
-        }
 #if LWT_SUPPORT
         case IOC_PORTAL_LWT_CONTROL: 
                 err = lwt_control (data->ioc_flags, data->ioc_misc);
@@ -666,26 +371,34 @@ static int kportal_ioctl(struct inode *inode, struct file *file,
                 }
                 break;
 
-        default:
+        default: {
+                struct libcfs_ioctl_handler *hand;
                 err = -EINVAL;
-                break;
+                down_read(&ioctl_list_sem);
+                list_for_each_entry(hand, &ioctl_list, item) {
+                        err = hand->handle_ioctl(data, cmd, arg);
+                        if (err != -EINVAL)
+                                break;
+                }
+                up_read(&ioctl_list_sem);
+                } break;
         }
 
         RETURN(err);
 }
 
 
-static struct file_operations portalsdev_fops = {
-        ioctl:   kportal_ioctl,
-        open:    kportal_psdev_open,
-        release: kportal_psdev_release
+static struct file_operations libcfs_fops = {
+        ioctl:   libcfs_ioctl,
+        open:    libcfs_psdev_open,
+        release: libcfs_psdev_release
 };
 
 
-static struct miscdevice portal_dev = {
+static struct miscdevice libcfs_dev = {
         PORTAL_MINOR,
         "portals",
-        &portalsdev_fops
+        &libcfs_fops
 };
 
 extern int insert_proc(void);
@@ -694,7 +407,7 @@ MODULE_AUTHOR("Peter J. Braam <braam@clusterfs.com>");
 MODULE_DESCRIPTION("Portals v3.1");
 MODULE_LICENSE("GPL");
 
-static int init_kportals_module(void)
+static int init_libcfs_module(void)
 {
         int rc;
 
@@ -711,41 +424,23 @@ static int init_kportals_module(void)
                 goto cleanup_debug;
         }
 #endif
-        sema_init(&nal_cmd_sem, 1);
-
-        rc = misc_register(&portal_dev);
+        rc = misc_register(&libcfs_dev);
         if (rc) {
                 CERROR("misc_register: error %d\n", rc);
                 goto cleanup_lwt;
         }
 
-        rc = PtlInit();
-        if (rc) {
-                CERROR("PtlInit: error %d\n", rc);
-                goto cleanup_deregister;
-        }
-
         rc = insert_proc();
         if (rc) {
                 CERROR("insert_proc: error %d\n", rc);
-                goto cleanup_fini;
-        }
-
-        rc = kportal_nal_register(ROUTER, kportal_router_cmd, NULL);
-        if (rc) {
-                CERROR("kportal_nal_registre: ROUTER error %d\n", rc);
-                goto cleanup_proc;
+                goto cleanup_deregister;
         }
 
         CDEBUG (D_OTHER, "portals setup OK\n");
         return (0);
 
- cleanup_proc:
-        remove_proc();
- cleanup_fini:
-        PtlFini();
  cleanup_deregister:
-        misc_deregister(&portal_dev);
+        misc_deregister(&libcfs_dev);
  cleanup_lwt:
 #if LWT_SUPPORT
         lwt_fini();
@@ -755,19 +450,17 @@ static int init_kportals_module(void)
         return rc;
 }
 
-static void exit_kportals_module(void)
+static void exit_libcfs_module(void)
 {
         int rc;
 
-        kportal_nal_unregister(ROUTER);
         remove_proc();
-        PtlFini();
 
         CDEBUG(D_MALLOC, "before Portals cleanup: kmem %d\n",
                atomic_read(&portal_kmemory));
 
 
-        rc = misc_deregister(&portal_dev);
+        rc = misc_deregister(&libcfs_dev);
         if (rc)
                 CERROR("misc_deregister error %d\n", rc);
 
@@ -784,54 +477,9 @@ static void exit_kportals_module(void)
                 printk(KERN_ERR "LustreError: portals_debug_cleanup: %d\n", rc);
 }
 
-EXPORT_SYMBOL(lib_dispatch);
-EXPORT_SYMBOL(PtlMEAttach);
-EXPORT_SYMBOL(PtlMEInsert);
-EXPORT_SYMBOL(PtlMEUnlink);
-EXPORT_SYMBOL(PtlEQAlloc);
-EXPORT_SYMBOL(PtlMDAttach);
-EXPORT_SYMBOL(PtlMDUnlink);
-EXPORT_SYMBOL(PtlNIInit);
-EXPORT_SYMBOL(PtlNIFini);
-EXPORT_SYMBOL(PtlNIDebug);
-EXPORT_SYMBOL(PtlInit);
-EXPORT_SYMBOL(PtlFini);
-EXPORT_SYMBOL(PtlPut);
-EXPORT_SYMBOL(PtlGet);
-EXPORT_SYMBOL(ptl_err_str);
-EXPORT_SYMBOL(portal_subsystem_debug);
-EXPORT_SYMBOL(portal_debug);
-EXPORT_SYMBOL(portal_stack);
-EXPORT_SYMBOL(portal_printk);
-EXPORT_SYMBOL(portal_cerror);
-EXPORT_SYMBOL(PtlEQWait);
-EXPORT_SYMBOL(PtlEQFree);
-EXPORT_SYMBOL(PtlEQGet);
-EXPORT_SYMBOL(PtlGetId);
-EXPORT_SYMBOL(PtlMDBind);
-EXPORT_SYMBOL(lib_iov_nob);
-EXPORT_SYMBOL(lib_copy_iov2buf);
-EXPORT_SYMBOL(lib_copy_buf2iov);
-EXPORT_SYMBOL(lib_extract_iov);
-EXPORT_SYMBOL(lib_kiov_nob);
-EXPORT_SYMBOL(lib_copy_kiov2buf);
-EXPORT_SYMBOL(lib_copy_buf2kiov);
-EXPORT_SYMBOL(lib_extract_kiov);
-EXPORT_SYMBOL(lib_finalize);
-EXPORT_SYMBOL(lib_parse);
-EXPORT_SYMBOL(lib_fake_reply_msg);
-EXPORT_SYMBOL(lib_init);
-EXPORT_SYMBOL(lib_fini);
-EXPORT_SYMBOL(portal_kmemory);
 EXPORT_SYMBOL(kportal_daemonize);
 EXPORT_SYMBOL(kportal_blockallsigs);
-EXPORT_SYMBOL(kportal_nal_register);
-EXPORT_SYMBOL(kportal_nal_unregister);
 EXPORT_SYMBOL(kportal_assertion_failed);
-EXPORT_SYMBOL(dispatch_name);
-EXPORT_SYMBOL(kportal_get_ni);
-EXPORT_SYMBOL(kportal_put_ni);
-EXPORT_SYMBOL(kportal_nal_cmd);
 
-module_init(init_kportals_module);
-module_exit (exit_kportals_module);
+module_init(init_libcfs_module);
+module_exit(exit_libcfs_module);
index d17db61..59f9dd9 100644 (file)
@@ -3,12 +3,30 @@
 # This code is issued under the GNU General Public License.
 # See the file COPYING in this distribution
 
+DEFS =
+
+my_sources = api-eq.c api-init.c api-me.c api-errno.c api-ni.c api-wrap.c \
+               lib-dispatch.c lib-init.c lib-me.c lib-msg.c lib-eq.c \
+               lib-md.c lib-move.c lib-ni.c lib-pid.c
 
-CPPFLAGS=
-INCLUDES=-I$(top_srcdir)/portals/include -I$(top_srcdir)/include
 noinst_LIBRARIES= libportals.a
-libportals_a_SOURCES= api-eq.c api-init.c api-me.c api-errno.c api-ni.c api-wrap.c lib-dispatch.c lib-init.c lib-me.c lib-msg.c lib-eq.c lib-md.c lib-move.c lib-ni.c lib-pid.c
+libportals_a_SOURCES= $(my_sources)
 
 if LIBLUSTRE
 libportals_a_CFLAGS= -fPIC
+else
+
+MODULE = portals
+modulenet_DATA = portals.o
+EXTRA_PROGRAMS = portals
+
+DISTCLEANFILES = *.orig *.rej
+
+portals_SOURCES= module.c $(my_sources)
+
+# Don't distribute any patched files.
+dist-hook:
+       list='$(EXT2C)'; for f in $$list; do rm -f $(distdir)/$$f; done
+
+include ../Rules.linux
 endif
index 7822846..de01765 100644 (file)
@@ -9,4 +9,4 @@ obj-y += portals.o
 portals-objs    :=     lib-dispatch.o lib-eq.o lib-init.o lib-md.o lib-me.o \
                        lib-move.o lib-msg.o lib-ni.o lib-pid.o \
                        api-eq.o api-errno.o api-init.o api-me.o api-ni.o \
-                       api-wrap.o
+                       api-wrap.o module.o
index 964b9d8..390156a 100644 (file)
@@ -47,35 +47,16 @@ void ptl_eq_ni_fini(nal_t * nal)
         /* Nothing to do anymore... */
 }
 
-int PtlEQGet(ptl_handle_eq_t eventq, ptl_event_t * ev)
+int ptl_get_event (ptl_eq_t *eq, ptl_event_t *ev)
 {
-        ptl_eq_t *eq;
-        int rc, new_index;
-        unsigned long flags;
-        ptl_event_t *new_event;
-        nal_t *nal;
+        int          new_index = eq->sequence & (eq->size - 1);
+        ptl_event_t *new_event = &eq->base[new_index];
         ENTRY;
 
-        if (!ptl_init)
-                RETURN(PTL_NOINIT);
-
-        nal = ptl_hndl2nal(&eventq);
-        if (!nal)
-                RETURN(PTL_INV_EQ);
-
-        eq = ptl_handle2usereq(&eventq);
-        nal->lock(nal, &flags);
-
-        /* size must be a power of 2 to handle a wrapped sequence # */
-        LASSERT (eq->size != 0 &&
-                 eq->size == LOWEST_BIT_SET (eq->size));
-
-        new_index = eq->sequence & (eq->size - 1);
-        new_event = &eq->base[new_index];
         CDEBUG(D_INFO, "new_event: %p, sequence: %lu, eq->size: %u\n",
                new_event, eq->sequence, eq->size);
+
         if (PTL_SEQ_GT (eq->sequence, new_event->sequence)) {
-                nal->unlock(nal, &flags);
                 RETURN(PTL_EQ_EMPTY);
         }
 
@@ -86,117 +67,75 @@ int PtlEQGet(ptl_handle_eq_t eventq, ptl_event_t * ev)
         if (eq->sequence != new_event->sequence) {
                 CERROR("DROPPING EVENT: eq seq %lu ev seq %lu\n",
                        eq->sequence, new_event->sequence);
-                rc = PTL_EQ_DROPPED;
-        } else {
-                rc = PTL_OK;
+                RETURN(PTL_EQ_DROPPED);
         }
 
         eq->sequence = new_event->sequence + 1;
-        nal->unlock(nal, &flags);
-        RETURN(rc);
+        RETURN(PTL_OK);
 }
 
-
-int PtlEQWait(ptl_handle_eq_t eventq_in, ptl_event_t *event_out)
+int PtlEQGet(ptl_handle_eq_t eventq, ptl_event_t * ev)
 {
-        int rc;
+        int which;
         
-        /* PtlEQGet does the handle checking */
-        while ((rc = PtlEQGet(eventq_in, event_out)) == PTL_EQ_EMPTY) {
-                nal_t *nal = ptl_hndl2nal(&eventq_in);
-                
-                if (nal->yield)
-                        nal->yield(nal);
-        }
-
-        return rc;
+        return (PtlEQPoll (&eventq, 1, 0, ev, &which));
 }
 
-#ifndef __KERNEL__
-#if 0
-static jmp_buf eq_jumpbuf;
-
-static void eq_timeout(int signal)
-{
-        sigset_t set;
-
-        /* signal will be automatically disabled in sig handler,
-         * must enable it before long jump
-         */
-        sigemptyset(&set);
-        sigaddset(&set, SIGALRM);
-        sigprocmask(SIG_UNBLOCK, &set, NULL);
-
-        longjmp(eq_jumpbuf, -1);
-}
-
-int PtlEQWait_timeout(ptl_handle_eq_t eventq_in, ptl_event_t * event_out,
-                      int timeout)
+int PtlEQWait(ptl_handle_eq_t eventq_in, ptl_event_t *event_out)
 {
-        static void (*prev) (int) = NULL;
-        static int left_over;
-        time_t time_at_start;
-        int rc;
-
-        if (setjmp(eq_jumpbuf)) {
-                signal(SIGALRM, prev);
-                alarm(left_over - timeout);
-                return PTL_EQ_EMPTY;
-        }
-
-        left_over = alarm(timeout);
-        prev = signal(SIGALRM, eq_timeout);
-        time_at_start = time(NULL);
-        if (left_over && left_over < timeout)
-                alarm(left_over);
-
-        rc = PtlEQWait(eventq_in, event_out);
-
-        signal(SIGALRM, prev);
-        alarm(left_over);       /* Should compute how long we waited */
-
-        return rc;
+        int which;
+        
+        return (PtlEQPoll (&eventq_in, 1, PTL_TIME_FOREVER, 
+                           event_out, &which));
 }
-#else
-#include <errno.h>
-
-/* FIXME
- * Here timeout need a trick with tcpnal, definitely unclean but OK for
- * this moment.
- */
 
-/* global variables defined by tcpnal */
-extern int __tcpnal_eqwait_timeout_value;
-extern int __tcpnal_eqwait_timedout;
-
-int PtlEQWait_timeout(ptl_handle_eq_t eventq_in, ptl_event_t * event_out,
-                      int timeout)
+int PtlEQPoll(ptl_handle_eq_t *eventqs_in, int neq_in, int timeout,
+              ptl_event_t *event_out, int *which_out)
 {
-        int rc;
+        nal_t        *nal;
+        int           i;
+        int           rc;
+        unsigned long flags;
+        
+        if (!ptl_init)
+                RETURN(PTL_NO_INIT);
 
-        if (!timeout)
-                return PtlEQWait(eventq_in, event_out);
+        if (neq_in < 1)
+                RETURN(PTL_EQ_INVALID);
+        
+        nal = ptl_hndl2nal(&eventqs_in[0]);
+        if (nal == NULL)
+                RETURN(PTL_EQ_INVALID);
 
-        __tcpnal_eqwait_timeout_value = timeout;
+        nal->lock(nal, &flags);
 
-        while ((rc = PtlEQGet(eventq_in, event_out)) == PTL_EQ_EMPTY) {
-                nal_t *nal = ptl_hndl2nal(&eventq_in);
+        for (;;) {
+                for (i = 0; i < neq_in; i++) {
+                        ptl_eq_t *eq = ptl_handle2usereq(&eventqs_in[i]);
+
+                        if (i > 0 &&
+                            ptl_hndl2nal(&eventqs_in[i]) != nal) {
+                                nal->unlock(nal, &flags);
+                                RETURN (PTL_EQ_INVALID);
+                        }
+
+                        /* size must be a power of 2 to handle a wrapped sequence # */
+                        LASSERT (eq->size != 0 &&
+                                 eq->size == LOWEST_BIT_SET (eq->size));
+
+                        rc = ptl_get_event (eq, event_out);
+                        if (rc != PTL_EQ_EMPTY) {
+                                nal->unlock(nal, &flags);
+                                *which_out = i;
+                                RETURN(rc);
+                        }
+                }
                 
-                if (nal->yield)
-                        nal->yield(nal);
-
-                if (__tcpnal_eqwait_timedout) {
-                        if (__tcpnal_eqwait_timedout != ETIMEDOUT)
-                                printf("Warning: yield return error %d\n",
-                                        __tcpnal_eqwait_timedout);
-                        rc = PTL_EQ_EMPTY;
-                        break;
+                if (timeout == 0) {
+                        nal->unlock(nal, &flags);
+                        RETURN (PTL_EQ_EMPTY);
                 }
+                        
+                timeout = nal->yield(nal, &flags, timeout);
         }
-
-        __tcpnal_eqwait_timeout_value = 0;
-
-        return rc;
 }
-#endif
-#endif /* __KERNEL__ */
index b5e7aa1..0e155da 100644 (file)
@@ -12,43 +12,35 @@ const char *ptl_err_str[] = {
         "PTL_OK",
         "PTL_SEGV",
 
-        "PTL_NOSPACE",
-        "PTL_INUSE",
+        "PTL_NO_SPACE",
+        "PTL_ME_IN_USE",
         "PTL_VAL_FAILED",
 
         "PTL_NAL_FAILED",
-        "PTL_NOINIT",
-        "PTL_INIT_DUP",
-        "PTL_INIT_INV",
-        "PTL_AC_INV_INDEX",
-
-        "PTL_INV_ASIZE",
-        "PTL_INV_HANDLE",
-        "PTL_INV_MD",
-        "PTL_INV_ME",
-        "PTL_INV_NI",
+        "PTL_NO_INIT",
+        "PTL_IFACE_DUP",
+        "PTL_IFACE_INVALID",
+
+        "PTL_HANDLE_INVALID",
+        "PTL_MD_INVALID",
+        "PTL_ME_INVALID",
 /* If you change these, you must update the number table in portals/errno.h */
-        "PTL_ILL_MD",
-        "PTL_INV_PROC",
-        "PTL_INV_PSIZE",
-        "PTL_INV_PTINDEX",
-        "PTL_INV_REG",
-
-        "PTL_INV_SR_INDX",
-        "PTL_ML_TOOLONG",
-        "PTL_ADDR_UNKNOWN",
-        "PTL_INV_EQ",
+        "PTL_PROCESS_INVALID",
+        "PTL_PT_INDEX_INVALID",
+
+        "PTL_SR_INDEX_INVALID",
+        "PTL_EQ_INVALID",
         "PTL_EQ_DROPPED",
 
         "PTL_EQ_EMPTY",
-        "PTL_NOUPDATE",
+        "PTL_MD_NO_UPDATE",
         "PTL_FAIL",
-        "PTL_NOT_IMPLEMENTED",
-        "PTL_NO_ACK",
 
         "PTL_IOV_TOO_MANY",
         "PTL_IOV_TOO_SMALL",
 
-        "PTL_EQ_INUSE",
+        "PTL_EQ_IN_USE",
+
+        "PTL_MAX_ERRNO"
 };
 /* If you change these, you must update the number table in portals/errno.h */
index e2921ac..e41bad8 100644 (file)
 #include <portals/api-support.h>
 
 int ptl_init;
-unsigned int portal_subsystem_debug = ~0 - (S_PORTALS | S_QSWNAL | S_SOCKNAL |
-                                            S_GMNAL | S_IBNAL);
-unsigned int portal_debug = (D_WARNING | D_DLMTRACE | D_ERROR | D_EMERG | D_HA |
-                             D_RPCTRACE | D_VFSTRACE | D_MALLOC);
-unsigned int portal_cerror = 1;
-unsigned int portal_printk;
-unsigned int portal_stack;
-
-#ifdef __KERNEL__
-atomic_t portal_kmemory = ATOMIC_INIT(0);
-#endif
 
 int __p30_initialized;
 int __p30_myr_initialized;
@@ -44,20 +33,20 @@ int __p30_ip_initialized;
 ptl_handle_ni_t __myr_ni_handle;
 ptl_handle_ni_t __ip_ni_handle;
 
-int __p30_myr_timeout = 10;
-int __p30_ip_timeout;
-
-int PtlInit(void)
+int PtlInit(int *max_interfaces)
 {
+        if (max_interfaces != NULL)
+                *max_interfaces = NAL_ENUM_END_MARKER;
 
         if (ptl_init)
                 return PTL_OK;
 
+        LASSERT(!strcmp(ptl_err_str[PTL_MAX_ERRNO], "PTL_MAX_ERRNO"));
+
         ptl_ni_init();
         ptl_me_init();
         ptl_eq_init();
         ptl_init = 1;
-        __p30_initialized = 1;
 
         return PTL_OK;
 }
@@ -72,3 +61,9 @@ void PtlFini(void)
         ptl_ni_fini();
         ptl_init = 0;
 }
+
+
+void PtlSnprintHandle(char *str, int len, ptl_handle_any_t h)
+{
+        snprintf(str, len, "0x%lx."LPX64, h.nal_idx, h.cookie);
+}
index 18eea91..02082c6 100644 (file)
@@ -110,7 +110,7 @@ int PtlNIInit(ptl_interface_t interface, ptl_pt_index_t ptl_size,
         int i;
 
         if (!ptl_init)
-                return PTL_NOINIT;
+                return PTL_NO_INIT;
 
         ptl_ni_init_mutex_enter ();
 
@@ -136,7 +136,7 @@ int PtlNIInit(ptl_interface_t interface, ptl_pt_index_t ptl_size,
                 if (nal->shutdown)
                         nal->shutdown (nal, ptl_num_interfaces);
                 ptl_ni_init_mutex_exit ();
-                return PTL_NOSPACE;
+                return PTL_NO_SPACE;
         }
 
         handle->nal_idx = (NI_HANDLE_MAGIC & ~NI_HANDLE_MASK) | ptl_num_interfaces;
@@ -157,14 +157,14 @@ int PtlNIFini(ptl_handle_ni_t ni)
         int rc;
 
         if (!ptl_init)
-                return PTL_NOINIT;
+                return PTL_NO_INIT;
 
         ptl_ni_init_mutex_enter ();
 
         nal = ptl_hndl2nal (&ni);
         if (nal == NULL) {
                 ptl_ni_init_mutex_exit ();
-                return PTL_INV_HANDLE;
+                return PTL_HANDLE_INVALID;
         }
 
         idx = ni.nal_idx & NI_HANDLE_MASK;
index d23a6aa..9c82c30 100644 (file)
@@ -33,12 +33,12 @@ static int do_forward(ptl_handle_any_t any_h, int cmd, void *argbuf,
 
         if (!ptl_init) {
                 CERROR("Not initialized\n");
-                return PTL_NOINIT;
+                return PTL_NO_INIT;
         }
 
         nal = ptl_hndl2nal(&any_h);
         if (!nal)
-                return PTL_INV_HANDLE;
+                return PTL_HANDLE_INVALID;
 
         nal->forward(nal, cmd, argbuf, argsize, retbuf, retsize);
 
@@ -194,7 +194,7 @@ int PtlMEInsert(ptl_handle_me_t current_in, ptl_process_id_t match_id_in,
                         sizeof(ret));
 
         if (rc != PTL_OK)
-                return (rc == PTL_INV_HANDLE) ? PTL_INV_ME : rc;
+                return (rc == PTL_HANDLE_INVALID) ? PTL_ME_INVALID : rc;
 
         if (handle_out) {
                 handle_out->nal_idx = current_in.nal_idx;
@@ -216,7 +216,7 @@ int PtlMEUnlink(ptl_handle_me_t current_in)
                         sizeof(ret));
 
         if (rc != PTL_OK)
-                return (rc == PTL_INV_HANDLE) ? PTL_INV_ME : rc;
+                return (rc == PTL_HANDLE_INVALID) ? PTL_ME_INVALID : rc;
 
         return ret.rc;
 }
@@ -250,7 +250,7 @@ int PtlMEDump(ptl_handle_me_t current_in)
                         sizeof(ret));
 
         if (rc != PTL_OK)
-                return (rc == PTL_INV_HANDLE) ? PTL_INV_ME : rc;
+                return (rc == PTL_HANDLE_INVALID) ? PTL_ME_INVALID : rc;
 
         return ret.rc;
 }
@@ -263,16 +263,16 @@ static int validate_md(ptl_handle_any_t current_in, ptl_md_t md_in)
 
         if (!ptl_init) {
                 CERROR("PtlMDAttach/Bind/Update: Not initialized\n");
-                return PTL_NOINIT;
+                return PTL_NO_INIT;
         }
 
         nal = ptl_hndl2nal(&current_in);
         if (!nal)
-                return PTL_INV_HANDLE;
+                return PTL_HANDLE_INVALID;
 
         if (nal->validate != NULL)                /* nal->validate not a NOOP */
         {
-                if ((md_in.options & PTL_MD_IOV) == 0)        /* contiguous */
+                if ((md_in.options & PTL_MD_IOVEC) == 0) /* contiguous */
                 {
                         rc = nal->validate (nal, md_in.start, md_in.length);
                         if (rc)
@@ -296,7 +296,7 @@ static int validate_md(ptl_handle_any_t current_in, ptl_md_t md_in)
 
 static ptl_handle_eq_t md2eq (ptl_md_t *md)
 {
-        if (PtlHandleEqual (md->eventq, PTL_EQ_NONE))
+        if (PtlHandleIsEqual (md->eventq, PTL_EQ_NONE))
                 return (PTL_EQ_NONE);
         
         return (ptl_handle2usereq (&md->eventq)->cb_eq_handle);
@@ -322,7 +322,7 @@ int PtlMDAttach(ptl_handle_me_t me_in, ptl_md_t md_in,
         }
 
         if (rc != PTL_OK)
-                return (rc == PTL_INV_HANDLE) ? PTL_INV_ME : rc;
+                return (rc == PTL_HANDLE_INVALID) ? PTL_ME_INVALID : rc;
 
         if (handle_out) {
                 handle_out->nal_idx = me_in.nal_idx;
@@ -334,7 +334,7 @@ int PtlMDAttach(ptl_handle_me_t me_in, ptl_md_t md_in,
 
 
 int PtlMDBind(ptl_handle_ni_t ni_in, ptl_md_t md_in,
-                       ptl_handle_md_t * handle_out)
+              ptl_unlink_t unlink_in, ptl_handle_md_t * handle_out)
 {
         PtlMDBind_in args;
         PtlMDBind_out ret;
@@ -347,6 +347,7 @@ int PtlMDBind(ptl_handle_ni_t ni_in, ptl_md_t md_in,
         args.eq_in = md2eq(&md_in);
         args.ni_in = ni_in;
         args.md_in = md_in;
+        args.unlink_in = unlink_in;
 
         rc = do_forward(ni_in, PTL_MDBIND, 
                         &args, sizeof(args), &ret, sizeof(ret));
@@ -379,13 +380,13 @@ int PtlMDUpdate(ptl_handle_md_t md_in, ptl_md_t *old_inout,
         if (new_inout) {
                 rc = validate_md (md_in, *new_inout);
                 if (rc != PTL_OK)
-                        return (rc == PTL_INV_HANDLE) ? PTL_INV_MD : rc;
+                        return (rc == PTL_HANDLE_INVALID) ? PTL_MD_INVALID : rc;
                 args.new_inout = *new_inout;
                 args.new_inout_valid = 1;
         } else
                 args.new_inout_valid = 0;
 
-        if (PtlHandleEqual (testq_in, PTL_EQ_NONE)) {
+        if (PtlHandleIsEqual (testq_in, PTL_EQ_NONE)) {
                 args.testq_in = PTL_EQ_NONE;
                 args.sequence_in = -1;
         } else {
@@ -398,7 +399,7 @@ int PtlMDUpdate(ptl_handle_md_t md_in, ptl_md_t *old_inout,
         rc = do_forward(md_in, PTL_MDUPDATE, &args, sizeof(args), &ret,
                         sizeof(ret));
         if (rc != PTL_OK)
-                return (rc == PTL_INV_HANDLE) ? PTL_INV_MD : rc;
+                return (rc == PTL_HANDLE_INVALID) ? PTL_MD_INVALID : rc;
 
         if (old_inout)
                 *old_inout = ret.old_inout;
@@ -416,7 +417,7 @@ int PtlMDUnlink(ptl_handle_md_t md_in)
         rc = do_forward(md_in, PTL_MDUNLINK, &args, sizeof(args), &ret,
                         sizeof(ret));
         if (rc != PTL_OK)
-                return (rc == PTL_INV_HANDLE) ? PTL_INV_MD : rc;
+                return (rc == PTL_HANDLE_INVALID) ? PTL_MD_INVALID : rc;
 
         return ret.rc;
 }
@@ -433,11 +434,11 @@ int PtlEQAlloc(ptl_handle_ni_t interface, ptl_size_t count,
         nal_t *nal;
 
         if (!ptl_init)
-                return PTL_NOINIT;
+                return PTL_NO_INIT;
         
         nal = ptl_hndl2nal (&interface);
         if (nal == NULL)
-                return PTL_INV_HANDLE;
+                return PTL_HANDLE_INVALID;
 
         if (count != LOWEST_BIT_SET(count)) {   /* not a power of 2 already */
                 do {                    /* knock off all but the top bit... */
@@ -452,7 +453,7 @@ int PtlEQAlloc(ptl_handle_ni_t interface, ptl_size_t count,
 
         PORTAL_ALLOC(ev, count * sizeof(ptl_event_t));
         if (!ev)
-                return PTL_NOSPACE;
+                return PTL_NO_SPACE;
 
         for (i = 0; i < count; i++)
                 ev[i].sequence = 0;
@@ -478,7 +479,7 @@ int PtlEQAlloc(ptl_handle_ni_t interface, ptl_size_t count,
 
         PORTAL_ALLOC(eq, sizeof(*eq));
         if (!eq) {
-                rc = PTL_NOSPACE;
+                rc = PTL_NO_SPACE;
                 goto fail;
         }
 
index ce343c1..8a91860 100644 (file)
@@ -52,7 +52,7 @@ int do_PtlEQAlloc_internal(nal_cb_t * nal, void *private, void *v_args,
 
         eq = lib_eq_alloc (nal);
         if (eq == NULL)
-                return (ret->rc = PTL_NOSPACE);
+                return (ret->rc = PTL_NO_SPACE);
 
         state_lock(nal, &flags);
 
@@ -104,9 +104,9 @@ int do_PtlEQFree_internal(nal_cb_t * nal, void *private, void *v_args,
 
         eq = ptl_handle2eq(&args->eventq_in, nal);
         if (eq == NULL) {
-                ret->rc = PTL_INV_EQ;
+                ret->rc = PTL_EQ_INVALID;
         } else if (eq->eq_refcount != 0) {
-                ret->rc = PTL_EQ_INUSE;
+                ret->rc = PTL_EQ_IN_USE;
         } else {
                 if (nal->cb_unmap != NULL) {
                         struct iovec iov = {
index d4d8860..61ef465 100644 (file)
@@ -63,7 +63,7 @@ lib_freelist_init (nal_cb_t *nal, lib_freelist_t *fl, int n, int size)
 
         space = nal->cb_malloc (nal, n * size);
         if (space == NULL)
-                return (PTL_NOSPACE);
+                return (PTL_NO_SPACE);
 
         INIT_LIST_HEAD (&fl->fl_list);
         fl->fl_objs = space;
@@ -179,7 +179,7 @@ lib_setup_handle_hash (nal_cb_t *nal)
                 (struct list_head *)nal->cb_malloc (nal, ni->ni_lh_hash_size
                                                     * sizeof (struct list_head));
         if (ni->ni_lh_hash_table == NULL)
-                return (PTL_NOSPACE);
+                return (PTL_NO_SPACE);
         
         for (i = 0; i < ni->ni_lh_hash_size; i++)
                 INIT_LIST_HEAD (&ni->ni_lh_hash_table[i]);
@@ -295,7 +295,7 @@ lib_init(nal_cb_t * nal, ptl_nid_t nid, ptl_pid_t pid, int gsize,
 
         ni->tbl.tbl = nal->cb_malloc(nal, sizeof(struct list_head) * ptl_size);
         if (ni->tbl.tbl == NULL) {
-                rc = PTL_NOSPACE;
+                rc = PTL_NO_SPACE;
                 goto out;
         }
 
index a1ed583..9a391cd 100644 (file)
 #include <portals/lib-p30.h>
 #include <portals/arg-blocks.h>
 
-/*
- * must be called with state lock held
- */
+/* must be called with state lock held */
 void lib_md_unlink(nal_cb_t * nal, lib_md_t * md)
 {
-        lib_me_t *me = md->me;
+        if ((md->md_flags & PTL_MD_FLAG_ZOMBIE) == 0) {
+                /* first unlink attempt... */
+                lib_me_t *me = md->me;
+
+                md->md_flags |= PTL_MD_FLAG_ZOMBIE;
+
+                /* Disassociate from ME (if any), and unlink it if it was created
+                 * with PTL_UNLINK */
+                if (me != NULL) {
+                        me->md = NULL;
+                        if (me->unlink == PTL_UNLINK)
+                                lib_me_unlink(nal, me);
+                }
+
+                /* emsure all future handle lookups fail */
+                lib_invalidate_handle(nal, &md->md_lh);
+        }
 
         if (md->pending != 0) {
                 CDEBUG(D_NET, "Queueing unlink of md %p\n", md);
-                md->md_flags |= PTL_MD_FLAG_UNLINK;
                 return;
         }
 
@@ -52,23 +65,16 @@ void lib_md_unlink(nal_cb_t * nal, lib_md_t * md)
                 if (nal->cb_unmap_pages != NULL)
                         nal->cb_unmap_pages (nal, md->md_niov, md->md_iov.kiov, 
                                              &md->md_addrkey);
-        } else if (nal->cb_unmap != NULL)
+        } else if (nal->cb_unmap != NULL) {
                 nal->cb_unmap (nal, md->md_niov, md->md_iov.iov, 
                                &md->md_addrkey);
-
-        if (me) {
-                me->md = NULL;
-                if (me->unlink == PTL_UNLINK)
-                        lib_me_unlink(nal, me);
         }
 
-        if (md->eq != NULL)
-        {
+        if (md->eq != NULL) {
                 md->eq->eq_refcount--;
                 LASSERT (md->eq->eq_refcount >= 0);
         }
 
-        lib_invalidate_handle (nal, &md->md_lh);
         list_del (&md->md_list);
         lib_md_free(nal, md);
 }
@@ -77,8 +83,6 @@ void lib_md_unlink(nal_cb_t * nal, lib_md_t * md)
 static int lib_md_build(nal_cb_t *nal, lib_md_t *new, void *private,
                         ptl_md_t *md, ptl_handle_eq_t *eqh, int unlink)
 {
-        const int     max_size_opts = PTL_MD_AUTO_UNLINK |
-                                      PTL_MD_MAX_SIZE;
         lib_eq_t     *eq = NULL;
         int           rc;
         int           i;
@@ -88,39 +92,45 @@ static int lib_md_build(nal_cb_t *nal, lib_md_t *new, void *private,
          * otherwise caller may only lib_md_free() it.
          */
 
-        if (!PtlHandleEqual (*eqh, PTL_EQ_NONE)) {
+        if (!PtlHandleIsEqual (*eqh, PTL_EQ_NONE)) {
                 eq = ptl_handle2eq(eqh, nal);
                 if (eq == NULL)
-                        return PTL_INV_EQ;
+                        return PTL_EQ_INVALID;
         }
 
         /* Must check this _before_ allocation.  Also, note that non-iov
          * MDs must set md_niov to 0. */
-        LASSERT((md->options & (PTL_MD_IOV | PTL_MD_KIOV)) == 0 ||
+        LASSERT((md->options & (PTL_MD_IOVEC | PTL_MD_KIOV)) == 0 ||
                 md->niov <= PTL_MD_MAX_IOV);
 
-        if ((md->options & max_size_opts) != 0 && /* max size used */
+        /* This implementation doesn't know how to create START events or
+         * disable END events.  Best to LASSERT our caller is compliant so
+         * we find out quickly...  */
+        LASSERT (PtlHandleIsEqual (*eqh, PTL_EQ_NONE) ||
+                 ((md->options & PTL_MD_EVENT_START_DISABLE) != 0 &&
+                  (md->options & PTL_MD_EVENT_END_DISABLE) == 0));
+
+        if ((md->options & PTL_MD_MAX_SIZE) != 0 && /* max size used */
             (md->max_size < 0 || md->max_size > md->length)) // illegal max_size
-                return PTL_INV_MD;
+                return PTL_MD_INVALID;
 
         new->me = NULL;
         new->start = md->start;
         new->length = md->length;
         new->offset = 0;
         new->max_size = md->max_size;
-        new->unlink = unlink;
         new->options = md->options;
         new->user_ptr = md->user_ptr;
         new->eq = eq;
         new->threshold = md->threshold;
         new->pending = 0;
-        new->md_flags = 0;
+        new->md_flags = (unlink == PTL_UNLINK) ? PTL_MD_FLAG_AUTO_UNLINK : 0;
 
-        if ((md->options & PTL_MD_IOV) != 0) {
+        if ((md->options & PTL_MD_IOVEC) != 0) {
                 int total_length = 0;
 
                 if ((md->options & PTL_MD_KIOV) != 0) /* Can't specify both */
-                        return PTL_INV_MD; 
+                        return PTL_MD_INVALID; 
 
                 new->md_niov = md->niov;
                 
@@ -147,14 +157,14 @@ static int lib_md_build(nal_cb_t *nal, lib_md_t *new, void *private,
                 }
         } else if ((md->options & PTL_MD_KIOV) != 0) {
 #ifndef __KERNEL__
-                return PTL_INV_MD;
+                return PTL_MD_INVALID;
 #else
                 int total_length = 0;
                 
                 /* Trap attempt to use paged I/O if unsupported early. */
                 if (nal->cb_send_pages == NULL ||
                     nal->cb_recv_pages == NULL)
-                        return PTL_INV_MD;
+                        return PTL_MD_INVALID;
 
                 new->md_niov = md->niov;
 
@@ -219,7 +229,7 @@ void lib_md_deconstruct(nal_cb_t * nal, lib_md_t * md, ptl_md_t * new)
         new->options = md->options;
         new->user_ptr = md->user_ptr;
         ptl_eq2handle(&new->eventq, md->eq);
-        new->niov = ((md->options & (PTL_MD_IOV | PTL_MD_KIOV)) == 0) ? 0 : md->md_niov;
+        new->niov = ((md->options & (PTL_MD_IOVEC | PTL_MD_KIOV)) == 0) ? 0 : md->md_niov;
 }
 
 int do_PtlMDAttach(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
@@ -240,21 +250,21 @@ int do_PtlMDAttach(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
         lib_md_t *md;
         unsigned long flags;
 
-        if ((args->md_in.options & (PTL_MD_KIOV | PTL_MD_IOV)) != 0 &&
+        if ((args->md_in.options & (PTL_MD_KIOV | PTL_MD_IOVEC)) != 0 &&
             args->md_in.niov > PTL_MD_MAX_IOV) /* too many fragments */
                 return (ret->rc = PTL_IOV_TOO_MANY);
 
         md = lib_md_alloc(nal, &args->md_in);
         if (md == NULL)
-                return (ret->rc = PTL_NOSPACE);
+                return (ret->rc = PTL_NO_SPACE);
 
         state_lock(nal, &flags);
 
         me = ptl_handle2me(&args->me_in, nal);
         if (me == NULL) {
-                ret->rc = PTL_INV_ME;
+                ret->rc = PTL_ME_INVALID;
         } else if (me->md != NULL) {
-                ret->rc = PTL_INUSE;
+                ret->rc = PTL_ME_IN_USE;
         } else {
                 ret->rc = lib_md_build(nal, md, private, &args->md_in,
                                        &args->eq_in, args->unlink_in);
@@ -292,18 +302,18 @@ int do_PtlMDBind(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
         lib_md_t *md;
         unsigned long flags;
 
-        if ((args->md_in.options & (PTL_MD_KIOV | PTL_MD_IOV)) != 0 &&
+        if ((args->md_in.options & (PTL_MD_KIOV | PTL_MD_IOVEC)) != 0 &&
             args->md_in.niov > PTL_MD_MAX_IOV) /* too many fragments */
                 return (ret->rc = PTL_IOV_TOO_MANY);
 
         md = lib_md_alloc(nal, &args->md_in);
         if (md == NULL)
-                return (ret->rc = PTL_NOSPACE);
+                return (ret->rc = PTL_NO_SPACE);
 
         state_lock(nal, &flags);
 
-        ret->rc = lib_md_build(nal, md, private,
-                               &args->md_in, &args->eq_in, PTL_UNLINK);
+        ret->rc = lib_md_build(nal, md, private, &args->md_in, 
+                               &args->eq_in, args->unlink_in);
 
         if (ret->rc == PTL_OK) {
                 ptl_md2handle(&ret->handle_out, md);
@@ -331,7 +341,7 @@ int do_PtlMDUnlink(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
         md = ptl_handle2md(&args->md_in, nal);
         if (md == NULL) {
                 state_unlock(nal, &flags);
-                return (ret->rc = PTL_INV_MD);
+                return (ret->rc = PTL_MD_INVALID);
         }
 
         /* If the MD is busy, lib_md_unlink just marks it for deletion, and
@@ -343,7 +353,7 @@ int do_PtlMDUnlink(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
                 memset(&ev, 0, sizeof(ev));
 
                 ev.type = PTL_EVENT_UNLINK;
-                ev.status = PTL_OK;
+                ev.ni_fail_type = PTL_OK;
                 ev.unlinked = 1;
                 lib_md_deconstruct(nal, md, &ev.mem_desc);
                 
@@ -385,7 +395,7 @@ int do_PtlMDUpdate_internal(nal_cb_t * nal, void *private, void *v_args,
 
         md = ptl_handle2md(&args->md_in, nal);
         if (md == NULL) {
-                 ret->rc = PTL_INV_MD;
+                 ret->rc = PTL_MD_INVALID;
                  goto out;
         }
 
@@ -399,8 +409,8 @@ int do_PtlMDUpdate_internal(nal_cb_t * nal, void *private, void *v_args,
 
         /* XXX fttb, the new MD must be the same type wrt fragmentation */
         if (((new->options ^ md->options) & 
-             (PTL_MD_IOV | PTL_MD_KIOV)) != 0) {
-                ret->rc = PTL_INV_MD;
+             (PTL_MD_IOVEC | PTL_MD_KIOV)) != 0) {
+                ret->rc = PTL_MD_INVALID;
                 goto out;
         }
 
@@ -414,30 +424,32 @@ int do_PtlMDUpdate_internal(nal_cb_t * nal, void *private, void *v_args,
                 goto out;
         }
 
-        if (!PtlHandleEqual (args->testq_in, PTL_EQ_NONE)) {
+        if (!PtlHandleIsEqual (args->testq_in, PTL_EQ_NONE)) {
                 test_eq = ptl_handle2eq(&args->testq_in, nal);
                 if (test_eq == NULL) {
-                        ret->rc = PTL_INV_EQ;
+                        ret->rc = PTL_EQ_INVALID;
                         goto out;
                 }
         }
 
         if (md->pending != 0) {
-                        ret->rc = PTL_NOUPDATE;
+                        ret->rc = PTL_MD_NO_UPDATE;
                         goto out;
         }
 
         if (test_eq == NULL ||
             test_eq->sequence == args->sequence_in) {
                 lib_me_t *me = md->me;
+                int       unlink = (md->md_flags & PTL_MD_FLAG_AUTO_UNLINK) ?
+                                   PTL_UNLINK : PTL_RETAIN;
 
                 // #warning this does not track eq refcounts properly 
                 ret->rc = lib_md_build(nal, md, private,
-                                       new, &new->eventq, md->unlink);
+                                       new, &new->eventq, unlink);
 
                 md->me = me;
         } else {
-                ret->rc = PTL_NOUPDATE;
+                ret->rc = PTL_MD_NO_UPDATE;
         }
 
  out:
index 31ac214..e3c46ea 100644 (file)
@@ -45,15 +45,15 @@ int do_PtlMEAttach(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
         lib_me_t *me;
 
         if (args->index_in >= tbl->size)
-                return ret->rc = PTL_INV_PTINDEX;
+                return ret->rc = PTL_PT_INDEX_INVALID;
 
         /* Should check for valid matchid, but not yet */
         if (0)
-                return ret->rc = PTL_INV_PROC;
+                return ret->rc = PTL_PROCESS_INVALID;
 
         me = lib_me_alloc (nal);
         if (me == NULL)
-                return (ret->rc = PTL_NOSPACE);
+                return (ret->rc = PTL_NO_SPACE);
 
         state_lock(nal, &flags);
 
@@ -87,7 +87,7 @@ int do_PtlMEInsert(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
 
         new = lib_me_alloc (nal);
         if (new == NULL)
-                return (ret->rc = PTL_NOSPACE);
+                return (ret->rc = PTL_NO_SPACE);
 
         /* Should check for valid matchid, but not yet */
 
@@ -98,7 +98,7 @@ int do_PtlMEInsert(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
                 lib_me_free (nal, new);
 
                 state_unlock (nal, &flags);
-                return (ret->rc = PTL_INV_ME);
+                return (ret->rc = PTL_ME_INVALID);
         }
 
         new->match_id = args->match_id_in;
@@ -132,7 +132,7 @@ int do_PtlMEUnlink(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
 
         me = ptl_handle2me(&args->current_in, nal);
         if (me == NULL) {
-                ret->rc = PTL_INV_ME;
+                ret->rc = PTL_ME_INVALID;
         } else {
                 lib_me_unlink(nal, me);
                 ret->rc = PTL_OK;
@@ -174,7 +174,7 @@ int do_PtlTblDump(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
         unsigned long flags;
 
         if (args->index_in < 0 || args->index_in >= tbl->size)
-                return ret->rc = PTL_INV_PTINDEX;
+                return ret->rc = PTL_PT_INDEX_INVALID;
 
         nal->cb_printf(nal, "Portal table index %d\n", args->index_in);
 
@@ -200,7 +200,7 @@ int do_PtlMEDump(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
 
         me = ptl_handle2me(&args->current_in, nal);
         if (me == NULL) {
-                ret->rc = PTL_INV_ME;
+                ret->rc = PTL_ME_INVALID;
         } else {
                 lib_me_dump(nal, me);
                 ret->rc = PTL_OK;
index ecd543c..477ddf8 100644 (file)
 #include <portals/lib-p30.h>
 #include <portals/arg-blocks.h>
 
-/*
- * Right now it does not check access control lists.
- *
- * We only support one MD per ME, which is how the Portals 3.1 spec is written.
- * All previous complication is removed.
- */
-
-static lib_me_t *
-lib_find_me(nal_cb_t *nal, int index, int op_mask, ptl_nid_t src_nid,
-            ptl_pid_t src_pid, ptl_size_t rlength, ptl_size_t roffset,
-            ptl_match_bits_t match_bits, ptl_size_t *mlength_out,
-            ptl_size_t *offset_out, int *unlink_out)
+/* forward ref */
+static void lib_commit_md (nal_cb_t *nal, lib_md_t *md, lib_msg_t *msg);
+
+static lib_md_t *
+lib_match_md(nal_cb_t *nal, int index, int op_mask, 
+             ptl_nid_t src_nid, ptl_pid_t src_pid, 
+             ptl_size_t rlength, ptl_size_t roffset,
+             ptl_match_bits_t match_bits, lib_msg_t *msg,
+             ptl_size_t *mlength_out, ptl_size_t *offset_out)
 {
         lib_ni_t         *ni = &nal->ni;
         struct list_head *match_list = &ni->tbl.tbl[index];
@@ -53,7 +50,6 @@ lib_find_me(nal_cb_t *nal, int index, int op_mask, ptl_nid_t src_nid,
         lib_md_t         *md;
         ptl_size_t        mlength;
         ptl_size_t        offset;
-
         ENTRY;
 
         CDEBUG (D_NET, "Request from "LPU64".%d of length %d into portal %d "
@@ -75,14 +71,14 @@ lib_find_me(nal_cb_t *nal, int index, int op_mask, ptl_nid_t src_nid,
 
                 LASSERT (me == md->me);
 
-                /* MD deactivated */
-                if (md->threshold == 0)
-                        continue;
-
                 /* mismatched MD op */
                 if ((md->options & op_mask) == 0)
                         continue;
 
+                /* MD exhausted */
+                if (lib_md_exhausted(md))
+                        continue;
+
                 /* mismatched ME nid/pid? */
                 if (me->match_id.nid != PTL_NID_ANY &&
                     me->match_id.nid != src_nid)
@@ -103,10 +99,12 @@ lib_find_me(nal_cb_t *nal, int index, int op_mask, ptl_nid_t src_nid,
                 else
                         offset = roffset;
 
-                mlength = md->length - offset;
-                if ((md->options & PTL_MD_MAX_SIZE) != 0 &&
-                    mlength > md->max_size)
+                if ((md->options & PTL_MD_MAX_SIZE) != 0) {
                         mlength = md->max_size;
+                        LASSERT (md->offset + mlength <= md->length);
+                } else {
+                        mlength = md->length - offset;
+                }
 
                 if (rlength <= mlength) {        /* fits in allowed space */
                         mlength = rlength;
@@ -118,13 +116,38 @@ lib_find_me(nal_cb_t *nal, int index, int op_mask, ptl_nid_t src_nid,
                         goto failed;
                 }
 
+                /* Commit to this ME/MD */
+                CDEBUG(D_NET, "Incoming %s index %x from "LPU64"/%u of "
+                       "length %d/%d into md "LPX64" [%d] + %d\n", 
+                       (op_mask == PTL_MD_OP_PUT) ? "put" : "get",
+                       index, src_nid, src_pid, mlength, rlength, 
+                       md->md_lh.lh_cookie, md->md_niov, offset);
+
+                lib_commit_md(nal, md, msg);
                 md->offset = offset + mlength;
 
+                /* NB Caller sets ev.type and ev.hdr_data */
+                msg->ev.initiator.nid = src_nid;
+                msg->ev.initiator.pid = src_pid;
+                msg->ev.portal = index;
+                msg->ev.match_bits = match_bits;
+                msg->ev.rlength = rlength;
+                msg->ev.mlength = mlength;
+                msg->ev.offset = offset;
+
+                lib_md_deconstruct(nal, md, &msg->ev.mem_desc);
+
                 *offset_out = offset;
                 *mlength_out = mlength;
-                *unlink_out = ((md->options & PTL_MD_AUTO_UNLINK) != 0 &&
-                               md->offset >= (md->length - md->max_size));
-                RETURN (me);
+
+                /* Auto-unlink NOW, so the ME gets unlinked if required.
+                 * We bumped md->pending above so the MD just gets flagged
+                 * for unlink when it is finalized. */
+                if ((md->md_flags & PTL_MD_FLAG_AUTO_UNLINK) != 0 &&
+                    lib_md_exhausted(md))
+                        lib_md_unlink(nal, md);
+
+                RETURN (md);
         }
 
  failed:
@@ -627,9 +650,7 @@ parse_put(nal_cb_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
         lib_ni_t        *ni = &nal->ni;
         ptl_size_t       mlength = 0;
         ptl_size_t       offset = 0;
-        int              unlink = 0;
         ptl_err_t        rc;
-        lib_me_t        *me;
         lib_md_t        *md;
         unsigned long    flags;
                 
@@ -640,36 +661,19 @@ parse_put(nal_cb_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
 
         state_lock(nal, &flags);
 
-        me = lib_find_me(nal, hdr->msg.put.ptl_index, PTL_MD_OP_PUT,
-                         hdr->src_nid, hdr->src_pid,
-                         hdr->payload_length, hdr->msg.put.offset,
-                         hdr->msg.put.match_bits,
-                         &mlength, &offset, &unlink);
-        if (me == NULL) {
+        md = lib_match_md(nal, hdr->msg.put.ptl_index, PTL_MD_OP_PUT,
+                          hdr->src_nid, hdr->src_pid,
+                          hdr->payload_length, hdr->msg.put.offset,
+                          hdr->msg.put.match_bits, msg,
+                          &mlength, &offset);
+        if (md == NULL) {
                 state_unlock(nal, &flags);
                 return (PTL_FAIL);
         }
 
-        md = me->md;
-        CDEBUG(D_NET, "Incoming put index %x from "LPU64"/%u of length %d/%d "
-               "into md "LPX64" [%d] + %d\n", hdr->msg.put.ptl_index,
-               hdr->src_nid, hdr->src_pid, mlength, hdr->payload_length, 
-               md->md_lh.lh_cookie, md->md_niov, offset);
-
-        lib_commit_md(nal, md, msg);
-
-        msg->ev.type = PTL_EVENT_PUT;
-        msg->ev.initiator.nid = hdr->src_nid;
-        msg->ev.initiator.pid = hdr->src_pid;
-        msg->ev.portal = hdr->msg.put.ptl_index;
-        msg->ev.match_bits = hdr->msg.put.match_bits;
-        msg->ev.rlength = hdr->payload_length;
-        msg->ev.mlength = mlength;
-        msg->ev.offset = offset;
+        msg->ev.type = PTL_EVENT_PUT_END;
         msg->ev.hdr_data = hdr->msg.put.hdr_data;
 
-        lib_md_deconstruct(nal, md, &msg->ev.mem_desc);
-
         if (!ptl_is_wire_handle_none(&hdr->msg.put.ack_wmd) &&
             !(md->options & PTL_MD_ACK_DISABLE)) {
                 msg->ack_wmd = hdr->msg.put.ack_wmd;
@@ -678,11 +682,6 @@ parse_put(nal_cb_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
         ni->counters.recv_count++;
         ni->counters.recv_length += mlength;
 
-        /* only unlink after MD's pending count has been bumped in
-         * lib_commit_md() otherwise lib_me_unlink() will nuke it */
-        if (unlink)
-                lib_me_unlink (nal, me);
-
         state_unlock(nal, &flags);
 
         rc = lib_recv(nal, private, msg, md, offset, mlength,
@@ -700,8 +699,6 @@ parse_get(nal_cb_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
         lib_ni_t        *ni = &nal->ni;
         ptl_size_t       mlength = 0;
         ptl_size_t       offset = 0;
-        int              unlink = 0;
-        lib_me_t        *me;
         lib_md_t        *md;
         ptl_hdr_t        reply;
         unsigned long    flags;
@@ -715,44 +712,22 @@ parse_get(nal_cb_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
 
         state_lock(nal, &flags);
 
-        me = lib_find_me(nal, hdr->msg.get.ptl_index, PTL_MD_OP_GET,
-                         hdr->src_nid, hdr->src_pid,
-                         hdr->msg.get.sink_length, hdr->msg.get.src_offset,
-                         hdr->msg.get.match_bits,
-                         &mlength, &offset, &unlink);
-        if (me == NULL) {
+        md = lib_match_md(nal, hdr->msg.get.ptl_index, PTL_MD_OP_GET,
+                          hdr->src_nid, hdr->src_pid,
+                          hdr->msg.get.sink_length, hdr->msg.get.src_offset,
+                          hdr->msg.get.match_bits, msg,
+                          &mlength, &offset);
+        if (md == NULL) {
                 state_unlock(nal, &flags);
                 return (PTL_FAIL);
         }
 
-        md = me->md;
-        CDEBUG(D_NET, "Incoming get index %d from "LPU64".%u of length %d/%d "
-               "from md "LPX64" [%d] + %d\n", hdr->msg.get.ptl_index,
-               hdr->src_nid, hdr->src_pid, mlength, hdr->payload_length, 
-               md->md_lh.lh_cookie, md->md_niov, offset);
-
-        lib_commit_md(nal, md, msg);
-
-        msg->ev.type = PTL_EVENT_GET;
-        msg->ev.initiator.nid = hdr->src_nid;
-        msg->ev.initiator.pid = hdr->src_pid;
-        msg->ev.portal = hdr->msg.get.ptl_index;
-        msg->ev.match_bits = hdr->msg.get.match_bits;
-        msg->ev.rlength = hdr->payload_length;
-        msg->ev.mlength = mlength;
-        msg->ev.offset = offset;
+        msg->ev.type = PTL_EVENT_GET_END;
         msg->ev.hdr_data = 0;
 
-        lib_md_deconstruct(nal, md, &msg->ev.mem_desc);
-
         ni->counters.send_count++;
         ni->counters.send_length += mlength;
 
-        /* only unlink after MD's refcount has been bumped in
-         * lib_commit_md() otherwise lib_me_unlink() will nuke it */
-        if (unlink)
-                lib_me_unlink (nal, me);
-
         state_unlock(nal, &flags);
 
         memset (&reply, 0, sizeof (reply));
@@ -828,7 +803,7 @@ parse_reply(nal_cb_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
 
         lib_commit_md(nal, md, msg);
 
-        msg->ev.type = PTL_EVENT_REPLY;
+        msg->ev.type = PTL_EVENT_REPLY_END;
         msg->ev.initiator.nid = hdr->src_nid;
         msg->ev.initiator.pid = hdr->src_pid;
         msg->ev.rlength = rlength;
@@ -1044,8 +1019,6 @@ lib_parse(nal_cb_t *nal, ptl_hdr_t *hdr, void *private)
                 return;
         }
 
-        do_gettimeofday(&msg->ev.arrival_time);
-
         switch (hdr->type) {
         case PTL_MSG_ACK:
                 rc = parse_ack(nal, hdr, private, msg);
@@ -1112,14 +1085,14 @@ do_PtlPut(nal_cb_t *nal, void *private, void *v_args, void *v_ret)
         {
                 CERROR(LPU64": Dropping PUT to "LPU64": simulated failure\n",
                        nal->ni.nid, id->nid);
-                return (ret->rc = PTL_INV_PROC);
+                return (ret->rc = PTL_PROCESS_INVALID);
         }
 
         msg = lib_msg_alloc(nal);
         if (msg == NULL) {
                 CERROR(LPU64": Dropping PUT to "LPU64": ENOMEM on lib_msg_t\n",
                        ni->nid, id->nid);
-                return (ret->rc = PTL_NOSPACE);
+                return (ret->rc = PTL_NO_SPACE);
         }
 
         state_lock(nal, &flags);
@@ -1129,7 +1102,7 @@ do_PtlPut(nal_cb_t *nal, void *private, void *v_args, void *v_ret)
                 lib_msg_free(nal, msg);
                 state_unlock(nal, &flags);
         
-                return (ret->rc = PTL_INV_MD);
+                return (ret->rc = PTL_MD_INVALID);
         }
 
         CDEBUG(D_NET, "PtlPut -> %Lu: %lu\n", (unsigned long long)id->nid,
@@ -1158,7 +1131,7 @@ do_PtlPut(nal_cb_t *nal, void *private, void *v_args, void *v_ret)
 
         lib_commit_md(nal, md, msg);
         
-        msg->ev.type = PTL_EVENT_SENT;
+        msg->ev.type = PTL_EVENT_SEND_END;
         msg->ev.initiator.nid = ni->nid;
         msg->ev.initiator.pid = ni->pid;
         msg->ev.portal = args->portal_in;
@@ -1188,19 +1161,18 @@ do_PtlPut(nal_cb_t *nal, void *private, void *v_args, void *v_ret)
 }
 
 lib_msg_t * 
-lib_fake_reply_msg (nal_cb_t *nal, ptl_nid_t peer_nid, lib_md_t *getmd)
+lib_create_reply_msg (nal_cb_t *nal, ptl_nid_t peer_nid, lib_msg_t *getmsg)
 {
         /* The NAL can DMA direct to the GET md (i.e. no REPLY msg).  This
-         * returns a msg the NAL can pass to lib_finalize() so that a REPLY
-         * event still occurs. 
+         * returns a msg for the NAL to pass to lib_finalize() when the sink
+         * data has been received.
          *
-         * CAVEAT EMPTOR: 'getmd' is passed by pointer so it MUST be valid.
-         * This can only be guaranteed while a lib_msg_t holds a reference
-         * on it (ie. pending > 0), so best call this before the
-         * lib_finalize() of the original GET. */
+         * CAVEAT EMPTOR: 'getmsg' is the original GET, which is freed when
+         * lib_finalize() is called on it, so the NAL must call this first */
 
         lib_ni_t        *ni = &nal->ni;
         lib_msg_t       *msg = lib_msg_alloc(nal);
+        lib_md_t        *getmd = getmsg->md;
         unsigned long    flags;
 
         state_lock(nal, &flags);
@@ -1225,7 +1197,7 @@ lib_fake_reply_msg (nal_cb_t *nal, ptl_nid_t peer_nid, lib_md_t *getmd)
 
         lib_commit_md (nal, getmd, msg);
 
-        msg->ev.type = PTL_EVENT_REPLY;
+        msg->ev.type = PTL_EVENT_REPLY_END;
         msg->ev.initiator.nid = peer_nid;
         msg->ev.initiator.pid = 0;      /* XXX FIXME!!! */
         msg->ev.rlength = msg->ev.mlength = getmd->length;
@@ -1281,14 +1253,14 @@ do_PtlGet(nal_cb_t *nal, void *private, void *v_args, void *v_ret)
         {
                 CERROR(LPU64": Dropping PUT to "LPU64": simulated failure\n",
                        nal->ni.nid, id->nid);
-                return (ret->rc = PTL_INV_PROC);
+                return (ret->rc = PTL_PROCESS_INVALID);
         }
 
         msg = lib_msg_alloc(nal);
         if (msg == NULL) {
                 CERROR(LPU64": Dropping GET to "LPU64": ENOMEM on lib_msg_t\n",
                        ni->nid, id->nid);
-                return (ret->rc = PTL_NOSPACE);
+                return (ret->rc = PTL_NO_SPACE);
         }
 
         state_lock(nal, &flags);
@@ -1298,7 +1270,7 @@ do_PtlGet(nal_cb_t *nal, void *private, void *v_args, void *v_ret)
                 lib_msg_free(nal, msg);
                 state_unlock(nal, &flags);
 
-                return ret->rc = PTL_INV_MD;
+                return ret->rc = PTL_MD_INVALID;
         }
 
         CDEBUG(D_NET, "PtlGet -> %Lu: %lu\n", (unsigned long long)id->nid,
@@ -1323,7 +1295,7 @@ do_PtlGet(nal_cb_t *nal, void *private, void *v_args, void *v_ret)
 
         lib_commit_md(nal, md, msg);
 
-        msg->ev.type = PTL_EVENT_SENT;
+        msg->ev.type = PTL_EVENT_SEND_END;
         msg->ev.initiator.nid = ni->nid;
         msg->ev.initiator.pid = ni->pid;
         msg->ev.portal = args->portal_in;
index 04c69b1..869c9d6 100644 (file)
@@ -100,7 +100,7 @@ lib_finalize(nal_cb_t *nal, void *private, lib_msg_t *msg, ptl_err_t status)
         if (status == PTL_OK &&
             !ptl_is_wire_handle_none(&msg->ack_wmd)) {
 
-                LASSERT(msg->ev.type == PTL_EVENT_PUT);
+                LASSERT(msg->ev.type == PTL_EVENT_PUT_END);
 
                 memset (&ack, 0, sizeof (ack));
                 ack.type     = HTON__u32 (PTL_MSG_ACK);
@@ -133,11 +133,16 @@ lib_finalize(nal_cb_t *nal, void *private, lib_msg_t *msg, ptl_err_t status)
         LASSERT (md->pending >= 0);
 
         /* Should I unlink this MD? */
-        unlink = (md->pending == 0 &&           /* No other refs */
-                  (md->threshold == 0 ||        /* All ops done */
-                   md->md_flags & PTL_MD_FLAG_UNLINK) != 0); /* black spot */
-
-        msg->ev.status = status;
+        if (md->pending != 0)                   /* other refs */
+                unlink = 0;
+        else if ((md->md_flags & PTL_MD_FLAG_ZOMBIE) != 0)
+                unlink = 1;
+        else if ((md->md_flags & PTL_MD_FLAG_AUTO_UNLINK) == 0)
+                unlink = 0;
+        else
+                unlink = lib_md_exhausted(md);
+
+        msg->ev.ni_fail_type = status;
         msg->ev.unlinked = unlink;
 
         if (md->eq != NULL)
index 9e90576..296bc4a 100644 (file)
@@ -88,7 +88,7 @@ int do_PtlNIStatus(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
         else if (args->register_in == PTL_SR_MSGS_MAX)
                 ret->status_out = count->msgs_max;
         else
-                ret->rc = PTL_INV_SR_INDX;
+                ret->rc = PTL_SR_INDEX_INVALID;
 
         return ret->rc;
 }
@@ -119,7 +119,7 @@ int do_PtlNIDist(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
 
         if ((rc = nal->cb_dist(nal, nid, &dist)) != 0) {
                 ret->distance_out = (unsigned long) MAX_DIST;
-                return PTL_INV_PROC;
+                return PTL_PROCESS_INVALID;
         }
 
         ret->distance_out = dist;
diff --git a/lustre/portals/portals/module.c b/lustre/portals/portals/module.c
new file mode 100644 (file)
index 0000000..012d3d9
--- /dev/null
@@ -0,0 +1,479 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef EXPORT_SYMTAB
+# define EXPORT_SYMTAB
+#endif
+#define DEBUG_SUBSYSTEM S_PORTALS
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/smp_lock.h>
+#include <linux/unistd.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+
+#include <linux/fs.h>
+#include <linux/stat.h>
+#include <asm/uaccess.h>
+#include <asm/segment.h>
+#include <linux/miscdevice.h>
+
+#include <portals/lib-p30.h>
+#include <portals/p30.h>
+#include <linux/kp30.h>
+#include <linux/kpr.h>
+#include <linux/portals_compat25.h>
+
+extern void (kping_client)(struct portal_ioctl_data *);
+
+struct nal_cmd_handler {
+        nal_cmd_handler_t nch_handler;
+        void * nch_private;
+};
+
+static struct nal_cmd_handler nal_cmd[NAL_MAX_NR + 1];
+static DECLARE_MUTEX(nal_cmd_sem);
+
+
+static int
+kportal_add_route(int gateway_nalid, ptl_nid_t gateway_nid, 
+                  ptl_nid_t lo_nid, ptl_nid_t hi_nid)
+{
+        int rc;
+        kpr_control_interface_t *ci;
+
+        ci = (kpr_control_interface_t *) PORTAL_SYMBOL_GET (kpr_control_interface);
+        if (ci == NULL)
+                return (-ENODEV);
+
+        rc = ci->kprci_add_route (gateway_nalid, gateway_nid, lo_nid, hi_nid);
+
+        PORTAL_SYMBOL_PUT(kpr_control_interface);
+        return (rc);
+}
+
+static int
+kportal_del_route(int gw_nalid, ptl_nid_t gw_nid, 
+                  ptl_nid_t lo, ptl_nid_t hi)
+{
+        int rc;
+        kpr_control_interface_t *ci;
+
+        ci = (kpr_control_interface_t *)PORTAL_SYMBOL_GET(kpr_control_interface);
+        if (ci == NULL)
+                return (-ENODEV);
+
+        rc = ci->kprci_del_route (gw_nalid, gw_nid, lo, hi);
+
+        PORTAL_SYMBOL_PUT(kpr_control_interface);
+        return (rc);
+}
+
+static int
+kportal_notify_router (int gw_nalid, ptl_nid_t gw_nid,
+                       int alive, time_t when)
+{
+        int rc;
+        kpr_control_interface_t *ci;
+
+        /* No error if router not preset.  Sysadmin is allowed to notify
+         * _everywhere_ when a NID boots or crashes, even if they know
+         * nothing of the peer. */
+        ci = (kpr_control_interface_t *)PORTAL_SYMBOL_GET(kpr_control_interface);
+        if (ci == NULL)
+                return (0);
+
+        rc = ci->kprci_notify (gw_nalid, gw_nid, alive, when);
+
+        PORTAL_SYMBOL_PUT(kpr_control_interface);
+        return (rc);
+}
+
+static int
+kportal_get_route(int index, __u32 *gateway_nalidp, ptl_nid_t *gateway_nidp,
+                  ptl_nid_t *lo_nidp, ptl_nid_t *hi_nidp, int *alivep)
+{
+        int       gateway_nalid;
+        ptl_nid_t gateway_nid;
+        ptl_nid_t lo_nid;
+        ptl_nid_t hi_nid;
+        int       alive;
+        int       rc;
+        kpr_control_interface_t *ci;
+
+        ci = (kpr_control_interface_t *) PORTAL_SYMBOL_GET(kpr_control_interface);
+        if (ci == NULL)
+                return (-ENODEV);
+
+        rc = ci->kprci_get_route(index, &gateway_nalid, &gateway_nid,
+                                 &lo_nid, &hi_nid, &alive);
+
+        if (rc == 0) {
+                CDEBUG(D_IOCTL, "got route [%d] %d "LPX64":"LPX64" - "LPX64", %s\n",
+                       index, gateway_nalid, gateway_nid, lo_nid, hi_nid,
+                       alive ? "up" : "down");
+
+                *gateway_nalidp = (__u32)gateway_nalid;
+                *gateway_nidp   = gateway_nid;
+                *lo_nidp        = lo_nid;
+                *hi_nidp        = hi_nid;
+                *alivep         = alive;
+        }
+
+        PORTAL_SYMBOL_PUT (kpr_control_interface);
+        return (rc);
+}
+
+static int 
+kportal_router_cmd(struct portals_cfg *pcfg, void * private)
+{
+        int err = -EINVAL;
+        ENTRY;
+
+        switch(pcfg->pcfg_command) {
+        default:
+                CDEBUG(D_IOCTL, "Inappropriate cmd: %d\n", pcfg->pcfg_command);
+                break;
+                
+        case NAL_CMD_ADD_ROUTE:
+                CDEBUG(D_IOCTL, "Adding route: [%d] "LPU64" : "LPU64" - "LPU64"\n",
+                       pcfg->pcfg_nal, pcfg->pcfg_nid, 
+                       pcfg->pcfg_nid2, pcfg->pcfg_nid3);
+                err = kportal_add_route(pcfg->pcfg_gw_nal, pcfg->pcfg_nid,
+                                        pcfg->pcfg_nid2, pcfg->pcfg_nid3);
+                break;
+
+        case NAL_CMD_DEL_ROUTE:
+                CDEBUG (D_IOCTL, "Removing routes via [%d] "LPU64" : "LPU64" - "LPU64"\n",
+                        pcfg->pcfg_gw_nal, pcfg->pcfg_nid, 
+                        pcfg->pcfg_nid2, pcfg->pcfg_nid3);
+                err = kportal_del_route (pcfg->pcfg_gw_nal, pcfg->pcfg_nid,
+                                         pcfg->pcfg_nid2, pcfg->pcfg_nid3);
+                break;
+
+        case NAL_CMD_NOTIFY_ROUTER: {
+                CDEBUG (D_IOCTL, "Notifying peer [%d] "LPU64" %s @ %ld\n",
+                        pcfg->pcfg_gw_nal, pcfg->pcfg_nid,
+                        pcfg->pcfg_flags ? "Enabling" : "Disabling",
+                        (time_t)pcfg->pcfg_nid3);
+                
+                err = kportal_notify_router (pcfg->pcfg_gw_nal, pcfg->pcfg_nid,
+                                             pcfg->pcfg_flags, 
+                                             (time_t)pcfg->pcfg_nid3);
+                break;
+        }
+                
+        case NAL_CMD_GET_ROUTE:
+                CDEBUG (D_IOCTL, "Getting route [%d]\n", pcfg->pcfg_count);
+                err = kportal_get_route(pcfg->pcfg_count, &pcfg->pcfg_gw_nal,
+                                        &pcfg->pcfg_nid, 
+                                        &pcfg->pcfg_nid2, &pcfg->pcfg_nid3,
+                                        &pcfg->pcfg_flags);
+                break;
+        }
+        RETURN(err);
+}
+
+int
+kportal_nal_cmd(struct portals_cfg *pcfg)
+{
+        __u32 nal = pcfg->pcfg_nal;
+        int rc = -EINVAL;
+
+        ENTRY;
+
+        down(&nal_cmd_sem);
+        if (nal > 0 && nal <= NAL_MAX_NR && nal_cmd[nal].nch_handler) {
+                CDEBUG(D_IOCTL, "calling handler nal: %d, cmd: %d\n", nal, 
+                       pcfg->pcfg_command);
+                rc = nal_cmd[nal].nch_handler(pcfg, nal_cmd[nal].nch_private);
+        }
+        up(&nal_cmd_sem);
+        RETURN(rc);
+}
+
+ptl_handle_ni_t *
+kportal_get_ni (int nal)
+{
+
+        switch (nal)
+        {
+        case QSWNAL:
+                return (PORTAL_SYMBOL_GET(kqswnal_ni));
+        case SOCKNAL:
+                return (PORTAL_SYMBOL_GET(ksocknal_ni));
+        case GMNAL:
+                return  (PORTAL_SYMBOL_GET(kgmnal_ni));
+        case IBNAL:
+                return  (PORTAL_SYMBOL_GET(kibnal_ni));
+        case TCPNAL:
+                /* userspace NAL */
+                return (NULL);
+        case SCIMACNAL:
+                return  (PORTAL_SYMBOL_GET(kscimacnal_ni));
+        default:
+                /* A warning to a naive caller */
+                CERROR ("unknown nal: %d\n", nal);
+                return (NULL);
+        }
+}
+
+void
+kportal_put_ni (int nal)
+{
+
+        switch (nal)
+        {
+        case QSWNAL:
+                PORTAL_SYMBOL_PUT(kqswnal_ni);
+                break;
+        case SOCKNAL:
+                PORTAL_SYMBOL_PUT(ksocknal_ni);
+                break;
+        case GMNAL:
+                PORTAL_SYMBOL_PUT(kgmnal_ni);
+                break;
+        case IBNAL:
+                PORTAL_SYMBOL_PUT(kibnal_ni);
+                break;
+        case TCPNAL:
+                /* A lesson to a malicious caller */
+                LBUG ();
+        case SCIMACNAL:
+                PORTAL_SYMBOL_PUT(kscimacnal_ni);
+                break;
+        default:
+                CERROR ("unknown nal: %d\n", nal);
+        }
+}
+
+int
+kportal_nal_register(int nal, nal_cmd_handler_t handler, void * private)
+{
+        int rc = 0;
+
+        CDEBUG(D_IOCTL, "Register NAL %d, handler: %p\n", nal, handler);
+
+        if (nal > 0  && nal <= NAL_MAX_NR) {
+                down(&nal_cmd_sem);
+                if (nal_cmd[nal].nch_handler != NULL)
+                        rc = -EBUSY;
+                else {
+                        nal_cmd[nal].nch_handler = handler;
+                        nal_cmd[nal].nch_private = private;
+                }
+                up(&nal_cmd_sem);
+        }
+        return rc;
+}
+
+int
+kportal_nal_unregister(int nal)
+{
+        int rc = 0;
+
+        CDEBUG(D_IOCTL, "Unregister NAL %d\n", nal);
+
+        if (nal > 0  && nal <= NAL_MAX_NR) {
+                down(&nal_cmd_sem);
+                nal_cmd[nal].nch_handler = NULL;
+                nal_cmd[nal].nch_private = NULL;
+                up(&nal_cmd_sem);
+        }
+        return rc;
+}
+
+static int kportal_ioctl(struct portal_ioctl_data *data, 
+                         unsigned int cmd, unsigned long arg)
+{
+        int err = 0;
+        char str[PTL_NALFMT_SIZE];
+        ENTRY;
+
+        switch (cmd) {
+        case IOC_PORTAL_PING: {
+                void (*ping)(struct portal_ioctl_data *);
+
+                CDEBUG(D_IOCTL, "doing %d pings to nid "LPX64" (%s)\n",
+                       data->ioc_count, data->ioc_nid,
+                       portals_nid2str(data->ioc_nal, data->ioc_nid, str));
+                ping = PORTAL_SYMBOL_GET(kping_client);
+                if (!ping)
+                        CERROR("PORTAL_SYMBOL_GET failed\n");
+                else {
+                        ping(data);
+                        PORTAL_SYMBOL_PUT(kping_client);
+                }
+                RETURN(0);
+        }
+
+        case IOC_PORTAL_GET_NID: {
+                const ptl_handle_ni_t *nip;
+                ptl_process_id_t       pid;
+
+                CDEBUG (D_IOCTL, "Getting nid for nal [%d]\n", data->ioc_nal);
+
+                nip = kportal_get_ni (data->ioc_nal);
+                if (nip == NULL)
+                        RETURN (-EINVAL);
+
+                err = PtlGetId (*nip, &pid);
+                LASSERT (err == PTL_OK);
+                kportal_put_ni (data->ioc_nal);
+
+                data->ioc_nid = pid.nid;
+                if (copy_to_user ((char *)arg, data, sizeof (*data)))
+                        err = -EFAULT;
+                break;
+        }
+
+        case IOC_PORTAL_NAL_CMD: {
+                struct portals_cfg pcfg;
+
+                LASSERT (data->ioc_plen1 == sizeof(pcfg));
+                err = copy_from_user(&pcfg, (void *)data->ioc_pbuf1, 
+                                     sizeof(pcfg));
+                if ( err ) {
+                        EXIT;
+                        return err;
+                }
+
+                CDEBUG (D_IOCTL, "nal command nal %d cmd %d\n", pcfg.pcfg_nal,
+                        pcfg.pcfg_command);
+                err = kportal_nal_cmd(&pcfg);
+                if (err == 0) {
+                        if (copy_to_user((char *)data->ioc_pbuf1, &pcfg, 
+                                         sizeof (pcfg)))
+                                err = -EFAULT;
+                        if (copy_to_user((char *)arg, data, sizeof (*data)))
+                                err = -EFAULT;
+                }
+                break;
+        }
+        case IOC_PORTAL_FAIL_NID: {
+                const ptl_handle_ni_t *nip;
+
+                CDEBUG (D_IOCTL, "fail nid: [%d] "LPU64" count %d\n",
+                        data->ioc_nal, data->ioc_nid, data->ioc_count);
+
+                nip = kportal_get_ni (data->ioc_nal);
+                if (nip == NULL)
+                        return (-EINVAL);
+
+                err = PtlFailNid (*nip, data->ioc_nid, data->ioc_count);
+                kportal_put_ni (data->ioc_nal);
+                break;
+        }
+        default:
+                err = -EINVAL;
+                break;
+        }
+
+        RETURN(err);
+}
+
+DECLARE_IOCTL_HANDLER(kportal_ioctl_handler, kportal_ioctl);
+
+static int init_kportals_module(void)
+{
+        int rc;
+        ENTRY;
+
+        rc = PtlInit(NULL);
+        if (rc) {
+                CERROR("PtlInit: error %d\n", rc);
+                RETURN(rc);
+        }
+
+        rc = kportal_nal_register(ROUTER, kportal_router_cmd, NULL);
+        if (rc) {
+                PtlFini();
+                CERROR("kportal_nal_registre: ROUTER error %d\n", rc);
+        }
+
+        if (rc == 0)
+                libcfs_register_ioctl(&kportal_ioctl_handler);
+
+        RETURN(rc);
+}
+
+static void exit_kportals_module(void)
+{
+        libcfs_deregister_ioctl(&kportal_ioctl_handler);
+        kportal_nal_unregister(ROUTER);
+        PtlFini();
+}
+
+EXPORT_SYMBOL(kportal_nal_register);
+EXPORT_SYMBOL(kportal_nal_unregister);
+EXPORT_SYMBOL(kportal_get_ni);
+EXPORT_SYMBOL(kportal_put_ni);
+EXPORT_SYMBOL(kportal_nal_cmd);
+
+EXPORT_SYMBOL(ptl_err_str);
+EXPORT_SYMBOL(lib_dispatch);
+EXPORT_SYMBOL(PtlMEAttach);
+EXPORT_SYMBOL(PtlMEInsert);
+EXPORT_SYMBOL(PtlMEUnlink);
+EXPORT_SYMBOL(PtlEQAlloc);
+EXPORT_SYMBOL(PtlMDAttach);
+EXPORT_SYMBOL(PtlMDUnlink);
+EXPORT_SYMBOL(PtlNIInit);
+EXPORT_SYMBOL(PtlNIFini);
+EXPORT_SYMBOL(PtlNIDebug);
+EXPORT_SYMBOL(PtlInit);
+EXPORT_SYMBOL(PtlFini);
+EXPORT_SYMBOL(PtlSnprintHandle);
+EXPORT_SYMBOL(PtlPut);
+EXPORT_SYMBOL(PtlGet);
+EXPORT_SYMBOL(PtlEQWait);
+EXPORT_SYMBOL(PtlEQFree);
+EXPORT_SYMBOL(PtlEQGet);
+EXPORT_SYMBOL(PtlGetId);
+EXPORT_SYMBOL(PtlMDBind);
+EXPORT_SYMBOL(lib_iov_nob);
+EXPORT_SYMBOL(lib_copy_iov2buf);
+EXPORT_SYMBOL(lib_copy_buf2iov);
+EXPORT_SYMBOL(lib_extract_iov);
+EXPORT_SYMBOL(lib_kiov_nob);
+EXPORT_SYMBOL(lib_copy_kiov2buf);
+EXPORT_SYMBOL(lib_copy_buf2kiov);
+EXPORT_SYMBOL(lib_extract_kiov);
+EXPORT_SYMBOL(lib_finalize);
+EXPORT_SYMBOL(lib_parse);
+EXPORT_SYMBOL(lib_create_reply_msg);
+EXPORT_SYMBOL(lib_init);
+EXPORT_SYMBOL(lib_fini);
+EXPORT_SYMBOL(dispatch_name);
+
+MODULE_AUTHOR("Peter J. Braam <braam@clusterfs.com>");
+MODULE_DESCRIPTION("Portals v3.1");
+MODULE_LICENSE("GPL");
+module_init(init_kportals_module);
+module_exit(exit_kportals_module);
index ea25439..309025b 100644 (file)
@@ -39,6 +39,7 @@
 #define DEBUG_SUBSYSTEM S_PTLROUTER
 
 #include <linux/kp30.h>
+#include <linux/kpr.h>
 #include <portals/p30.h>
 #include <portals/lib-p30.h>
 
index 85c0d71..9977f20 100644 (file)
@@ -187,7 +187,7 @@ pingcli_start(struct portal_ioctl_data *args)
         client->md_in_head.length    = (args->ioc_size + STDSIZE)
                                                 * count;
         client->md_in_head.threshold = PTL_MD_THRESH_INF;
-        client->md_in_head.options   = PTL_MD_OP_PUT;
+        client->md_in_head.options   = PTL_MD_EVENT_START_DISABLE | PTL_MD_OP_PUT;
         client->md_in_head.user_ptr  = NULL;
         client->md_in_head.eventq    = client->eq;
         memset (client->inbuf, 0, (args->ioc_size + STDSIZE) * count);
@@ -203,7 +203,7 @@ pingcli_start(struct portal_ioctl_data *args)
         client->md_out_head.start     = client->outbuf;
         client->md_out_head.length    = STDSIZE + args->ioc_size;
         client->md_out_head.threshold = args->ioc_count;
-        client->md_out_head.options   = PTL_MD_OP_PUT;
+        client->md_out_head.options   = PTL_MD_EVENT_START_DISABLE | PTL_MD_OP_PUT;
         client->md_out_head.user_ptr  = NULL;
         client->md_out_head.eventq    = PTL_EQ_NONE;
 
@@ -213,7 +213,7 @@ pingcli_start(struct portal_ioctl_data *args)
 
         /* Bind the outgoing ping header */
         if ((rc=PtlMDBind (*nip, client->md_out_head,
-                                        &client->md_out_head_h))) {
+                           PTL_UNLINK, &client->md_out_head_h))) {
                 CERROR ("PtlMDBind error %d\n", rc);
                 pingcli_shutdown (1);
                 return NULL;
index 1e40ed8..0aa1ea7 100644 (file)
@@ -129,13 +129,13 @@ int pingsrv_thread(void *arg)
                 server->mdout.length    = server->evnt.rlength;
                 server->mdout.start     = server->in_buf;
                 server->mdout.threshold = 1; 
-                server->mdout.options   = PTL_MD_OP_PUT;
+                server->mdout.options   = PTL_MD_EVENT_START_DISABLE | PTL_MD_OP_PUT;
                 server->mdout.user_ptr  = NULL;
                 server->mdout.eventq    = PTL_EQ_NONE;
        
                 /* Bind the outgoing buffer */
                 if ((rc = PtlMDBind (server->ni, server->mdout, 
-                                                &server->mdout_h))) {
+                                     PTL_UNLINK, &server->mdout_h))) {
                          PDEBUG ("PtlMDBind", rc);
                          pingsrv_shutdown (1);
                          return 1;
@@ -145,7 +145,7 @@ int pingsrv_thread(void *arg)
                 server->mdin.start     = server->in_buf;
                 server->mdin.length    = MAXSIZE;
                 server->mdin.threshold = 1; 
-                server->mdin.options   = PTL_MD_OP_PUT;
+                server->mdin.options   = PTL_MD_EVENT_START_DISABLE | PTL_MD_OP_PUT;
                 server->mdin.user_ptr  = NULL;
                 server->mdin.eventq    = server->eq;
         
@@ -245,7 +245,7 @@ static struct pingsrv_data *pingsrv_setup(void)
         server->mdin.start     = server->in_buf;
         server->mdin.length    = MAXSIZE;
         server->mdin.threshold = 1; 
-        server->mdin.options   = PTL_MD_OP_PUT;
+        server->mdin.options   = PTL_MD_EVENT_START_DISABLE | PTL_MD_OP_PUT;
         server->mdin.user_ptr  = NULL;
         server->mdin.eventq    = server->eq;
         memset (server->in_buf, 0, STDSIZE);
index 64a1dd2..663da4e 100644 (file)
@@ -180,7 +180,7 @@ pingcli_start(struct portal_ioctl_data *args)
         client->md_in_head.start     = client->inbuf;
         client->md_in_head.length    = STDSIZE;
         client->md_in_head.threshold = 1;
-        client->md_in_head.options   = PTL_MD_OP_PUT;
+        client->md_in_head.options   = PTL_MD_EVENT_START_DISABLE | PTL_MD_OP_PUT;
         client->md_in_head.user_ptr  = NULL;
         client->md_in_head.eventq    = client->eq;
         memset (client->inbuf, 0, STDSIZE);
@@ -197,7 +197,7 @@ pingcli_start(struct portal_ioctl_data *args)
         client->md_out_head.start     = client->outbuf;
         client->md_out_head.length    = STDSIZE;
         client->md_out_head.threshold = 1;
-        client->md_out_head.options   = PTL_MD_OP_PUT;
+        client->md_out_head.options   = PTL_MD_EVENT_START_DISABLE | PTL_MD_OP_PUT;
         client->md_out_head.user_ptr  = NULL;
         client->md_out_head.eventq    = PTL_EQ_NONE;
 
@@ -205,7 +205,7 @@ pingcli_start(struct portal_ioctl_data *args)
 
         /* Bind the outgoing ping header */
         if ((rc=PtlMDBind (*nip, client->md_out_head,
-                                        &client->md_out_head_h))) {
+                           PTL_UNLINK, &client->md_out_head_h))) {
                 CERROR ("PtlMDBind error %d\n", rc);
                 pingcli_shutdown (1);
                 return (NULL);
index b8bda29..e8fb470 100644 (file)
@@ -121,13 +121,13 @@ int pingsrv_thread(void *arg)
                 server->mdout.start     = server->in_buf;
                 server->mdout.length    = STDSIZE;
                 server->mdout.threshold = 1; 
-                server->mdout.options   = PTL_MD_OP_PUT;
+                server->mdout.options   = PTL_MD_EVENT_START_DISABLE | PTL_MD_OP_PUT;
                 server->mdout.user_ptr  = NULL;
                 server->mdout.eventq    = PTL_EQ_NONE;
        
                 /* Bind the outgoing buffer */
                 if ((rc = PtlMDBind (server->ni, server->mdout, 
-                                                &server->mdout_h))) {
+                                     PTL_UNLINK, &server->mdout_h))) {
                          PDEBUG ("PtlMDBind", rc);
                          pingsrv_shutdown (1);
                          return 1;
@@ -137,7 +137,7 @@ int pingsrv_thread(void *arg)
                 server->mdin.start     = server->in_buf;
                 server->mdin.length    = STDSIZE;
                 server->mdin.threshold = 1; 
-                server->mdin.options   = PTL_MD_OP_PUT;
+                server->mdin.options   = PTL_MD_EVENT_START_DISABLE | PTL_MD_OP_PUT;
                 server->mdin.user_ptr  = NULL;
                 server->mdin.eventq    = server->eq;
         
@@ -234,7 +234,7 @@ static struct pingsrv_data *pingsrv_setup(void)
         server->mdin.start     = server->in_buf;
         server->mdin.length    = STDSIZE;
         server->mdin.threshold = 1; 
-        server->mdin.options   = PTL_MD_OP_PUT;
+        server->mdin.options   = PTL_MD_EVENT_START_DISABLE | PTL_MD_OP_PUT;
         server->mdin.user_ptr  = NULL;
         server->mdin.eventq    = server->eq;
         memset (server->in_buf, 0, STDSIZE);
index c27f555..00a7ae4 100644 (file)
@@ -127,11 +127,21 @@ static int procbridge_validate(nal_t *nal, void *base, size_t extent)
 }
 
 
-/* FIXME cfs temporary workaround! FIXME
- * global time out value
- */
-int __tcpnal_eqwait_timeout_value = 0;
-int __tcpnal_eqwait_timedout = 0;
+static void procbridge_lock(nal_t * n, unsigned long *flags)
+{
+    bridge b=(bridge)n->nal_data;
+    procbridge p=(procbridge)b->local;
+
+    pthread_mutex_lock(&p->mutex);
+}
+
+static void procbridge_unlock(nal_t * n, unsigned long *flags)
+{
+    bridge b=(bridge)n->nal_data;
+    procbridge p=(procbridge)b->local;
+
+    pthread_mutex_unlock(&p->mutex);
+}
 
 /* Function: yield
  * Arguments:  pid:
@@ -141,31 +151,43 @@ int __tcpnal_eqwait_timedout = 0;
  *   overload it to explicitly block until signalled by the
  *   lower half.
  */
-static void procbridge_yield(nal_t *n)
+static int procbridge_yield(nal_t *n, unsigned long *flags, int milliseconds)
 {
     bridge b=(bridge)n->nal_data;
     procbridge p=(procbridge)b->local;
 
-    pthread_mutex_lock(&p->mutex);
-    if (!__tcpnal_eqwait_timeout_value) {
+    if (milliseconds == 0)
+            return 0;
+            
+    if (milliseconds < 0) {
         pthread_cond_wait(&p->cond,&p->mutex);
     } else {
+        struct timeval then;
         struct timeval now;
         struct timespec timeout;
 
-        gettimeofday(&now, NULL);
-        timeout.tv_sec = now.tv_sec + __tcpnal_eqwait_timeout_value;
-        timeout.tv_nsec = now.tv_usec * 1000;
+        gettimeofday(&then, NULL);
+        timeout.tv_sec = then.tv_sec + milliseconds/1000;
+        timeout.tv_nsec = then.tv_usec * 1000 + milliseconds % 1000 * 1000000;
+        if (timeout.tv_nsec >= 1000000000) {
+                timeout.tv_sec++;
+                timeout.tv_nsec -= 1000000000;
+        }
+
+        pthread_cond_timedwait(&p->cond, &p->mutex, &timeout);
 
-        __tcpnal_eqwait_timedout =
-                pthread_cond_timedwait(&p->cond, &p->mutex, &timeout);
+        gettimeofday(&now, NULL);
+        milliseconds -= (now.tv_sec - then.tv_sec) * 1000 + 
+                        (now.tv_usec - then.tv_usec) / 1000;
+        
+        if (milliseconds < 0)
+                milliseconds = 0;
     }
-    pthread_mutex_unlock(&p->mutex);
+
+    return (milliseconds);
 }
 
 
-static void procbridge_lock(nal_t * nal, unsigned long *flags){}
-static void procbridge_unlock(nal_t * nal, unsigned long *flags){}
 /* api_nal
  *  the interface vector to allow the generic code to access
  *  this nal. this is seperate from the library side nal_cb.
@@ -233,7 +255,6 @@ nal_t *procbridge_interface(int num_interface,
     pthread_mutex_init(&p->mutex,0);
     pthread_cond_init(&p->cond, 0);
     p->nal_flags = 0;
-    pthread_mutex_init(&p->nal_cb_lock, 0);
 
     /* initialize notifier */
     if (socketpair(AF_UNIX, SOCK_STREAM, 0, p->notifier)) {
index 2a5ba0d..1cfb233 100644 (file)
@@ -95,7 +95,7 @@ static void nal_cli(nal_cb_t *nal,
     bridge b = (bridge) nal->nal_data;
     procbridge p = (procbridge) b->local;
 
-    pthread_mutex_lock(&p->nal_cb_lock);
+    pthread_mutex_lock(&p->mutex);
 }
 
 
@@ -105,9 +105,21 @@ static void nal_sti(nal_cb_t *nal,
     bridge b = (bridge)nal->nal_data;
     procbridge p = (procbridge) b->local;
 
-    pthread_mutex_unlock(&p->nal_cb_lock);
+    pthread_mutex_unlock(&p->mutex);
 }
 
+static void nal_callback(nal_cb_t *nal, void *private,
+                         lib_eq_t *eq, ptl_event_t *ev)
+{
+        bridge b = (bridge)nal->nal_data;
+        procbridge p = (procbridge) b->local;
+
+        /* holding p->mutex */
+        if (eq->event_callback != NULL)
+                eq->event_callback(ev);
+        
+        pthread_cond_broadcast(&p->cond);
+}
 
 static int nal_dist(nal_cb_t *nal,
                     ptl_nid_t nid,
@@ -116,21 +128,20 @@ static int nal_dist(nal_cb_t *nal,
     return 0;
 }
 
-static void wakeup_topside(void *z)
+static void check_stopping(void *z)
 {
     bridge b = z;
     procbridge p = b->local;
-    int stop;
 
+    if ((p->nal_flags & NAL_FLAG_STOPPING) == 0)
+            return;
+    
     pthread_mutex_lock(&p->mutex);
-    stop = p->nal_flags & NAL_FLAG_STOPPING;
-    if (stop)
-        p->nal_flags |= NAL_FLAG_STOPPED;
+    p->nal_flags |= NAL_FLAG_STOPPED;
     pthread_cond_broadcast(&p->cond);
     pthread_mutex_unlock(&p->mutex);
 
-    if (stop)
-        pthread_exit(0);
+    pthread_exit(0);
 }
 
 
@@ -175,6 +186,7 @@ void *nal_thread(void *z)
     b->nal_cb->cb_printf=nal_printf;
     b->nal_cb->cb_cli=nal_cli;
     b->nal_cb->cb_sti=nal_sti;
+    b->nal_cb->cb_callback=nal_callback;
     b->nal_cb->cb_dist=nal_dist;
 
     pid_request = args->nia_requested_pid;
@@ -216,7 +228,7 @@ void *nal_thread(void *z)
            performs an operation and returns to blocking mode. we
            overload this function to inform the api side that
            it may be interested in looking at the event queue */
-        register_thunk(wakeup_topside,b);
+        register_thunk(check_stopping,b);
         timer_loop();
     }
     return(0);
index 925406f..e871d9a 100644 (file)
@@ -3,7 +3,8 @@
 # This code is issued under the GNU General Public License.
 # See the file COPYING in this distribution
 
-COMPILE = $(CC) -Wall -g -I$(srcdir)/../include
+# ../ for <portals/*.h>, ../../ for <config.h>
+COMPILE = $(CC) -Wall -g -I$(srcdir)/../include -I$(srcdir)/../../include
 LINK = $(CC) -o $@
 
 if LIBLUSTRE
index d8b7f02..ccef070 100644 (file)
@@ -284,7 +284,7 @@ int ptlbd_blk_init(void)
                 /* avoid integer overflow */
                 ptlbd_size[i] = (16*1024*((1024*1024) >> BLOCK_SIZE_BITS));
                 ptlbd_hardsect_size[i] = 4096;
-                ptlbd_max_sectors[i] = PTL_MD_MAX_IOV * (4096/512);
+                ptlbd_max_sectors[i] = PTLRPC_MAX_BRW_PAGES * (4096/512);
         }
 
         return 0;
index 4822d33..ea8fb77 100644 (file)
@@ -13,8 +13,8 @@ LDLM_COMM_SOURCES= $(top_srcdir)/ldlm/l_lock.c $(top_srcdir)/ldlm/ldlm_lock.c \
 
 COMMON_SOURCES =  client.c recover.c connection.c niobuf.c pack_generic.c   \
     events.c ptlrpc_module.c service.c pinger.c recov_thread.c llog_net.c   \
-    llog_client.c llog_server.c import.c ptlrpcd.c ptlrpc_internal.h        \
-    $(LDLM_COMM_SOURCES)
+    llog_client.c llog_server.c import.c ptlrpcd.c pers.c                  \
+    ptlrpc_internal.h $(LDLM_COMM_SOURCES)
 
 if LIBLUSTRE
 
index 25fd1eb..0774fa2 100644 (file)
@@ -92,9 +92,9 @@ static inline struct ptlrpc_bulk_desc *new_bulk(int npages, int type, int portal
 
         spin_lock_init(&desc->bd_lock);
         init_waitqueue_head(&desc->bd_waitq);
-        desc->bd_max_pages = npages;
-        desc->bd_page_count = 0;
-        desc->bd_md_h = PTL_HANDLE_NONE;
+        desc->bd_max_iov = npages;
+        desc->bd_iov_count = 0;
+        desc->bd_md_h = PTL_INVALID_HANDLE;
         desc->bd_portal = portal;
         desc->bd_type = type;
         
@@ -152,27 +152,15 @@ struct ptlrpc_bulk_desc *ptlrpc_prep_bulk_exp (struct ptlrpc_request *req,
 void ptlrpc_prep_bulk_page(struct ptlrpc_bulk_desc *desc,
                            struct page *page, int pageoffset, int len)
 {
-#ifdef __KERNEL__
-        ptl_kiov_t *kiov = &desc->bd_iov[desc->bd_page_count];
-#else
-        struct iovec *iov = &desc->bd_iov[desc->bd_page_count];
-#endif
-        LASSERT(desc->bd_page_count < desc->bd_max_pages);
+        LASSERT(desc->bd_iov_count < desc->bd_max_iov);
         LASSERT(page != NULL);
         LASSERT(pageoffset >= 0);
         LASSERT(len > 0);
         LASSERT(pageoffset + len <= PAGE_SIZE);
 
-#ifdef __KERNEL__
-        kiov->kiov_page   = page;
-        kiov->kiov_offset = pageoffset;
-        kiov->kiov_len    = len;
-#else
-        iov->iov_base = page->addr + pageoffset;
-        iov->iov_len  = len;
-#endif
-        desc->bd_page_count++;
         desc->bd_nob += len;
+
+        pers_bulk_add_page(desc, page, pageoffset, len);
 }
 
 void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *desc)
@@ -180,7 +168,7 @@ void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *desc)
         ENTRY;
 
         LASSERT(desc != NULL);
-        LASSERT(desc->bd_page_count != 0x5a5a5a5a); /* not freed already */
+        LASSERT(desc->bd_iov_count != 0x5a5a5a5a); /* not freed already */
         LASSERT(!desc->bd_network_rw);         /* network hands off or */
         LASSERT((desc->bd_export != NULL) ^ (desc->bd_import != NULL));
         if (desc->bd_export)
@@ -189,7 +177,7 @@ void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *desc)
                 class_import_put(desc->bd_import);
 
         OBD_FREE(desc, offsetof(struct ptlrpc_bulk_desc, 
-                                bd_iov[desc->bd_max_pages]));
+                                bd_iov[desc->bd_max_iov]));
         EXIT;
 }
 
@@ -1112,7 +1100,7 @@ void ptlrpc_unregister_reply (struct ptlrpc_request *request)
                 return;
 
         rc = PtlMDUnlink (request->rq_reply_md_h);
-        if (rc == PTL_INV_MD) {
+        if (rc == PTL_MD_INVALID) {
                 LASSERT (!ptlrpc_client_receiving_reply(request));
                 return;
         }
index b1f8221..6e61236 100644 (file)
@@ -29,6 +29,7 @@
 #endif
 #include <linux/obd_class.h>
 #include <linux/lustre_net.h>
+#include "ptlrpc_internal.h"
 
 struct ptlrpc_ni  ptlrpc_interfaces[NAL_MAX_NR];
 int               ptlrpc_ninterfaces;
@@ -43,15 +44,15 @@ void request_out_callback(ptl_event_t *ev)
         unsigned long          flags;
         ENTRY;
 
-        LASSERT (ev->type == PTL_EVENT_SENT ||
+        LASSERT (ev->type == PTL_EVENT_SEND_END ||
                  ev->type == PTL_EVENT_UNLINK);
         LASSERT (ev->unlinked);
 
-        DEBUG_REQ((ev->status == PTL_OK) ? D_NET : D_ERROR, req,
-                  "type %d, status %d", ev->type, ev->status);
+        DEBUG_REQ((ev->ni_fail_type == PTL_NI_OK) ? D_NET : D_ERROR, req,
+                  "type %d, status %d", ev->type, ev->ni_fail_type);
 
         if (ev->type == PTL_EVENT_UNLINK ||
-            ev->status != PTL_OK) {
+            ev->ni_fail_type != PTL_NI_OK) {
 
                 /* Failed send: make it seem like the reply timed out, just
                  * like failing sends in client.c does currently...  */
@@ -78,23 +79,23 @@ void reply_in_callback(ptl_event_t *ev)
         unsigned long flags;
         ENTRY;
 
-        LASSERT (ev->type == PTL_EVENT_PUT ||
+        LASSERT (ev->type == PTL_EVENT_PUT_END ||
                  ev->type == PTL_EVENT_UNLINK);
         LASSERT (ev->unlinked);
         LASSERT (ev->mem_desc.start == req->rq_repmsg);
         LASSERT (ev->offset == 0);
         LASSERT (ev->mlength <= req->rq_replen);
         
-        DEBUG_REQ((ev->status == PTL_OK) ? D_NET : D_ERROR, req,
-                  "type %d, status %d", ev->type, ev->status);
+        DEBUG_REQ((ev->ni_fail_type == PTL_NI_OK) ? D_NET : D_ERROR, req,
+                  "type %d, status %d", ev->type, ev->ni_fail_type);
 
         spin_lock_irqsave (&req->rq_lock, flags);
 
         LASSERT (req->rq_receiving_reply);
         req->rq_receiving_reply = 0;
 
-        if (ev->type == PTL_EVENT_PUT &&
-            ev->status == PTL_OK) {
+        if (ev->type == PTL_EVENT_PUT_END &&
+            ev->ni_fail_type == PTL_NI_OK) {
                 req->rq_replied = 1;
                 req->rq_nob_received = ev->mlength;
         }
@@ -118,15 +119,15 @@ void client_bulk_callback (ptl_event_t *ev)
         ENTRY;
 
         LASSERT ((desc->bd_type == BULK_PUT_SINK && 
-                  ev->type == PTL_EVENT_PUT) ||
+                  ev->type == PTL_EVENT_PUT_END) ||
                  (desc->bd_type == BULK_GET_SOURCE &&
-                  ev->type == PTL_EVENT_GET) ||
+                  ev->type == PTL_EVENT_GET_END) ||
                  ev->type == PTL_EVENT_UNLINK);
         LASSERT (ev->unlinked);
 
-        CDEBUG((ev->status == PTL_OK) ? D_NET : D_ERROR,
+        CDEBUG((ev->ni_fail_type == PTL_NI_OK) ? D_NET : D_ERROR,
                "event type %d, status %d, desc %p\n", 
-               ev->type, ev->status, desc);
+               ev->type, ev->ni_fail_type, desc);
 
         spin_lock_irqsave (&desc->bd_lock, flags);
 
@@ -134,7 +135,7 @@ void client_bulk_callback (ptl_event_t *ev)
         desc->bd_network_rw = 0;
 
         if (ev->type != PTL_EVENT_UNLINK &&
-            ev->status == PTL_OK) {
+            ev->ni_fail_type == PTL_NI_OK) {
                 desc->bd_success = 1;
                 desc->bd_nob_transferred = ev->mlength;
         }
@@ -160,15 +161,15 @@ void request_in_callback(ptl_event_t *ev)
         long                               flags;
         ENTRY;
 
-        LASSERT (ev->type == PTL_EVENT_PUT ||
+        LASSERT (ev->type == PTL_EVENT_PUT_END ||
                  ev->type == PTL_EVENT_UNLINK);
         LASSERT ((char *)ev->mem_desc.start >= rqbd->rqbd_buffer);
         LASSERT ((char *)ev->mem_desc.start + ev->offset + ev->mlength <=
                  rqbd->rqbd_buffer + service->srv_buf_size);
 
-        CDEBUG((ev->status == PTL_OK) ? D_NET : D_ERROR,
+        CDEBUG((ev->ni_fail_type == PTL_OK) ? D_NET : D_ERROR,
                "event type %d, status %d, service %s\n", 
-               ev->type, ev->status, service->srv_name);
+               ev->type, ev->ni_fail_type, service->srv_name);
 
         if (ev->unlinked) {
                 /* If this is the last request message to fit in the
@@ -179,8 +180,8 @@ void request_in_callback(ptl_event_t *ev)
                 req = &rqbd->rqbd_req;
                 memset(req, 0, sizeof (*req));
         } else {
-                LASSERT (ev->type == PTL_EVENT_PUT);
-                if (ev->status != PTL_OK) {
+                LASSERT (ev->type == PTL_EVENT_PUT_END);
+                if (ev->ni_fail_type != PTL_NI_OK) {
                         /* We moaned above already... */
                         return;
                 }
@@ -198,10 +199,10 @@ void request_in_callback(ptl_event_t *ev)
          * size to non-zero if this was a successful receive. */
         req->rq_xid = ev->match_bits;
         req->rq_reqmsg = ev->mem_desc.start + ev->offset;
-        if (ev->type == PTL_EVENT_PUT &&
-            ev->status == PTL_OK)
+        if (ev->type == PTL_EVENT_PUT_END &&
+            ev->ni_fail_type == PTL_NI_OK)
                 req->rq_reqlen = ev->mlength;
-        req->rq_arrival_time = ev->arrival_time;
+        do_gettimeofday(&req->rq_arrival_time);
         req->rq_peer.peer_nid = ev->initiator.nid;
         req->rq_peer.peer_ni = rqbd->rqbd_srv_ni->sni_ni;
         req->rq_rqbd = rqbd;
@@ -249,7 +250,7 @@ void reply_out_callback(ptl_event_t *ev)
         unsigned long              flags;
         ENTRY;
 
-        LASSERT (ev->type == PTL_EVENT_SENT ||
+        LASSERT (ev->type == PTL_EVENT_SEND_END ||
                  ev->type == PTL_EVENT_ACK ||
                  ev->type == PTL_EVENT_UNLINK);
 
@@ -285,22 +286,22 @@ void server_bulk_callback (ptl_event_t *ev)
         unsigned long            flags;
         ENTRY;
 
-        LASSERT (ev->type == PTL_EVENT_SENT ||
+        LASSERT (ev->type == PTL_EVENT_SEND_END ||
                  ev->type == PTL_EVENT_UNLINK ||
                  (desc->bd_type == BULK_PUT_SOURCE &&
                   ev->type == PTL_EVENT_ACK) ||
                  (desc->bd_type == BULK_GET_SINK &&
-                  ev->type == PTL_EVENT_REPLY));
+                  ev->type == PTL_EVENT_REPLY_END));
 
-        CDEBUG((ev->status == PTL_OK) ? D_NET : D_ERROR,
+        CDEBUG((ev->ni_fail_type == PTL_NI_OK) ? D_NET : D_ERROR,
                "event type %d, status %d, desc %p\n", 
-               ev->type, ev->status, desc);
+               ev->type, ev->ni_fail_type, desc);
 
         spin_lock_irqsave (&desc->bd_lock, flags);
         
         if ((ev->type == PTL_EVENT_ACK ||
-             ev->type == PTL_EVENT_REPLY) &&
-            ev->status == PTL_OK) {
+             ev->type == PTL_EVENT_REPLY_END) &&
+            ev->ni_fail_type == PTL_NI_OK) {
                 /* We heard back from the peer, so even if we get this
                  * before the SENT event (oh yes we can), we know we
                  * read/wrote the peer buffer and how much... */
@@ -339,26 +340,29 @@ static int ptlrpc_master_callback(ptl_event_t *ev)
 int ptlrpc_uuid_to_peer (struct obd_uuid *uuid, struct ptlrpc_peer *peer)
 {
         struct ptlrpc_ni   *pni;
-        struct lustre_peer  lpeer;
+        ptl_nid_t           peer_nid;
+        ptl_handle_ni_t     peer_ni;
         int                 i;
-        int                 rc = lustre_uuid_to_peer (uuid->uuid, &lpeer);
-
+        char                str[20];
+        int                 rc = lustre_uuid_to_peer(uuid->uuid, 
+                                                     &peer_ni, &peer_nid);
         if (rc != 0)
                 RETURN (rc);
 
         for (i = 0; i < ptlrpc_ninterfaces; i++) {
                 pni = &ptlrpc_interfaces[i];
 
-                if (!memcmp(&lpeer.peer_ni, &pni->pni_ni_h,
-                            sizeof (lpeer.peer_ni))) {
-                        peer->peer_nid = lpeer.peer_nid;
+                if (!memcmp(&peer_ni, &pni->pni_ni_h,
+                            sizeof (peer_ni))) {
+                        peer->peer_nid = peer_nid;
                         peer->peer_ni = pni;
                         return (0);
                 }
         }
 
-        CERROR("Can't find ptlrpc interface for "LPX64" ni handle %08lx."LPX64"\n",
-               lpeer.peer_nid, lpeer.peer_ni.nal_idx, lpeer.peer_ni.cookie);
+        PtlSnprintHandle(str, sizeof(str), peer_ni);
+        CERROR("Can't find ptlrpc interface for "LPX64" ni %s\n",
+               peer_nid, str);
         return (-ENOENT);
 }
 
@@ -384,7 +388,7 @@ void ptlrpc_ni_fini(struct ptlrpc_ni *pni)
                         kportal_put_ni (pni->pni_number);
                         return;
                         
-                case PTL_EQ_INUSE:
+                case PTL_EQ_IN_USE:
                         if (retries != 0)
                                 CWARN("Event queue for %s still busy\n",
                                       pni->pni_name);
@@ -402,6 +406,7 @@ void ptlrpc_ni_fini(struct ptlrpc_ni *pni)
 int ptlrpc_ni_init(int number, char *name, struct ptlrpc_ni *pni)
 {
         int              rc;
+        char             str[20];
         ptl_handle_ni_t *nip = kportal_get_ni (number);
 
         if (nip == NULL) {
@@ -409,24 +414,18 @@ int ptlrpc_ni_init(int number, char *name, struct ptlrpc_ni *pni)
                 return (-ENOENT);
         }
 
-        CDEBUG (D_NET, "init %d %s: nal_idx %ld\n", number, name, nip->nal_idx);
+        PtlSnprintHandle(str, sizeof(str), *nip);
+        CDEBUG (D_NET, "init %d %s: %s\n", number, name, str);
 
         pni->pni_name = name;
         pni->pni_number = number;
         pni->pni_ni_h = *nip;
 
-        pni->pni_eq_h = PTL_HANDLE_NONE;
+        pni->pni_eq_h = PTL_INVALID_HANDLE;
 
-#ifdef __KERNEL__
-        /* kernel: portals calls the callback when the event is added to the
-         * queue, so we don't care if we lose events */
-        rc = PtlEQAlloc(pni->pni_ni_h, 1024, ptlrpc_master_callback,
+        rc = PtlEQAlloc(pni->pni_ni_h, PTLRPC_NUM_EQ, PTLRPC_EQ_CALLBACK,
                         &pni->pni_eq_h);
-#else
-        /* liblustre: no asynchronous callback and allocate a nice big event
-         * queue so we don't drop any events... */
-        rc = PtlEQAlloc(pni->pni_ni_h, 10240, NULL, &pni->pni_eq_h);
-#endif
+
         if (rc != PTL_OK)
                 GOTO (fail, rc = -ENOMEM);
 
@@ -473,19 +472,16 @@ liblustre_check_events (int timeout)
 {
         ptl_event_t ev;
         int         rc;
+        int         i;
         ENTRY;
 
-        if (timeout) {
-                rc = PtlEQWait_timeout(ptlrpc_interfaces[0].pni_eq_h, &ev, timeout);
-        } else {
-                rc = PtlEQGet (ptlrpc_interfaces[0].pni_eq_h, &ev);
-        }
+        rc = PtlEQPoll(&ptlrpc_interfaces[0].pni_eq_h, 1, timeout * 1000,
+                       &ev, &i);
         if (rc == PTL_EQ_EMPTY)
                 RETURN(0);
         
         LASSERT (rc == PTL_EQ_DROPPED || rc == PTL_OK);
         
-#ifndef __KERNEL__
         /* liblustre: no asynch callback so we can't affort to miss any
          * events... */
         if (rc == PTL_EQ_DROPPED) {
@@ -494,10 +490,11 @@ liblustre_check_events (int timeout)
         }
         
         ptlrpc_master_callback (&ev);
-#endif
         RETURN(1);
 }
 
+int liblustre_waiting = 0;
+
 int
 liblustre_wait_event (int timeout)
 {
@@ -505,40 +502,55 @@ liblustre_wait_event (int timeout)
         struct liblustre_wait_callback *llwc;
         int                             found_something = 0;
 
-        /* First check for any new events */
-        if (liblustre_check_events(0))
-                found_something = 1;
+        /* single threaded recursion check... */
+        liblustre_waiting = 1;
 
-        /* Now give all registered callbacks a bite at the cherry */
-        list_for_each(tmp, &liblustre_wait_callbacks) {
-                llwc = list_entry(tmp, struct liblustre_wait_callback, 
-                                  llwc_list);
-                
-                if (llwc->llwc_fn(llwc->llwc_arg))
+        for (;;) {
+                /* Deal with all pending events */
+                while (liblustre_check_events(0))
                         found_something = 1;
-        }
 
-        /* return to caller if something happened */
-        if (found_something)
-                return 1;
-        
-        /* block for an event, returning immediately on timeout */
-        if (!liblustre_check_events(timeout))
-                return 0;
-
-        /* an event occurred; let all registered callbacks progress... */
-        list_for_each(tmp, &liblustre_wait_callbacks) {
-                llwc = list_entry(tmp, struct liblustre_wait_callback, 
-                                  llwc_list);
+                /* Give all registered callbacks a bite at the cherry */
+                list_for_each(tmp, &liblustre_wait_callbacks) {
+                        llwc = list_entry(tmp, struct liblustre_wait_callback, 
+                                          llwc_list);
                 
-                if (llwc->llwc_fn(llwc->llwc_arg))
-                        found_something = 1;
+                        if (llwc->llwc_fn(llwc->llwc_arg))
+                                found_something = 1;
+                }
+
+                if (found_something || timeout == 0)
+                        break;
+
+                /* Nothing so far, but I'm allowed to block... */
+                found_something = liblustre_check_events(timeout);
+                if (!found_something)           /* still nothing */
+                        break;                  /* I timed out */
         }
 
-        /* ...and tell caller something happened */
-        return 1;
+        liblustre_waiting = 0;
+
+        return found_something;
 }
-#endif
+
+static int cray_portals_callback(ptl_event_t *ev)
+{
+        /* We get a callback from the client Cray portals implementation
+         * whenever anyone calls PtlEQPoll(), and an event queue with a
+         * callback handler has outstanding events.  
+         *
+         * If it's not liblustre calling PtlEQPoll(), this lets us know we
+         * have outstanding events which we handle with
+         * liblustre_wait_event().
+         *
+         * Otherwise, we're already eagerly consuming events and we'd
+         * handle events out of order if we recursed. */
+        if (liblustre_waiting)
+                return;
+        
+        liblustre_wait_event(0);
+}
+#endif /* __KERNEL__ */
 
 int ptlrpc_init_portals(void)
 {
index 17be7dd..0eb8d41 100644 (file)
@@ -138,7 +138,8 @@ EXPORT_SYMBOL(llog_initiator_connect);
 #else /* !__KERNEL__ */
 
 int llog_origin_connect(struct llog_ctxt *ctxt, int count,
-                        struct llog_logid *logid, struct llog_gen *gen)
+                        struct llog_logid *logid, struct llog_gen *gen,
+                        struct obd_uuid *uuid)
 {
         return 0;
 }
index b885e89..c22e668 100644 (file)
@@ -57,7 +57,7 @@ static int ptl_send_buf (ptl_handle_md_t *mdh, void *base, int len,
         md.start     = base;
         md.length    = len;
         md.threshold = (ack == PTL_ACK_REQ) ? 2 : 1;
-        md.options   = 0;
+        md.options   = PTLRPC_MD_OPTIONS;
         md.user_ptr  = cbid;
         md.eventq    = conn->c_peer.peer_ni->pni_eq_h;
 
@@ -68,10 +68,11 @@ static int ptl_send_buf (ptl_handle_md_t *mdh, void *base, int len,
                 obd_fail_loc |= OBD_FAIL_ONCE | OBD_FAILED;
         }
 
-        rc = PtlMDBind (conn->c_peer.peer_ni->pni_ni_h, md, mdh);
+        rc = PtlMDBind (conn->c_peer.peer_ni->pni_ni_h, md, 
+                        PTL_UNLINK, mdh);
         if (rc != PTL_OK) {
                 CERROR ("PtlMDBind failed: %d\n", rc);
-                LASSERT (rc == PTL_NOSPACE);
+                LASSERT (rc == PTL_NO_SPACE);
                 RETURN (-ENOMEM);
         }
 
@@ -92,6 +93,20 @@ static int ptl_send_buf (ptl_handle_md_t *mdh, void *base, int len,
         RETURN (0);
 }
 
+static void ptlrpc_fill_md(ptl_md_t *md, struct ptlrpc_bulk_desc *desc)
+{
+        LASSERT(ptl_md_max_iovs() == 0  || 
+                (desc->bd_iov_count <= ptl_md_max_iovs()));
+
+        if (ptl_requires_iov() || desc->bd_iov_count > 0) {
+                md->options |= PTLRPC_PTL_MD_IOV;
+                md->start = &desc->bd_iov[0];
+                md->niov = desc->bd_iov_count;
+        } else {
+                md->start = ptl_iov_base(&desc->bd_iov[0]);
+        }
+}
+
 int ptlrpc_start_bulk_transfer (struct ptlrpc_bulk_desc *desc)
 {
         int                 rc;
@@ -112,16 +127,12 @@ int ptlrpc_start_bulk_transfer (struct ptlrpc_bulk_desc *desc)
         desc->bd_success = 0;
         peer = &desc->bd_export->exp_connection->c_peer;
 
-        md.start = &desc->bd_iov[0];
-        md.niov = desc->bd_page_count;
         md.length = desc->bd_nob;
         md.eventq = peer->peer_ni->pni_eq_h;
         md.threshold = 2; /* SENT and ACK/REPLY */
-#ifdef __KERNEL__
-        md.options = PTL_MD_KIOV;
-#else
-        md.options = PTL_MD_IOV;
-#endif
+        md.options = PTLRPC_MD_OPTIONS;
+
+        ptlrpc_fill_md(&md, desc);
         md.user_ptr = &desc->bd_cbid;
         LASSERT (desc->bd_cbid.cbid_fn == server_bulk_callback);
         LASSERT (desc->bd_cbid.cbid_arg == desc);
@@ -129,10 +140,11 @@ int ptlrpc_start_bulk_transfer (struct ptlrpc_bulk_desc *desc)
         /* NB total length may be 0 for a read past EOF, so we send a 0
          * length bulk, since the client expects a bulk event. */
 
-        rc = PtlMDBind(peer->peer_ni->pni_ni_h, md, &desc->bd_md_h);
+        rc = PtlMDBind(peer->peer_ni->pni_ni_h, md,
+                       PTL_UNLINK, &desc->bd_md_h);
         if (rc != PTL_OK) {
                 CERROR("PtlMDBind failed: %d\n", rc);
-                LASSERT (rc == PTL_NOSPACE);
+                LASSERT (rc == PTL_NO_SPACE);
                 RETURN(-ENOMEM);
         }
 
@@ -186,7 +198,7 @@ void ptlrpc_abort_bulk (struct ptlrpc_bulk_desc *desc)
          * happened. */
 
         rc = PtlMDUnlink (desc->bd_md_h);
-        if (rc == PTL_INV_MD) {
+        if (rc == PTL_MD_INVALID) {
                 LASSERT(!ptlrpc_bulk_active(desc));
                 return;
         }
@@ -224,7 +236,7 @@ int ptlrpc_register_bulk (struct ptlrpc_request *req)
         /* NB no locking required until desc is on the network */
         LASSERT (desc->bd_nob > 0);
         LASSERT (!desc->bd_network_rw);
-        LASSERT (desc->bd_page_count <= PTL_MD_MAX_PAGES);
+        LASSERT (desc->bd_iov_count <= PTLRPC_MAX_BRW_PAGES);
         LASSERT (desc->bd_req != NULL);
         LASSERT (desc->bd_type == BULK_PUT_SINK ||
                  desc->bd_type == BULK_GET_SOURCE);
@@ -233,18 +245,13 @@ int ptlrpc_register_bulk (struct ptlrpc_request *req)
 
         peer = &desc->bd_import->imp_connection->c_peer;
 
-        md.start = &desc->bd_iov[0];
-        md.niov = desc->bd_page_count;
         md.length = desc->bd_nob;
         md.eventq = peer->peer_ni->pni_eq_h;
         md.threshold = 1;                       /* PUT or GET */
-        md.options = (desc->bd_type == BULK_GET_SOURCE) ? 
-                     PTL_MD_OP_GET : PTL_MD_OP_PUT;
-#ifdef __KERNEL__
-        md.options |= PTL_MD_KIOV;
-#else
-        md.options |= PTL_MD_IOV;
-#endif
+        md.options = PTLRPC_MD_OPTIONS | 
+                     ((desc->bd_type == BULK_GET_SOURCE) ? 
+                      PTL_MD_OP_GET : PTL_MD_OP_PUT);
+        ptlrpc_fill_md(&md, desc);
         md.user_ptr = &desc->bd_cbid;
         LASSERT (desc->bd_cbid.cbid_fn == client_bulk_callback);
         LASSERT (desc->bd_cbid.cbid_arg == desc);
@@ -264,7 +271,7 @@ int ptlrpc_register_bulk (struct ptlrpc_request *req)
                          PTL_UNLINK, PTL_INS_AFTER, &me_h);
         if (rc != PTL_OK) {
                 CERROR("PtlMEAttach failed: %d\n", rc);
-                LASSERT (rc == PTL_NOSPACE);
+                LASSERT (rc == PTL_NO_SPACE);
                 RETURN (-ENOMEM);
         }
 
@@ -273,7 +280,7 @@ int ptlrpc_register_bulk (struct ptlrpc_request *req)
         rc = PtlMDAttach(me_h, md, PTL_UNLINK, &desc->bd_md_h);
         if (rc != PTL_OK) {
                 CERROR("PtlMDAttach failed: %d\n", rc);
-                LASSERT (rc == PTL_NOSPACE);
+                LASSERT (rc == PTL_NO_SPACE);
                 desc->bd_network_rw = 0;
                 rc2 = PtlMEUnlink (me_h);
                 LASSERT (rc2 == PTL_OK);
@@ -309,7 +316,7 @@ void ptlrpc_unregister_bulk (struct ptlrpc_request *req)
          * happened. */
 
         rc = PtlMDUnlink (desc->bd_md_h);
-        if (rc == PTL_INV_MD) {
+        if (rc == PTL_MD_INVALID) {
                 LASSERT(!ptlrpc_bulk_active(desc));
                 return;
         }
@@ -453,7 +460,7 @@ int ptl_send_rpc(struct ptlrpc_request *request)
                          PTL_INS_AFTER, &reply_me_h);
         if (rc != PTL_OK) {
                 CERROR("PtlMEAttach failed: %d\n", rc);
-                LASSERT (rc == PTL_NOSPACE);
+                LASSERT (rc == PTL_NO_SPACE);
                 GOTO(cleanup_repmsg, rc = -ENOMEM);
         }
 
@@ -471,7 +478,7 @@ int ptl_send_rpc(struct ptlrpc_request *request)
         reply_md.start     = request->rq_repmsg;
         reply_md.length    = request->rq_replen;
         reply_md.threshold = 1;
-        reply_md.options   = PTL_MD_OP_PUT;
+        reply_md.options   = PTLRPC_MD_OPTIONS | PTL_MD_OP_PUT;
         reply_md.user_ptr  = &request->rq_reply_cbid;
         reply_md.eventq    = connection->c_peer.peer_ni->pni_eq_h;
 
@@ -479,7 +486,7 @@ int ptl_send_rpc(struct ptlrpc_request *request)
                          &request->rq_reply_md_h);
         if (rc != PTL_OK) {
                 CERROR("PtlMDAttach failed: %d\n", rc);
-                LASSERT (rc == PTL_NOSPACE);
+                LASSERT (rc == PTL_NO_SPACE);
                 GOTO(cleanup_me, rc -ENOMEM);
         }
 
@@ -535,10 +542,8 @@ int ptlrpc_register_rqbd (struct ptlrpc_request_buffer_desc *rqbd)
         ptl_md_t                 md;
         ptl_handle_me_t          me_h;
 
-        CDEBUG(D_NET, "PtlMEAttach: portal %d on %s h %lx."LPX64"\n",
-               service->srv_req_portal, srv_ni->sni_ni->pni_name,
-               srv_ni->sni_ni->pni_ni_h.nal_idx,
-               srv_ni->sni_ni->pni_ni_h.cookie);
+        CDEBUG(D_NET, "PtlMEAttach: portal %d on %s\n",
+               service->srv_req_portal, srv_ni->sni_ni->pni_name);
 
         if (OBD_FAIL_CHECK_ONCE(OBD_FAIL_PTLRPC_RQBD))
                 return (-ENOMEM);
@@ -553,20 +558,20 @@ int ptlrpc_register_rqbd (struct ptlrpc_request_buffer_desc *rqbd)
         LASSERT(rqbd->rqbd_refcount == 0);
         rqbd->rqbd_refcount = 1;
 
-        md.start      = rqbd->rqbd_buffer;
-        md.length     = service->srv_buf_size;
-        md.max_size   = service->srv_max_req_size;
-        md.threshold  = PTL_MD_THRESH_INF;
-        md.options    = PTL_MD_OP_PUT | PTL_MD_MAX_SIZE | PTL_MD_AUTO_UNLINK;
-        md.user_ptr   = &rqbd->rqbd_cbid;
-        md.eventq     = srv_ni->sni_ni->pni_eq_h;
+        md.start     = rqbd->rqbd_buffer;
+        md.length    = service->srv_buf_size;
+        md.max_size  = service->srv_max_req_size;
+        md.threshold = PTL_MD_THRESH_INF;
+        md.options   = PTLRPC_MD_OPTIONS | PTL_MD_OP_PUT | PTL_MD_MAX_SIZE;
+        md.user_ptr  = &rqbd->rqbd_cbid;
+        md.eventq    = srv_ni->sni_ni->pni_eq_h;
         
         rc = PtlMDAttach(me_h, md, PTL_UNLINK, &rqbd->rqbd_md_h);
         if (rc == PTL_OK)
                 return (0);
 
         CERROR("PtlMDAttach failed: %d; \n", rc);
-        LASSERT (rc == PTL_NOSPACE);
+        LASSERT (rc == PTL_NO_SPACE);
         rc = PtlMEUnlink (me_h);
         LASSERT (rc == PTL_OK);
         rqbd->rqbd_refcount = 0;
diff --git a/lustre/ptlrpc/pers.c b/lustre/ptlrpc/pers.c
new file mode 100644 (file)
index 0000000..7fcccd2
--- /dev/null
@@ -0,0 +1,86 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  Copyright (c) 2004 Cluster File Systems, Inc.
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#define DEBUG_SUBSYSTEM S_RPC
+#ifndef __KERNEL__
+#include <errno.h>
+#include <signal.h>
+#include <liblustre.h>
+#endif
+
+#include <linux/obd_support.h>
+#include <linux/obd_class.h>
+#include <linux/lustre_lib.h>
+#include <linux/lustre_ha.h>
+#include <linux/lustre_import.h>
+
+#include "ptlrpc_internal.h"
+
+#ifdef __KERNEL__
+#ifndef CRAY_PORTALS
+void pers_bulk_add_page(struct ptlrpc_bulk_desc *desc, struct page *page,
+                        int pageoffset, int len)
+{
+        ptl_kiov_t *kiov = &desc->bd_iov[desc->bd_iov_count];
+
+        kiov->kiov_page = page;
+        kiov->kiov_offset = pageoffset;
+        kiov->kiov_len = len;
+
+        desc->bd_iov_count++;
+}
+#else
+void pers_bulk_add_page(struct ptlrpc_bulk_desc *desc, struct page *page,
+                        int pageoffset, int len)
+{
+        struct iovec *iov = &desc->bd_iov[desc->bd_iov_count];
+
+        /* Should get a compiler warning if sizeof(physaddr) > sizeof(void *) */
+        iov->iov_base = (void *)(page_to_phys(page) + pageoffset);
+        iov->iov_len = len;
+
+        desc->bd_iov_count++;
+}
+#endif
+
+#else /* !__KERNEL__ */
+
+int can_merge_iovs(struct iovec *existing, struct iovec *candidate)
+{
+        if (existing->iov_base + existing->iov_len == candidate->iov_base)
+                return 1;
+        return 0;
+}
+void pers_bulk_add_page(struct ptlrpc_bulk_desc *desc, struct page *page, 
+                        int pageoffset, int len)
+{
+        struct iovec *iov = &desc->bd_iov[desc->bd_iov_count];
+
+        iov->iov_base = page->addr + pageoffset;
+        iov->iov_len = len;
+
+        if (desc->bd_iov_count > 0 && can_merge_iovs(iov - 1, iov)) {
+                (iov - 1)->iov_len += len;
+        } else {
+                desc->bd_iov_count++;
+        }
+}
+#endif
index ad1d502..01d7d23 100644 (file)
@@ -38,7 +38,6 @@
 static DECLARE_MUTEX(pinger_sem);
 static struct list_head pinger_imports = LIST_HEAD_INIT(pinger_imports);
 
-#ifdef __KERNEL__
 static struct ptlrpc_thread *pinger_thread = NULL;
 
 int ptlrpc_ping(struct obd_import *imp) 
@@ -67,6 +66,7 @@ int ptlrpc_ping(struct obd_import *imp)
         RETURN(rc);
 }
 
+#ifdef __KERNEL__
 static int ptlrpc_pinger_main(void *arg)
 {
         struct ptlrpc_svc_data *data = (struct ptlrpc_svc_data *)arg;
@@ -279,7 +279,8 @@ void ptlrpc_pinger_wake_up()
 #endif
 }
 
-#else
+#else /* !__KERNEL__ */
+
 /* XXX
  * the current implementation of pinger in liblustre is not optimized
  */
@@ -288,42 +289,32 @@ static struct pinger_data {
         int             pd_recursion;
         unsigned long   pd_this_ping;
         unsigned long   pd_next_ping;
-        struct ptlrpc_request_set *pd_set;
+        int             pd_force_check;
 } pinger_args;
 
 static int pinger_check_rpcs(void *arg)
 {
         unsigned long curtime = time(NULL);
-        struct ptlrpc_request *req;
-        struct ptlrpc_request_set *set;
         struct list_head *iter;
         struct pinger_data *pd = &pinger_args;
-        int rc;
 
         /* prevent recursion */
         if (pd->pd_recursion++) {
                 CDEBUG(D_HA, "pinger: recursion! quit\n");
-                LASSERT(pd->pd_set);
                 pd->pd_recursion--;
                 return 0;
         }
 
         /* have we reached ping point? */
-        if (!pd->pd_set && pd->pd_next_ping > curtime) {
+        if (pd->pd_next_ping > curtime && !pd->pd_force_check) {
                 pd->pd_recursion--;
                 return 0;
         }
 
-        /* if we have rpc_set already, continue processing it */
-        if (pd->pd_set) {
-                LASSERT(pd->pd_this_ping);
-                set = pd->pd_set;
-                goto do_check_set;
-        }
+        if (pd->pd_force_check)
+                pd->pd_force_check = 0;
 
         pd->pd_this_ping = curtime;
-        pd->pd_set = ptlrpc_prep_set();
-        set = pd->pd_set;
 
         /* add rpcs into set */
         down(&pinger_sem);
@@ -331,95 +322,50 @@ static int pinger_check_rpcs(void *arg)
                 struct obd_import *imp =
                         list_entry(iter, struct obd_import,
                                    imp_pinger_chain);
-                int generation, level;
+                int level, force;
                 unsigned long flags;
 
-                if (imp->imp_next_ping <= pd->pd_this_ping) {
-                        /* Add a ping. */
-                        spin_lock_irqsave(&imp->imp_lock, flags);
-                        generation = imp->imp_generation;
-                        level = imp->imp_state;
-                        spin_unlock_irqrestore(&imp->imp_lock, flags);
 
-                        if (level != LUSTRE_IMP_FULL) {
-                                CDEBUG(D_HA,
-                                       "not pinging %s (in recovery)\n",
-                                       imp->imp_target_uuid.uuid);
-                                continue;
+                spin_lock_irqsave(&imp->imp_lock, flags);
+                level = imp->imp_state;
+                force = imp->imp_force_verify;
+                if (force)
+                        imp->imp_force_verify = 0;
+                spin_unlock_irqrestore(&imp->imp_lock, flags);
+
+                if (imp->imp_next_ping <= pd->pd_this_ping || force) {
+                        if (level == LUSTRE_IMP_DISCON) {
+                                /* wait at least a timeout before 
+                                   trying recovery again. */
+                                imp->imp_next_ping = time(NULL) + 
+                                        (obd_timeout * HZ);
+                                ptlrpc_initiate_recovery(imp);
+                        } 
+                        else if (level != LUSTRE_IMP_FULL ||
+                                 imp->imp_obd->obd_no_recov) {
+                                CDEBUG(D_HA, 
+                                       "not pinging %s (in recovery "
+                                       " or recovery disabled: %s)\n",
+                                       imp->imp_target_uuid.uuid,
+                                       ptlrpc_import_state_name(level));
+                        } 
+                        else if (imp->imp_pingable || force) {
+                                ptlrpc_ping(imp);
                         }
 
-                        req = ptlrpc_prep_req(imp, OBD_PING, 0, NULL,
-                                              NULL);
-                        if (!req) {
-                                CERROR("out of memory\n");
-                                break;
-                        }
-                        req->rq_no_resend = 1;
-                        req->rq_replen = lustre_msg_size(0, NULL);
-                        req->rq_send_state = LUSTRE_IMP_FULL;
-                        req->rq_phase = RQ_PHASE_RPC;
-                        req->rq_import_generation = generation;
-                        ptlrpc_set_add_req(set, req);
                 } else {
-                        CDEBUG(D_HA, "don't need to ping %s (%lu > "
-                               "%lu)\n", imp->imp_target_uuid.uuid,
-                               imp->imp_next_ping, pd->pd_this_ping);
+                        if (imp->imp_pingable) {
+                                CDEBUG(D_HA, "don't need to ping %s "
+                                       "(%lu > %lu)\n", 
+                                       imp->imp_target_uuid.uuid,
+                                       imp->imp_next_ping, pd->pd_this_ping);
+                        }
                 }
         }
-        pd->pd_this_ping = curtime;
-        up(&pinger_sem);
-
-        /* Might be empty, that's OK. */
-        if (set->set_remaining == 0)
-                CDEBUG(D_HA, "nothing to ping\n");
 
-        list_for_each(iter, &set->set_requests) {
-                struct ptlrpc_request *req =
-                        list_entry(iter, struct ptlrpc_request,
-                                   rq_set_chain);
-                DEBUG_REQ(D_HA, req, "pinging %s->%s",
-                          req->rq_import->imp_obd->obd_uuid.uuid,
-                          req->rq_import->imp_target_uuid.uuid);
-                (void)ptl_send_rpc(req);
-        }
-
-do_check_set:
-        rc = ptlrpc_check_set(set);
-
-        /* not finished, and we are not expired, simply return */
-        if (!rc && curtime < pd->pd_this_ping + obd_timeout) {
-                CDEBUG(D_HA, "not finished, but also not expired\n");
-                pd->pd_recursion--;
-                return 0;
-        }
-
-        /* Expire all the requests that didn't come back. */
-        down(&pinger_sem);
-        list_for_each(iter, &set->set_requests) {
-                req = list_entry(iter, struct ptlrpc_request,
-                                 rq_set_chain);
-
-                if (req->rq_replied)
-                        continue;
-
-                req->rq_phase = RQ_PHASE_COMPLETE;
-                set->set_remaining--;
-                /* If it was disconnected, don't sweat it. */
-                if (list_empty(&req->rq_import->imp_pinger_chain)) {
-                        ptlrpc_unregister_reply(req);
-                        continue;
-                }
-
-                CDEBUG(D_HA, "pinger initiate expire_one_request\n");
-                ptlrpc_expire_one_request(req);
-        }
         up(&pinger_sem);
 
-        ptlrpc_set_destroy(set);
-        pd->pd_set = NULL;
-
-        pd->pd_next_ping = pd->pd_this_ping + obd_timeout;
-        pd->pd_this_ping = 0; /* XXX for debug */
+        pd->pd_next_ping = pd->pd_this_ping + (obd_timeout * HZ);
 
         CDEBUG(D_HA, "finished a round ping\n");
         pd->pd_recursion--;
@@ -451,8 +397,7 @@ void ptlrpc_pinger_sending_on_import(struct obd_import *imp)
 {
         down(&pinger_sem);
         imp->imp_next_ping = time(NULL) + obd_timeout;
-        if (pinger_args.pd_set == NULL &&
-            pinger_args.pd_next_ping > imp->imp_next_ping) {
+        if (pinger_args.pd_next_ping > imp->imp_next_ping) {
                 CDEBUG(D_HA, "set next ping to %ld(cur %ld)\n",
                         imp->imp_next_ping, time(NULL));
                 pinger_args.pd_next_ping = imp->imp_next_ping;
@@ -495,8 +440,6 @@ int ptlrpc_pinger_del_import(struct obd_import *imp)
 
 void ptlrpc_pinger_wake_up()
 {
-#ifdef ENABLE_PINGER
-        /* XXX force pinger to run, if needed */
-#endif
+        pinger_args.pd_force_check = 1;
 }
 #endif /* !__KERNEL__ */
index 021fb0f..d7c2378 100644 (file)
@@ -108,6 +108,58 @@ enum {
 
 int ptlrpc_expire_one_request(struct ptlrpc_request *req);
 
+/* XXX these should be run-time checks so we can have one build run against
+ * many nals */
+#if defined(__KERNEL__)
+#define ptl_requires_iov() 1
+#else
+#define ptl_requires_iov() 0
+#endif
+
+#if defined(__KERNEL__)
+# if defined(CRAY_PORTALS)
+#  define PTLRPC_PTL_MD_IOV (PTL_MD_IOVEC | PTL_MD_PHYS)
+# else
+#  define PTLRPC_PTL_MD_IOV PTL_MD_KIOV
+# endif
+#else
+# define PTLRPC_PTL_MD_IOV PTL_MD_IOVEC
+#endif
+
+#if !defined(__KERNEL__) && defined(CRAY_PORTALS)
+#define ptl_md_max_iovs() 1
+#else
+#define ptl_md_max_iovs() 0 /* unlimited */
+#endif
+
+/* XXX hopefully we can make the iov a consistent type across portals imps */
+#if defined(__KERNEL__)
+#define ptl_iov_base(kiov) (NULL) /* this is meaningless */
+#else
+#define ptl_iov_base(iov) ((iov)->iov_base)
+#endif
+
+#ifdef __KERNEL__
+/* portals calls the callback when the event is added to the queue, so we don't
+ * care if we lose events */
+# define PTLRPC_NUM_EQ 1024
+# define PTLRPC_EQ_CALLBACK ptlrpc_master_callback
+#else 
+/* liblustre: no callback, or only when app polls event queues, so allocate a
+ * nice big event queue to ensure we don't drop any */
+# define PTLRPC_NUM_EQ 10240
+# if CRAY_PORTALS
+int cray_portals_callback(ptl_event_t *ev);
+#  define PTLRPC_EQ_CALLBACK cray_portals_callback
+# else 
+#  define PTLRPC_EQ_CALLBACK PTL_EQ_HANDLER_NONE
+# endif
+#endif
+
+/* pers.c */
+void pers_bulk_add_page(struct ptlrpc_bulk_desc *desc, struct page *page, 
+                        int pageoffset, int len);
+
 /* pinger.c */
 int ptlrpc_start_pinger(void);
 int ptlrpc_stop_pinger(void);
index 2307d20..751b787 100644 (file)
@@ -851,7 +851,7 @@ int ptlrpc_unregister_service(struct ptlrpc_service *service)
                                            rqbd_list);
 
                         rc = PtlMDUnlink(rqbd->rqbd_md_h);
-                        LASSERT (rc == PTL_OK || rc == PTL_INV_MD);
+                        LASSERT (rc == PTL_OK || rc == PTL_MD_INVALID);
                 }
 
                 /* Wait for the network to release any buffers it's
index 322e1f1..01f7c75 100755 (executable)
@@ -1113,7 +1113,8 @@ class Network(Module):
                 panic("unable to set hostaddr for", self.net_type, self.hostaddr, self.cluster_id)
             debug("hostaddr:", self.hostaddr)
 
-        self.add_portals_module("libcfs", 'portals')
+        self.add_portals_module("libcfs", 'libcfs')
+        self.add_portals_module("portals", 'portals')
         if node_needs_router():
             self.add_portals_module("router", 'kptlrouter')
         if self.net_type == 'tcp':