Whamcloud - gitweb
- merge 2 weeks of b1_4 fixes onto HEAD
authorphil <phil>
Fri, 4 Jun 2004 15:14:58 +0000 (15:14 +0000)
committerphil <phil>
Fri, 4 Jun 2004 15:14:58 +0000 (15:14 +0000)
- b1_4 is basically the parent of HEAD, because that's the direction
  that changes flow, as strange as that sounds.  so there's a
  HEAD_BASE tag which sits on b1_4

177 files changed:
ldiskfs/kernel_patches/patches/iopen-2.6-suse.patch
ldiskfs/kernel_patches/series/ldiskfs-2.6-suse.series
ldiskfs/ldiskfs/autoMakefile.am
lnet/archdep.m4
lnet/include/linux/kp30.h
lnet/include/linux/kpr.h
lnet/include/linux/libcfs.h
lnet/include/linux/portals_lib.h
lnet/include/lnet/api-support.h
lnet/include/lnet/api.h
lnet/include/lnet/arg-blocks.h [deleted file]
lnet/include/lnet/errno.h
lnet/include/lnet/internal.h
lnet/include/lnet/lib-dispatch.h [deleted file]
lnet/include/lnet/lib-lnet.h
lnet/include/lnet/lib-nal.h [deleted file]
lnet/include/lnet/lib-p30.h
lnet/include/lnet/lib-types.h
lnet/include/lnet/nal.h
lnet/include/lnet/types.h
lnet/klnds/gmlnd/gmlnd.h
lnet/klnds/gmlnd/gmlnd_api.c
lnet/klnds/gmlnd/gmlnd_cb.c
lnet/klnds/gmlnd/gmlnd_comm.c
lnet/klnds/qswlnd/qswlnd.c
lnet/klnds/qswlnd/qswlnd.h
lnet/klnds/qswlnd/qswlnd_cb.c
lnet/klnds/socklnd/socklnd.c
lnet/klnds/socklnd/socklnd.h
lnet/klnds/socklnd/socklnd_cb.c
lnet/libcfs/module.c
lnet/lnet/Makefile.in
lnet/lnet/Makefile.mk
lnet/lnet/api-eq.c [deleted file]
lnet/lnet/api-errno.c
lnet/lnet/api-init.c [deleted file]
lnet/lnet/api-me.c [deleted file]
lnet/lnet/api-ni.c
lnet/lnet/api-wrap.c
lnet/lnet/autoMakefile.am
lnet/lnet/lib-dispatch.c [deleted file]
lnet/lnet/lib-eq.c
lnet/lnet/lib-init.c
lnet/lnet/lib-md.c
lnet/lnet/lib-me.c
lnet/lnet/lib-move.c
lnet/lnet/lib-msg.c
lnet/lnet/lib-ni.c
lnet/lnet/lib-pid.c
lnet/lnet/module.c
lnet/ulnds/address.c
lnet/ulnds/bridge.h
lnet/ulnds/procapi.c
lnet/ulnds/procbridge.h
lnet/ulnds/proclib.c
lnet/ulnds/socklnd/address.c
lnet/ulnds/socklnd/bridge.h
lnet/ulnds/socklnd/procapi.c
lnet/ulnds/socklnd/procbridge.h
lnet/ulnds/socklnd/proclib.c
lnet/ulnds/socklnd/tcplnd.c
lnet/ulnds/tcplnd.c
lustre/ChangeLog
lustre/autoMakefile.am
lustre/autogen.sh
lustre/configure.in
lustre/include/linux/lustre_compat25.h
lustre/include/linux/lustre_net.h
lustre/kernel_patches/patches/ext-2.4-patch-1-chaos.patch
lustre/kernel_patches/patches/ext-2.4-patch-1-suse-2.4.19.patch
lustre/kernel_patches/patches/ext-2.4-patch-1-suse.patch
lustre/kernel_patches/patches/ext-2.4-patch-1.patch
lustre/kernel_patches/patches/ext-2.4-patch-4.patch
lustre/kernel_patches/patches/ext3-htree-2.4.19-pre1.patch
lustre/kernel_patches/patches/ext3-htree-2.4.21-chaos.patch
lustre/kernel_patches/patches/ext3-htree-2.4.22-rh.patch
lustre/kernel_patches/patches/ext3-htree-rename_fix.patch [new file with mode: 0644]
lustre/kernel_patches/patches/ext3-htree-suse.patch
lustre/kernel_patches/patches/ext3-htree.patch
lustre/kernel_patches/patches/ext3-pdirops-2.4.24-chaos.patch
lustre/kernel_patches/patches/htree-ext3-2.4.18.patch
lustre/kernel_patches/patches/iopen-2.6-suse.patch
lustre/kernel_patches/patches/loop-sync-2.4.21-suse.patch [new file with mode: 0644]
lustre/kernel_patches/patches/lustre_version.patch
lustre/kernel_patches/patches/md_path_lookup-2.6-suse.patch [new file with mode: 0644]
lustre/kernel_patches/patches/vfs_intent-2.6-suse.patch
lustre/kernel_patches/patches/vfs_nointent-2.6-suse.patch
lustre/kernel_patches/series/2.6-suse.series
lustre/kernel_patches/series/ldiskfs-2.6-suse.series
lustre/kernel_patches/series/suse-2.4.21-2
lustre/kernel_patches/targets/2.6-suse.target
lustre/ldiskfs/autoMakefile.am
lustre/ldlm/ldlm_lockd.c
lustre/ldlm/ldlm_resource.c
lustre/liblustre/tests/Makefile.am
lustre/llite/file.c
lustre/lov/lov_obd.c
lustre/mds/handler.c
lustre/mds/mds_internal.h
lustre/mds/mds_lov.c
lustre/mds/mds_open.c
lustre/mds/mds_unlink_open.c
lustre/obdclass/class_obd.c
lustre/obdclass/simple.c [deleted file]
lustre/obdfilter/filter_io_26.c
lustre/osc/osc_create.c
lustre/osc/osc_request.c
lustre/portals/archdep.m4
lustre/portals/include/linux/kp30.h
lustre/portals/include/linux/kpr.h
lustre/portals/include/linux/libcfs.h
lustre/portals/include/linux/portals_lib.h
lustre/portals/include/portals/api-support.h
lustre/portals/include/portals/api.h
lustre/portals/include/portals/arg-blocks.h [deleted file]
lustre/portals/include/portals/errno.h
lustre/portals/include/portals/lib-dispatch.h [deleted file]
lustre/portals/include/portals/lib-nal.h [deleted file]
lustre/portals/include/portals/lib-p30.h
lustre/portals/include/portals/lib-types.h
lustre/portals/include/portals/nal.h
lustre/portals/include/portals/types.h
lustre/portals/knals/gmnal/gmnal.h
lustre/portals/knals/gmnal/gmnal_api.c
lustre/portals/knals/gmnal/gmnal_cb.c
lustre/portals/knals/gmnal/gmnal_comm.c
lustre/portals/knals/qswnal/qswnal.c
lustre/portals/knals/qswnal/qswnal.h
lustre/portals/knals/qswnal/qswnal_cb.c
lustre/portals/knals/socknal/socknal.c
lustre/portals/knals/socknal/socknal.h
lustre/portals/knals/socknal/socknal_cb.c
lustre/portals/libcfs/module.c
lustre/portals/portals/Makefile.in
lustre/portals/portals/Makefile.mk
lustre/portals/portals/api-eq.c [deleted file]
lustre/portals/portals/api-errno.c
lustre/portals/portals/api-init.c [deleted file]
lustre/portals/portals/api-me.c [deleted file]
lustre/portals/portals/api-ni.c
lustre/portals/portals/api-wrap.c
lustre/portals/portals/autoMakefile.am
lustre/portals/portals/lib-dispatch.c [deleted file]
lustre/portals/portals/lib-eq.c
lustre/portals/portals/lib-init.c
lustre/portals/portals/lib-md.c
lustre/portals/portals/lib-me.c
lustre/portals/portals/lib-move.c
lustre/portals/portals/lib-msg.c
lustre/portals/portals/lib-ni.c
lustre/portals/portals/lib-pid.c
lustre/portals/portals/module.c
lustre/portals/unals/address.c
lustre/portals/unals/bridge.h
lustre/portals/unals/procapi.c
lustre/portals/unals/procbridge.h
lustre/portals/unals/proclib.c
lustre/portals/unals/tcpnal.c
lustre/ptlbd/autoMakefile.am
lustre/ptlrpc/events.c
lustre/ptlrpc/ptlrpc_internal.h
lustre/ptlrpc/ptlrpc_module.c
lustre/scripts/cvsdiffclient
lustre/scripts/land1.sh
lustre/scripts/lmake
lustre/scripts/lustre-kernel-2.4.spec.in
lustre/scripts/merge1.sh
lustre/tests/.cvsignore
lustre/tests/Makefile.am
lustre/tests/cfg/local.sh
lustre/tests/recovery-small.sh
lustre/tests/rename_many.c [new file with mode: 0644]
lustre/tests/replay-dual.sh
lustre/tests/replay-single.sh
lustre/tests/sanity.sh
lustre/utils/lconf
lustre/utils/llmount.c

index 2133355..8a8d115 100644 (file)
@@ -8,8 +8,8 @@
 
 Index: linux-stage/fs/ext3/Makefile
 ===================================================================
---- linux-stage.orig/fs/ext3/Makefile  2004-05-07 16:00:16.000000000 -0400
-+++ linux-stage/fs/ext3/Makefile       2004-05-07 16:00:17.000000000 -0400
+--- linux-stage.orig/fs/ext3/Makefile  2004-05-11 17:21:20.000000000 -0400
++++ linux-stage/fs/ext3/Makefile       2004-05-11 17:21:21.000000000 -0400
 @@ -4,7 +4,7 @@
  
  obj-$(CONFIG_EXT3_FS) += ext3.o
@@ -21,8 +21,8 @@ Index: linux-stage/fs/ext3/Makefile
  ext3-$(CONFIG_EXT3_FS_XATTR)   += xattr.o xattr_user.o xattr_trusted.o
 Index: linux-stage/fs/ext3/inode.c
 ===================================================================
---- linux-stage.orig/fs/ext3/inode.c   2004-05-07 16:00:16.000000000 -0400
-+++ linux-stage/fs/ext3/inode.c        2004-05-07 17:21:59.000000000 -0400
+--- linux-stage.orig/fs/ext3/inode.c   2004-05-11 17:21:21.000000000 -0400
++++ linux-stage/fs/ext3/inode.c        2004-05-11 17:21:21.000000000 -0400
 @@ -37,6 +37,7 @@
  #include <linux/mpage.h>
  #include <linux/uio.h>
@@ -43,8 +43,8 @@ Index: linux-stage/fs/ext3/inode.c
        bh = iloc.bh;
 Index: linux-stage/fs/ext3/iopen.c
 ===================================================================
---- linux-stage.orig/fs/ext3/iopen.c   2004-05-07 16:00:17.000000000 -0400
-+++ linux-stage/fs/ext3/iopen.c        2004-05-07 17:22:37.000000000 -0400
+--- linux-stage.orig/fs/ext3/iopen.c   1969-12-31 19:00:00.000000000 -0500
++++ linux-stage/fs/ext3/iopen.c        2004-05-11 17:21:21.000000000 -0400
 @@ -0,0 +1,272 @@
 +/*
 + * linux/fs/ext3/iopen.c
@@ -320,8 +320,8 @@ Index: linux-stage/fs/ext3/iopen.c
 +}
 Index: linux-stage/fs/ext3/iopen.h
 ===================================================================
---- linux-stage.orig/fs/ext3/iopen.h   2004-05-07 16:00:17.000000000 -0400
-+++ linux-stage/fs/ext3/iopen.h        2004-05-07 16:00:17.000000000 -0400
+--- linux-stage.orig/fs/ext3/iopen.h   1969-12-31 19:00:00.000000000 -0500
++++ linux-stage/fs/ext3/iopen.h        2004-05-11 17:21:21.000000000 -0400
 @@ -0,0 +1,15 @@
 +/*
 + * iopen.h
@@ -340,8 +340,8 @@ Index: linux-stage/fs/ext3/iopen.h
 +                                         struct inode *inode, int rehash);
 Index: linux-stage/fs/ext3/namei.c
 ===================================================================
---- linux-stage.orig/fs/ext3/namei.c   2004-05-07 16:00:16.000000000 -0400
-+++ linux-stage/fs/ext3/namei.c        2004-05-07 16:00:17.000000000 -0400
+--- linux-stage.orig/fs/ext3/namei.c   2004-05-11 17:21:20.000000000 -0400
++++ linux-stage/fs/ext3/namei.c        2004-05-11 17:21:21.000000000 -0400
 @@ -37,6 +37,7 @@
  #include <linux/buffer_head.h>
  #include <linux/smp_lock.h>
@@ -420,30 +420,30 @@ Index: linux-stage/fs/ext3/namei.c
  }
 Index: linux-stage/fs/ext3/super.c
 ===================================================================
---- linux-stage.orig/fs/ext3/super.c   2004-05-07 16:00:16.000000000 -0400
-+++ linux-stage/fs/ext3/super.c        2004-05-07 17:21:59.000000000 -0400
+--- linux-stage.orig/fs/ext3/super.c   2004-05-11 17:21:21.000000000 -0400
++++ linux-stage/fs/ext3/super.c        2004-05-11 17:44:53.000000000 -0400
 @@ -536,7 +536,7 @@
        Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, Opt_noload,
        Opt_commit, Opt_journal_update, Opt_journal_inum,
        Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
--      Opt_ignore, Opt_err,
-+      Opt_ignore, Opt_err, Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
+-      Opt_ignore, Opt_barrier,
++      Opt_ignore, Opt_barrier, Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
+       Opt_err,
  };
  
- static match_table_t tokens = {
-@@ -575,6 +575,9 @@
-       {Opt_ignore, "noquota"},
+@@ -577,6 +577,9 @@
        {Opt_ignore, "quota"},
        {Opt_ignore, "usrquota"},
-+      {Opt_iopen,  "iopen"},
-+      {Opt_noiopen,  "noiopen"},
-+      {Opt_iopen_nopriv,  "iopen_nopriv"},
+       {Opt_barrier, "barrier=%u"},
++      {Opt_iopen, "iopen"},
++      {Opt_noiopen, "noiopen"},
++      {Opt_iopen_nopriv, "iopen_nopriv"},
        {Opt_err, NULL}
  };
  
-@@ -762,6 +765,18 @@
-               case Opt_abort:
-                       set_opt(sbi->s_mount_opt, ABORT);
+@@ -772,6 +775,18 @@
+                       else
+                               clear_opt(sbi->s_mount_opt, BARRIER);
                        break;
 +              case Opt_iopen:
 +                      set_opt (sbi->s_mount_opt, IOPEN);
@@ -462,14 +462,14 @@ Index: linux-stage/fs/ext3/super.c
                default:
 Index: linux-stage/include/linux/ext3_fs.h
 ===================================================================
---- linux-stage.orig/include/linux/ext3_fs.h   2004-05-07 16:00:16.000000000 -0400
-+++ linux-stage/include/linux/ext3_fs.h        2004-05-07 16:00:17.000000000 -0400
-@@ -325,6 +325,8 @@
- #define EXT3_MOUNT_NO_UID32           0x2000  /* Disable 32-bit UIDs */
+--- linux-stage.orig/include/linux/ext3_fs.h   2004-05-11 17:21:20.000000000 -0400
++++ linux-stage/include/linux/ext3_fs.h        2004-05-11 17:21:21.000000000 -0400
+@@ -326,6 +326,8 @@
  #define EXT3_MOUNT_XATTR_USER         0x4000  /* Extended user attributes */
  #define EXT3_MOUNT_POSIX_ACL          0x8000  /* POSIX Access Control Lists */
-+#define EXT3_MOUNT_IOPEN             0x10000  /* Allow access via iopen */
-+#define EXT3_MOUNT_IOPEN_NOPRIV              0x20000  /* Make iopen world-readable */
+ #define EXT3_MOUNT_BARRIER            0x10000 /* Use block barriers */
++#define EXT3_MOUNT_IOPEN                0x20000 /* Allow access via iopen */
++#define EXT3_MOUNT_IOPEN_NOPRIV         0x40000 /* Make iopen world-readable */
  
  /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
  #ifndef _LINUX_EXT2_FS_H
index cff99dd..d27088e 100644 (file)
@@ -7,3 +7,4 @@ ext3-init-generation-2.6-suse.patch
 ext3-ea-in-inode-2.6-suse.patch
 export-ext3-2.6-suse.patch
 ext3-include-fixes-2.6-suse.patch
+ext3-htree-rename_fix.patch 
index b24081e..11838d6 100644 (file)
@@ -1,6 +1,8 @@
+if MODULES
 if LDISKFS
 modulefs_DATA = ldiskfs$(KMODEXT)
 endif
+endif
 
 ldiskfs_linux_headers := $(addprefix linux/,$(subst ext3,ldiskfs,$(notdir $(linux_headers))))
 
index 636ee1d..cb6e0a2 100644 (file)
@@ -218,12 +218,13 @@ if test x$enable_modules != xno ; then
        fi
        LUSTRE_MODULE_TRY_MAKE(
                [#include <linux/version.h>],
-               [LINUXRELEASE=UTS_RELEASE],
+               [char *LINUXRELEASE;
+                LINUXRELEASE=UTS_RELEASE;],
                [$makerule LUSTRE_KERNEL_TEST=conftest.i],
                [test -s kernel-tests/conftest.i],
                [
                        # LINUXRELEASE="UTS_RELEASE"
-                       eval $(grep LINUXRELEASE kernel-tests/conftest.i)
+                       eval $(grep "LINUXRELEASE=" kernel-tests/conftest.i)
                ],[
                        AC_MSG_RESULT([unknown])
                        AC_MSG_ERROR([Could not preprocess test program.  Consult config.log for details.])
index c55dd37..6ef28a8 100644 (file)
@@ -7,12 +7,6 @@
 #include <linux/libcfs.h>
 #define PORTAL_DEBUG
 
-#ifndef offsetof
-# define offsetof(typ,memb)     ((unsigned long)((char *)&(((typ *)0)->memb)))
-#endif
-
-#define LOWEST_BIT_SET(x)       ((x) & ~((x) - 1))
-
 #ifdef __KERNEL__
 # include <linux/vmalloc.h>
 # include <linux/time.h>
@@ -647,7 +641,6 @@ enum {
         TCPNAL    = 5,
         ROUTER    = 6,
         IBNAL     = 7,
-        CRAY_KB_ERNAL = 8,
         NAL_ENUM_END_MARKER
 };
 
index 51d2d2f..1127698 100644 (file)
@@ -4,7 +4,7 @@
 #ifndef _KPR_H
 #define _KPR_H
 
-# include <portals/lib-nal.h> /* for ptl_hdr_t */
+# include <portals/lib-types.h> /* for ptl_hdr_t */
 
 /******************************************************************************/
 /* Kernel Portals Router interface */
index c2a15f4..a205163 100644 (file)
@@ -79,9 +79,11 @@ extern unsigned int portal_cerror;
 #define S_PTLROUTER   0x00100000
 #define S_COBD        0x00200000
 #define S_IBNAL       0x00400000
-#define S_LMV         0x00800000
-#define S_SM          0x01000000
-#define S_CMOBD       0x02000000
+#define S_SM          0x00800000
+#define S_ASOBD       0x01000000
+#define S_LMV         0x02000000
+#define S_CMOBD       0x04000000
+
 /* If you change these values, please keep portals/utils/debug.c
  * up to date! */
 
index 609290d..b4741cc 100644 (file)
@@ -77,8 +77,10 @@ static inline char *strdup(const char *str)
 #endif
 
 #ifdef __KERNEL__
+# define NTOH__u16(var) le16_to_cpu(var)
 # define NTOH__u32(var) le32_to_cpu(var)
 # define NTOH__u64(var) le64_to_cpu(var)
+# define HTON__u16(var) cpu_to_le16(var)
 # define HTON__u32(var) cpu_to_le32(var)
 # define HTON__u64(var) cpu_to_le64(var)
 #else
@@ -92,8 +94,10 @@ static inline char *strdup(const char *str)
        };       \
        (ret);     \
     })
+# define NTOH__u16(var) (var)
 # define NTOH__u32(var) (var)
 # define NTOH__u64(var) (expansion_u64(var))
+# define HTON__u16(var) (var)
 # define HTON__u32(var) (var)
 # define HTON__u64(var) (expansion_u64(var))
 #endif
index cfae78c..c5994c6 100644 (file)
@@ -19,9 +19,4 @@
 
 #include <portals/internal.h>
 #include <portals/nal.h>
-#include <portals/arg-blocks.h>
 
-/* Hack for 2.4.18 macro name collision */
-#ifdef yield
-#undef yield
-#endif
index 6d382bb..c7aaced 100644 (file)
@@ -5,7 +5,6 @@
 
 #include <portals/types.h>
 
-#ifndef PTL_NO_WRAP
 int PtlInit(int *);
 void PtlFini(void);
 
@@ -17,8 +16,6 @@ int PtlNIInitialized(ptl_interface_t);
 
 int PtlNIFini(ptl_handle_ni_t interface_in);
 
-#endif
-
 int PtlGetId(ptl_handle_ni_t ni_handle, ptl_process_id_t *id);
 
 
@@ -32,9 +29,7 @@ int PtlNIStatus(ptl_handle_ni_t interface_in, ptl_sr_index_t register_in,
 int PtlNIDist(ptl_handle_ni_t interface_in, ptl_process_id_t process_in,
               unsigned long *distance_out);
 
-#ifndef PTL_NO_WRAP
 int PtlNIHandle(ptl_handle_any_t handle_in, ptl_handle_ni_t * interface_out);
-#endif
 
 
 /* 
@@ -74,16 +69,12 @@ int PtlMEUnlink(ptl_handle_me_t current_in);
 
 int PtlMEUnlinkList(ptl_handle_me_t current_in);
 
-int PtlTblDump(ptl_handle_ni_t ni, int index_in);
-int PtlMEDump(ptl_handle_me_t current_in);
-
 
 
 /*
  * Memory descriptors
  */
 
-#ifndef PTL_NO_WRAP
 int PtlMDAttach(ptl_handle_me_t current_in, ptl_md_t md_in,
                 ptl_unlink_t unlink_in, ptl_handle_md_t * handle_out);
 
@@ -95,7 +86,6 @@ int PtlMDUnlink(ptl_handle_md_t md_in);
 int PtlMDUpdate(ptl_handle_md_t md_in, ptl_md_t * old_inout,
                 ptl_md_t * new_inout, ptl_handle_eq_t testq_in);
 
-#endif
 
 /* These should not be called by users */
 int PtlMDUpdate_internal(ptl_handle_md_t md_in, ptl_md_t * old_inout,
@@ -108,16 +98,11 @@ int PtlMDUpdate_internal(ptl_handle_md_t md_in, ptl_md_t * old_inout,
 /*
  * Event queues
  */
-#ifndef PTL_NO_WRAP
-
-/* These should be called by users */
 int PtlEQAlloc(ptl_handle_ni_t ni_in, ptl_size_t count_in,
                ptl_eq_handler_t handler,
                ptl_handle_eq_t *handle_out);
 int PtlEQFree(ptl_handle_eq_t eventq_in);
 
-int PtlEQCount(ptl_handle_eq_t eventq_in, ptl_size_t * count_out);
-
 int PtlEQGet(ptl_handle_eq_t eventq_in, ptl_event_t * event_out);
 
 
@@ -125,7 +110,6 @@ int PtlEQWait(ptl_handle_eq_t eventq_in, ptl_event_t * event_out);
 
 int PtlEQPoll(ptl_handle_eq_t *eventqs_in, int neq_in, int timeout,
              ptl_event_t *event_out, int *which_out);
-#endif
 
 /*
  * Access Control Table
diff --git a/lnet/include/lnet/arg-blocks.h b/lnet/include/lnet/arg-blocks.h
deleted file mode 100644 (file)
index 21e30d5..0000000
+++ /dev/null
@@ -1,268 +0,0 @@
-#ifndef PTL_BLOCKS_H
-#define PTL_BLOCKS_H
-
-#include "build_check.h"
-
-/*
- * blocks.h
- *
- * Argument block types for the Portals 3.0 library
- * Generated by idl
- *
- */
-
-#include <portals/types.h>
-
-/* put LIB_MAX_DISPATCH last here  -- these must match the
-   assignements to the dispatch table in lib-p30/dispatch.c */
-#define PTL_GETID     1
-#define PTL_NISTATUS  2
-#define PTL_NIDIST    3
-// #define PTL_NIDEBUG   4
-#define PTL_MEATTACH  5
-#define PTL_MEINSERT  6
-// #define PTL_MEPREPEND 7
-#define PTL_MEUNLINK  8
-#define PTL_TBLDUMP   9 
-#define PTL_MEDUMP   10
-#define PTL_MDATTACH 11
-// #define PTL_MDINSERT 12
-#define PTL_MDBIND   13
-#define PTL_MDUPDATE 14
-#define PTL_MDUNLINK 15
-#define PTL_EQALLOC  16
-#define PTL_EQFREE   17
-#define PTL_ACENTRY  18
-#define PTL_PUT      19 
-#define PTL_GET      20
-#define PTL_FAILNID  21
-#define LIB_MAX_DISPATCH 21
-
-typedef struct PtlFailNid_in {
-       ptl_handle_ni_t interface;
-       ptl_nid_t       nid;
-       unsigned int    threshold;
-} PtlFailNid_in;
-
-typedef struct PtlFailNid_out {
-       int             rc;
-} PtlFailNid_out;
-
-typedef struct PtlGetId_in {
-        ptl_handle_ni_t handle_in;
-} PtlGetId_in;
-
-typedef struct PtlGetId_out {
-        int rc;
-        ptl_process_id_t id_out;
-} PtlGetId_out;
-
-typedef struct PtlNIStatus_in {
-        ptl_handle_ni_t interface_in;
-        ptl_sr_index_t register_in;
-} PtlNIStatus_in;
-
-typedef struct PtlNIStatus_out {
-        int rc;
-        ptl_sr_value_t status_out;
-} PtlNIStatus_out;
-
-
-typedef struct PtlNIDist_in {
-        ptl_handle_ni_t interface_in;
-        ptl_process_id_t process_in;
-} PtlNIDist_in;
-
-typedef struct PtlNIDist_out {
-        int rc;
-        unsigned long distance_out;
-} PtlNIDist_out;
-
-
-typedef struct PtlNIDebug_in {
-        unsigned int mask_in;
-} PtlNIDebug_in;
-
-typedef struct PtlNIDebug_out {
-        unsigned int rc;
-} PtlNIDebug_out;
-
-
-typedef struct PtlMEAttach_in {
-        ptl_handle_ni_t interface_in;
-        ptl_pt_index_t index_in;
-        ptl_ins_pos_t position_in;
-        ptl_process_id_t match_id_in;
-        ptl_match_bits_t match_bits_in;
-        ptl_match_bits_t ignore_bits_in;
-        ptl_unlink_t unlink_in;
-} PtlMEAttach_in;
-
-typedef struct PtlMEAttach_out {
-        int rc;
-        ptl_handle_me_t handle_out;
-} PtlMEAttach_out;
-
-
-typedef struct PtlMEInsert_in {
-        ptl_handle_me_t current_in;
-        ptl_process_id_t match_id_in;
-        ptl_match_bits_t match_bits_in;
-        ptl_match_bits_t ignore_bits_in;
-        ptl_unlink_t unlink_in;
-        ptl_ins_pos_t position_in;
-} PtlMEInsert_in;
-
-typedef struct PtlMEInsert_out {
-        int rc;
-        ptl_handle_me_t handle_out;
-} PtlMEInsert_out;
-
-typedef struct PtlMEUnlink_in {
-        ptl_handle_me_t current_in;
-        ptl_unlink_t unlink_in;
-} PtlMEUnlink_in;
-
-typedef struct PtlMEUnlink_out {
-        int rc;
-} PtlMEUnlink_out;
-
-
-typedef struct PtlTblDump_in {
-        int index_in;
-} PtlTblDump_in;
-
-typedef struct PtlTblDump_out {
-        int rc;
-} PtlTblDump_out;
-
-
-typedef struct PtlMEDump_in {
-        ptl_handle_me_t current_in;
-} PtlMEDump_in;
-
-typedef struct PtlMEDump_out {
-        int rc;
-} PtlMEDump_out;
-
-
-typedef struct PtlMDAttach_in {
-        ptl_handle_me_t me_in;
-        ptl_handle_eq_t eq_in;
-        ptl_md_t md_in;
-        ptl_unlink_t unlink_in;
-} PtlMDAttach_in;
-
-typedef struct PtlMDAttach_out {
-        int rc;
-        ptl_handle_md_t handle_out;
-} PtlMDAttach_out;
-
-
-typedef struct PtlMDBind_in {
-        ptl_handle_ni_t ni_in;
-        ptl_handle_eq_t eq_in;
-        ptl_md_t md_in;
-       ptl_unlink_t unlink_in;
-} PtlMDBind_in;
-
-typedef struct PtlMDBind_out {
-        int rc;
-        ptl_handle_md_t handle_out;
-} PtlMDBind_out;
-
-
-typedef struct PtlMDUpdate_internal_in {
-        ptl_handle_md_t md_in;
-        ptl_handle_eq_t testq_in;
-        ptl_seq_t sequence_in;
-
-        ptl_md_t old_inout;
-        int old_inout_valid;
-        ptl_md_t new_inout;
-        int new_inout_valid;
-} PtlMDUpdate_internal_in;
-
-typedef struct PtlMDUpdate_internal_out {
-        int rc;
-        ptl_md_t old_inout;
-        ptl_md_t new_inout;
-} PtlMDUpdate_internal_out;
-
-
-typedef struct PtlMDUnlink_in {
-        ptl_handle_md_t md_in;
-} PtlMDUnlink_in;
-
-typedef struct PtlMDUnlink_out {
-        int rc;
-        ptl_md_t status_out;
-} PtlMDUnlink_out;
-
-
-typedef struct PtlEQAlloc_in {
-        ptl_handle_ni_t ni_in;
-        ptl_size_t count_in;
-        void *base_in;
-        int len_in;
-        ptl_eq_handler_t callback_in;
-} PtlEQAlloc_in;
-
-typedef struct PtlEQAlloc_out {
-        int rc;
-        ptl_handle_eq_t handle_out;
-} PtlEQAlloc_out;
-
-
-typedef struct PtlEQFree_in {
-        ptl_handle_eq_t eventq_in;
-} PtlEQFree_in;
-
-typedef struct PtlEQFree_out {
-        int rc;
-} PtlEQFree_out;
-
-
-typedef struct PtlACEntry_in {
-        ptl_handle_ni_t ni_in;
-        ptl_ac_index_t index_in;
-        ptl_process_id_t match_id_in;
-        ptl_pt_index_t portal_in;
-} PtlACEntry_in;
-
-typedef struct PtlACEntry_out {
-        int rc;
-} PtlACEntry_out;
-
-
-typedef struct PtlPut_in {
-        ptl_handle_md_t md_in;
-        ptl_ack_req_t ack_req_in;
-        ptl_process_id_t target_in;
-        ptl_pt_index_t portal_in;
-        ptl_ac_index_t cookie_in;
-        ptl_match_bits_t match_bits_in;
-        ptl_size_t offset_in;
-        ptl_hdr_data_t hdr_data_in;
-} PtlPut_in;
-
-typedef struct PtlPut_out {
-        int rc;
-} PtlPut_out;
-
-
-typedef struct PtlGet_in {
-        ptl_handle_md_t md_in;
-        ptl_process_id_t target_in;
-        ptl_pt_index_t portal_in;
-        ptl_ac_index_t cookie_in;
-        ptl_match_bits_t match_bits_in;
-        ptl_size_t offset_in;
-} PtlGet_in;
-
-typedef struct PtlGet_out {
-        int rc;
-} PtlGet_out;
-
-
-#endif
index a98bfd9..42f2626 100644 (file)
@@ -41,7 +41,10 @@ typedef enum {
 
        PTL_EQ_IN_USE           = 21,
 
-        PTL_MAX_ERRNO          = 22
+       PTL_NI_INVALID          = 22,
+       PTL_MD_ILLEGAL          = 23,
+       
+        PTL_MAX_ERRNO          = 24
 } ptl_err_t;
 /* If you change these, you must update the string table in api-errno.c */
 
index 25778e4..eae00a0 100644 (file)
 
 extern int ptl_init;           /* Has the library been initialized */
 
-extern int ptl_ni_init(void);
-extern void ptl_ni_fini(void);
-
-static inline ptl_eq_t *
-ptl_handle2usereq (ptl_handle_eq_t *handle)
-{
-        /* EQ handles are a little wierd.  On the "user" side, the cookie
-         * is just a pointer to a queue of events in shared memory.  It's
-         * cb_eq_handle is the "real" handle which we pass when we
-         * call do_forward(). */
-        return (ptl_eq_t *)((unsigned long)handle->cookie);
-}
-
 #endif
diff --git a/lnet/include/lnet/lib-dispatch.h b/lnet/include/lnet/lib-dispatch.h
deleted file mode 100644 (file)
index 610c776..0000000
+++ /dev/null
@@ -1,45 +0,0 @@
-#ifndef PTL_DISPATCH_H
-#define PTL_DISPATCH_H
-
-#include "build_check.h"
-/*
- * include/dispatch.h
- *
- * Dispatch table header and externs for remote side
- * operations
- *
- * Generated by idl
- *
- */
-
-#include <portals/lib-p30.h>
-#include <portals/arg-blocks.h>
-
-extern int do_PtlGetId(nal_cb_t * nal, void *private, void *args, void *ret);
-extern int do_PtlNIStatus(nal_cb_t * nal, void *private, void *args, void *ret);
-extern int do_PtlNIDist(nal_cb_t * nal, void *private, void *args, void *ret);
-extern int do_PtlMEAttach(nal_cb_t * nal, void *private, void *args, void *ret);
-extern int do_PtlMEInsert(nal_cb_t * nal, void *private, void *args, void *ret);
-extern int do_PtlMEPrepend(nal_cb_t * nal, void *private, void *args,
-                           void *ret);
-extern int do_PtlMEUnlink(nal_cb_t * nal, void *private, void *args, void *ret);
-extern int do_PtlTblDump(nal_cb_t * nal, void *private, void *args, void *ret);
-extern int do_PtlMEDump(nal_cb_t * nal, void *private, void *args, void *ret);
-extern int do_PtlMDAttach(nal_cb_t * nal, void *private, void *args,
-                                   void *ret);
-extern int do_PtlMDBind(nal_cb_t * nal, void *private, void *args,
-                                 void *ret);
-extern int do_PtlMDUpdate_internal(nal_cb_t * nal, void *private, void *args,
-                                   void *ret);
-extern int do_PtlMDUnlink(nal_cb_t * nal, void *private, void *args,
-                                   void *ret);
-extern int do_PtlEQAlloc_internal(nal_cb_t * nal, void *private, void *args,
-                                  void *ret);
-extern int do_PtlEQFree_internal(nal_cb_t * nal, void *private, void *args,
-                                 void *ret);
-extern int do_PtlPut(nal_cb_t * nal, void *private, void *args, void *ret);
-extern int do_PtlGet(nal_cb_t * nal, void *private, void *args, void *ret);
-extern int do_PtlFailNid (nal_cb_t *nal, void *private, void *args, void *ret);
-
-extern char *dispatch_name(int index);
-#endif
index efa929c..4daf219 100644 (file)
 #else
 # include <portals/list.h>
 # include <string.h>
+# include <pthread.h>
 #endif
 #include <portals/types.h>
 #include <linux/kp30.h>
 #include <portals/p30.h>
+#include <portals/nal.h>
 #include <portals/lib-types.h>
-#include <portals/lib-nal.h>
-#include <portals/lib-dispatch.h>
 
 static inline int ptl_is_wire_handle_none (ptl_handle_wire_t *wh)
 {
@@ -31,17 +31,18 @@ static inline int ptl_is_wire_handle_none (ptl_handle_wire_t *wh)
                 wh->wh_object_cookie == PTL_WIRE_HANDLE_NONE.wh_object_cookie);
 }
 
-#define state_lock(nal,flagsp)                          \
-do {                                                    \
-        CDEBUG(D_PORTALS, "taking state lock\n");       \
-        nal->cb_cli(nal, flagsp);                       \
-} while (0)
+#ifdef __KERNEL__
+#define LIB_LOCK(nal,flags)                                     \
+        spin_lock_irqsave(&(nal)->libnal_ni.ni_lock, flags)
+#define LIB_UNLOCK(nal,flags)                                   \
+        spin_unlock_irqrestore(&(nal)->libnal_ni.ni_lock, flags)
+#else
+#define LIB_LOCK(nal,flags)                                             \
+        (pthread_mutex_lock(&(nal)->libnal_ni.ni_mutex), (flags) = 0)
+#define LIB_UNLOCK(nal,flags)                                   \
+        pthread_mutex_unlock(&(nal)->libnal_ni.ni_mutex)
+#endif
 
-#define state_unlock(nal,flagsp)                        \
-{                                                       \
-        CDEBUG(D_PORTALS, "releasing state lock\n");    \
-        nal->cb_sti(nal, flagsp);                       \
-}
 
 #ifdef PTL_USE_LIB_FREELIST
 
@@ -50,13 +51,13 @@ do {                                                    \
 #define MAX_MSGS        2048    /* Outstanding messages */
 #define MAX_EQS         512
 
-extern int lib_freelist_init (nal_cb_t *nal, lib_freelist_t *fl, int nobj, int objsize);
-extern void lib_freelist_fini (nal_cb_t *nal, lib_freelist_t *fl);
+extern int lib_freelist_init (lib_nal_t *nal, lib_freelist_t *fl, int nobj, int objsize);
+extern void lib_freelist_fini (lib_nal_t *nal, lib_freelist_t *fl);
 
 static inline void *
 lib_freelist_alloc (lib_freelist_t *fl)
 {
-        /* ALWAYS called with statelock held */
+        /* ALWAYS called with liblock held */
         lib_freeobj_t *o;
 
         if (list_empty (&fl->fl_list))
@@ -70,7 +71,7 @@ lib_freelist_alloc (lib_freelist_t *fl)
 static inline void
 lib_freelist_free (lib_freelist_t *fl, void *obj)
 {
-        /* ALWAYS called with statelock held */
+        /* ALWAYS called with liblock held */
         lib_freeobj_t *o = list_entry (obj, lib_freeobj_t, fo_contents);
         
         list_add (&o->fo_list, &fl->fl_list);
@@ -78,78 +79,78 @@ lib_freelist_free (lib_freelist_t *fl, void *obj)
 
 
 static inline lib_eq_t *
-lib_eq_alloc (nal_cb_t *nal)
+lib_eq_alloc (lib_nal_t *nal)
 {
-        /* NEVER called with statelock held */
+        /* NEVER called with liblock held */
         unsigned long  flags;
         lib_eq_t      *eq;
         
-        state_lock (nal, &flags);
-        eq = (lib_eq_t *)lib_freelist_alloc (&nal->ni.ni_free_eqs);
-        state_unlock (nal, &flags);
+        LIB_LOCK (nal, flags);
+        eq = (lib_eq_t *)lib_freelist_alloc (&nal->libnal_ni.ni_free_eqs);
+        LIB_UNLOCK (nal, flags);
 
         return (eq);
 }
 
 static inline void
-lib_eq_free (nal_cb_t *nal, lib_eq_t *eq)
+lib_eq_free (lib_nal_t *nal, lib_eq_t *eq)
 {
-        /* ALWAYS called with statelock held */
-        lib_freelist_free (&nal->ni.ni_free_eqs, eq);
+        /* ALWAYS called with liblock held */
+        lib_freelist_free (&nal->libnal_ni.ni_free_eqs, eq);
 }
 
 static inline lib_md_t *
-lib_md_alloc (nal_cb_t *nal, ptl_md_t *umd)
+lib_md_alloc (lib_nal_t *nal, ptl_md_t *umd)
 {
-        /* NEVER called with statelock held */
+        /* NEVER called with liblock held */
         unsigned long  flags;
         lib_md_t      *md;
         
-        state_lock (nal, &flags);
-        md = (lib_md_t *)lib_freelist_alloc (&nal->ni.ni_free_mds);
-        state_unlock (nal, &flags);
+        LIB_LOCK (nal, flags);
+        md = (lib_md_t *)lib_freelist_alloc (&nal->libnal_ni.ni_free_mds);
+        LIB_UNLOCK (nal, flags);
 
         return (md);
 }
 
 static inline void
-lib_md_free (nal_cb_t *nal, lib_md_t *md)
+lib_md_free (lib_nal_t *nal, lib_md_t *md)
 {
-        /* ALWAYS called with statelock held */
-        lib_freelist_free (&nal->ni.ni_free_mds, md);
+        /* ALWAYS called with liblock held */
+        lib_freelist_free (&nal->libnal_ni.ni_free_mds, md);
 }
 
 static inline lib_me_t *
-lib_me_alloc (nal_cb_t *nal)
+lib_me_alloc (lib_nal_t *nal)
 {
-        /* NEVER called with statelock held */
+        /* NEVER called with liblock held */
         unsigned long  flags;
         lib_me_t      *me;
         
-        state_lock (nal, &flags);
-        me = (lib_me_t *)lib_freelist_alloc (&nal->ni.ni_free_mes);
-        state_unlock (nal, &flags);
+        LIB_LOCK (nal, flags);
+        me = (lib_me_t *)lib_freelist_alloc (&nal->libnal_ni.ni_free_mes);
+        LIB_UNLOCK (nal, flags);
         
         return (me);
 }
 
 static inline void
-lib_me_free (nal_cb_t *nal, lib_me_t *me)
+lib_me_free (lib_nal_t *nal, lib_me_t *me)
 {
-        /* ALWAYS called with statelock held */
-        lib_freelist_free (&nal->ni.ni_free_mes, me);
+        /* ALWAYS called with liblock held */
+        lib_freelist_free (&nal->libnal_ni.ni_free_mes, me);
 }
 
 static inline lib_msg_t *
-lib_msg_alloc (nal_cb_t *nal)
+lib_msg_alloc (lib_nal_t *nal)
 {
-        /* NEVER called with statelock held */
+        /* NEVER called with liblock held */
         unsigned long  flags;
         lib_msg_t     *msg;
         
-        state_lock (nal, &flags);
-        msg = (lib_msg_t *)lib_freelist_alloc (&nal->ni.ni_free_msgs);
-        state_unlock (nal, &flags);
+        LIB_LOCK (nal, flags);
+        msg = (lib_msg_t *)lib_freelist_alloc (&nal->libnal_ni.ni_free_msgs);
+        LIB_UNLOCK (nal, flags);
 
         if (msg != NULL) {
                 /* NULL pointers, clear flags etc */
@@ -160,18 +161,18 @@ lib_msg_alloc (nal_cb_t *nal)
 }
 
 static inline void
-lib_msg_free (nal_cb_t *nal, lib_msg_t *msg)
+lib_msg_free (lib_nal_t *nal, lib_msg_t *msg)
 {
-        /* ALWAYS called with statelock held */
-        lib_freelist_free (&nal->ni.ni_free_msgs, msg);
+        /* ALWAYS called with liblock held */
+        lib_freelist_free (&nal->libnal_ni.ni_free_msgs, msg);
 }
 
 #else
 
 static inline lib_eq_t *
-lib_eq_alloc (nal_cb_t *nal)
+lib_eq_alloc (lib_nal_t *nal)
 {
-        /* NEVER called with statelock held */
+        /* NEVER called with liblock held */
         lib_eq_t *eq;
 
         PORTAL_ALLOC(eq, sizeof(*eq));
@@ -179,16 +180,16 @@ lib_eq_alloc (nal_cb_t *nal)
 }
 
 static inline void
-lib_eq_free (nal_cb_t *nal, lib_eq_t *eq)
+lib_eq_free (lib_nal_t *nal, lib_eq_t *eq)
 {
-        /* ALWAYS called with statelock held */
+        /* ALWAYS called with liblock held */
         PORTAL_FREE(eq, sizeof(*eq));
 }
 
 static inline lib_md_t *
-lib_md_alloc (nal_cb_t *nal, ptl_md_t *umd)
+lib_md_alloc (lib_nal_t *nal, ptl_md_t *umd)
 {
-        /* NEVER called with statelock held */
+        /* NEVER called with liblock held */
         lib_md_t *md;
         int       size;
         int       niov;
@@ -214,9 +215,9 @@ lib_md_alloc (nal_cb_t *nal, ptl_md_t *umd)
 }
 
 static inline void 
-lib_md_free (nal_cb_t *nal, lib_md_t *md)
+lib_md_free (lib_nal_t *nal, lib_md_t *md)
 {
-        /* ALWAYS called with statelock held */
+        /* ALWAYS called with liblock held */
         int       size;
 
         if ((md->options & PTL_MD_KIOV) != 0)
@@ -228,9 +229,9 @@ lib_md_free (nal_cb_t *nal, lib_md_t *md)
 }
 
 static inline lib_me_t *
-lib_me_alloc (nal_cb_t *nal)
+lib_me_alloc (lib_nal_t *nal)
 {
-        /* NEVER called with statelock held */
+        /* NEVER called with liblock held */
         lib_me_t *me;
 
         PORTAL_ALLOC(me, sizeof(*me));
@@ -238,16 +239,16 @@ lib_me_alloc (nal_cb_t *nal)
 }
 
 static inline void 
-lib_me_free(nal_cb_t *nal, lib_me_t *me)
+lib_me_free(lib_nal_t *nal, lib_me_t *me)
 {
-        /* ALWAYS called with statelock held */
+        /* ALWAYS called with liblock held */
         PORTAL_FREE(me, sizeof(*me));
 }
 
 static inline lib_msg_t *
-lib_msg_alloc(nal_cb_t *nal)
+lib_msg_alloc(lib_nal_t *nal)
 {
-        /* NEVER called with statelock held; may be in interrupt... */
+        /* NEVER called with liblock held; may be in interrupt... */
         lib_msg_t *msg;
 
         if (in_interrupt())
@@ -264,27 +265,28 @@ lib_msg_alloc(nal_cb_t *nal)
 }
 
 static inline void 
-lib_msg_free(nal_cb_t *nal, lib_msg_t *msg)
+lib_msg_free(lib_nal_t *nal, lib_msg_t *msg)
 {
-        /* ALWAYS called with statelock held */
+        /* ALWAYS called with liblock held */
         PORTAL_FREE(msg, sizeof(*msg));
 }
 #endif
 
-extern lib_handle_t *lib_lookup_cookie (nal_cb_t *nal, __u64 cookie, int type);
-extern void lib_initialise_handle (nal_cb_t *nal, lib_handle_t *lh, int type);
-extern void lib_invalidate_handle (nal_cb_t *nal, lib_handle_t *lh);
+extern lib_handle_t *lib_lookup_cookie (lib_nal_t *nal, __u64 cookie, int type);
+extern void lib_initialise_handle (lib_nal_t *nal, lib_handle_t *lh, int type);
+extern void lib_invalidate_handle (lib_nal_t *nal, lib_handle_t *lh);
 
 static inline void
-ptl_eq2handle (ptl_handle_eq_t *handle, lib_eq_t *eq)
+ptl_eq2handle (ptl_handle_eq_t *handle, lib_nal_t *nal, lib_eq_t *eq)
 {
+        handle->nal_idx = nal->libnal_ni.ni_api->nal_handle.nal_idx;
         handle->cookie = eq->eq_lh.lh_cookie;
 }
 
 static inline lib_eq_t *
-ptl_handle2eq (ptl_handle_eq_t *handle, nal_cb_t *nal)
+ptl_handle2eq (ptl_handle_eq_t *handle, lib_nal_t *nal)
 {
-        /* ALWAYS called with statelock held */
+        /* ALWAYS called with liblock held */
         lib_handle_t *lh = lib_lookup_cookie (nal, handle->cookie, 
                                               PTL_COOKIE_TYPE_EQ);
         if (lh == NULL)
@@ -294,15 +296,16 @@ ptl_handle2eq (ptl_handle_eq_t *handle, nal_cb_t *nal)
 }
 
 static inline void
-ptl_md2handle (ptl_handle_md_t *handle, lib_md_t *md)
+ptl_md2handle (ptl_handle_md_t *handle, lib_nal_t *nal, lib_md_t *md)
 {
+        handle->nal_idx = nal->libnal_ni.ni_api->nal_handle.nal_idx;
         handle->cookie = md->md_lh.lh_cookie;
 }
 
 static inline lib_md_t *
-ptl_handle2md (ptl_handle_md_t *handle, nal_cb_t *nal)
+ptl_handle2md (ptl_handle_md_t *handle, lib_nal_t *nal)
 {
-        /* ALWAYS called with statelock held */
+        /* ALWAYS called with liblock held */
         lib_handle_t *lh = lib_lookup_cookie (nal, handle->cookie,
                                               PTL_COOKIE_TYPE_MD);
         if (lh == NULL)
@@ -312,12 +315,12 @@ ptl_handle2md (ptl_handle_md_t *handle, nal_cb_t *nal)
 }
 
 static inline lib_md_t *
-ptl_wire_handle2md (ptl_handle_wire_t *wh, nal_cb_t *nal)
+ptl_wire_handle2md (ptl_handle_wire_t *wh, lib_nal_t *nal)
 {
-        /* ALWAYS called with statelock held */
+        /* ALWAYS called with liblock held */
         lib_handle_t *lh;
         
-        if (wh->wh_interface_cookie != nal->ni.ni_interface_cookie)
+        if (wh->wh_interface_cookie != nal->libnal_ni.ni_interface_cookie)
                 return (NULL);
         
         lh = lib_lookup_cookie (nal, wh->wh_object_cookie,
@@ -329,15 +332,16 @@ ptl_wire_handle2md (ptl_handle_wire_t *wh, nal_cb_t *nal)
 }
 
 static inline void
-ptl_me2handle (ptl_handle_me_t *handle, lib_me_t *me)
+ptl_me2handle (ptl_handle_me_t *handle, lib_nal_t *nal, lib_me_t *me)
 {
+        handle->nal_idx = nal->libnal_ni.ni_api->nal_handle.nal_idx;
         handle->cookie = me->me_lh.lh_cookie;
 }
 
 static inline lib_me_t *
-ptl_handle2me (ptl_handle_me_t *handle, nal_cb_t *nal)
+ptl_handle2me (ptl_handle_me_t *handle, lib_nal_t *nal)
 {
-        /* ALWAYS called with statelock held */
+        /* ALWAYS called with liblock held */
         lib_handle_t *lh = lib_lookup_cookie (nal, handle->cookie,
                                               PTL_COOKIE_TYPE_ME);
         if (lh == NULL)
@@ -346,35 +350,30 @@ ptl_handle2me (ptl_handle_me_t *handle, nal_cb_t *nal)
         return (lh_entry (lh, lib_me_t, me_lh));
 }
 
-extern int lib_init(nal_cb_t *cb, ptl_process_id_t pid,
+extern int lib_init(lib_nal_t *libnal, nal_t *apinal,
+                    ptl_process_id_t pid,
                     ptl_ni_limits_t *desired_limits, 
                     ptl_ni_limits_t *actual_limits);
-extern int lib_fini(nal_cb_t * cb);
-extern void lib_dispatch(nal_cb_t * cb, void *private, int index,
-                         void *arg_block, void *ret_block);
-extern char *dispatch_name(int index);
+extern int lib_fini(lib_nal_t *libnal);
 
 /*
- * When the NAL detects an incoming message, it should call
- * lib_parse() decode it.  The NAL callbacks will be handed
- * the private cookie as a way for the NAL to maintain state
- * about which transaction is being processed.  An extra parameter,
- * lib_cookie will contain the necessary information for
- * finalizing the message.
- *
- * After it has finished the handling the message, it should
- * call lib_finalize() with the lib_cookie parameter.
- * Call backs will be made to write events, send acks or
- * replies and so on.
+ * When the NAL detects an incoming message header, it should call
+ * lib_parse() decode it.  If the message header is garbage, lib_parse()
+ * returns immediately with failure, otherwise the NAL callbacks will be
+ * called to receive the message body.  They are handed the private cookie
+ * as a way for the NAL to maintain state about which transaction is being
+ * processed.  An extra parameter, lib_msg contains the lib-level message
+ * state for passing to lib_finalize() when the message body has been
+ * received.
  */
-extern void lib_enq_event_locked (nal_cb_t *nal, void *private,
+extern void lib_enq_event_locked (lib_nal_t *nal, void *private,
                                   lib_eq_t *eq, ptl_event_t *ev);
-extern void lib_finalize (nal_cb_t *nal, void *private, lib_msg_t *msg, 
+extern void lib_finalize (lib_nal_t *nal, void *private, lib_msg_t *msg, 
                           ptl_ni_fail_t ni_fail_type);
-extern void lib_parse (nal_cb_t *nal, ptl_hdr_t *hdr, void *private);
-extern lib_msg_t *lib_create_reply_msg (nal_cb_t *nal, ptl_nid_t peer_nid, 
+extern ptl_err_t lib_parse (lib_nal_t *nal, ptl_hdr_t *hdr, void *private);
+extern lib_msg_t *lib_create_reply_msg (lib_nal_t *nal, ptl_nid_t peer_nid, 
                                         lib_msg_t *get_msg);
-extern void print_hdr (nal_cb_t * nal, ptl_hdr_t * hdr);
+extern void print_hdr (lib_nal_t * nal, ptl_hdr_t * hdr);
 
 
 extern ptl_size_t lib_iov_nob (int niov, struct iovec *iov);
@@ -397,14 +396,65 @@ extern int lib_extract_kiov (int dst_niov, ptl_kiov_t *dst,
 
 extern void lib_assert_wire_constants (void);
 
-extern ptl_err_t lib_recv (nal_cb_t *nal, void *private, lib_msg_t *msg, lib_md_t *md,
+extern ptl_err_t lib_recv (lib_nal_t *nal, void *private, lib_msg_t *msg, lib_md_t *md,
                            ptl_size_t offset, ptl_size_t mlen, ptl_size_t rlen);
-extern ptl_err_t lib_send (nal_cb_t *nal, void *private, lib_msg_t *msg,
+extern ptl_err_t lib_send (lib_nal_t *nal, void *private, lib_msg_t *msg,
                            ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
                            lib_md_t *md, ptl_size_t offset, ptl_size_t len);
 
-extern void lib_md_deconstruct(nal_cb_t * nal, lib_md_t * md_in,
-                               ptl_md_t * md_out);
-extern void lib_md_unlink(nal_cb_t * nal, lib_md_t * md_in);
-extern void lib_me_unlink(nal_cb_t * nal, lib_me_t * me_in);
+extern int lib_api_ni_status (nal_t *nal, ptl_sr_index_t sr_idx,
+                              ptl_sr_value_t *status);
+extern int lib_api_ni_dist (nal_t *nal, ptl_process_id_t *pid, 
+                            unsigned long *dist);
+
+extern int lib_api_eq_alloc (nal_t *nal, ptl_size_t count,
+                             ptl_eq_handler_t callback, 
+                             ptl_handle_eq_t *handle);
+extern int lib_api_eq_free(nal_t *nal, ptl_handle_eq_t *eqh);
+extern int lib_api_eq_poll (nal_t *nal, 
+                            ptl_handle_eq_t *eventqs, int neq, int timeout_ms,
+                            ptl_event_t *event, int *which);
+
+extern int lib_api_me_attach(nal_t *nal,
+                             ptl_pt_index_t portal,
+                             ptl_process_id_t match_id, 
+                             ptl_match_bits_t match_bits, 
+                             ptl_match_bits_t ignore_bits,
+                             ptl_unlink_t unlink, ptl_ins_pos_t pos,
+                             ptl_handle_me_t *handle);
+extern int lib_api_me_insert(nal_t *nal,
+                             ptl_handle_me_t *current_meh,
+                             ptl_process_id_t match_id, 
+                             ptl_match_bits_t match_bits, 
+                             ptl_match_bits_t ignore_bits,
+                             ptl_unlink_t unlink, ptl_ins_pos_t pos,
+                             ptl_handle_me_t *handle);
+extern int lib_api_me_unlink (nal_t *nal, ptl_handle_me_t *meh);
+extern void lib_me_unlink(lib_nal_t *nal, lib_me_t *me);
+
+extern int lib_api_get_id(nal_t *nal, ptl_process_id_t *pid);
+
+extern void lib_md_unlink(lib_nal_t *nal, lib_md_t *md);
+extern void lib_md_deconstruct(lib_nal_t *nal, lib_md_t *lmd, ptl_md_t *umd);
+extern int lib_api_md_attach(nal_t *nal, ptl_handle_me_t *meh,
+                             ptl_md_t *umd, ptl_unlink_t unlink, 
+                             ptl_handle_md_t *handle);
+extern int lib_api_md_bind(nal_t *nal, ptl_md_t *umd, ptl_unlink_t unlink,
+                           ptl_handle_md_t *handle);
+extern int lib_api_md_unlink (nal_t *nal, ptl_handle_md_t *mdh);
+extern int lib_api_md_update (nal_t *nal, ptl_handle_md_t *mdh,
+                              ptl_md_t *oldumd, ptl_md_t *newumd,
+                              ptl_handle_eq_t *testqh);
+
+extern int lib_api_get(nal_t *apinal, ptl_handle_md_t *mdh, 
+                       ptl_process_id_t *id,
+                       ptl_pt_index_t portal, ptl_ac_index_t ac,
+                       ptl_match_bits_t match_bits, ptl_size_t offset);
+extern int lib_api_put(nal_t *apinal, ptl_handle_md_t *mdh, 
+                       ptl_ack_req_t ack, ptl_process_id_t *id,
+                       ptl_pt_index_t portal, ptl_ac_index_t ac,
+                       ptl_match_bits_t match_bits, 
+                       ptl_size_t offset, ptl_hdr_data_t hdr_data);
+extern int lib_api_fail_nid(nal_t *apinal, ptl_nid_t nid, unsigned int threshold);
+
 #endif
diff --git a/lnet/include/lnet/lib-nal.h b/lnet/include/lnet/lib-nal.h
deleted file mode 100644 (file)
index d1d0495..0000000
+++ /dev/null
@@ -1,116 +0,0 @@
-#ifndef _LIB_NAL_H_
-#define _LIB_NAL_H_
-
-#include "build_check.h"
-/*
- * nal.h
- *
- * Library side headers that define the abstraction layer's
- * responsibilities and interfaces
- */
-
-#include <portals/lib-types.h>
-
-struct nal_cb_t {
-       /*
-        * Per interface portal table, access control table
-        * and NAL private data field;
-        */
-       lib_ni_t ni;
-       void *nal_data;
-       /*
-        * send: Sends a preformatted header and payload data to a
-        * specified remote process. The payload is scattered over 'niov'
-        * fragments described by iov, starting at 'offset' for 'mlen'
-        * bytes.  
-        * NB the NAL may NOT overwrite iov.  
-        * PTL_OK on success => NAL has committed to send and will call
-        * lib_finalize on completion
-        */
-       ptl_err_t (*cb_send) (nal_cb_t * nal, void *private, lib_msg_t * cookie, 
-                             ptl_hdr_t * hdr, int type, ptl_nid_t nid, ptl_pid_t pid, 
-                             unsigned int niov, struct iovec *iov, 
-                             size_t offset, size_t mlen);
-
-       /* as send, but with a set of page fragments (NULL if not supported) */
-       ptl_err_t (*cb_send_pages) (nal_cb_t * nal, void *private, lib_msg_t * cookie, 
-                                   ptl_hdr_t * hdr, int type, ptl_nid_t nid, ptl_pid_t pid, 
-                                   unsigned int niov, ptl_kiov_t *iov, 
-                                   size_t offset, size_t mlen);
-       /*
-        * recv: Receives an incoming message from a remote process.  The
-        * payload is to be received into the scattered buffer of 'niov'
-        * fragments described by iov, starting at 'offset' for 'mlen'
-        * bytes.  Payload bytes after 'mlen' up to 'rlen' are to be
-        * discarded.  
-        * NB the NAL may NOT overwrite iov.
-        * PTL_OK on success => NAL has committed to receive and will call
-        * lib_finalize on completion
-        */
-       ptl_err_t (*cb_recv) (nal_cb_t * nal, void *private, lib_msg_t * cookie,
-                             unsigned int niov, struct iovec *iov, 
-                             size_t offset, size_t mlen, size_t rlen);
-
-       /* as recv, but with a set of page fragments (NULL if not supported) */
-       ptl_err_t (*cb_recv_pages) (nal_cb_t * nal, void *private, lib_msg_t * cookie,
-                                   unsigned int niov, ptl_kiov_t *iov, 
-                                   size_t offset, size_t mlen, size_t rlen);
-       /*
-        * read: Reads a block of data from a specified user address
-        */
-       ptl_err_t (*cb_read) (nal_cb_t * nal, void *private, void *dst_addr,
-                             user_ptr src_addr, size_t len);
-
-       /*
-        * write: Writes a block of data into a specified user address
-        */
-       ptl_err_t (*cb_write) (nal_cb_t * nal, void *private, user_ptr dsr_addr,
-                              void *src_addr, size_t len);
-
-       /*
-        * callback: Calls an event callback
-        * NULL => lib calls eq's callback (if any) directly.
-        */
-       void (*cb_callback) (nal_cb_t * nal, void *private, lib_eq_t *eq,
-                            ptl_event_t *ev);
-
-       /*
-        *  malloc: Acquire a block of memory in a system independent
-        * fashion.
-        */
-       void *(*cb_malloc) (nal_cb_t * nal, size_t len);
-
-       void (*cb_free) (nal_cb_t * nal, void *buf, size_t len);
-
-       /*
-        * (un)map: Tell the NAL about some memory it will access.
-        * *addrkey passed to cb_unmap() is what cb_map() set it to.
-        * type of *iov depends on options.
-        * Set to NULL if not required.
-        */
-       ptl_err_t (*cb_map) (nal_cb_t * nal, unsigned int niov, struct iovec *iov, 
-                            void **addrkey);
-       void (*cb_unmap) (nal_cb_t * nal, unsigned int niov, struct iovec *iov, 
-                         void **addrkey);
-
-       /* as (un)map, but with a set of page fragments */
-       ptl_err_t (*cb_map_pages) (nal_cb_t * nal, unsigned int niov, ptl_kiov_t *iov, 
-                                  void **addrkey);
-       void (*cb_unmap_pages) (nal_cb_t * nal, unsigned int niov, ptl_kiov_t *iov, 
-                         void **addrkey);
-
-       void (*cb_printf) (nal_cb_t * nal, const char *fmt, ...);
-
-       /* Turn interrupts off (begin of protected area) */
-       void (*cb_cli) (nal_cb_t * nal, unsigned long *flags);
-
-       /* Turn interrupts on (end of protected area) */
-       void (*cb_sti) (nal_cb_t * nal, unsigned long *flags);
-
-       /*
-        * Calculate a network "distance" to given node
-        */
-       int (*cb_dist) (nal_cb_t * nal, ptl_nid_t nid, unsigned long *dist);
-};
-
-#endif
index efa929c..4daf219 100644 (file)
 #else
 # include <portals/list.h>
 # include <string.h>
+# include <pthread.h>
 #endif
 #include <portals/types.h>
 #include <linux/kp30.h>
 #include <portals/p30.h>
+#include <portals/nal.h>
 #include <portals/lib-types.h>
-#include <portals/lib-nal.h>
-#include <portals/lib-dispatch.h>
 
 static inline int ptl_is_wire_handle_none (ptl_handle_wire_t *wh)
 {
@@ -31,17 +31,18 @@ static inline int ptl_is_wire_handle_none (ptl_handle_wire_t *wh)
                 wh->wh_object_cookie == PTL_WIRE_HANDLE_NONE.wh_object_cookie);
 }
 
-#define state_lock(nal,flagsp)                          \
-do {                                                    \
-        CDEBUG(D_PORTALS, "taking state lock\n");       \
-        nal->cb_cli(nal, flagsp);                       \
-} while (0)
+#ifdef __KERNEL__
+#define LIB_LOCK(nal,flags)                                     \
+        spin_lock_irqsave(&(nal)->libnal_ni.ni_lock, flags)
+#define LIB_UNLOCK(nal,flags)                                   \
+        spin_unlock_irqrestore(&(nal)->libnal_ni.ni_lock, flags)
+#else
+#define LIB_LOCK(nal,flags)                                             \
+        (pthread_mutex_lock(&(nal)->libnal_ni.ni_mutex), (flags) = 0)
+#define LIB_UNLOCK(nal,flags)                                   \
+        pthread_mutex_unlock(&(nal)->libnal_ni.ni_mutex)
+#endif
 
-#define state_unlock(nal,flagsp)                        \
-{                                                       \
-        CDEBUG(D_PORTALS, "releasing state lock\n");    \
-        nal->cb_sti(nal, flagsp);                       \
-}
 
 #ifdef PTL_USE_LIB_FREELIST
 
@@ -50,13 +51,13 @@ do {                                                    \
 #define MAX_MSGS        2048    /* Outstanding messages */
 #define MAX_EQS         512
 
-extern int lib_freelist_init (nal_cb_t *nal, lib_freelist_t *fl, int nobj, int objsize);
-extern void lib_freelist_fini (nal_cb_t *nal, lib_freelist_t *fl);
+extern int lib_freelist_init (lib_nal_t *nal, lib_freelist_t *fl, int nobj, int objsize);
+extern void lib_freelist_fini (lib_nal_t *nal, lib_freelist_t *fl);
 
 static inline void *
 lib_freelist_alloc (lib_freelist_t *fl)
 {
-        /* ALWAYS called with statelock held */
+        /* ALWAYS called with liblock held */
         lib_freeobj_t *o;
 
         if (list_empty (&fl->fl_list))
@@ -70,7 +71,7 @@ lib_freelist_alloc (lib_freelist_t *fl)
 static inline void
 lib_freelist_free (lib_freelist_t *fl, void *obj)
 {
-        /* ALWAYS called with statelock held */
+        /* ALWAYS called with liblock held */
         lib_freeobj_t *o = list_entry (obj, lib_freeobj_t, fo_contents);
         
         list_add (&o->fo_list, &fl->fl_list);
@@ -78,78 +79,78 @@ lib_freelist_free (lib_freelist_t *fl, void *obj)
 
 
 static inline lib_eq_t *
-lib_eq_alloc (nal_cb_t *nal)
+lib_eq_alloc (lib_nal_t *nal)
 {
-        /* NEVER called with statelock held */
+        /* NEVER called with liblock held */
         unsigned long  flags;
         lib_eq_t      *eq;
         
-        state_lock (nal, &flags);
-        eq = (lib_eq_t *)lib_freelist_alloc (&nal->ni.ni_free_eqs);
-        state_unlock (nal, &flags);
+        LIB_LOCK (nal, flags);
+        eq = (lib_eq_t *)lib_freelist_alloc (&nal->libnal_ni.ni_free_eqs);
+        LIB_UNLOCK (nal, flags);
 
         return (eq);
 }
 
 static inline void
-lib_eq_free (nal_cb_t *nal, lib_eq_t *eq)
+lib_eq_free (lib_nal_t *nal, lib_eq_t *eq)
 {
-        /* ALWAYS called with statelock held */
-        lib_freelist_free (&nal->ni.ni_free_eqs, eq);
+        /* ALWAYS called with liblock held */
+        lib_freelist_free (&nal->libnal_ni.ni_free_eqs, eq);
 }
 
 static inline lib_md_t *
-lib_md_alloc (nal_cb_t *nal, ptl_md_t *umd)
+lib_md_alloc (lib_nal_t *nal, ptl_md_t *umd)
 {
-        /* NEVER called with statelock held */
+        /* NEVER called with liblock held */
         unsigned long  flags;
         lib_md_t      *md;
         
-        state_lock (nal, &flags);
-        md = (lib_md_t *)lib_freelist_alloc (&nal->ni.ni_free_mds);
-        state_unlock (nal, &flags);
+        LIB_LOCK (nal, flags);
+        md = (lib_md_t *)lib_freelist_alloc (&nal->libnal_ni.ni_free_mds);
+        LIB_UNLOCK (nal, flags);
 
         return (md);
 }
 
 static inline void
-lib_md_free (nal_cb_t *nal, lib_md_t *md)
+lib_md_free (lib_nal_t *nal, lib_md_t *md)
 {
-        /* ALWAYS called with statelock held */
-        lib_freelist_free (&nal->ni.ni_free_mds, md);
+        /* ALWAYS called with liblock held */
+        lib_freelist_free (&nal->libnal_ni.ni_free_mds, md);
 }
 
 static inline lib_me_t *
-lib_me_alloc (nal_cb_t *nal)
+lib_me_alloc (lib_nal_t *nal)
 {
-        /* NEVER called with statelock held */
+        /* NEVER called with liblock held */
         unsigned long  flags;
         lib_me_t      *me;
         
-        state_lock (nal, &flags);
-        me = (lib_me_t *)lib_freelist_alloc (&nal->ni.ni_free_mes);
-        state_unlock (nal, &flags);
+        LIB_LOCK (nal, flags);
+        me = (lib_me_t *)lib_freelist_alloc (&nal->libnal_ni.ni_free_mes);
+        LIB_UNLOCK (nal, flags);
         
         return (me);
 }
 
 static inline void
-lib_me_free (nal_cb_t *nal, lib_me_t *me)
+lib_me_free (lib_nal_t *nal, lib_me_t *me)
 {
-        /* ALWAYS called with statelock held */
-        lib_freelist_free (&nal->ni.ni_free_mes, me);
+        /* ALWAYS called with liblock held */
+        lib_freelist_free (&nal->libnal_ni.ni_free_mes, me);
 }
 
 static inline lib_msg_t *
-lib_msg_alloc (nal_cb_t *nal)
+lib_msg_alloc (lib_nal_t *nal)
 {
-        /* NEVER called with statelock held */
+        /* NEVER called with liblock held */
         unsigned long  flags;
         lib_msg_t     *msg;
         
-        state_lock (nal, &flags);
-        msg = (lib_msg_t *)lib_freelist_alloc (&nal->ni.ni_free_msgs);
-        state_unlock (nal, &flags);
+        LIB_LOCK (nal, flags);
+        msg = (lib_msg_t *)lib_freelist_alloc (&nal->libnal_ni.ni_free_msgs);
+        LIB_UNLOCK (nal, flags);
 
         if (msg != NULL) {
                 /* NULL pointers, clear flags etc */
@@ -160,18 +161,18 @@ lib_msg_alloc (nal_cb_t *nal)
 }
 
 static inline void
-lib_msg_free (nal_cb_t *nal, lib_msg_t *msg)
+lib_msg_free (lib_nal_t *nal, lib_msg_t *msg)
 {
-        /* ALWAYS called with statelock held */
-        lib_freelist_free (&nal->ni.ni_free_msgs, msg);
+        /* ALWAYS called with liblock held */
+        lib_freelist_free (&nal->libnal_ni.ni_free_msgs, msg);
 }
 
 #else
 
 static inline lib_eq_t *
-lib_eq_alloc (nal_cb_t *nal)
+lib_eq_alloc (lib_nal_t *nal)
 {
-        /* NEVER called with statelock held */
+        /* NEVER called with liblock held */
         lib_eq_t *eq;
 
         PORTAL_ALLOC(eq, sizeof(*eq));
@@ -179,16 +180,16 @@ lib_eq_alloc (nal_cb_t *nal)
 }
 
 static inline void
-lib_eq_free (nal_cb_t *nal, lib_eq_t *eq)
+lib_eq_free (lib_nal_t *nal, lib_eq_t *eq)
 {
-        /* ALWAYS called with statelock held */
+        /* ALWAYS called with liblock held */
         PORTAL_FREE(eq, sizeof(*eq));
 }
 
 static inline lib_md_t *
-lib_md_alloc (nal_cb_t *nal, ptl_md_t *umd)
+lib_md_alloc (lib_nal_t *nal, ptl_md_t *umd)
 {
-        /* NEVER called with statelock held */
+        /* NEVER called with liblock held */
         lib_md_t *md;
         int       size;
         int       niov;
@@ -214,9 +215,9 @@ lib_md_alloc (nal_cb_t *nal, ptl_md_t *umd)
 }
 
 static inline void 
-lib_md_free (nal_cb_t *nal, lib_md_t *md)
+lib_md_free (lib_nal_t *nal, lib_md_t *md)
 {
-        /* ALWAYS called with statelock held */
+        /* ALWAYS called with liblock held */
         int       size;
 
         if ((md->options & PTL_MD_KIOV) != 0)
@@ -228,9 +229,9 @@ lib_md_free (nal_cb_t *nal, lib_md_t *md)
 }
 
 static inline lib_me_t *
-lib_me_alloc (nal_cb_t *nal)
+lib_me_alloc (lib_nal_t *nal)
 {
-        /* NEVER called with statelock held */
+        /* NEVER called with liblock held */
         lib_me_t *me;
 
         PORTAL_ALLOC(me, sizeof(*me));
@@ -238,16 +239,16 @@ lib_me_alloc (nal_cb_t *nal)
 }
 
 static inline void 
-lib_me_free(nal_cb_t *nal, lib_me_t *me)
+lib_me_free(lib_nal_t *nal, lib_me_t *me)
 {
-        /* ALWAYS called with statelock held */
+        /* ALWAYS called with liblock held */
         PORTAL_FREE(me, sizeof(*me));
 }
 
 static inline lib_msg_t *
-lib_msg_alloc(nal_cb_t *nal)
+lib_msg_alloc(lib_nal_t *nal)
 {
-        /* NEVER called with statelock held; may be in interrupt... */
+        /* NEVER called with liblock held; may be in interrupt... */
         lib_msg_t *msg;
 
         if (in_interrupt())
@@ -264,27 +265,28 @@ lib_msg_alloc(nal_cb_t *nal)
 }
 
 static inline void 
-lib_msg_free(nal_cb_t *nal, lib_msg_t *msg)
+lib_msg_free(lib_nal_t *nal, lib_msg_t *msg)
 {
-        /* ALWAYS called with statelock held */
+        /* ALWAYS called with liblock held */
         PORTAL_FREE(msg, sizeof(*msg));
 }
 #endif
 
-extern lib_handle_t *lib_lookup_cookie (nal_cb_t *nal, __u64 cookie, int type);
-extern void lib_initialise_handle (nal_cb_t *nal, lib_handle_t *lh, int type);
-extern void lib_invalidate_handle (nal_cb_t *nal, lib_handle_t *lh);
+extern lib_handle_t *lib_lookup_cookie (lib_nal_t *nal, __u64 cookie, int type);
+extern void lib_initialise_handle (lib_nal_t *nal, lib_handle_t *lh, int type);
+extern void lib_invalidate_handle (lib_nal_t *nal, lib_handle_t *lh);
 
 static inline void
-ptl_eq2handle (ptl_handle_eq_t *handle, lib_eq_t *eq)
+ptl_eq2handle (ptl_handle_eq_t *handle, lib_nal_t *nal, lib_eq_t *eq)
 {
+        handle->nal_idx = nal->libnal_ni.ni_api->nal_handle.nal_idx;
         handle->cookie = eq->eq_lh.lh_cookie;
 }
 
 static inline lib_eq_t *
-ptl_handle2eq (ptl_handle_eq_t *handle, nal_cb_t *nal)
+ptl_handle2eq (ptl_handle_eq_t *handle, lib_nal_t *nal)
 {
-        /* ALWAYS called with statelock held */
+        /* ALWAYS called with liblock held */
         lib_handle_t *lh = lib_lookup_cookie (nal, handle->cookie, 
                                               PTL_COOKIE_TYPE_EQ);
         if (lh == NULL)
@@ -294,15 +296,16 @@ ptl_handle2eq (ptl_handle_eq_t *handle, nal_cb_t *nal)
 }
 
 static inline void
-ptl_md2handle (ptl_handle_md_t *handle, lib_md_t *md)
+ptl_md2handle (ptl_handle_md_t *handle, lib_nal_t *nal, lib_md_t *md)
 {
+        handle->nal_idx = nal->libnal_ni.ni_api->nal_handle.nal_idx;
         handle->cookie = md->md_lh.lh_cookie;
 }
 
 static inline lib_md_t *
-ptl_handle2md (ptl_handle_md_t *handle, nal_cb_t *nal)
+ptl_handle2md (ptl_handle_md_t *handle, lib_nal_t *nal)
 {
-        /* ALWAYS called with statelock held */
+        /* ALWAYS called with liblock held */
         lib_handle_t *lh = lib_lookup_cookie (nal, handle->cookie,
                                               PTL_COOKIE_TYPE_MD);
         if (lh == NULL)
@@ -312,12 +315,12 @@ ptl_handle2md (ptl_handle_md_t *handle, nal_cb_t *nal)
 }
 
 static inline lib_md_t *
-ptl_wire_handle2md (ptl_handle_wire_t *wh, nal_cb_t *nal)
+ptl_wire_handle2md (ptl_handle_wire_t *wh, lib_nal_t *nal)
 {
-        /* ALWAYS called with statelock held */
+        /* ALWAYS called with liblock held */
         lib_handle_t *lh;
         
-        if (wh->wh_interface_cookie != nal->ni.ni_interface_cookie)
+        if (wh->wh_interface_cookie != nal->libnal_ni.ni_interface_cookie)
                 return (NULL);
         
         lh = lib_lookup_cookie (nal, wh->wh_object_cookie,
@@ -329,15 +332,16 @@ ptl_wire_handle2md (ptl_handle_wire_t *wh, nal_cb_t *nal)
 }
 
 static inline void
-ptl_me2handle (ptl_handle_me_t *handle, lib_me_t *me)
+ptl_me2handle (ptl_handle_me_t *handle, lib_nal_t *nal, lib_me_t *me)
 {
+        handle->nal_idx = nal->libnal_ni.ni_api->nal_handle.nal_idx;
         handle->cookie = me->me_lh.lh_cookie;
 }
 
 static inline lib_me_t *
-ptl_handle2me (ptl_handle_me_t *handle, nal_cb_t *nal)
+ptl_handle2me (ptl_handle_me_t *handle, lib_nal_t *nal)
 {
-        /* ALWAYS called with statelock held */
+        /* ALWAYS called with liblock held */
         lib_handle_t *lh = lib_lookup_cookie (nal, handle->cookie,
                                               PTL_COOKIE_TYPE_ME);
         if (lh == NULL)
@@ -346,35 +350,30 @@ ptl_handle2me (ptl_handle_me_t *handle, nal_cb_t *nal)
         return (lh_entry (lh, lib_me_t, me_lh));
 }
 
-extern int lib_init(nal_cb_t *cb, ptl_process_id_t pid,
+extern int lib_init(lib_nal_t *libnal, nal_t *apinal,
+                    ptl_process_id_t pid,
                     ptl_ni_limits_t *desired_limits, 
                     ptl_ni_limits_t *actual_limits);
-extern int lib_fini(nal_cb_t * cb);
-extern void lib_dispatch(nal_cb_t * cb, void *private, int index,
-                         void *arg_block, void *ret_block);
-extern char *dispatch_name(int index);
+extern int lib_fini(lib_nal_t *libnal);
 
 /*
- * When the NAL detects an incoming message, it should call
- * lib_parse() decode it.  The NAL callbacks will be handed
- * the private cookie as a way for the NAL to maintain state
- * about which transaction is being processed.  An extra parameter,
- * lib_cookie will contain the necessary information for
- * finalizing the message.
- *
- * After it has finished the handling the message, it should
- * call lib_finalize() with the lib_cookie parameter.
- * Call backs will be made to write events, send acks or
- * replies and so on.
+ * When the NAL detects an incoming message header, it should call
+ * lib_parse() decode it.  If the message header is garbage, lib_parse()
+ * returns immediately with failure, otherwise the NAL callbacks will be
+ * called to receive the message body.  They are handed the private cookie
+ * as a way for the NAL to maintain state about which transaction is being
+ * processed.  An extra parameter, lib_msg contains the lib-level message
+ * state for passing to lib_finalize() when the message body has been
+ * received.
  */
-extern void lib_enq_event_locked (nal_cb_t *nal, void *private,
+extern void lib_enq_event_locked (lib_nal_t *nal, void *private,
                                   lib_eq_t *eq, ptl_event_t *ev);
-extern void lib_finalize (nal_cb_t *nal, void *private, lib_msg_t *msg, 
+extern void lib_finalize (lib_nal_t *nal, void *private, lib_msg_t *msg, 
                           ptl_ni_fail_t ni_fail_type);
-extern void lib_parse (nal_cb_t *nal, ptl_hdr_t *hdr, void *private);
-extern lib_msg_t *lib_create_reply_msg (nal_cb_t *nal, ptl_nid_t peer_nid, 
+extern ptl_err_t lib_parse (lib_nal_t *nal, ptl_hdr_t *hdr, void *private);
+extern lib_msg_t *lib_create_reply_msg (lib_nal_t *nal, ptl_nid_t peer_nid, 
                                         lib_msg_t *get_msg);
-extern void print_hdr (nal_cb_t * nal, ptl_hdr_t * hdr);
+extern void print_hdr (lib_nal_t * nal, ptl_hdr_t * hdr);
 
 
 extern ptl_size_t lib_iov_nob (int niov, struct iovec *iov);
@@ -397,14 +396,65 @@ extern int lib_extract_kiov (int dst_niov, ptl_kiov_t *dst,
 
 extern void lib_assert_wire_constants (void);
 
-extern ptl_err_t lib_recv (nal_cb_t *nal, void *private, lib_msg_t *msg, lib_md_t *md,
+extern ptl_err_t lib_recv (lib_nal_t *nal, void *private, lib_msg_t *msg, lib_md_t *md,
                            ptl_size_t offset, ptl_size_t mlen, ptl_size_t rlen);
-extern ptl_err_t lib_send (nal_cb_t *nal, void *private, lib_msg_t *msg,
+extern ptl_err_t lib_send (lib_nal_t *nal, void *private, lib_msg_t *msg,
                            ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
                            lib_md_t *md, ptl_size_t offset, ptl_size_t len);
 
-extern void lib_md_deconstruct(nal_cb_t * nal, lib_md_t * md_in,
-                               ptl_md_t * md_out);
-extern void lib_md_unlink(nal_cb_t * nal, lib_md_t * md_in);
-extern void lib_me_unlink(nal_cb_t * nal, lib_me_t * me_in);
+extern int lib_api_ni_status (nal_t *nal, ptl_sr_index_t sr_idx,
+                              ptl_sr_value_t *status);
+extern int lib_api_ni_dist (nal_t *nal, ptl_process_id_t *pid, 
+                            unsigned long *dist);
+
+extern int lib_api_eq_alloc (nal_t *nal, ptl_size_t count,
+                             ptl_eq_handler_t callback, 
+                             ptl_handle_eq_t *handle);
+extern int lib_api_eq_free(nal_t *nal, ptl_handle_eq_t *eqh);
+extern int lib_api_eq_poll (nal_t *nal, 
+                            ptl_handle_eq_t *eventqs, int neq, int timeout_ms,
+                            ptl_event_t *event, int *which);
+
+extern int lib_api_me_attach(nal_t *nal,
+                             ptl_pt_index_t portal,
+                             ptl_process_id_t match_id, 
+                             ptl_match_bits_t match_bits, 
+                             ptl_match_bits_t ignore_bits,
+                             ptl_unlink_t unlink, ptl_ins_pos_t pos,
+                             ptl_handle_me_t *handle);
+extern int lib_api_me_insert(nal_t *nal,
+                             ptl_handle_me_t *current_meh,
+                             ptl_process_id_t match_id, 
+                             ptl_match_bits_t match_bits, 
+                             ptl_match_bits_t ignore_bits,
+                             ptl_unlink_t unlink, ptl_ins_pos_t pos,
+                             ptl_handle_me_t *handle);
+extern int lib_api_me_unlink (nal_t *nal, ptl_handle_me_t *meh);
+extern void lib_me_unlink(lib_nal_t *nal, lib_me_t *me);
+
+extern int lib_api_get_id(nal_t *nal, ptl_process_id_t *pid);
+
+extern void lib_md_unlink(lib_nal_t *nal, lib_md_t *md);
+extern void lib_md_deconstruct(lib_nal_t *nal, lib_md_t *lmd, ptl_md_t *umd);
+extern int lib_api_md_attach(nal_t *nal, ptl_handle_me_t *meh,
+                             ptl_md_t *umd, ptl_unlink_t unlink, 
+                             ptl_handle_md_t *handle);
+extern int lib_api_md_bind(nal_t *nal, ptl_md_t *umd, ptl_unlink_t unlink,
+                           ptl_handle_md_t *handle);
+extern int lib_api_md_unlink (nal_t *nal, ptl_handle_md_t *mdh);
+extern int lib_api_md_update (nal_t *nal, ptl_handle_md_t *mdh,
+                              ptl_md_t *oldumd, ptl_md_t *newumd,
+                              ptl_handle_eq_t *testqh);
+
+extern int lib_api_get(nal_t *apinal, ptl_handle_md_t *mdh, 
+                       ptl_process_id_t *id,
+                       ptl_pt_index_t portal, ptl_ac_index_t ac,
+                       ptl_match_bits_t match_bits, ptl_size_t offset);
+extern int lib_api_put(nal_t *apinal, ptl_handle_md_t *mdh, 
+                       ptl_ack_req_t ack, ptl_process_id_t *id,
+                       ptl_pt_index_t portal, ptl_ac_index_t ac,
+                       ptl_match_bits_t match_bits, 
+                       ptl_size_t offset, ptl_hdr_data_t hdr_data);
+extern int lib_api_fail_nid(nal_t *apinal, ptl_nid_t nid, unsigned int threshold);
+
 #endif
index ef618c7..6549988 100644 (file)
@@ -13,6 +13,7 @@
 #include "build_check.h"
 
 #include <portals/types.h>
+#include <portals/nal.h>
 #ifdef __KERNEL__
 # include <linux/uio.h>
 # include <linux/smp_lock.h>
@@ -22,9 +23,6 @@
 # include <sys/types.h>
 #endif
 
-/* struct nal_cb_t is defined in lib-nal.h */
-typedef struct nal_cb_t nal_cb_t;
-
 typedef char *user_ptr;
 typedef struct lib_msg_t lib_msg_t;
 typedef struct lib_ptl_t lib_ptl_t;
@@ -165,11 +163,12 @@ typedef struct {
 struct lib_eq_t {
         struct list_head  eq_list;
         lib_handle_t      eq_lh;
-        ptl_seq_t         sequence;
-        ptl_size_t        size;
-        ptl_event_t      *base;
+        ptl_seq_t         eq_enq_seq;
+        ptl_seq_t         eq_deq_seq;
+        ptl_size_t        eq_size;
+        ptl_event_t      *eq_events;
         int               eq_refcount;
-        ptl_eq_handler_t  event_callback;
+        ptl_eq_handler_t  eq_callback;
         void             *eq_addrkey;
 };
 
@@ -244,29 +243,117 @@ typedef struct {
 /* PTL_COOKIE_TYPES must be a power of 2, so the cookie type can be
  * extracted by masking with (PTL_COOKIE_TYPES - 1) */
 
-typedef struct {
-        ptl_nid_t nid;
-        ptl_pid_t pid;
-        lib_ptl_t tbl;
-        lib_counters_t counters;
-        ptl_ni_limits_t actual_limits;
+typedef struct lib_ni 
+{
+        nal_t            *ni_api;
+        ptl_process_id_t  ni_pid;
+        lib_ptl_t         ni_portals;
+        lib_counters_t    ni_counters;
+        ptl_ni_limits_t   ni_actual_limits;
 
         int               ni_lh_hash_size;      /* size of lib handle hash table */
         struct list_head *ni_lh_hash_table;     /* all extant lib handles, this interface */
         __u64             ni_next_object_cookie; /* cookie generator */
         __u64             ni_interface_cookie;  /* uniquely identifies this ni in this epoch */
         
-        struct list_head ni_test_peers;
+        struct list_head  ni_test_peers;
         
 #ifdef PTL_USE_LIB_FREELIST
-        lib_freelist_t   ni_free_mes;
-        lib_freelist_t   ni_free_msgs;
-        lib_freelist_t   ni_free_mds;
-        lib_freelist_t   ni_free_eqs;
+        lib_freelist_t    ni_free_mes;
+        lib_freelist_t    ni_free_msgs;
+        lib_freelist_t    ni_free_mds;
+        lib_freelist_t    ni_free_eqs;
+#endif
+
+        struct list_head  ni_active_msgs;
+        struct list_head  ni_active_mds;
+        struct list_head  ni_active_eqs;
+
+#ifdef __KERNEL__
+        spinlock_t        ni_lock;
+        wait_queue_head_t ni_waitq;
+#else
+        pthread_mutex_t   ni_mutex;
+        pthread_cond_t    ni_cond;
 #endif
-        struct list_head ni_active_msgs;
-        struct list_head ni_active_mds;
-        struct list_head ni_active_eqs;
 } lib_ni_t;
 
+
+typedef struct lib_nal
+{
+       /* lib-level interface state */
+       lib_ni_t libnal_ni;
+
+       /* NAL-private data */
+       void *libnal_data;
+
+       /*
+        * send: Sends a preformatted header and payload data to a
+        * specified remote process. The payload is scattered over 'niov'
+        * fragments described by iov, starting at 'offset' for 'mlen'
+        * bytes.  
+        * NB the NAL may NOT overwrite iov.  
+        * PTL_OK on success => NAL has committed to send and will call
+        * lib_finalize on completion
+        */
+       ptl_err_t (*libnal_send) 
+                (struct lib_nal *nal, void *private, lib_msg_t *cookie, 
+                 ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, 
+                 unsigned int niov, struct iovec *iov, 
+                 size_t offset, size_t mlen);
+        
+       /* as send, but with a set of page fragments (NULL if not supported) */
+       ptl_err_t (*libnal_send_pages)
+                (struct lib_nal *nal, void *private, lib_msg_t * cookie, 
+                 ptl_hdr_t * hdr, int type, ptl_nid_t nid, ptl_pid_t pid, 
+                 unsigned int niov, ptl_kiov_t *iov, 
+                 size_t offset, size_t mlen);
+       /*
+        * recv: Receives an incoming message from a remote process.  The
+        * payload is to be received into the scattered buffer of 'niov'
+        * fragments described by iov, starting at 'offset' for 'mlen'
+        * bytes.  Payload bytes after 'mlen' up to 'rlen' are to be
+        * discarded.  
+        * NB the NAL may NOT overwrite iov.
+        * PTL_OK on success => NAL has committed to receive and will call
+        * lib_finalize on completion
+        */
+       ptl_err_t (*libnal_recv) 
+                (struct lib_nal *nal, void *private, lib_msg_t * cookie,
+                 unsigned int niov, struct iovec *iov, 
+                 size_t offset, size_t mlen, size_t rlen);
+
+       /* as recv, but with a set of page fragments (NULL if not supported) */
+       ptl_err_t (*libnal_recv_pages) 
+                (struct lib_nal *nal, void *private, lib_msg_t * cookie,
+                 unsigned int niov, ptl_kiov_t *iov, 
+                 size_t offset, size_t mlen, size_t rlen);
+
+       /*
+        * (un)map: Tell the NAL about some memory it will access.
+        * *addrkey passed to libnal_unmap() is what libnal_map() set it to.
+        * type of *iov depends on options.
+        * Set to NULL if not required.
+        */
+       ptl_err_t (*libnal_map)
+                (struct lib_nal *nal, unsigned int niov, struct iovec *iov, 
+                 void **addrkey);
+       void (*libnal_unmap)
+                (struct lib_nal *nal, unsigned int niov, struct iovec *iov, 
+                 void **addrkey);
+
+       /* as (un)map, but with a set of page fragments */
+       ptl_err_t (*libnal_map_pages)
+                (struct lib_nal *nal, unsigned int niov, ptl_kiov_t *iov, 
+                 void **addrkey);
+       void (*libnal_unmap_pages)
+                (struct lib_nal *nal, unsigned int niov, ptl_kiov_t *iov, 
+                 void **addrkey);
+
+       void (*libnal_printf)(struct lib_nal *nal, const char *fmt, ...);
+
+       /* Calculate a network "distance" to given node */
+       int (*libnal_dist) (struct lib_nal *nal, ptl_nid_t nid, unsigned long *dist);
+} lib_nal_t;
+
 #endif
index 1f925c1..bf86569 100644 (file)
 
 #include <portals/types.h>
 
-#ifdef yield
-#undef yield
-#endif
-
 typedef struct nal_t nal_t;
 
 struct nal_t {
+       /* common interface state */
        int              nal_refct;
+        ptl_handle_ni_t  nal_handle;
+
+       /* NAL-private data */
        void            *nal_data;
 
-       int (*startup) (nal_t *nal, ptl_pid_t requested_pid,
-                       ptl_ni_limits_t *req, ptl_ni_limits_t *actual);
+       /* NAL API implementation 
+        * NB only nal_ni_init needs to be set when the NAL registers itself */
+       int (*nal_ni_init) (nal_t *nal, ptl_pid_t requested_pid,
+                           ptl_ni_limits_t *req, ptl_ni_limits_t *actual);
        
-       void (*shutdown) (nal_t *nal);
+       void (*nal_ni_fini) (nal_t *nal);
 
-       int (*forward) (nal_t *nal, int index,  /* Function ID */
-                       void *args, size_t arg_len, void *ret, size_t ret_len);
+       int (*nal_get_id) (nal_t *nal, ptl_process_id_t *id);
+       int (*nal_ni_status) (nal_t *nal, ptl_sr_index_t register, ptl_sr_value_t *status);
+       int (*nal_ni_dist) (nal_t *nal, ptl_process_id_t *id, unsigned long *distance);
+       int (*nal_fail_nid) (nal_t *nal, ptl_nid_t nid, unsigned int threshold);
 
-       int (*yield) (nal_t *nal, unsigned long *flags, int milliseconds);
+       int (*nal_me_attach) (nal_t *nal, ptl_pt_index_t portal,
+                             ptl_process_id_t match_id, 
+                             ptl_match_bits_t match_bits, ptl_match_bits_t ignore_bits,
+                             ptl_unlink_t unlink, ptl_ins_pos_t pos, 
+                             ptl_handle_me_t *handle);
+       int (*nal_me_insert) (nal_t *nal, ptl_handle_me_t *me,
+                             ptl_process_id_t match_id, 
+                             ptl_match_bits_t match_bits, ptl_match_bits_t ignore_bits,
+                             ptl_unlink_t unlink, ptl_ins_pos_t pos, 
+                             ptl_handle_me_t *handle);
+       int (*nal_me_unlink) (nal_t *nal, ptl_handle_me_t *me);
+       
+       int (*nal_md_attach) (nal_t *nal, ptl_handle_me_t *me,
+                             ptl_md_t *md, ptl_unlink_t unlink, 
+                             ptl_handle_md_t *handle);
+       int (*nal_md_bind) (nal_t *nal, 
+                           ptl_md_t *md, ptl_unlink_t unlink, 
+                           ptl_handle_md_t *handle);
+       int (*nal_md_unlink) (nal_t *nal, ptl_handle_md_t *md);
+       int (*nal_md_update) (nal_t *nal, ptl_handle_md_t *md,
+                             ptl_md_t *old_md, ptl_md_t *new_md,
+                             ptl_handle_eq_t *testq);
 
-       void (*lock) (nal_t *nal, unsigned long *flags);
+       int (*nal_eq_alloc) (nal_t *nal, ptl_size_t count,
+                            ptl_eq_handler_t handler,
+                            ptl_handle_eq_t *handle);
+       int (*nal_eq_free) (nal_t *nal, ptl_handle_eq_t *eq);
+       int (*nal_eq_poll) (nal_t *nal, 
+                           ptl_handle_eq_t *eqs, int neqs, int timeout,
+                           ptl_event_t *event, int *which);
 
-       void (*unlock) (nal_t *nal, unsigned long *flags);
+       int (*nal_ace_entry) (nal_t *nal, ptl_ac_index_t index,
+                             ptl_process_id_t match_id, ptl_pt_index_t portal);
+       
+       int (*nal_put) (nal_t *nal, ptl_handle_md_t *md, ptl_ack_req_t ack,
+                       ptl_process_id_t *target, ptl_pt_index_t portal,
+                       ptl_ac_index_t ac, ptl_match_bits_t match,
+                       ptl_size_t offset, ptl_hdr_data_t hdr_data);
+       int (*nal_get) (nal_t *nal, ptl_handle_md_t *md,
+                       ptl_process_id_t *target, ptl_pt_index_t portal,
+                       ptl_ac_index_t ac, ptl_match_bits_t match,
+                       ptl_size_t offset);
 };
 
-extern nal_t *ptl_hndl2nal(ptl_handle_any_t * any);
+extern nal_t *ptl_hndl2nal(ptl_handle_any_t *any);
 
 #ifdef __KERNEL__
 extern int ptl_register_nal(ptl_interface_t interface, nal_t *nal);
index ef2712b..250b954 100644 (file)
@@ -153,17 +153,6 @@ typedef void (*ptl_eq_handler_t)(ptl_event_t *event);
 #define PTL_EQ_HANDLER_NONE NULL
 
 typedef struct {
-        volatile ptl_seq_t sequence;
-        ptl_size_t size;
-        ptl_event_t *base;
-        ptl_handle_any_t cb_eq_handle;
-} ptl_eq_t;
-
-typedef struct {
-        ptl_eq_t *eq;
-} ptl_ni_t;
-
-typedef struct {
        int max_mes;
        int max_mds;
        int max_eqs;
index e48552e..ca98f84 100644 (file)
@@ -190,7 +190,6 @@ typedef struct _gmnal_rxtwe {
 #define NRXTHREADS 10 /* max number of receiver threads */
 
 typedef struct _gmnal_data_t {
-       spinlock_t      cb_lock;
        spinlock_t      stxd_lock;
        struct semaphore stxd_token;
        gmnal_stxd_t    *stxd;
@@ -205,7 +204,7 @@ typedef struct _gmnal_data_t {
        gmnal_srxd_t    *srxd;
        struct gm_hash  *srxd_hash;
        nal_t           *nal;   
-       nal_cb_t        *nal_cb;
+       lib_nal_t       *libnal;
        struct gm_port  *gm_port;
        unsigned int    gm_local_nid;
        unsigned int    gm_global_nid;
@@ -298,7 +297,6 @@ extern gmnal_data_t *global_nal_data;
 #define GMNAL_GM_LOCK_INIT(a)          spin_lock_init(&a->gm_lock);
 #define GMNAL_GM_LOCK(a)               spin_lock(&a->gm_lock);
 #define GMNAL_GM_UNLOCK(a)             spin_unlock(&a->gm_lock);
-#define GMNAL_CB_LOCK_INIT(a)          spin_lock_init(&a->cb_lock);
 
 
 /*
@@ -340,39 +338,19 @@ void gmnal_api_unlock(nal_t *, unsigned long *);
  *     CB NAL
  */
 
-int gmnal_cb_send(nal_cb_t *, void *, lib_msg_t *, ptl_hdr_t *,
+int gmnal_cb_send(lib_nal_t *, void *, lib_msg_t *, ptl_hdr_t *,
        int, ptl_nid_t, ptl_pid_t, unsigned int, struct iovec *, size_t);
 
-int gmnal_cb_send_pages(nal_cb_t *, void *, lib_msg_t *, ptl_hdr_t *,
+int gmnal_cb_send_pages(lib_nal_t *, void *, lib_msg_t *, ptl_hdr_t *,
        int, ptl_nid_t, ptl_pid_t, unsigned int, ptl_kiov_t *, size_t);
 
-int gmnal_cb_recv(nal_cb_t *, void *, lib_msg_t *, 
+int gmnal_cb_recv(lib_nal_t *, void *, lib_msg_t *, 
        unsigned int, struct iovec *, size_t, size_t);
 
-int gmnal_cb_recv_pages(nal_cb_t *, void *, lib_msg_t *, 
+int gmnal_cb_recv_pages(lib_nal_t *, void *, lib_msg_t *, 
        unsigned int, ptl_kiov_t *, size_t, size_t);
 
-int gmnal_cb_read(nal_cb_t *, void *private, void *, user_ptr, size_t);
-
-int gmnal_cb_write(nal_cb_t *, void *private, user_ptr, void *, size_t);
-
-int gmnal_cb_callback(nal_cb_t *, void *, lib_eq_t *, ptl_event_t *);
-
-void *gmnal_cb_malloc(nal_cb_t *, size_t);
-
-void gmnal_cb_free(nal_cb_t *, void *, size_t);
-
-void gmnal_cb_unmap(nal_cb_t *, unsigned int, struct iovec*, void **);
-
-int  gmnal_cb_map(nal_cb_t *, unsigned int, struct iovec*, void **); 
-
-void gmnal_cb_printf(nal_cb_t *, const char *fmt, ...);
-
-void gmnal_cb_cli(nal_cb_t *, unsigned long *);
-
-void gmnal_cb_sti(nal_cb_t *, unsigned long *);
-
-int gmnal_cb_dist(nal_cb_t *, ptl_nid_t, unsigned long *);
+int gmnal_cb_dist(lib_nal_t *, ptl_nid_t, unsigned long *);
 
 int gmnal_init(void);
 
@@ -381,22 +359,14 @@ void  gmnal_fini(void);
 
 
 #define GMNAL_INIT_NAL_CB(a)   do {    \
-                               a->cb_send = gmnal_cb_send; \
-                               a->cb_send_pages = gmnal_cb_send_pages; \
-                               a->cb_recv = gmnal_cb_recv; \
-                               a->cb_recv_pages = gmnal_cb_recv_pages; \
-                               a->cb_read = gmnal_cb_read; \
-                               a->cb_write = gmnal_cb_write; \
-                               a->cb_callback = gmnal_cb_callback; \
-                               a->cb_malloc = gmnal_cb_malloc; \
-                               a->cb_free = gmnal_cb_free; \
-                               a->cb_map = NULL; \
-                               a->cb_unmap = NULL; \
-                               a->cb_printf = gmnal_cb_printf; \
-                               a->cb_cli = gmnal_cb_cli; \
-                               a->cb_sti = gmnal_cb_sti; \
-                               a->cb_dist = gmnal_cb_dist; \
-                               a->nal_data = NULL; \
+                               a->libnal_send = gmnal_cb_send; \
+                               a->libnal_send_pages = gmnal_cb_send_pages; \
+                               a->libnal_recv = gmnal_cb_recv; \
+                               a->libnal_recv_pages = gmnal_cb_recv_pages; \
+                               a->libnal_map = NULL; \
+                               a->libnal_unmap = NULL; \
+                               a->libnal_dist = gmnal_cb_dist; \
+                               a->libnal_data = NULL; \
                                } while (0)
 
 
@@ -451,9 +421,9 @@ void                gmnal_remove_rxtwe(gmnal_data_t *);
 /*
  *     Small messages
  */
-int            gmnal_small_rx(nal_cb_t *, void *, lib_msg_t *, unsigned int, 
+int            gmnal_small_rx(lib_nal_t *, void *, lib_msg_t *, unsigned int, 
                                struct iovec *, size_t, size_t);
-int            gmnal_small_tx(nal_cb_t *, void *, lib_msg_t *, ptl_hdr_t *, 
+int            gmnal_small_tx(lib_nal_t *, void *, lib_msg_t *, ptl_hdr_t *, 
                                int, ptl_nid_t, ptl_pid_t, 
                                unsigned int, struct iovec*, int);
 void           gmnal_small_tx_callback(gm_port_t *, void *, gm_status_t);
@@ -463,10 +433,10 @@ void              gmnal_small_tx_callback(gm_port_t *, void *, gm_status_t);
 /*
  *     Large messages
  */
-int            gmnal_large_rx(nal_cb_t *, void *, lib_msg_t *, unsigned int, 
+int            gmnal_large_rx(lib_nal_t *, void *, lib_msg_t *, unsigned int, 
                                struct iovec *, size_t, size_t);
 
-int            gmnal_large_tx(nal_cb_t *, void *, lib_msg_t *, ptl_hdr_t *, 
+int            gmnal_large_tx(lib_nal_t *, void *, lib_msg_t *, ptl_hdr_t *, 
                                int, ptl_nid_t, ptl_pid_t, unsigned int, 
                                struct iovec*, int);
 
index 7c94f93..002587d 100644 (file)
@@ -50,77 +50,6 @@ static ctl_table gmnalnal_top_sysctl_table[] = {
         { 0 }
 };
 
-
-
-
-
-
-/*
- *     gmnal_api_forward
- *     This function takes a pack block of arguments from the NAL API
- *     module and passes them to the NAL CB module. The CB module unpacks
- *     the args and calls the appropriate function indicated by index.
- *     Typically this function is used to pass args between kernel and use
- *     space.
- *     As lgmanl exists entirely in kernel, just pass the arg block directly 
- *     to the NAL CB, buy passing the args to lib_dispatch
- *     Arguments are
- *     nal_t   nal     Our nal
- *     int     index   the api function that initiated this call 
- *     void    *args   packed block of function args
- *     size_t  arg_len length of args block
- *     void    *ret    A return value for the API NAL
- *     size_t  ret_len Size of the return value
- *     
- */
-
-int
-gmnal_api_forward(nal_t *nal, int index, void *args, size_t arg_len,
-               void *ret, size_t ret_len)
-{
-
-       nal_cb_t        *nal_cb = NULL;
-       gmnal_data_t    *nal_data = NULL;
-
-
-
-
-
-       if (!nal || !args || (index < 0) || (arg_len < 0)) {
-                       CDEBUG(D_ERROR, "Bad args to gmnal_api_forward\n");
-               return (PTL_FAIL);
-       }
-
-       if (ret && (ret_len <= 0)) {
-               CDEBUG(D_ERROR, "Bad args to gmnal_api_forward\n");
-               return (PTL_FAIL);
-       }
-
-
-       if (!nal->nal_data) {
-               CDEBUG(D_ERROR, "bad nal, no nal data\n");      
-               return (PTL_FAIL);
-       }
-       
-       nal_data = nal->nal_data;
-       CDEBUG(D_INFO, "nal_data is [%p]\n", nal_data); 
-
-       if (!nal_data->nal_cb) {
-               CDEBUG(D_ERROR, "bad nal_data, no nal_cb\n");   
-               return (PTL_FAIL);
-       }
-       
-       nal_cb = nal_data->nal_cb;
-       CDEBUG(D_INFO, "nal_cb is [%p]\n", nal_cb);     
-       
-       CDEBUG(D_PORTALS, "gmnal_api_forward calling lib_dispatch\n");
-       lib_dispatch(nal_cb, NULL, index, args, ret);
-       CDEBUG(D_PORTALS, "gmnal_api_forward returns from lib_dispatch\n");
-
-       return(PTL_OK);
-}
-
-
 /*
  *     gmnal_api_shutdown
  *      nal_refct == 0 => called on last matching PtlNIFini()
@@ -131,7 +60,7 @@ void
 gmnal_api_shutdown(nal_t *nal, int interface)
 {
        gmnal_data_t    *nal_data;
-       nal_cb_t        *nal_cb;
+       lib_nal_t       *libnal;
 
         if (nal->nal_refct != 0)
                 return;
@@ -139,9 +68,9 @@ gmnal_api_shutdown(nal_t *nal, int interface)
        CDEBUG(D_TRACE, "gmnal_api_shutdown: nal_data [%p]\n", nal_data);
 
         LASSERT(nal == global_nal_data->nal);
-        nal_data = nal->nal_data;
+        libnal = (lib_nal_t *)nal->nal_data;
+        nal_data = (gmnal_data_t *)libnal->libnal_data;
         LASSERT(nal_data == global_nal_data);
-        nal_cb = nal_data->nal_cb;
 
         /* Stop portals calling our ioctl handler */
         libcfs_nal_cmd_unregister(GMNAL);
@@ -150,7 +79,7 @@ gmnal_api_shutdown(nal_t *nal, int interface)
          * flag so when lib calls us we fail immediately and dont queue any
          * more work but our threads can still call into lib OK.  THEN
          * shutdown our threads, THEN lib_fini() */
-        lib_fini(nal_cb);
+        lib_fini(libnal);
 
        gmnal_stop_rxthread(nal_data);
        gmnal_stop_ctthread(nal_data);
@@ -162,94 +91,22 @@ gmnal_api_shutdown(nal_t *nal, int interface)
        GMNAL_GM_UNLOCK(nal_data);
         if (nal_data->sysctl)
                 unregister_sysctl_table (nal_data->sysctl);
-       PORTAL_FREE(nal, sizeof(nal_t));        
+        /* Don't free 'nal'; it's a static struct */
        PORTAL_FREE(nal_data, sizeof(gmnal_data_t));    
-       PORTAL_FREE(nal_cb, sizeof(nal_cb_t));
+       PORTAL_FREE(libnal, sizeof(lib_nal_t));
 
         global_nal_data = NULL;
         PORTAL_MODULE_UNUSE;
 }
 
 
-/*
- *     gmnal_api_validate
- *     validate a user address for use in communications
- *     There's nothing to be done here
- */
-int
-gmnal_api_validate(nal_t *nal, void *base, size_t extent)
-{
-
-       return(PTL_OK);
-}
-
-
-
-/*
- *     gmnal_api_yield
- *     Give up the processor
- */
-void
-gmnal_api_yield(nal_t *nal, unsigned long *flags, int milliseconds)
-{
-       CDEBUG(D_TRACE, "gmnal_api_yield : nal [%p]\n", nal);
-
-        if (milliseconds != 0) {
-                CERROR("Blocking yield not implemented yet\n");
-                LBUG();
-        }
-
-        our_cond_resched();
-       return;
-}
-
-
-
-/*
- *     gmnal_api_lock
- *     Take a threadsafe lock
- */
-void
-gmnal_api_lock(nal_t *nal, unsigned long *flags)
-{
-
-       gmnal_data_t    *nal_data;
-       nal_cb_t        *nal_cb;
-
-       nal_data = nal->nal_data;
-       nal_cb = nal_data->nal_cb;
-
-       nal_cb->cb_cli(nal_cb, flags);
-
-       return;
-}
-
-/*
- *     gmnal_api_unlock
- *     Release a threadsafe lock
- */
-void
-gmnal_api_unlock(nal_t *nal, unsigned long *flags)
-{
-       gmnal_data_t    *nal_data;
-       nal_cb_t        *nal_cb;
-
-       nal_data = nal->nal_data;
-       nal_cb = nal_data->nal_cb;
-
-       nal_cb->cb_sti(nal_cb, flags);
-
-       return;
-}
-
-
 int
 gmnal_api_startup(nal_t *nal, ptl_pid_t requested_pid,
                   ptl_ni_limits_t *requested_limits,
                   ptl_ni_limits_t *actual_limits)
 {
 
-       nal_cb_t        *nal_cb = NULL;
+       lib_nal_t       *libnal = NULL;
        gmnal_data_t    *nal_data = NULL;
        gmnal_srxd_t    *srxd = NULL;
        gm_status_t     gm_status;
@@ -258,9 +115,8 @@ gmnal_api_startup(nal_t *nal, ptl_pid_t requested_pid,
 
         if (nal->nal_refct != 0) {
                 if (actual_limits != NULL) {
-                        nal_data = (gmnal_data_t *)nal->nal_data;
-                        nal_cb = nal_data->nal_cb;
-                        *actual_limits = nal->_cb->ni.actual_limits;
+                        libnal = (lib_nal_t *)nal->nal_data;
+                        *actual_limits = nal->libnal_ni.ni_actual_limits;
                 return (PTL_OK);
         }
 
@@ -283,24 +139,22 @@ gmnal_api_startup(nal_t *nal, ptl_pid_t requested_pid,
        CDEBUG(D_INFO, "Allocd and reset nal_data[%p]\n", nal_data);
        CDEBUG(D_INFO, "small_msg_size is [%d]\n", nal_data->small_msg_size);
 
-       PORTAL_ALLOC(nal_cb, sizeof(nal_cb_t));
-       if (!nal_cb) {
+       PORTAL_ALLOC(libnal, sizeof(lib_nal_t));
+       if (!libnal) {
                PORTAL_FREE(nal_data, sizeof(gmnal_data_t));
                return(PTL_NO_SPACE);
        }
-       memset(nal_cb, 0, sizeof(nal_cb_t));
-       CDEBUG(D_INFO, "Allocd and reset nal_cb[%p]\n", nal_cb);
+       memset(libnal, 0, sizeof(lib_nal_t));
+       CDEBUG(D_INFO, "Allocd and reset libnal[%p]\n", libnal);
 
-       GMNAL_INIT_NAL_CB(nal_cb);
+       GMNAL_INIT_NAL_CB(libnal);
        /*
         *      String them all together
         */
-       nal->nal_data = (void*)nal_data;
-       nal_cb->nal_data = (void*)nal_data;
+       libnal->libnal_data = (void*)nal_data;
        nal_data->nal = nal;
-       nal_data->nal_cb = nal_cb;
+       nal_data->libnal = libnal;
 
-       GMNAL_CB_LOCK_INIT(nal_data);
        GMNAL_GM_LOCK_INIT(nal_data);
 
 
@@ -311,7 +165,7 @@ gmnal_api_startup(nal_t *nal, ptl_pid_t requested_pid,
        if (gm_init() != GM_SUCCESS) {
                CDEBUG(D_ERROR, "call to gm_init failed\n");
                PORTAL_FREE(nal_data, sizeof(gmnal_data_t));    
-               PORTAL_FREE(nal_cb, sizeof(nal_cb_t));
+               PORTAL_FREE(libnal, sizeof(lib_nal_t));
                return(PTL_FAIL);
        }
 
@@ -356,7 +210,7 @@ gmnal_api_startup(nal_t *nal, ptl_pid_t requested_pid,
                gm_finalize();
                GMNAL_GM_UNLOCK(nal_data);
                PORTAL_FREE(nal_data, sizeof(gmnal_data_t));    
-               PORTAL_FREE(nal_cb, sizeof(nal_cb_t));
+               PORTAL_FREE(libnal, sizeof(lib_nal_t));
                return(PTL_FAIL);
        }
 
@@ -373,7 +227,7 @@ gmnal_api_startup(nal_t *nal, ptl_pid_t requested_pid,
                gm_finalize();
                GMNAL_GM_UNLOCK(nal_data);
                PORTAL_FREE(nal_data, sizeof(gmnal_data_t));    
-               PORTAL_FREE(nal_cb, sizeof(nal_cb_t));
+               PORTAL_FREE(libnal, sizeof(lib_nal_t));
                return(PTL_FAIL);
        }
 
@@ -402,7 +256,7 @@ gmnal_api_startup(nal_t *nal, ptl_pid_t requested_pid,
                gm_finalize();
                GMNAL_GM_UNLOCK(nal_data);
                PORTAL_FREE(nal_data, sizeof(gmnal_data_t));    
-               PORTAL_FREE(nal_cb, sizeof(nal_cb_t));
+               PORTAL_FREE(libnal, sizeof(lib_nal_t));
                return(PTL_FAIL);
        }
 
@@ -434,7 +288,7 @@ gmnal_api_startup(nal_t *nal, ptl_pid_t requested_pid,
                gm_finalize();
                GMNAL_GM_UNLOCK(nal_data);
                PORTAL_FREE(nal_data, sizeof(gmnal_data_t));    
-               PORTAL_FREE(nal_cb, sizeof(nal_cb_t));
+               PORTAL_FREE(libnal, sizeof(lib_nal_t));
                return(PTL_FAIL);
        }
        nal_data->gm_local_nid = local_nid;
@@ -454,7 +308,7 @@ gmnal_api_startup(nal_t *nal, ptl_pid_t requested_pid,
                gm_finalize();
                GMNAL_GM_UNLOCK(nal_data);
                PORTAL_FREE(nal_data, sizeof(gmnal_data_t));    
-               PORTAL_FREE(nal_cb, sizeof(nal_cb_t));
+               PORTAL_FREE(libnal, sizeof(lib_nal_t));
                return(PTL_FAIL);
        }
        CDEBUG(D_INFO, "Global node id is [%u]\n", global_nid);
@@ -471,7 +325,7 @@ gmnal_api_startup(nal_t *nal, ptl_pid_t requested_pid,
        CDEBUG(D_INFO, "portals_nid is ["LPU64"]\n", process_id.nid);
        
        CDEBUG(D_PORTALS, "calling lib_init\n");
-       if (lib_init(nal_cb, process_id, 
+       if (lib_init(libnal, nal, process_id, 
                      requested_limits, actual_limits) != PTL_OK) {
                CDEBUG(D_ERROR, "lib_init failed\n");
                gmnal_stop_rxthread(nal_data);
@@ -483,7 +337,7 @@ gmnal_api_startup(nal_t *nal, ptl_pid_t requested_pid,
                gm_finalize();
                GMNAL_GM_UNLOCK(nal_data);
                PORTAL_FREE(nal_data, sizeof(gmnal_data_t));    
-               PORTAL_FREE(nal_cb, sizeof(nal_cb_t));
+               PORTAL_FREE(libnal, sizeof(lib_nal_t));
                return(PTL_FAIL);
                
        }
@@ -493,7 +347,7 @@ gmnal_api_startup(nal_t *nal, ptl_pid_t requested_pid,
 
                 /* XXX these cleanup cases should be restructured to
                  * minimise duplication... */
-                lib_fini(nal_cb);
+                lib_fini(libnal);
                 
                gmnal_stop_rxthread(nal_data);
                gmnal_stop_ctthread(nal_data);
@@ -504,7 +358,7 @@ gmnal_api_startup(nal_t *nal, ptl_pid_t requested_pid,
                gm_finalize();
                GMNAL_GM_UNLOCK(nal_data);
                PORTAL_FREE(nal_data, sizeof(gmnal_data_t));    
-               PORTAL_FREE(nal_cb, sizeof(nal_cb_t));
+               PORTAL_FREE(libnal, sizeof(lib_nal_t));
                return(PTL_FAIL);
         }
 
@@ -550,10 +404,6 @@ int gmnal_init(void)
  */
 void gmnal_fini()
 {
-       gmnal_data_t    *nal_data = global_nal_data;
-       nal_t           *nal = nal_data->nal;
-       nal_cb_t        *nal_cb = nal_data->nal_cb;
-
        CDEBUG(D_TRACE, "gmnal_fini\n");
 
         LASSERT(global_nal_data == NULL);
index ece1380..e99d3ec 100644 (file)
@@ -27,7 +27,7 @@
 
 #include "gmnal.h"
 
-int gmnal_cb_recv(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, 
+int gmnal_cb_recv(lib_nal_t *libnal, void *private, lib_msg_t *cookie, 
                   unsigned int niov, struct iovec *iov, size_t mlen, 
                   size_t rlen)
 {
@@ -35,19 +35,19 @@ int gmnal_cb_recv(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
        int             status = PTL_OK;
 
 
-       CDEBUG(D_TRACE, "gmnal_cb_recv nal_cb [%p], private[%p], cookie[%p], "
+       CDEBUG(D_TRACE, "gmnal_cb_recv libnal [%p], private[%p], cookie[%p], "
               "niov[%d], iov [%p], mlen["LPSZ"], rlen["LPSZ"]\n", 
-              nal_cb, private, cookie, niov, iov, mlen, rlen);
+              libnal, private, cookie, niov, iov, mlen, rlen);
 
        switch(srxd->type) {
        case(GMNAL_SMALL_MESSAGE):
                CDEBUG(D_INFO, "gmnal_cb_recv got small message\n");
-               status = gmnal_small_rx(nal_cb, private, cookie, niov, 
+               status = gmnal_small_rx(libnal, private, cookie, niov, 
                                         iov, mlen, rlen);
        break;
        case(GMNAL_LARGE_MESSAGE_INIT):
                CDEBUG(D_INFO, "gmnal_cb_recv got large message init\n");
-               status = gmnal_large_rx(nal_cb, private, cookie, niov, 
+               status = gmnal_large_rx(libnal, private, cookie, niov, 
                                         iov, mlen, rlen);
        }
                
@@ -56,7 +56,7 @@ int gmnal_cb_recv(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
        return(status);
 }
 
-int gmnal_cb_recv_pages(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, 
+int gmnal_cb_recv_pages(lib_nal_t *libnal, void *private, lib_msg_t *cookie, 
                         unsigned int kniov, ptl_kiov_t *kiov, size_t mlen, 
                         size_t rlen)
 {
@@ -67,9 +67,9 @@ int gmnal_cb_recv_pages(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
        ptl_kiov_t      *kiov_dup = kiov;;
 
 
-       CDEBUG(D_TRACE, "gmnal_cb_recv_pages nal_cb [%p],private[%p], "
+       CDEBUG(D_TRACE, "gmnal_cb_recv_pages libnal [%p],private[%p], "
               "cookie[%p], kniov[%d], kiov [%p], mlen["LPSZ"], rlen["LPSZ"]\n",
-              nal_cb, private, cookie, kniov, kiov, mlen, rlen);
+              libnal, private, cookie, kniov, kiov, mlen, rlen);
 
        if (srxd->type == GMNAL_SMALL_MESSAGE) {
                PORTAL_ALLOC(iovec, sizeof(struct iovec)*kniov);
@@ -98,7 +98,7 @@ int gmnal_cb_recv_pages(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
                         kiov++;
                }
                CDEBUG(D_INFO, "calling gmnal_small_rx\n");
-               status = gmnal_small_rx(nal_cb, private, cookie, kniov, 
+               status = gmnal_small_rx(libnal, private, cookie, kniov, 
                                         iovec_dup, mlen, rlen);
                for (i=0; i<kniov; i++) {
                        kunmap(kiov_dup->kiov_page);
@@ -113,7 +113,7 @@ int gmnal_cb_recv_pages(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
 }
 
 
-int gmnal_cb_send(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, 
+int gmnal_cb_send(lib_nal_t *libnal, void *private, lib_msg_t *cookie, 
                   ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, 
                   unsigned int niov, struct iovec *iov, size_t len)
 {
@@ -123,24 +123,25 @@ int gmnal_cb_send(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
 
        CDEBUG(D_TRACE, "gmnal_cb_send niov[%d] len["LPSZ"] nid["LPU64"]\n", 
               niov, len, nid);
-       nal_data = nal_cb->nal_data;
+       nal_data = libnal->libnal_data;
        
        if (GMNAL_IS_SMALL_MESSAGE(nal_data, niov, iov, len)) {
                CDEBUG(D_INFO, "This is a small message send\n");
-               gmnal_small_tx(nal_cb, private, cookie, hdr, type, nid, pid, 
+               gmnal_small_tx(libnal, private, cookie, hdr, type, nid, pid, 
                                niov, iov, len);
        } else {
                CDEBUG(D_ERROR, "Large message send it is not supported\n");
-               lib_finalize(nal_cb, private, cookie, PTL_FAIL);
+               lib_finalize(libnal, private, cookie, PTL_FAIL);
                return(PTL_FAIL);
-               gmnal_large_tx(nal_cb, private, cookie, hdr, type, nid, pid, 
+               gmnal_large_tx(libnal, private, cookie, hdr, type, nid, pid, 
                                niov, iov, len);
        }
        return(PTL_OK);
 }
 
-int gmnal_cb_send_pages(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, 
-                        ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,                         unsigned int kniov, ptl_kiov_t *kiov, size_t len)
+int gmnal_cb_send_pages(lib_nal_t *libnal, void *private, lib_msg_t *cookie, 
+                        ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
+                         unsigned int kniov, ptl_kiov_t *kiov, size_t len)
 {
 
        int     i = 0;
@@ -149,7 +150,7 @@ int gmnal_cb_send_pages(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
        ptl_kiov_t      *kiov_dup = kiov;
 
        CDEBUG(D_TRACE, "gmnal_cb_send_pages nid ["LPU64"] niov[%d] len["LPSZ"]\n", nid, kniov, len);
-       nal_data = nal_cb->nal_data;
+       nal_data = libnal->libnal_data;
        PORTAL_ALLOC(iovec, kniov*sizeof(struct iovec));
         iovec_dup = iovec;
        if (GMNAL_IS_SMALL_MESSAGE(nal_data, 0, NULL, len)) {
@@ -168,7 +169,7 @@ int gmnal_cb_send_pages(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
                         iovec++;
                         kiov++;
                }
-               gmnal_small_tx(nal_cb, private, cookie, hdr, type, nid, 
+               gmnal_small_tx(libnal, private, cookie, hdr, type, nid, 
                                pid, kniov, iovec_dup, len);
        } else {
                CDEBUG(D_ERROR, "Large message send it is not supported yet\n");
@@ -185,7 +186,7 @@ int gmnal_cb_send_pages(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
                         iovec++;
                         kiov++;
                }
-               gmnal_large_tx(nal_cb, private, cookie, hdr, type, nid, 
+               gmnal_large_tx(libnal, private, cookie, hdr, type, nid, 
                                pid, kniov, iovec, len);
        }
        for (i=0; i<kniov; i++) {
@@ -196,94 +197,7 @@ int gmnal_cb_send_pages(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
        return(PTL_OK);
 }
 
-int gmnal_cb_read(nal_cb_t *nal_cb, void *private, void *dst, 
-                  user_ptr src, size_t len)
-{
-       gm_bcopy(src, dst, len);
-       return(PTL_OK);
-}
-
-int gmnal_cb_write(nal_cb_t *nal_cb, void *private, user_ptr dst, 
-                   void *src, size_t len)
-{
-       gm_bcopy(src, dst, len);
-       return(PTL_OK);
-}
-
-int gmnal_cb_callback(nal_cb_t *nal_cb, void *private, lib_eq_t *eq, 
-                      ptl_event_t *ev)
-{
-
-       if (eq->event_callback != NULL) {
-               CDEBUG(D_INFO, "found callback\n");
-               eq->event_callback(ev);
-       }
-       
-       return(PTL_OK);
-}
-
-void *gmnal_cb_malloc(nal_cb_t *nal_cb, size_t len)
-{
-       void *ptr = NULL;
-       CDEBUG(D_TRACE, "gmnal_cb_malloc len["LPSZ"]\n", len);
-       PORTAL_ALLOC(ptr, len);
-       return(ptr);
-}
-
-void gmnal_cb_free(nal_cb_t *nal_cb, void *buf, size_t len)
-{
-       CDEBUG(D_TRACE, "gmnal_cb_free :: buf[%p] len["LPSZ"]\n", buf, len);
-       PORTAL_FREE(buf, len);
-       return;
-}
-
-void gmnal_cb_unmap(nal_cb_t *nal_cb, unsigned int niov, struct iovec *iov, 
-                    void **addrkey)
-{
-       return;
-}
-
-int  gmnal_cb_map(nal_cb_t *nal_cb, unsigned int niov, struct iovec *iov, 
-                  void**addrkey)
-{
-       return(PTL_OK);
-}
-
-void gmnal_cb_printf(nal_cb_t *nal_cb, const char *fmt, ...)
-{
-       CDEBUG(D_TRACE, "gmnal_cb_printf\n");
-       printk(fmt);
-       return;
-}
-
-void gmnal_cb_cli(nal_cb_t *nal_cb, unsigned long *flags)
-{
-       gmnal_data_t    *nal_data = (gmnal_data_t*)nal_cb->nal_data;
-
-       spin_lock_irqsave(&nal_data->cb_lock, *flags);
-       return;
-}
-
-void gmnal_cb_sti(nal_cb_t *nal_cb, unsigned long *flags)
-{
-       gmnal_data_t    *nal_data = (gmnal_data_t*)nal_cb->nal_data;
-
-       spin_unlock_irqrestore(&nal_data->cb_lock, *flags);
-       return;
-}
-
-void gmnal_cb_callback(nal_cb_t *nal_cb, void *private, lib_eq_t *eq, ptl_event_t *ev)
-{
-        /* holding cb_lock */
-
-        if (eq->event_callback != NULL)
-                eq->event_callback(ev);
-
-        /* We will wake theads sleeping in yield() here, AFTER the
-         * callback, when we implement blocking yield */
-}
-
-int gmnal_cb_dist(nal_cb_t *nal_cb, ptl_nid_t nid, unsigned long *dist)
+int gmnal_cb_dist(lib_nal_t *libnal, ptl_nid_t nid, unsigned long *dist)
 {
        CDEBUG(D_TRACE, "gmnal_cb_dist\n");
        if (dist)
index 1bcd9bd..4af7186 100644 (file)
@@ -189,6 +189,7 @@ gmnal_pre_receive(gmnal_data_t *nal_data, gmnal_rxtwe_t *we, int gmnal_type)
        unsigned int snode, sport, type, length;
        gmnal_msghdr_t  *gmnal_msghdr;
        ptl_hdr_t       *portals_hdr;
+        int              rc;
 
        CDEBUG(D_INFO, "nal_data [%p], we[%p] type [%d]\n", 
               nal_data, we, gmnal_type);
@@ -219,10 +220,12 @@ gmnal_pre_receive(gmnal_data_t *nal_data, gmnal_rxtwe_t *we, int gmnal_type)
         */
        srxd = gmnal_rxbuffer_to_srxd(nal_data, buffer);
        CDEBUG(D_INFO, "Back from gmnal_rxbuffer_to_srxd\n");
-       srxd->nal_data = nal_data;
        if (!srxd) {
                CDEBUG(D_ERROR, "Failed to get receive descriptor\n");
-               lib_parse(nal_data->nal_cb, portals_hdr, srxd);
+                /* I think passing a NULL srxd to lib_parse will crash
+                 * gmnal_recv() */
+                LBUG();
+               lib_parse(nal_data->libnal, portals_hdr, srxd);
                return(GMNAL_STATUS_FAIL);
        }
 
@@ -234,6 +237,7 @@ gmnal_pre_receive(gmnal_data_t *nal_data, gmnal_rxtwe_t *we, int gmnal_type)
                return(GMNAL_STATUS_OK);
        }
 
+       srxd->nal_data = nal_data;
        srxd->type = gmnal_type;
        srxd->nsiov = gmnal_msghdr->niov;
        srxd->gm_source_node = gmnal_msghdr->sender_node_id;
@@ -245,7 +249,12 @@ gmnal_pre_receive(gmnal_data_t *nal_data, gmnal_rxtwe_t *we, int gmnal_type)
         *      cb_recv is responsible for returning the buffer 
         *      for future receive
         */
-       lib_parse(nal_data->nal_cb, portals_hdr, srxd);
+       rc = lib_parse(nal_data->libnal, portals_hdr, srxd);
+
+        if (rc != PTL_OK) {
+                /* I just received garbage; take appropriate action... */
+                LBUG();
+        }
 
        return(GMNAL_STATUS_OK);
 }
@@ -309,19 +318,19 @@ gmnal_rx_bad(gmnal_data_t *nal_data, gmnal_rxtwe_t *we, gmnal_srxd_t *srxd)
  *     Call lib_finalize
  */
 int
-gmnal_small_rx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, 
+gmnal_small_rx(lib_nal_t *libnal, void *private, lib_msg_t *cookie, 
                unsigned int niov, struct iovec *iov, size_t mlen, size_t rlen)
 {
        gmnal_srxd_t    *srxd = NULL;
        void    *buffer = NULL;
-       gmnal_data_t    *nal_data = (gmnal_data_t*)nal_cb->nal_data;
+       gmnal_data_t    *nal_data = (gmnal_data_t*)libnal->nal_data;
 
 
        CDEBUG(D_TRACE, "niov [%d] mlen["LPSZ"]\n", niov, mlen);
 
        if (!private) {
                CDEBUG(D_ERROR, "gmnal_small_rx no context\n");
-               lib_finalize(nal_cb, private, cookie, PTL_FAIL);
+               lib_finalize(libnal, private, cookie, PTL_FAIL);
                return(PTL_FAIL);
        }
 
@@ -343,7 +352,7 @@ gmnal_small_rx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
         *      let portals library know receive is complete
         */
        CDEBUG(D_PORTALS, "calling lib_finalize\n");
-       lib_finalize(nal_cb, private, cookie, PTL_OK);
+       lib_finalize(libnal, private, cookie, PTL_OK);
        /*
         *      return buffer so it can be used again
         */
@@ -365,11 +374,11 @@ gmnal_small_rx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
  *     The callback function informs when the send is complete.
  */
 int
-gmnal_small_tx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, 
+gmnal_small_tx(lib_nal_t *libnal, void *private, lib_msg_t *cookie, 
                ptl_hdr_t *hdr, int type, ptl_nid_t global_nid, ptl_pid_t pid, 
                unsigned int niov, struct iovec *iov, int size)
 {
-       gmnal_data_t    *nal_data = (gmnal_data_t*)nal_cb->nal_data;
+       gmnal_data_t    *nal_data = (gmnal_data_t*)libnal->nal_data;
        gmnal_stxd_t    *stxd = NULL;
        void            *buffer = NULL;
        gmnal_msghdr_t  *msghdr = NULL;
@@ -377,9 +386,9 @@ gmnal_small_tx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
        unsigned int    local_nid;
        gm_status_t     gm_status = GM_SUCCESS;
 
-       CDEBUG(D_TRACE, "gmnal_small_tx nal_cb [%p] private [%p] cookie [%p] "
+       CDEBUG(D_TRACE, "gmnal_small_tx libnal [%p] private [%p] cookie [%p] "
               "hdr [%p] type [%d] global_nid ["LPU64"] pid [%d] niov [%d] "
-              "iov [%p] size [%d]\n", nal_cb, private, cookie, hdr, type, 
+              "iov [%p] size [%d]\n", libnal, private, cookie, hdr, type, 
               global_nid, pid, niov, iov, size);
 
        CDEBUG(D_INFO, "portals_hdr:: dest_nid ["LPU64"], src_nid ["LPU64"]\n",
@@ -472,7 +481,7 @@ gmnal_small_tx_callback(gm_port_t *gm_port, void *context, gm_status_t status)
        gmnal_stxd_t    *stxd = (gmnal_stxd_t*)context;
        lib_msg_t       *cookie = stxd->cookie;
        gmnal_data_t    *nal_data = (gmnal_data_t*)stxd->nal_data;
-       nal_cb_t        *nal_cb = nal_data->nal_cb;
+       lib_nal_t       *libnal = nal_data->libnal;
 
        if (!stxd) {
                CDEBUG(D_TRACE, "send completion event for unknown stxd\n");
@@ -592,7 +601,7 @@ gmnal_small_tx_callback(gm_port_t *gm_port, void *context, gm_status_t status)
                return;
        }
        gmnal_return_stxd(nal_data, stxd);
-       lib_finalize(nal_cb, stxd, cookie, PTL_OK);
+       lib_finalize(libnal, stxd, cookie, PTL_OK);
        return;
 }
 
@@ -645,7 +654,7 @@ void gmnal_drop_sends_callback(struct gm_port *gm_port, void *context,
  *     this ack, deregister the memory. Only 1 send token is required here.
  */
 int
-gmnal_large_tx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, 
+gmnal_large_tx(lib_nal_t *libnal, void *private, lib_msg_t *cookie, 
                ptl_hdr_t *hdr, int type, ptl_nid_t global_nid, ptl_pid_t pid, 
                unsigned int niov, struct iovec *iov, int size)
 {
@@ -661,15 +670,15 @@ gmnal_large_tx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
        int             niov_dup;
 
 
-       CDEBUG(D_TRACE, "gmnal_large_tx nal_cb [%p] private [%p], cookie [%p] "
+       CDEBUG(D_TRACE, "gmnal_large_tx libnal [%p] private [%p], cookie [%p] "
               "hdr [%p], type [%d] global_nid ["LPU64"], pid [%d], niov [%d], "
-              "iov [%p], size [%d]\n", nal_cb, private, cookie, hdr, type, 
+              "iov [%p], size [%d]\n", libnal, private, cookie, hdr, type, 
               global_nid, pid, niov, iov, size);
 
-       if (nal_cb)
-               nal_data = (gmnal_data_t*)nal_cb->nal_data;
+       if (libnal)
+               nal_data = (gmnal_data_t*)libnal->nal_data;
        else  {
-               CDEBUG(D_ERROR, "no nal_cb.\n");
+               CDEBUG(D_ERROR, "no libnal.\n");
                return(GMNAL_STATUS_FAIL);
        }
        
@@ -811,11 +820,11 @@ gmnal_large_tx_callback(gm_port_t *gm_port, void *context, gm_status_t status)
  *     data from the sender.
  */
 int
-gmnal_large_rx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, 
+gmnal_large_rx(lib_nal_t *libnal, void *private, lib_msg_t *cookie, 
                unsigned int nriov, struct iovec *riov, size_t mlen, 
                size_t rlen)
 {
-       gmnal_data_t    *nal_data = nal_cb->nal_data;
+       gmnal_data_t    *nal_data = libnal->nal_data;
        gmnal_srxd_t    *srxd = (gmnal_srxd_t*)private;
        void            *buffer = NULL;
        struct  iovec   *riov_dup;
@@ -823,13 +832,13 @@ gmnal_large_rx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
        gmnal_msghdr_t  *msghdr = NULL;
        gm_status_t     gm_status;
 
-       CDEBUG(D_TRACE, "gmnal_large_rx :: nal_cb[%p], private[%p], "
+       CDEBUG(D_TRACE, "gmnal_large_rx :: libnal[%p], private[%p], "
               "cookie[%p], niov[%d], iov[%p], mlen["LPSZ"], rlen["LPSZ"]\n",
-               nal_cb, private, cookie, nriov, riov, mlen, rlen);
+               libnal, private, cookie, nriov, riov, mlen, rlen);
 
        if (!srxd) {
                CDEBUG(D_ERROR, "gmnal_large_rx no context\n");
-               lib_finalize(nal_cb, private, cookie, PTL_FAIL);
+               lib_finalize(libnal, private, cookie, PTL_FAIL);
                return(PTL_FAIL);
        }
 
@@ -1092,7 +1101,7 @@ gmnal_remote_get_callback(gm_port_t *gm_port, void *context,
 
        gmnal_ltxd_t    *ltxd = (gmnal_ltxd_t*)context;
        gmnal_srxd_t    *srxd = ltxd->srxd;
-       nal_cb_t        *nal_cb = srxd->nal_data->nal_cb;
+       lib_nal_t       *libnal = srxd->nal_data->libnal;
        int             lastone;
        struct  iovec   *riov;
        int             nriov;
@@ -1126,7 +1135,7 @@ gmnal_remote_get_callback(gm_port_t *gm_port, void *context,
         *      Let our client application proceed
         */     
        CDEBUG(D_ERROR, "final callback context[%p]\n", srxd);
-       lib_finalize(nal_cb, srxd, srxd->cookie, PTL_OK);
+       lib_finalize(libnal, srxd, srxd->cookie, PTL_OK);
 
        /*
         *      send an ack to the sender to let him know we got the data
@@ -1276,7 +1285,7 @@ gmnal_large_tx_ack_callback(gm_port_t *gm_port, void *context,
 void 
 gmnal_large_tx_ack_received(gmnal_data_t *nal_data, gmnal_srxd_t *srxd)
 {
-       nal_cb_t        *nal_cb = nal_data->nal_cb;
+       lib_nal_t       *libnal = nal_data->libnal;
        gmnal_stxd_t    *stxd = NULL;
        gmnal_msghdr_t  *msghdr = NULL;
        void            *buffer = NULL;
@@ -1291,7 +1300,7 @@ gmnal_large_tx_ack_received(gmnal_data_t *nal_data, gmnal_srxd_t *srxd)
 
        CDEBUG(D_INFO, "gmnal_large_tx_ack_received stxd [%p]\n", stxd);
 
-       lib_finalize(nal_cb, stxd, stxd->cookie, PTL_OK);
+       lib_finalize(libnal, stxd, stxd->cookie, PTL_OK);
 
        /*
         *      extract the iovec from the stxd, deregister the memory.
index f4005de..c595450 100644 (file)
@@ -43,6 +43,9 @@ kpr_nal_interface_t kqswnal_router_interface = {
 #define QSWNAL_SYSCTL_COPY_SMALL_FWD     2
 
 static ctl_table kqswnal_ctl_table[] = {
+       {QSWNAL_SYSCTL_OPTIMIZED_GETS, "optimized_puts",
+        &kqswnal_tunables.kqn_optimized_puts, sizeof (int),
+        0644, NULL, &proc_dointvec},
        {QSWNAL_SYSCTL_OPTIMIZED_GETS, "optimized_gets",
         &kqswnal_tunables.kqn_optimized_gets, sizeof (int),
         0644, NULL, &proc_dointvec},
@@ -55,88 +58,6 @@ static ctl_table kqswnal_top_ctl_table[] = {
 };
 #endif
 
-static int
-kqswnal_forward(nal_t   *nal,
-               int     id,
-               void    *args,  size_t args_len,
-               void    *ret,   size_t ret_len)
-{
-       kqswnal_data_t *k = nal->nal_data;
-       nal_cb_t       *nal_cb = k->kqn_cb;
-
-       LASSERT (nal == &kqswnal_api);
-       LASSERT (k == &kqswnal_data);
-       LASSERT (nal_cb == &kqswnal_lib);
-
-       lib_dispatch(nal_cb, k, id, args, ret); /* nal needs k */
-       return (PTL_OK);
-}
-
-static void
-kqswnal_lock (nal_t *nal, unsigned long *flags)
-{
-       kqswnal_data_t *k = nal->nal_data;
-       nal_cb_t       *nal_cb = k->kqn_cb;
-
-       LASSERT (nal == &kqswnal_api);
-       LASSERT (k == &kqswnal_data);
-       LASSERT (nal_cb == &kqswnal_lib);
-
-       nal_cb->cb_cli(nal_cb,flags);
-}
-
-static void
-kqswnal_unlock(nal_t *nal, unsigned long *flags)
-{
-       kqswnal_data_t *k = nal->nal_data;
-       nal_cb_t       *nal_cb = k->kqn_cb;
-
-       LASSERT (nal == &kqswnal_api);
-       LASSERT (k == &kqswnal_data);
-       LASSERT (nal_cb == &kqswnal_lib);
-
-       nal_cb->cb_sti(nal_cb,flags);
-}
-
-static int
-kqswnal_yield(nal_t *nal, unsigned long *flags, int milliseconds)
-{
-       /* NB called holding statelock */
-        wait_queue_t       wait;
-       unsigned long      now = jiffies;
-
-       CDEBUG (D_NET, "yield\n");
-
-       if (milliseconds == 0) {
-               if (need_resched())
-                       schedule();
-               return 0;
-       }
-
-       init_waitqueue_entry(&wait, current);
-       set_current_state(TASK_INTERRUPTIBLE);
-       add_wait_queue(&kqswnal_data.kqn_yield_waitq, &wait);
-
-       kqswnal_unlock(nal, flags);
-
-       if (milliseconds < 0)
-               schedule ();
-       else
-               schedule_timeout((milliseconds * HZ) / 1000);
-       
-       kqswnal_lock(nal, flags);
-
-       remove_wait_queue(&kqswnal_data.kqn_yield_waitq, &wait);
-
-       if (milliseconds > 0) {
-               milliseconds -= ((jiffies - now) * 1000) / HZ;
-               if (milliseconds < 0)
-                       milliseconds = 0;
-       }
-       
-       return (milliseconds);
-}
-
 int
 kqswnal_get_tx_desc (struct portals_cfg *pcfg)
 {
@@ -186,7 +107,7 @@ kqswnal_cmd (struct portals_cfg *pcfg, void *private)
                        kqswnal_data.kqn_nid_offset);
                kqswnal_data.kqn_nid_offset =
                        pcfg->pcfg_nid - kqswnal_data.kqn_elanid;
-               kqswnal_lib.ni.nid = pcfg->pcfg_nid;
+               kqswnal_lib.libnal_ni.ni_pid.nid = pcfg->pcfg_nid;
                return (0);
                
        default:
@@ -469,9 +390,11 @@ kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid,
        ptl_process_id_t  my_process_id;
        int               pkmem = atomic_read(&portal_kmemory);
 
+       LASSERT (nal == &kqswnal_api);
+
        if (nal->nal_refct != 0) {
                if (actual_limits != NULL)
-                       *actual_limits = kqswnal_lib.ni.actual_limits;
+                       *actual_limits = kqswnal_lib.libnal_ni.ni_actual_limits;
                /* This module got the first ref */
                PORTAL_MODULE_USE;
                return (PTL_OK);
@@ -481,18 +404,9 @@ kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid,
 
        CDEBUG (D_MALLOC, "start kmem %d\n", atomic_read(&portal_kmemory));
 
-       memset(&kqswnal_rpc_success, 0, sizeof(kqswnal_rpc_success));
-       memset(&kqswnal_rpc_failed, 0, sizeof(kqswnal_rpc_failed));
-#if MULTIRAIL_EKC
-       kqswnal_rpc_failed.Data[0] = -ECONNREFUSED;
-#else
-       kqswnal_rpc_failed.Status = -ECONNREFUSED;
-#endif
        /* ensure all pointers NULL etc */
        memset (&kqswnal_data, 0, sizeof (kqswnal_data));
 
-       kqswnal_data.kqn_cb = &kqswnal_lib;
-
        INIT_LIST_HEAD (&kqswnal_data.kqn_idletxds);
        INIT_LIST_HEAD (&kqswnal_data.kqn_nblk_idletxds);
        INIT_LIST_HEAD (&kqswnal_data.kqn_activetxds);
@@ -507,8 +421,12 @@ kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid,
        spin_lock_init (&kqswnal_data.kqn_sched_lock);
        init_waitqueue_head (&kqswnal_data.kqn_sched_waitq);
 
-       spin_lock_init (&kqswnal_data.kqn_statelock);
-       init_waitqueue_head (&kqswnal_data.kqn_yield_waitq);
+       /* Leave kqn_rpc_success zeroed */
+#if MULTIRAIL_EKC
+       kqswnal_data.kqn_rpc_failed.Data[0] = -ECONNREFUSED;
+#else
+       kqswnal_data.kqn_rpc_failed.Status = -ECONNREFUSED;
+#endif
 
        /* pointers/lists/locks initialised */
        kqswnal_data.kqn_init = KQN_INIT_DATA;
@@ -517,13 +435,13 @@ kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid,
        kqswnal_data.kqn_ep = ep_system();
        if (kqswnal_data.kqn_ep == NULL) {
                CERROR("Can't initialise EKC\n");
-               kqswnal_shutdown(&kqswnal_api);
+               kqswnal_shutdown(nal);
                return (PTL_IFACE_INVALID);
        }
 
        if (ep_waitfor_nodeid(kqswnal_data.kqn_ep) == ELAN_INVALID_NODE) {
                CERROR("Can't get elan ID\n");
-               kqswnal_shutdown(&kqswnal_api);
+               kqswnal_shutdown(nal);
                return (PTL_IFACE_INVALID);
        }
 #else
@@ -534,7 +452,7 @@ kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid,
        if (kqswnal_data.kqn_ep == NULL)
        {
                CERROR ("Can't get elan device 0\n");
-               kqswnal_shutdown(&kqswnal_api);
+               kqswnal_shutdown(nal);
                return (PTL_IFACE_INVALID);
        }
 #endif
@@ -550,7 +468,7 @@ kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid,
        if (kqswnal_data.kqn_eptx == NULL)
        {
                CERROR ("Can't allocate transmitter\n");
-               kqswnal_shutdown (&kqswnal_api);
+               kqswnal_shutdown (nal);
                return (PTL_NO_SPACE);
        }
 
@@ -563,7 +481,7 @@ kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid,
        if (kqswnal_data.kqn_eprx_small == NULL)
        {
                CERROR ("Can't install small msg receiver\n");
-               kqswnal_shutdown (&kqswnal_api);
+               kqswnal_shutdown (nal);
                return (PTL_NO_SPACE);
        }
 
@@ -573,7 +491,7 @@ kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid,
        if (kqswnal_data.kqn_eprx_large == NULL)
        {
                CERROR ("Can't install large msg receiver\n");
-               kqswnal_shutdown (&kqswnal_api);
+               kqswnal_shutdown (nal);
                return (PTL_NO_SPACE);
        }
 
@@ -588,7 +506,7 @@ kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid,
                                EP_PERM_WRITE);
        if (kqswnal_data.kqn_ep_tx_nmh == NULL) {
                CERROR("Can't reserve tx dma space\n");
-               kqswnal_shutdown(&kqswnal_api);
+               kqswnal_shutdown(nal);
                return (PTL_NO_SPACE);
        }
 #else
@@ -603,7 +521,7 @@ kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid,
        if (rc != DDI_SUCCESS)
        {
                CERROR ("Can't reserve rx dma space\n");
-               kqswnal_shutdown (&kqswnal_api);
+               kqswnal_shutdown (nal);
                return (PTL_NO_SPACE);
        }
 #endif
@@ -617,7 +535,7 @@ kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid,
                                EP_PERM_WRITE);
        if (kqswnal_data.kqn_ep_tx_nmh == NULL) {
                CERROR("Can't reserve rx dma space\n");
-               kqswnal_shutdown(&kqswnal_api);
+               kqswnal_shutdown(nal);
                return (PTL_NO_SPACE);
        }
 #else
@@ -633,7 +551,7 @@ kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid,
        if (rc != DDI_SUCCESS)
        {
                CERROR ("Can't reserve rx dma space\n");
-               kqswnal_shutdown (&kqswnal_api);
+               kqswnal_shutdown (nal);
                return (PTL_NO_SPACE);
        }
 #endif
@@ -644,7 +562,7 @@ kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid,
                     sizeof(kqswnal_tx_t) * (KQSW_NTXMSGS + KQSW_NNBLK_TXMSGS));
        if (kqswnal_data.kqn_txds == NULL)
        {
-               kqswnal_shutdown (&kqswnal_api);
+               kqswnal_shutdown (nal);
                return (PTL_NO_SPACE);
        }
 
@@ -660,7 +578,7 @@ kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid,
                PORTAL_ALLOC (ktx->ktx_buffer, KQSW_TX_BUFFER_SIZE);
                if (ktx->ktx_buffer == NULL)
                {
-                       kqswnal_shutdown (&kqswnal_api);
+                       kqswnal_shutdown (nal);
                        return (PTL_NO_SPACE);
                }
 
@@ -697,7 +615,7 @@ kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid,
                      sizeof (kqswnal_rx_t) * (KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE));
        if (kqswnal_data.kqn_rxds == NULL)
        {
-               kqswnal_shutdown (&kqswnal_api);
+               kqswnal_shutdown (nal);
                return (PTL_NO_SPACE);
        }
 
@@ -732,7 +650,7 @@ kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid,
                        struct page *page = alloc_page(GFP_KERNEL);
                        
                        if (page == NULL) {
-                               kqswnal_shutdown (&kqswnal_api);
+                               kqswnal_shutdown (nal);
                                return (PTL_NO_SPACE);
                        }
 
@@ -780,12 +698,12 @@ kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid,
        my_process_id.nid = kqswnal_elanid2nid(kqswnal_data.kqn_elanid);
        my_process_id.pid = 0;
 
-       rc = lib_init(&kqswnal_lib, my_process_id,
+       rc = lib_init(&kqswnal_lib, nal, my_process_id,
                      requested_limits, actual_limits);
         if (rc != PTL_OK)
        {
                CERROR ("lib_init failed %d\n", rc);
-               kqswnal_shutdown (&kqswnal_api);
+               kqswnal_shutdown (nal);
                return (rc);
        }
 
@@ -799,6 +717,7 @@ kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid,
                kqswnal_rx_t *krx = &kqswnal_data.kqn_rxds[i];
 
                /* NB this enqueue can allocate/sleep (attr == 0) */
+               krx->krx_state = KRX_POSTED;
 #if MULTIRAIL_EKC
                rc = ep_queue_receive(krx->krx_eprx, kqswnal_rxhandler, krx,
                                      &krx->krx_elanbuffer, 0);
@@ -810,7 +729,7 @@ kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid,
                if (rc != EP_SUCCESS)
                {
                        CERROR ("failed ep_queue_receive %d\n", rc);
-                       kqswnal_shutdown (&kqswnal_api);
+                       kqswnal_shutdown (nal);
                        return (PTL_FAIL);
                }
        }
@@ -822,7 +741,7 @@ kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid,
                if (rc != 0)
                {
                        CERROR ("failed to spawn scheduling thread: %d\n", rc);
-                       kqswnal_shutdown (&kqswnal_api);
+                       kqswnal_shutdown (nal);
                        return (PTL_FAIL);
                }
        }
@@ -835,7 +754,7 @@ kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid,
        rc = libcfs_nal_cmd_register (QSWNAL, &kqswnal_cmd, NULL);
        if (rc != 0) {
                CERROR ("Can't initialise command interface (rc = %d)\n", rc);
-               kqswnal_shutdown (&kqswnal_api);
+               kqswnal_shutdown (nal);
                return (PTL_FAIL);
        }
 
@@ -867,17 +786,11 @@ kqswnal_initialise (void)
 {
        int   rc;
 
-       kqswnal_api.startup  = kqswnal_startup;
-       kqswnal_api.shutdown = kqswnal_shutdown;
-       kqswnal_api.forward  = kqswnal_forward;
-       kqswnal_api.yield    = kqswnal_yield;
-       kqswnal_api.lock     = kqswnal_lock;
-       kqswnal_api.unlock   = kqswnal_unlock;
-       kqswnal_api.nal_data = &kqswnal_data;
-
-       kqswnal_lib.nal_data = &kqswnal_data;
+       kqswnal_api.nal_ni_init = kqswnal_startup;
+       kqswnal_api.nal_ni_fini = kqswnal_shutdown;
 
        /* Initialise dynamic tunables to defaults once only */
+       kqswnal_tunables.kqn_optimized_puts = KQSW_OPTIMIZED_PUTS;
        kqswnal_tunables.kqn_optimized_gets = KQSW_OPTIMIZED_GETS;
        
        rc = ptl_register_nal(QSWNAL, &kqswnal_api);
index 6978aa0..b085caa 100644 (file)
@@ -109,7 +109,8 @@ typedef unsigned long kqsw_csum_t;
 
 #define KQSW_RESCHED                    100     /* # busy loops that forces scheduler to yield */
 
-#define KQSW_OPTIMIZED_GETS             1       /* optimized gets? */
+#define KQSW_OPTIMIZED_GETS             1       /* optimize gets >= this size */
+#define KQSW_OPTIMIZED_PUTS            (32<<10) /* optimize puts >= this size */
 #define KQSW_COPY_SMALL_FWD             0       /* copy small fwd messages to pre-mapped buffer? */
 
 /*
@@ -156,12 +157,18 @@ typedef struct
         int              krx_npages;            /* # pages in receive buffer */
         int              krx_nob;               /* Number Of Bytes received into buffer */
         int              krx_rpc_reply_needed;  /* peer waiting for EKC RPC reply */
-        int              krx_rpc_reply_sent;    /* rpc reply sent */
+        int              krx_rpc_reply_status;  /* what status to send */
+        int              krx_state;             /* what this RX is doing */
         atomic_t         krx_refcount;          /* how to tell when rpc is done */
         kpr_fwd_desc_t   krx_fwd;               /* embedded forwarding descriptor */
         ptl_kiov_t       krx_kiov[KQSW_NRXMSGPAGES_LARGE]; /* buffer frags */
 }  kqswnal_rx_t;
 
+#define KRX_POSTED       1                      /* receiving */
+#define KRX_PARSE        2                      /* ready to be parsed */
+#define KRX_COMPLETING   3                      /* waiting to be completed */
+
+
 typedef struct
 {
         struct list_head  ktx_list;             /* enqueue idle/active */
@@ -174,7 +181,7 @@ typedef struct
         int               ktx_nmappedpages;     /* # pages mapped for current message */
         int               ktx_port;             /* destination ep port */
         ptl_nid_t         ktx_nid;              /* destination node */
-        void             *ktx_args[2];          /* completion passthru */
+        void             *ktx_args[3];          /* completion passthru */
         char             *ktx_buffer;           /* pre-allocated contiguous buffer for hdr + small payloads */
         unsigned long     ktx_launchtime;       /* when (in jiffies) the transmit was launched */
 
@@ -193,13 +200,16 @@ typedef struct
 } kqswnal_tx_t;
 
 #define KTX_IDLE        0                       /* on kqn_(nblk_)idletxds */
-#define KTX_SENDING     1                       /* local send */
-#define KTX_FORWARDING  2                       /* routing a packet */
-#define KTX_GETTING     3                       /* local optimised get */
+#define KTX_FORWARDING  1                       /* sending a forwarded packet */
+#define KTX_SENDING     2                       /* normal send */
+#define KTX_GETTING     3                       /* sending optimised get */
+#define KTX_PUTTING     4                       /* sending optimised put */
+#define KTX_RDMAING     5                       /* handling optimised put/get */
 
 typedef struct
 {
         /* dynamic tunables... */
+        int                      kqn_optimized_puts;  /* optimized PUTs? */
         int                      kqn_optimized_gets;  /* optimized GETs? */
 #if CONFIG_SYSCTL
         struct ctl_table_header *kqn_sysctl;          /* sysctl interface */
@@ -230,9 +240,6 @@ typedef struct
         struct list_head   kqn_delayedfwds;     /* delayed forwards */
         struct list_head   kqn_delayedtxds;     /* delayed transmits */
 
-        spinlock_t         kqn_statelock;       /* cb_cli/cb_sti */
-        wait_queue_head_t  kqn_yield_waitq;     /* where yield waits */
-        nal_cb_t          *kqn_cb;              /* -> kqswnal_lib */
 #if MULTIRAIL_EKC
         EP_SYS            *kqn_ep;              /* elan system */
         EP_NMH            *kqn_ep_tx_nmh;       /* elan reserved tx vaddrs */
@@ -250,6 +257,9 @@ typedef struct
         ptl_nid_t          kqn_nid_offset;      /* this cluster's NID offset */
         int                kqn_nnodes;          /* this cluster's size */
         int                kqn_elanid;          /* this nodes's elan ID */
+
+        EP_STATUSBLK       kqn_rpc_success;     /* preset RPC reply status blocks */
+        EP_STATUSBLK       kqn_rpc_failed;
 }  kqswnal_data_t;
 
 /* kqn_init state */
@@ -258,21 +268,16 @@ typedef struct
 #define KQN_INIT_LIB            2
 #define KQN_INIT_ALL            3
 
-extern nal_cb_t            kqswnal_lib;
+extern lib_nal_t           kqswnal_lib;
 extern nal_t               kqswnal_api;
 extern kqswnal_tunables_t  kqswnal_tunables;
 extern kqswnal_data_t      kqswnal_data;
 
-/* global pre-prepared replies to keep off the stack */
-extern EP_STATUSBLK    kqswnal_rpc_success;
-extern EP_STATUSBLK    kqswnal_rpc_failed;
-
 extern int kqswnal_thread_start (int (*fn)(void *arg), void *arg);
 extern void kqswnal_rxhandler(EP_RXD *rxd);
 extern int kqswnal_scheduler (void *);
 extern void kqswnal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd);
-extern void kqswnal_dma_reply_complete (EP_RXD *rxd);
-extern void kqswnal_requeue_rx (kqswnal_rx_t *krx);
+extern void kqswnal_rx_done (kqswnal_rx_t *krx);
 
 static inline ptl_nid_t
 kqswnal_elanid2nid (int elanid) 
@@ -291,6 +296,12 @@ kqswnal_nid2elanid (ptl_nid_t nid)
         return (nid - kqswnal_data.kqn_nid_offset);
 }
 
+static inline ptl_nid_t
+kqswnal_rx_nid(kqswnal_rx_t *krx) 
+{
+        return (kqswnal_elanid2nid(ep_rxd_node(krx->krx_rxd)));
+}
+
 static inline int
 kqswnal_pages_spanned (void *base, int nob)
 {
@@ -313,11 +324,11 @@ static inline kqsw_csum_t kqsw_csum (kqsw_csum_t sum, void *base, int nob)
 }
 #endif
 
-static inline void kqswnal_rx_done (kqswnal_rx_t *krx)
+static inline void kqswnal_rx_decref (kqswnal_rx_t *krx)
 {
         LASSERT (atomic_read (&krx->krx_refcount) > 0);
         if (atomic_dec_and_test (&krx->krx_refcount))
-                kqswnal_requeue_rx(krx);
+                kqswnal_rx_done(krx);
 }
 
 #if MULTIRAIL_EKC
index 2bcb853..e1237a8 100644 (file)
 
 #include "qswnal.h"
 
-EP_STATUSBLK  kqswnal_rpc_success;
-EP_STATUSBLK  kqswnal_rpc_failed;
-
 /*
  *  LIB functions follow
  *
  */
-static ptl_err_t
-kqswnal_read(nal_cb_t *nal, void *private, void *dst_addr, user_ptr src_addr,
-             size_t len)
-{
-        CDEBUG (D_NET, LPX64": reading "LPSZ" bytes from %p -> %p\n",
-                nal->ni.nid, len, src_addr, dst_addr );
-        memcpy( dst_addr, src_addr, len );
-
-        return (PTL_OK);
-}
-
-static ptl_err_t
-kqswnal_write(nal_cb_t *nal, void *private, user_ptr dst_addr, void *src_addr,
-              size_t len)
-{
-        CDEBUG (D_NET, LPX64": writing "LPSZ" bytes from %p -> %p\n",
-                nal->ni.nid, len, src_addr, dst_addr );
-        memcpy( dst_addr, src_addr, len );
-
-        return (PTL_OK);
-}
-
-static void *
-kqswnal_malloc(nal_cb_t *nal, size_t len)
-{
-        void *buf;
-
-        PORTAL_ALLOC(buf, len);
-        return (buf);
-}
-
-static void
-kqswnal_free(nal_cb_t *nal, void *buf, size_t len)
-{
-        PORTAL_FREE(buf, len);
-}
-
-static void
-kqswnal_printf (nal_cb_t * nal, const char *fmt, ...)
-{
-        va_list ap;
-        char msg[256];
-
-        va_start (ap, fmt);
-        vsnprintf (msg, sizeof (msg), fmt, ap);        /* sprint safely */
-        va_end (ap);
-
-        msg[sizeof (msg) - 1] = 0;                /* ensure terminated */
-
-        CDEBUG (D_NET, "%s", msg);
-}
-
-#if (defined(CONFIG_SPARC32) || defined(CONFIG_SPARC64))
-# error "Can't save/restore irq contexts in different procedures"
-#endif
-
-static void
-kqswnal_cli(nal_cb_t *nal, unsigned long *flags)
-{
-        kqswnal_data_t *data= nal->nal_data;
-
-        spin_lock_irqsave(&data->kqn_statelock, *flags);
-}
-
-
-static void
-kqswnal_sti(nal_cb_t *nal, unsigned long *flags)
-{
-        kqswnal_data_t *data= nal->nal_data;
-
-        spin_unlock_irqrestore(&data->kqn_statelock, *flags);
-}
-
-static void
-kqswnal_callback(nal_cb_t *nal, void *private, lib_eq_t *eq, ptl_event_t *ev)
-{
-        /* holding kqn_statelock */
-
-        if (eq->event_callback != NULL)
-                eq->event_callback(ev);
-
-        if (waitqueue_active(&kqswnal_data.kqn_yield_waitq))
-                wake_up_all(&kqswnal_data.kqn_yield_waitq);
-}
-
 static int
-kqswnal_dist(nal_cb_t *nal, ptl_nid_t nid, unsigned long *dist)
+kqswnal_dist(lib_nal_t *nal, ptl_nid_t nid, unsigned long *dist)
 {
-        if (nid == nal->ni.nid)
+        if (nid == nal->libnal_ni.ni_pid.nid)
                 *dist = 0;                      /* it's me */
         else if (kqswnal_nid2elanid (nid) >= 0)
                 *dist = 1;                      /* it's my peer */
@@ -212,11 +124,12 @@ kqswnal_map_tx_kiov (kqswnal_tx_t *ktx, int offset, int nob, int niov, ptl_kiov_
         do {
                 int  fraglen = kiov->kiov_len - offset;
 
-                /* nob exactly spans the iovs */
-                LASSERT (fraglen <= nob);
-                /* each frag fits in a page */
+                /* each page frag is contained in one page */
                 LASSERT (kiov->kiov_offset + kiov->kiov_len <= PAGE_SIZE);
 
+                if (fraglen > nob)
+                        fraglen = nob;
+
                 nmapped++;
                 if (nmapped > maxmapped) {
                         CERROR("Can't map message in %d pages (max %d)\n",
@@ -328,11 +241,12 @@ kqswnal_map_tx_iov (kqswnal_tx_t *ktx, int offset, int nob,
         
         do {
                 int  fraglen = iov->iov_len - offset;
-                long npages  = kqswnal_pages_spanned (iov->iov_base, fraglen);
-
-                /* nob exactly spans the iovs */
-                LASSERT (fraglen <= nob);
+                long npages;
                 
+                if (fraglen > nob)
+                        fraglen = nob;
+                npages = kqswnal_pages_spanned (iov->iov_base, fraglen);
+
                 nmapped += npages;
                 if (nmapped > maxmapped) {
                         CERROR("Can't map message in %d pages (max %d)\n",
@@ -519,40 +433,29 @@ kqswnal_get_idle_tx (kpr_fwd_desc_t *fwd, int may_block)
 void
 kqswnal_tx_done (kqswnal_tx_t *ktx, int error)
 {
-        lib_msg_t     *msg;
-        lib_msg_t     *repmsg = NULL;
-
         switch (ktx->ktx_state) {
         case KTX_FORWARDING:       /* router asked me to forward this packet */
                 kpr_fwd_done (&kqswnal_data.kqn_router,
                               (kpr_fwd_desc_t *)ktx->ktx_args[0], error);
                 break;
 
-        case KTX_SENDING:          /* packet sourced locally */
-                lib_finalize (&kqswnal_lib, ktx->ktx_args[0],
+        case KTX_RDMAING:          /* optimized GET/PUT handled */
+        case KTX_PUTTING:          /* optimized PUT sent */
+        case KTX_SENDING:          /* normal send */
+                lib_finalize (&kqswnal_lib, NULL,
                               (lib_msg_t *)ktx->ktx_args[1],
-                              (error == 0) ? PTL_OK : 
-                              (error == -ENOMEM) ? PTL_NO_SPACE : PTL_FAIL);
+                              (error == 0) ? PTL_OK : PTL_FAIL);
                 break;
 
-        case KTX_GETTING:          /* Peer has DMA-ed direct? */
-                msg = (lib_msg_t *)ktx->ktx_args[1];
-
-                if (error == 0) {
-                        repmsg = lib_create_reply_msg (&kqswnal_lib, 
-                                                       ktx->ktx_nid, msg);
-                        if (repmsg == NULL)
-                                error = -ENOMEM;
-                }
-                
-                if (error == 0) {
-                        lib_finalize (&kqswnal_lib, ktx->ktx_args[0], 
-                                      msg, PTL_OK);
-                        lib_finalize (&kqswnal_lib, NULL, repmsg, PTL_OK);
-                } else {
-                        lib_finalize (&kqswnal_lib, ktx->ktx_args[0], msg,
-                                      (error == -ENOMEM) ? PTL_NO_SPACE : PTL_FAIL);
-                }
+        case KTX_GETTING:          /* optimized GET sent & REPLY received */
+                /* Complete the GET with success since we can't avoid
+                 * delivering a REPLY event; we committed to it when we
+                 * launched the GET */
+                lib_finalize (&kqswnal_lib, NULL, 
+                              (lib_msg_t *)ktx->ktx_args[1], PTL_OK);
+                lib_finalize (&kqswnal_lib, NULL,
+                              (lib_msg_t *)ktx->ktx_args[2],
+                              (error == 0) ? PTL_OK : PTL_FAIL);
                 break;
 
         default:
@@ -580,16 +483,27 @@ kqswnal_txhandler(EP_TXD *txd, void *arg, int status)
                 kqswnal_notify_peer_down(ktx);
                 status = -EHOSTDOWN;
 
-        } else if (ktx->ktx_state == KTX_GETTING) {
-                /* RPC completed OK; what did our peer put in the status
+        } else switch (ktx->ktx_state) {
+
+        case KTX_GETTING:
+        case KTX_PUTTING:
+                /* RPC completed OK; but what did our peer put in the status
                  * block? */
 #if MULTIRAIL_EKC
                 status = ep_txd_statusblk(txd)->Data[0];
 #else
                 status = ep_txd_statusblk(txd)->Status;
 #endif
-        } else {
+                break;
+                
+        case KTX_FORWARDING:
+        case KTX_SENDING:
                 status = 0;
+                break;
+                
+        default:
+                LBUG();
+                break;
         }
 
         kqswnal_tx_done (ktx, status);
@@ -610,21 +524,20 @@ kqswnal_launch (kqswnal_tx_t *ktx)
                 return (-ESHUTDOWN);
 
         LASSERT (dest >= 0);                    /* must be a peer */
-        if (ktx->ktx_state == KTX_GETTING) {
-                /* NB ktx_frag[0] is the GET hdr + kqswnal_remotemd_t.  The
-                 * other frags are the GET sink which we obviously don't
-                 * send here :) */
-#if MULTIRAIL_EKC
+
+        switch (ktx->ktx_state) {
+        case KTX_GETTING:
+        case KTX_PUTTING:
+                /* NB ktx_frag[0] is the GET/PUT hdr + kqswnal_remotemd_t.
+                 * The other frags are the payload, awaiting RDMA */
                 rc = ep_transmit_rpc(kqswnal_data.kqn_eptx, dest,
                                      ktx->ktx_port, attr,
                                      kqswnal_txhandler, ktx,
                                      NULL, ktx->ktx_frags, 1);
-#else
-                rc = ep_transmit_rpc(kqswnal_data.kqn_eptx, dest,
-                                     ktx->ktx_port, attr, kqswnal_txhandler,
-                                     ktx, NULL, ktx->ktx_frags, 1);
-#endif
-        } else {
+                break;
+
+        case KTX_FORWARDING:
+        case KTX_SENDING:
 #if MULTIRAIL_EKC
                 rc = ep_transmit_message(kqswnal_data.kqn_eptx, dest,
                                          ktx->ktx_port, attr,
@@ -636,6 +549,12 @@ kqswnal_launch (kqswnal_tx_t *ktx)
                                        kqswnal_txhandler, ktx, 
                                        ktx->ktx_frags, ktx->ktx_nfrag);
 #endif
+                break;
+                
+        default:
+                LBUG();
+                rc = -EINVAL;                   /* no compiler warning please */
+                break;
         }
 
         switch (rc) {
@@ -658,6 +577,7 @@ kqswnal_launch (kqswnal_tx_t *ktx)
         }
 }
 
+#if 0
 static char *
 hdr_type_string (ptl_hdr_t *hdr)
 {
@@ -726,6 +646,7 @@ kqswnal_cerror_hdr(ptl_hdr_t * hdr)
         }
 
 }                               /* end of print_hdr() */
+#endif
 
 #if !MULTIRAIL_EKC
 void
@@ -787,114 +708,291 @@ kqswnal_eiovs2datav (int ndv, EP_DATAVEC *dv,
         CERROR ("DATAVEC too small\n");
         return (-E2BIG);
 }
+#else
+int
+kqswnal_check_rdma (int nlfrag, EP_NMD *lfrag,
+                    int nrfrag, EP_NMD *rfrag)
+{
+        int  i;
+
+        if (nlfrag != nrfrag) {
+                CERROR("Can't cope with unequal # frags: %d local %d remote\n",
+                       nlfrag, nrfrag);
+                return (-EINVAL);
+        }
+        
+        for (i = 0; i < nlfrag; i++)
+                if (lfrag[i].nmd_len != rfrag[i].nmd_len) {
+                        CERROR("Can't cope with unequal frags %d(%d):"
+                               " %d local %d remote\n",
+                               i, nlfrag, lfrag[i].nmd_len, rfrag[i].nmd_len);
+                        return (-EINVAL);
+                }
+        
+        return (0);
+}
 #endif
 
-int
-kqswnal_dma_reply (kqswnal_tx_t *ktx, int nfrag, 
-                   struct iovec *iov, ptl_kiov_t *kiov, 
-                   int offset, int nob)
+kqswnal_remotemd_t *
+kqswnal_parse_rmd (kqswnal_rx_t *krx, int type, ptl_nid_t expected_nid)
 {
-        kqswnal_rx_t       *krx = (kqswnal_rx_t *)ktx->ktx_args[0];
         char               *buffer = (char *)page_address(krx->krx_kiov[0].kiov_page);
+        ptl_hdr_t          *hdr = (ptl_hdr_t *)buffer;
         kqswnal_remotemd_t *rmd = (kqswnal_remotemd_t *)(buffer + KQSW_HDR_SIZE);
-        int                 rc;
-#if MULTIRAIL_EKC
-        int                 i;
-#else
-        EP_DATAVEC          datav[EP_MAXFRAG];
-        int                 ndatav;
-#endif
-        LASSERT (krx->krx_rpc_reply_needed);
-        LASSERT ((iov == NULL) != (kiov == NULL));
+        ptl_nid_t           nid = kqswnal_rx_nid(krx);
+
+        /* Note (1) lib_parse has already flipped hdr.
+         *      (2) RDMA addresses are sent in native endian-ness.  When
+         *      EKC copes with different endian nodes, I'll fix this (and
+         *      eat my hat :) */
+
+        LASSERT (krx->krx_nob >= sizeof(*hdr));
+
+        if (hdr->type != type) {
+                CERROR ("Unexpected optimized get/put type %d (%d expected)"
+                        "from "LPX64"\n", hdr->type, type, nid);
+                return (NULL);
+        }
+        
+        if (hdr->src_nid != nid) {
+                CERROR ("Unexpected optimized get/put source NID "
+                        LPX64" from "LPX64"\n", hdr->src_nid, nid);
+                return (NULL);
+        }
+
+        LASSERT (nid == expected_nid);
 
-        /* see kqswnal_sendmsg comment regarding endian-ness */
         if (buffer + krx->krx_nob < (char *)(rmd + 1)) {
                 /* msg too small to discover rmd size */
                 CERROR ("Incoming message [%d] too small for RMD (%d needed)\n",
                         krx->krx_nob, (int)(((char *)(rmd + 1)) - buffer));
-                return (-EINVAL);
+                return (NULL);
         }
-        
+
         if (buffer + krx->krx_nob < (char *)&rmd->kqrmd_frag[rmd->kqrmd_nfrag]) {
                 /* rmd doesn't fit in the incoming message */
                 CERROR ("Incoming message [%d] too small for RMD[%d] (%d needed)\n",
                         krx->krx_nob, rmd->kqrmd_nfrag,
                         (int)(((char *)&rmd->kqrmd_frag[rmd->kqrmd_nfrag]) - buffer));
-                return (-EINVAL);
+                return (NULL);
         }
 
-        /* Map the source data... */
+        return (rmd);
+}
+
+void
+kqswnal_rdma_store_complete (EP_RXD *rxd) 
+{
+        int           status = ep_rxd_status(rxd);
+        kqswnal_tx_t *ktx = (kqswnal_tx_t *)ep_rxd_arg(rxd);
+        kqswnal_rx_t *krx = (kqswnal_rx_t *)ktx->ktx_args[0];
+        
+        CDEBUG((status == EP_SUCCESS) ? D_NET : D_ERROR,
+               "rxd %p, ktx %p, status %d\n", rxd, ktx, status);
+
+        LASSERT (ktx->ktx_state == KTX_RDMAING);
+        LASSERT (krx->krx_rxd == rxd);
+        LASSERT (krx->krx_rpc_reply_needed);
+
+        krx->krx_rpc_reply_needed = 0;
+        kqswnal_rx_decref (krx);
+
+        /* free ktx & finalize() its lib_msg_t */
+        kqswnal_tx_done(ktx, (status == EP_SUCCESS) ? 0 : -ECONNABORTED);
+}
+
+void
+kqswnal_rdma_fetch_complete (EP_RXD *rxd) 
+{
+        /* Completed fetching the PUT data */
+        int           status = ep_rxd_status(rxd);
+        kqswnal_tx_t *ktx = (kqswnal_tx_t *)ep_rxd_arg(rxd);
+        kqswnal_rx_t *krx = (kqswnal_rx_t *)ktx->ktx_args[0];
+        unsigned long flags;
+        
+        CDEBUG((status == EP_SUCCESS) ? D_NET : D_ERROR,
+               "rxd %p, ktx %p, status %d\n", rxd, ktx, status);
+
+        LASSERT (ktx->ktx_state == KTX_RDMAING);
+        LASSERT (krx->krx_rxd == rxd);
+        LASSERT (krx->krx_rpc_reply_needed);
+
+        /* Set the RPC completion status */
+        status = (status == EP_SUCCESS) ? 0 : -ECONNABORTED;
+        krx->krx_rpc_reply_status = status;
+
+        /* free ktx & finalize() its lib_msg_t */
+        kqswnal_tx_done(ktx, status);
+
+        if (!in_interrupt()) {
+                /* OK to complete the RPC now (iff I had the last ref) */
+                kqswnal_rx_decref (krx);
+                return;
+        }
+
+        LASSERT (krx->krx_state == KRX_PARSE);
+        krx->krx_state = KRX_COMPLETING;
+
+        /* Complete the RPC in thread context */
+        spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags);
+
+        list_add_tail (&krx->krx_list, &kqswnal_data.kqn_readyrxds);
+        wake_up (&kqswnal_data.kqn_sched_waitq);
+
+        spin_unlock_irqrestore (&kqswnal_data.kqn_sched_lock, flags);
+}
+
+int
+kqswnal_rdma (kqswnal_rx_t *krx, lib_msg_t *libmsg, int type,
+              int niov, struct iovec *iov, ptl_kiov_t *kiov,
+              size_t offset, size_t len)
+{
+        kqswnal_remotemd_t *rmd;
+        kqswnal_tx_t       *ktx;
+        int                 eprc;
+        int                 rc;
+#if !MULTIRAIL_EKC
+        EP_DATAVEC          datav[EP_MAXFRAG];
+        int                 ndatav;
+#endif
+
+        LASSERT (type == PTL_MSG_GET || type == PTL_MSG_PUT);
+        /* Not both mapped and paged payload */
+        LASSERT (iov == NULL || kiov == NULL);
+        /* RPC completes with failure by default */
+        LASSERT (krx->krx_rpc_reply_needed);
+        LASSERT (krx->krx_rpc_reply_status != 0);
+
+        rmd = kqswnal_parse_rmd(krx, type, libmsg->ev.initiator.nid);
+        if (rmd == NULL)
+                return (-EPROTO);
+
+        if (len == 0) {
+                /* data got truncated to nothing. */
+                lib_finalize(&kqswnal_lib, krx, libmsg, PTL_OK);
+                /* Let kqswnal_rx_done() complete the RPC with success */
+                krx->krx_rpc_reply_status = 0;
+                return (0);
+        }
+        
+        /* NB I'm using 'ktx' just to map the local RDMA buffers; I'm not
+           actually sending a portals message with it */
+        ktx = kqswnal_get_idle_tx(NULL, 0);
+        if (ktx == NULL) {
+                CERROR ("Can't get txd for RDMA with "LPX64"\n",
+                        libmsg->ev.initiator.nid);
+                return (-ENOMEM);
+        }
+
+        ktx->ktx_state   = KTX_RDMAING;
+        ktx->ktx_nid     = libmsg->ev.initiator.nid;
+        ktx->ktx_args[0] = krx;
+        ktx->ktx_args[1] = libmsg;
+
+        /* Start mapping at offset 0 (we're not mapping any headers) */
         ktx->ktx_nfrag = ktx->ktx_firsttmpfrag = 0;
+        
         if (kiov != NULL)
-                rc = kqswnal_map_tx_kiov (ktx, offset, nob, nfrag, kiov);
+                rc = kqswnal_map_tx_kiov(ktx, offset, len, niov, kiov);
         else
-                rc = kqswnal_map_tx_iov (ktx, offset, nob, nfrag, iov);
+                rc = kqswnal_map_tx_iov(ktx, offset, len, niov, iov);
 
         if (rc != 0) {
-                CERROR ("Can't map source data: %d\n", rc);
-                return (rc);
+                CERROR ("Can't map local RDMA data: %d\n", rc);
+                goto out;
         }
 
 #if MULTIRAIL_EKC
-        if (ktx->ktx_nfrag != rmd->kqrmd_nfrag) {
-                CERROR("Can't cope with unequal # frags: %d local %d remote\n",
-                       ktx->ktx_nfrag, rmd->kqrmd_nfrag);
-                return (-EINVAL);
+        rc = kqswnal_check_rdma (ktx->ktx_nfrag, ktx->ktx_frags,
+                                 rmd->kqrmd_nfrag, rmd->kqrmd_frag);
+        if (rc != 0) {
+                CERROR ("Incompatible RDMA descriptors\n");
+                goto out;
         }
-        
-        for (i = 0; i < rmd->kqrmd_nfrag; i++)
-                if (ktx->ktx_frags[i].nmd_len != rmd->kqrmd_frag[i].nmd_len) {
-                        CERROR("Can't cope with unequal frags %d(%d):"
-                               " %d local %d remote\n",
-                               i, rmd->kqrmd_nfrag, 
-                               ktx->ktx_frags[i].nmd_len, 
-                               rmd->kqrmd_frag[i].nmd_len);
-                        return (-EINVAL);
-                }
 #else
-        ndatav = kqswnal_eiovs2datav (EP_MAXFRAG, datav,
-                                      ktx->ktx_nfrag, ktx->ktx_frags,
-                                      rmd->kqrmd_nfrag, rmd->kqrmd_frag);
+        switch (type) {
+        default:
+                LBUG();
+
+        case PTL_MSG_GET:
+                ndatav = kqswnal_eiovs2datav(EP_MAXFRAG, datav,
+                                             ktx->ktx_nfrag, ktx->ktx_frags,
+                                             rmd->kqrmd_nfrag, rmd->kqrmd_frag);
+                break;
+
+        case PTL_MSG_PUT:
+                ndatav = kqswnal_eiovs2datav(EP_MAXFRAG, datav,
+                                             rmd->kqrmd_nfrag, rmd->kqrmd_frag,
+                                             ktx->ktx_nfrag, ktx->ktx_frags);
+                break;
+        }
+                
         if (ndatav < 0) {
                 CERROR ("Can't create datavec: %d\n", ndatav);
-                return (ndatav);
+                rc = ndatav;
+                goto out;
         }
 #endif
 
-        /* Our caller will start to race with kqswnal_dma_reply_complete... */
-        LASSERT (atomic_read (&krx->krx_refcount) == 1);
-        atomic_set (&krx->krx_refcount, 2);
+        LASSERT (atomic_read(&krx->krx_refcount) > 0);
+        /* Take an extra ref for the completion callback */
+        atomic_inc(&krx->krx_refcount);
 
-#if MULTIRAIL_EKC
-        rc = ep_complete_rpc(krx->krx_rxd, kqswnal_dma_reply_complete, ktx, 
-                             &kqswnal_rpc_success,
-                             ktx->ktx_frags, rmd->kqrmd_frag, rmd->kqrmd_nfrag);
-        if (rc == EP_SUCCESS)
-                return (0);
+        switch (type) {
+        default:
+                LBUG();
 
-        /* Well we tried... */
-        krx->krx_rpc_reply_needed = 0;
+        case PTL_MSG_GET:
+#if MULTIRAIL_EKC
+                eprc = ep_complete_rpc(krx->krx_rxd, 
+                                       kqswnal_rdma_store_complete, ktx, 
+                                       &kqswnal_data.kqn_rpc_success,
+                                       ktx->ktx_frags, rmd->kqrmd_frag, rmd->kqrmd_nfrag);
 #else
-        rc = ep_complete_rpc (krx->krx_rxd, kqswnal_dma_reply_complete, ktx,
-                              &kqswnal_rpc_success, datav, ndatav);
-        if (rc == EP_SUCCESS)
-                return (0);
-
-        /* "old" EKC destroys rxd on failed completion */
-        krx->krx_rxd = NULL;
+                eprc = ep_complete_rpc (krx->krx_rxd, 
+                                        kqswnal_rdma_store_complete, ktx,
+                                        &kqswnal_data.kqn_rpc_success, 
+                                        datav, ndatav);
+                if (eprc != EP_SUCCESS) /* "old" EKC destroys rxd on failed completion */
+                        krx->krx_rxd = NULL;
 #endif
+                if (eprc != EP_SUCCESS) {
+                        CERROR("can't complete RPC: %d\n", eprc);
+                        /* don't re-attempt RPC completion */
+                        krx->krx_rpc_reply_needed = 0;
+                        rc = -ECONNABORTED;
+                }
+                break;
+                
+        case PTL_MSG_PUT:
+#if MULTIRAIL_EKC
+                eprc = ep_rpc_get (krx->krx_rxd, 
+                                   kqswnal_rdma_fetch_complete, ktx,
+                                   rmd->kqrmd_frag, ktx->ktx_frags, ktx->ktx_nfrag);
+#else
+                eprc = ep_rpc_get (krx->krx_rxd,
+                                   kqswnal_rdma_fetch_complete, ktx,
+                                   datav, ndatav);
+#endif
+                if (eprc != EP_SUCCESS) {
+                        CERROR("ep_rpc_get failed: %d\n", eprc);
+                        rc = -ECONNABORTED;
+                }
+                break;
+        }
 
-        CERROR("can't complete RPC: %d\n", rc);
-
-        /* reset refcount back to 1: we're not going to be racing with
-         * kqswnal_dma_reply_complete. */
-        atomic_set (&krx->krx_refcount, 1);
+ out:
+        if (rc != 0) {
+                kqswnal_rx_decref(krx);                 /* drop callback's ref */
+                kqswnal_put_idle_tx (ktx);
+        }
 
-        return (-ECONNABORTED);
+        atomic_dec(&kqswnal_data.kqn_pending_txs);
+        return (rc);
 }
 
 static ptl_err_t
-kqswnal_sendmsg (nal_cb_t     *nal,
+kqswnal_sendmsg (lib_nal_t    *nal,
                  void         *private,
                  lib_msg_t    *libmsg,
                  ptl_hdr_t    *hdr,
@@ -916,6 +1014,8 @@ kqswnal_sendmsg (nal_cb_t     *nal,
         int                sumoff;
         int                sumnob;
 #endif
+        /* NB 1. hdr is in network byte order */
+        /*    2. 'private' depends on the message type */
         
         CDEBUG(D_NET, "sending "LPSZ" bytes in %d frags to nid: "LPX64
                " pid %u\n", payload_nob, payload_niov, nid, pid);
@@ -934,6 +1034,15 @@ kqswnal_sendmsg (nal_cb_t     *nal,
                 return (PTL_FAIL);
         }
 
+        if (type == PTL_MSG_REPLY &&            /* can I look in 'private' */
+            ((kqswnal_rx_t *)private)->krx_rpc_reply_needed) { /* is it an RPC */
+                /* Must be a REPLY for an optimized GET */
+                rc = kqswnal_rdma ((kqswnal_rx_t *)private, libmsg, PTL_MSG_GET,
+                                   payload_niov, payload_iov, payload_kiov, 
+                                   payload_offset, payload_nob);
+                return ((rc == 0) ? PTL_OK : PTL_FAIL);
+        }
+
         targetnid = nid;
         if (kqswnal_nid2elanid (nid) < 0) {     /* Can't send direct: find gateway? */
                 rc = kpr_lookup (&kqswnal_data.kqn_router, nid, 
@@ -956,35 +1065,16 @@ kqswnal_sendmsg (nal_cb_t     *nal,
                                           type == PTL_MSG_REPLY ||
                                           in_interrupt()));
         if (ktx == NULL) {
-                kqswnal_cerror_hdr (hdr);
+                CERROR ("Can't get txd for msg type %d for "LPX64"\n",
+                        type, libmsg->ev.initiator.nid);
                 return (PTL_NO_SPACE);
         }
 
+        ktx->ktx_state   = KTX_SENDING;
         ktx->ktx_nid     = targetnid;
         ktx->ktx_args[0] = private;
         ktx->ktx_args[1] = libmsg;
-
-        if (type == PTL_MSG_REPLY &&
-            ((kqswnal_rx_t *)private)->krx_rpc_reply_needed) {
-                if (nid != targetnid ||
-                    kqswnal_nid2elanid(nid) != 
-                    ep_rxd_node(((kqswnal_rx_t *)private)->krx_rxd)) {
-                        CERROR("Optimized reply nid conflict: "
-                               "nid "LPX64" via "LPX64" elanID %d\n",
-                               nid, targetnid,
-                               ep_rxd_node(((kqswnal_rx_t *)private)->krx_rxd));
-                        rc = -EINVAL;
-                        goto out;
-                }
-
-                /* peer expects RPC completion with GET data */
-                rc = kqswnal_dma_reply (ktx, payload_niov, 
-                                        payload_iov, payload_kiov, 
-                                        payload_offset, payload_nob);
-                if (rc != 0)
-                        CERROR ("Can't DMA reply to "LPX64": %d\n", nid, rc);
-                goto out;
-        }
+        ktx->ktx_args[2] = NULL;    /* set when a GET commits to REPLY */
 
         memcpy (ktx->ktx_buffer, hdr, sizeof (*hdr)); /* copy hdr from caller's stack */
         ktx->ktx_wire_hdr = (ptl_hdr_t *)ktx->ktx_buffer;
@@ -1027,28 +1117,31 @@ kqswnal_sendmsg (nal_cb_t     *nal,
         memcpy(ktx->ktx_buffer + sizeof(*hdr) + sizeof(csum), &csum, sizeof(csum));
 #endif
 
-        if (kqswnal_tunables.kqn_optimized_gets &&
-            type == PTL_MSG_GET &&              /* doing a GET */
-            nid == targetnid) {                 /* not forwarding */
+        /* The first frag will be the pre-mapped buffer for (at least) the
+         * portals header. */
+        ktx->ktx_nfrag = ktx->ktx_firsttmpfrag = 1;
+
+        if (nid == targetnid &&                 /* not forwarding */
+            ((type == PTL_MSG_GET &&            /* optimize GET? */
+              kqswnal_tunables.kqn_optimized_gets != 0 &&
+              NTOH__u32(hdr->msg.get.sink_length) >= kqswnal_tunables.kqn_optimized_gets) ||
+             (type == PTL_MSG_PUT &&            /* optimize PUT? */
+              kqswnal_tunables.kqn_optimized_puts != 0 &&
+              payload_nob >= kqswnal_tunables.kqn_optimized_puts))) {
                 lib_md_t           *md = libmsg->md;
                 kqswnal_remotemd_t *rmd = (kqswnal_remotemd_t *)(ktx->ktx_buffer + KQSW_HDR_SIZE);
                 
-                /* Optimised path: I send over the Elan vaddrs of the get
-                 * sink buffers, and my peer DMAs directly into them.
+                /* Optimised path: I send over the Elan vaddrs of the local
+                 * buffers, and my peer DMAs directly to/from them.
                  *
                  * First I set up ktx as if it was going to send this
                  * payload, (it needs to map it anyway).  This fills
                  * ktx_frags[1] and onward with the network addresses
                  * of the GET sink frags.  I copy these into ktx_buffer,
-                 * immediately after the header, and send that as my GET
-                 * message.
-                 *
-                 * Note that the addresses are sent in native endian-ness.
-                 * When EKC copes with different endian nodes, I'll fix
-                 * this (and eat my hat :) */
+                 * immediately after the header, and send that as my
+                 * message. */
 
-                ktx->ktx_nfrag = ktx->ktx_firsttmpfrag = 1;
-                ktx->ktx_state = KTX_GETTING;
+                ktx->ktx_state = (type == PTL_MSG_PUT) ? KTX_PUTTING : KTX_GETTING;
 
                 if ((libmsg->md->options & PTL_MD_KIOV) != 0) 
                         rc = kqswnal_map_tx_kiov (ktx, 0, md->length,
@@ -1078,12 +1171,21 @@ kqswnal_sendmsg (nal_cb_t     *nal,
                 ktx->ktx_frags[0].Base = ktx->ktx_ebuffer;
                 ktx->ktx_frags[0].Len = KQSW_HDR_SIZE + payload_nob;
 #endif
+                if (type == PTL_MSG_GET) {
+                        /* Allocate reply message now while I'm in thread context */
+                        ktx->ktx_args[2] = lib_create_reply_msg (&kqswnal_lib,
+                                                                 nid, libmsg);
+                        if (ktx->ktx_args[2] == NULL)
+                                goto out;
+
+                        /* NB finalizing the REPLY message is my
+                         * responsibility now, whatever happens. */
+                }
+                
         } else if (payload_nob <= KQSW_TX_MAXCONTIG) {
 
                 /* small message: single frag copied into the pre-mapped buffer */
 
-                ktx->ktx_nfrag = ktx->ktx_firsttmpfrag = 1;
-                ktx->ktx_state = KTX_SENDING;
 #if MULTIRAIL_EKC
                 ep_nmd_subset(&ktx->ktx_frags[0], &ktx->ktx_ebuffer,
                               0, KQSW_HDR_SIZE + payload_nob);
@@ -1105,8 +1207,6 @@ kqswnal_sendmsg (nal_cb_t     *nal,
 
                 /* large message: multiple frags: first is hdr in pre-mapped buffer */
 
-                ktx->ktx_nfrag = ktx->ktx_firsttmpfrag = 1;
-                ktx->ktx_state = KTX_SENDING;
 #if MULTIRAIL_EKC
                 ep_nmd_subset(&ktx->ktx_frags[0], &ktx->ktx_ebuffer,
                               0, KQSW_HDR_SIZE);
@@ -1135,15 +1235,29 @@ kqswnal_sendmsg (nal_cb_t     *nal,
                rc == 0 ? "Sent" : "Failed to send",
                payload_nob, nid, targetnid, rc);
 
-        if (rc != 0)
+        if (rc != 0) {
+                if (ktx->ktx_state == KTX_GETTING &&
+                    ktx->ktx_args[2] != NULL) {
+                        /* We committed to reply, but there was a problem
+                         * launching the GET.  We can't avoid delivering a
+                         * REPLY event since we committed above, so we
+                         * pretend the GET succeeded but the REPLY
+                         * failed. */
+                        rc = 0;
+                        lib_finalize (&kqswnal_lib, private, libmsg, PTL_OK);
+                        lib_finalize (&kqswnal_lib, private,
+                                      (lib_msg_t *)ktx->ktx_args[2], PTL_FAIL);
+                }
+                
                 kqswnal_put_idle_tx (ktx);
-
+        }
+        
         atomic_dec(&kqswnal_data.kqn_pending_txs);
         return (rc == 0 ? PTL_OK : PTL_FAIL);
 }
 
 static ptl_err_t
-kqswnal_send (nal_cb_t     *nal,
+kqswnal_send (lib_nal_t    *nal,
               void         *private,
               lib_msg_t    *libmsg,
               ptl_hdr_t    *hdr,
@@ -1161,7 +1275,7 @@ kqswnal_send (nal_cb_t     *nal,
 }
 
 static ptl_err_t
-kqswnal_send_pages (nal_cb_t     *nal,
+kqswnal_send_pages (lib_nal_t    *nal,
                     void         *private,
                     lib_msg_t    *libmsg,
                     ptl_hdr_t    *hdr,
@@ -1200,7 +1314,7 @@ kqswnal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd)
         if (ktx == NULL)        /* can't get txd right now */
                 return;         /* fwd will be scheduled when tx desc freed */
 
-        if (nid == kqswnal_lib.ni.nid)          /* gateway is me */
+        if (nid == kqswnal_lib.libnal_ni.ni_pid.nid) /* gateway is me */
                 nid = fwd->kprfd_target_nid;    /* target is final dest */
 
         if (kqswnal_nid2elanid (nid) < 0) {
@@ -1254,9 +1368,8 @@ kqswnal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd)
         if (rc != 0) {
                 CERROR ("Failed to forward [%p] to "LPX64": %d\n", fwd, nid, rc);
 
-                kqswnal_put_idle_tx (ktx);
                 /* complete now (with failure) */
-                kpr_fwd_done (&kqswnal_data.kqn_router, fwd, rc);
+                kqswnal_tx_done (ktx, rc);
         }
 
         atomic_dec(&kqswnal_data.kqn_pending_txs);
@@ -1277,29 +1390,48 @@ kqswnal_fwd_callback (void *arg, int error)
                        NTOH__u64(hdr->src_nid), NTOH__u64(hdr->dest_nid),error);
         }
 
-        kqswnal_requeue_rx (krx);
+        LASSERT (atomic_read(&krx->krx_refcount) == 1);
+        kqswnal_rx_decref (krx);
 }
 
 void
-kqswnal_dma_reply_complete (EP_RXD *rxd) 
+kqswnal_requeue_rx (kqswnal_rx_t *krx)
 {
-        int           status = ep_rxd_status(rxd);
-        kqswnal_tx_t *ktx = (kqswnal_tx_t *)ep_rxd_arg(rxd);
-        kqswnal_rx_t *krx = (kqswnal_rx_t *)ktx->ktx_args[0];
-        lib_msg_t    *msg = (lib_msg_t *)ktx->ktx_args[1];
-        
-        CDEBUG((status == EP_SUCCESS) ? D_NET : D_ERROR,
-               "rxd %p, ktx %p, status %d\n", rxd, ktx, status);
+        LASSERT (atomic_read(&krx->krx_refcount) == 0);
+        LASSERT (!krx->krx_rpc_reply_needed);
 
-        LASSERT (krx->krx_rxd == rxd);
-        LASSERT (krx->krx_rpc_reply_needed);
+        krx->krx_state = KRX_POSTED;
 
-        krx->krx_rpc_reply_needed = 0;
-        kqswnal_rx_done (krx);
+#if MULTIRAIL_EKC
+        if (kqswnal_data.kqn_shuttingdown) {
+                /* free EKC rxd on shutdown */
+                ep_complete_receive(krx->krx_rxd);
+        } else {
+                /* repost receive */
+                ep_requeue_receive(krx->krx_rxd, 
+                                   kqswnal_rxhandler, krx,
+                                   &krx->krx_elanbuffer, 0);
+        }
+#else                
+        if (kqswnal_data.kqn_shuttingdown)
+                return;
 
-        lib_finalize (&kqswnal_lib, NULL, msg,
-                      (status == EP_SUCCESS) ? PTL_OK : PTL_FAIL);
-        kqswnal_put_idle_tx (ktx);
+        if (krx->krx_rxd == NULL) {
+                /* We had a failed ep_complete_rpc() which nukes the
+                 * descriptor in "old" EKC */
+                int eprc = ep_queue_receive(krx->krx_eprx, 
+                                            kqswnal_rxhandler, krx,
+                                            krx->krx_elanbuffer, 
+                                            krx->krx_npages * PAGE_SIZE, 0);
+                LASSERT (eprc == EP_SUCCESS);
+                /* We don't handle failure here; it's incredibly rare
+                 * (never reported?) and only happens with "old" EKC */
+        } else {
+                ep_requeue_receive(krx->krx_rxd, kqswnal_rxhandler, krx,
+                                   krx->krx_elanbuffer, 
+                                   krx->krx_npages * PAGE_SIZE);
+        }
+#endif
 }
 
 void
@@ -1319,71 +1451,45 @@ kqswnal_rpc_complete (EP_RXD *rxd)
 }
 
 void
-kqswnal_requeue_rx (kqswnal_rx_t *krx) 
+kqswnal_rx_done (kqswnal_rx_t *krx) 
 {
-        int   rc;
+        int           rc;
+        EP_STATUSBLK *sblk;
 
         LASSERT (atomic_read(&krx->krx_refcount) == 0);
 
         if (krx->krx_rpc_reply_needed) {
+                /* We've not completed the peer's RPC yet... */
+                sblk = (krx->krx_rpc_reply_status == 0) ? 
+                       &kqswnal_data.kqn_rpc_success : 
+                       &kqswnal_data.kqn_rpc_failed;
 
-                /* We failed to complete the peer's optimized GET (e.g. we
-                 * couldn't map the source buffers).  We complete the
-                 * peer's EKC rpc now with failure. */
+                LASSERT (!in_interrupt());
 #if MULTIRAIL_EKC
-                rc = ep_complete_rpc(krx->krx_rxd, kqswnal_rpc_complete, krx,
-                                     &kqswnal_rpc_failed, NULL, NULL, 0);
+                rc = ep_complete_rpc(krx->krx_rxd, 
+                                     kqswnal_rpc_complete, krx,
+                                     sblk, NULL, NULL, 0);
                 if (rc == EP_SUCCESS)
                         return;
-                
-                CERROR("can't complete RPC: %d\n", rc);
 #else
-                if (krx->krx_rxd != NULL) {
-                        /* We didn't try (and fail) to complete earlier... */
-                        rc = ep_complete_rpc(krx->krx_rxd, 
-                                             kqswnal_rpc_complete, krx,
-                                             &kqswnal_rpc_failed, NULL, 0);
-                        if (rc == EP_SUCCESS)
-                                return;
-
-                        CERROR("can't complete RPC: %d\n", rc);
-                }
-                
-                /* NB the old ep_complete_rpc() frees rxd on failure, so we
-                 * have to requeue from scratch here, unless we're shutting
-                 * down */
-                if (kqswnal_data.kqn_shuttingdown)
+                rc = ep_complete_rpc(krx->krx_rxd, 
+                                     kqswnal_rpc_complete, krx,
+                                     sblk, NULL, 0);
+                if (rc == EP_SUCCESS)
                         return;
 
-                rc = ep_queue_receive(krx->krx_eprx, kqswnal_rxhandler, krx,
-                                      krx->krx_elanbuffer, 
-                                      krx->krx_npages * PAGE_SIZE, 0);
-                LASSERT (rc == EP_SUCCESS);
-                /* We don't handle failure here; it's incredibly rare
-                 * (never reported?) and only happens with "old" EKC */
-                return;
+                /* "old" EKC destroys rxd on failed completion */
+                krx->krx_rxd = NULL;
 #endif
+                CERROR("can't complete RPC: %d\n", rc);
+                krx->krx_rpc_reply_needed = 0;
         }
 
-#if MULTIRAIL_EKC
-        if (kqswnal_data.kqn_shuttingdown) {
-                /* free EKC rxd on shutdown */
-                ep_complete_receive(krx->krx_rxd);
-        } else {
-                /* repost receive */
-                ep_requeue_receive(krx->krx_rxd, kqswnal_rxhandler, krx,
-                                   &krx->krx_elanbuffer, 0);
-        }
-#else                
-        /* don't actually requeue on shutdown */
-        if (!kqswnal_data.kqn_shuttingdown) 
-                ep_requeue_receive(krx->krx_rxd, kqswnal_rxhandler, krx,
-                                   krx->krx_elanbuffer, krx->krx_npages * PAGE_SIZE);
-#endif
+        kqswnal_requeue_rx(krx);
 }
         
 void
-kqswnal_rx (kqswnal_rx_t *krx)
+kqswnal_parse (kqswnal_rx_t *krx)
 {
         ptl_hdr_t      *hdr = (ptl_hdr_t *) page_address(krx->krx_kiov[0].kiov_page);
         ptl_nid_t       dest_nid = NTOH__u64 (hdr->dest_nid);
@@ -1391,25 +1497,28 @@ kqswnal_rx (kqswnal_rx_t *krx)
         int             nob;
         int             niov;
 
-        LASSERT (atomic_read(&krx->krx_refcount) == 0);
+        LASSERT (atomic_read(&krx->krx_refcount) == 1);
+
+        if (dest_nid == kqswnal_lib.libnal_ni.ni_pid.nid) { /* It's for me :) */
+                /* I ignore parse errors since I'm not consuming a byte
+                 * stream */
+                (void)lib_parse (&kqswnal_lib, hdr, krx);
 
-        if (dest_nid == kqswnal_lib.ni.nid) { /* It's for me :) */
-                atomic_set(&krx->krx_refcount, 1);
-                lib_parse (&kqswnal_lib, hdr, krx);
-                kqswnal_rx_done(krx);
+                /* Drop my ref; any RDMA activity takes an additional ref */
+                kqswnal_rx_decref(krx);
                 return;
         }
 
 #if KQSW_CHECKSUM
-        CERROR ("checksums for forwarded packets not implemented\n");
-        LBUG ();
+        LASSERTF (0, "checksums for forwarded packets not implemented\n");
 #endif
+
         if (kqswnal_nid2elanid (dest_nid) >= 0)  /* should have gone direct to peer */
         {
                 CERROR("dropping packet from "LPX64" for "LPX64
                        ": target is peer\n", NTOH__u64(hdr->src_nid), dest_nid);
 
-                kqswnal_requeue_rx (krx);
+                kqswnal_rx_decref (krx);
                 return;
         }
 
@@ -1451,7 +1560,9 @@ kqswnal_rxhandler(EP_RXD *rxd)
                rxd, krx, nob, status);
 
         LASSERT (krx != NULL);
-
+        LASSERT (krx->krx_state = KRX_POSTED);
+        
+        krx->krx_state = KRX_PARSE;
         krx->krx_rxd = rxd;
         krx->krx_nob = nob;
 #if MULTIRAIL_EKC
@@ -1459,7 +1570,10 @@ kqswnal_rxhandler(EP_RXD *rxd)
 #else
         krx->krx_rpc_reply_needed = ep_rxd_isrpc(rxd);
 #endif
-        
+        /* Default to failure if an RPC reply is requested but not handled */
+        krx->krx_rpc_reply_status = -EPROTO;
+        atomic_set (&krx->krx_refcount, 1);
+
         /* must receive a whole header to be able to parse */
         if (status != EP_SUCCESS || nob < sizeof (ptl_hdr_t))
         {
@@ -1475,12 +1589,12 @@ kqswnal_rxhandler(EP_RXD *rxd)
                         CERROR("receive status failed with status %d nob %d\n",
                                ep_rxd_status(rxd), nob);
 #endif
-                kqswnal_requeue_rx (krx);
+                kqswnal_rx_decref(krx);
                 return;
         }
 
         if (!in_interrupt()) {
-                kqswnal_rx (krx);
+                kqswnal_parse(krx);
                 return;
         }
 
@@ -1540,7 +1654,7 @@ kqswnal_csum_error (kqswnal_rx_t *krx, int ishdr)
 #endif
 
 static ptl_err_t
-kqswnal_recvmsg (nal_cb_t     *nal,
+kqswnal_recvmsg (lib_nal_t    *nal,
                  void         *private,
                  lib_msg_t    *libmsg,
                  unsigned int  niov,
@@ -1552,16 +1666,18 @@ kqswnal_recvmsg (nal_cb_t     *nal,
 {
         kqswnal_rx_t *krx = (kqswnal_rx_t *)private;
         char         *buffer = page_address(krx->krx_kiov[0].kiov_page);
+        ptl_hdr_t    *hdr = (ptl_hdr_t *)buffer;
         int           page;
         char         *page_ptr;
         int           page_nob;
         char         *iov_ptr;
         int           iov_nob;
         int           frag;
+        int           rc;
 #if KQSW_CHECKSUM
         kqsw_csum_t   senders_csum;
         kqsw_csum_t   payload_csum = 0;
-        kqsw_csum_t   hdr_csum = kqsw_csum(0, buffer, sizeof(ptl_hdr_t));
+        kqsw_csum_t   hdr_csum = kqsw_csum(0, hdr, sizeof(*hdr));
         size_t        csum_len = mlen;
         int           csum_frags = 0;
         int           csum_nob = 0;
@@ -1574,8 +1690,18 @@ kqswnal_recvmsg (nal_cb_t     *nal,
         if (senders_csum != hdr_csum)
                 kqswnal_csum_error (krx, 1);
 #endif
+        /* NB lib_parse() has already flipped *hdr */
+
         CDEBUG(D_NET,"kqswnal_recv, mlen="LPSZ", rlen="LPSZ"\n", mlen, rlen);
 
+        if (krx->krx_rpc_reply_needed &&
+            hdr->type == PTL_MSG_PUT) {
+                /* This must be an optimized PUT */
+                rc = kqswnal_rdma (krx, libmsg, PTL_MSG_PUT,
+                                   niov, iov, kiov, offset, mlen);
+                return (rc == 0 ? PTL_OK : PTL_FAIL);
+        }
+
         /* What was actually received must be >= payload. */
         LASSERT (mlen <= rlen);
         if (krx->krx_nob < KQSW_HDR_SIZE + mlen) {
@@ -1691,7 +1817,7 @@ kqswnal_recvmsg (nal_cb_t     *nal,
 }
 
 static ptl_err_t
-kqswnal_recv(nal_cb_t     *nal,
+kqswnal_recv(lib_nal_t    *nal,
              void         *private,
              lib_msg_t    *libmsg,
              unsigned int  niov,
@@ -1706,7 +1832,7 @@ kqswnal_recv(nal_cb_t     *nal,
 }
 
 static ptl_err_t
-kqswnal_recv_pages (nal_cb_t     *nal,
+kqswnal_recv_pages (lib_nal_t    *nal,
                     void         *private,
                     lib_msg_t    *libmsg,
                     unsigned int  niov,
@@ -1766,7 +1892,18 @@ kqswnal_scheduler (void *arg)
                         spin_unlock_irqrestore(&kqswnal_data.kqn_sched_lock,
                                                flags);
 
-                        kqswnal_rx (krx);
+                        switch (krx->krx_state) {
+                        case KRX_PARSE:
+                                kqswnal_parse (krx);
+                                break;
+                        case KRX_COMPLETING:
+                                /* Drop last ref to reply to RPC and requeue */
+                                LASSERT (krx->krx_rpc_reply_needed);
+                                kqswnal_rx_decref (krx);
+                                break;
+                        default:
+                                LBUG();
+                        }
 
                         did_something = 1;
                         spin_lock_irqsave(&kqswnal_data.kqn_sched_lock, flags);
@@ -1835,20 +1972,12 @@ kqswnal_scheduler (void *arg)
         return (0);
 }
 
-nal_cb_t kqswnal_lib =
+lib_nal_t kqswnal_lib =
 {
-        nal_data:       &kqswnal_data,         /* NAL private data */
-        cb_send:        kqswnal_send,
-        cb_send_pages:  kqswnal_send_pages,
-        cb_recv:        kqswnal_recv,
-        cb_recv_pages:  kqswnal_recv_pages,
-        cb_read:        kqswnal_read,
-        cb_write:       kqswnal_write,
-        cb_malloc:      kqswnal_malloc,
-        cb_free:        kqswnal_free,
-        cb_printf:      kqswnal_printf,
-        cb_cli:         kqswnal_cli,
-        cb_sti:         kqswnal_sti,
-        cb_callback:    kqswnal_callback,
-        cb_dist:        kqswnal_dist
+        libnal_data:       &kqswnal_data,         /* NAL private data */
+        libnal_send:        kqswnal_send,
+        libnal_send_pages:  kqswnal_send_pages,
+        libnal_recv:        kqswnal_recv,
+        libnal_recv_pages:  kqswnal_recv_pages,
+        libnal_dist:        kqswnal_dist
 };
index 32bbbec..9d39cb1 100644 (file)
@@ -74,83 +74,9 @@ static ctl_table ksocknal_top_ctl_table[] = {
 #endif
 
 int
-ksocknal_api_forward(nal_t *nal, int id, void *args, size_t args_len,
-                       void *ret, size_t ret_len)
-{
-        ksock_nal_data_t *k;
-        nal_cb_t *nal_cb;
-
-        k = nal->nal_data;
-        nal_cb = k->ksnd_nal_cb;
-
-        lib_dispatch(nal_cb, k, id, args, ret); /* ksocknal_send needs k */
-        return PTL_OK;
-}
-
-void
-ksocknal_api_lock(nal_t *nal, unsigned long *flags)
-{
-        ksock_nal_data_t *k;
-        nal_cb_t *nal_cb;
-
-        k = nal->nal_data;
-        nal_cb = k->ksnd_nal_cb;
-        nal_cb->cb_cli(nal_cb,flags);
-}
-
-void
-ksocknal_api_unlock(nal_t *nal, unsigned long *flags)
-{
-        ksock_nal_data_t *k;
-        nal_cb_t *nal_cb;
-
-        k = nal->nal_data;
-        nal_cb = k->ksnd_nal_cb;
-        nal_cb->cb_sti(nal_cb,flags);
-}
-
-int
-ksocknal_api_yield(nal_t *nal, unsigned long *flags, int milliseconds)
-{
-       /* NB called holding statelock */
-        wait_queue_t       wait;
-       unsigned long      now = jiffies;
-
-       CDEBUG (D_NET, "yield\n");
-
-       if (milliseconds == 0) {
-                our_cond_resched();
-               return 0;
-       }
-
-       init_waitqueue_entry(&wait, current);
-       set_current_state (TASK_INTERRUPTIBLE);
-       add_wait_queue (&ksocknal_data.ksnd_yield_waitq, &wait);
-
-       ksocknal_api_unlock(nal, flags);
-
-       if (milliseconds < 0)
-               schedule ();
-       else
-               schedule_timeout((milliseconds * HZ) / 1000);
-       
-       ksocknal_api_lock(nal, flags);
-
-       remove_wait_queue (&ksocknal_data.ksnd_yield_waitq, &wait);
-
-       if (milliseconds > 0) {
-               milliseconds -= ((jiffies - now) * 1000) / HZ;
-               if (milliseconds < 0)
-                       milliseconds = 0;
-       }
-       
-       return (milliseconds);
-}
-
-int
 ksocknal_set_mynid(ptl_nid_t nid)
 {
-        lib_ni_t *ni = &ksocknal_lib.ni;
+        lib_ni_t *ni = &ksocknal_lib.libnal_ni;
 
         /* FIXME: we have to do this because we call lib_init() at module
          * insertion time, which is before we have 'mynid' available.  lib_init
@@ -159,9 +85,9 @@ ksocknal_set_mynid(ptl_nid_t nid)
          * problem. */
 
         CDEBUG(D_IOCTL, "setting mynid to "LPX64" (old nid="LPX64")\n",
-               nid, ni->nid);
+               nid, ni->ni_pid.nid);
 
-        ni->nid = nid;
+        ni->ni_pid.nid = nid;
         return (0);
 }
 
@@ -1527,14 +1453,18 @@ ksocknal_api_shutdown (nal_t *nal)
 
                 /* flag threads to terminate; wake and wait for them to die */
                 ksocknal_data.ksnd_shuttingdown = 1;
+                mb();
                 wake_up_all (&ksocknal_data.ksnd_autoconnectd_waitq);
                 wake_up_all (&ksocknal_data.ksnd_reaper_waitq);
 
                 for (i = 0; i < SOCKNAL_N_SCHED; i++)
                        wake_up_all(&ksocknal_data.ksnd_schedulers[i].kss_waitq);
 
+                i = 4;
                 while (atomic_read (&ksocknal_data.ksnd_nthreads) != 0) {
-                        CDEBUG (D_NET, "waitinf for %d threads to terminate\n",
+                        i++;
+                        CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
+                               "waiting for %d threads to terminate\n",
                                 atomic_read (&ksocknal_data.ksnd_nthreads));
                         set_current_state (TASK_UNINTERRUPTIBLE);
                         schedule_timeout (HZ);
@@ -1590,7 +1520,7 @@ ksocknal_api_startup (nal_t *nal, ptl_pid_t requested_pid,
 
         if (nal->nal_refct != 0) {
                 if (actual_limits != NULL)
-                        *actual_limits = ksocknal_lib.ni.actual_limits;
+                        *actual_limits = ksocknal_lib.libnal_ni.ni_actual_limits;
                 /* This module got the first ref */
                 PORTAL_MODULE_USE;
                 return (PTL_OK);
@@ -1613,10 +1543,6 @@ ksocknal_api_startup (nal_t *nal, ptl_pid_t requested_pid,
 
         rwlock_init(&ksocknal_data.ksnd_global_lock);
 
-        ksocknal_data.ksnd_nal_cb = &ksocknal_lib;
-        spin_lock_init (&ksocknal_data.ksnd_nal_cb_lock);
-        init_waitqueue_head(&ksocknal_data.ksnd_yield_waitq);
-        
         spin_lock_init(&ksocknal_data.ksnd_small_fmp.fmp_lock);
         INIT_LIST_HEAD(&ksocknal_data.ksnd_small_fmp.fmp_idle_fmbs);
         INIT_LIST_HEAD(&ksocknal_data.ksnd_small_fmp.fmp_blocked_conns);
@@ -1646,7 +1572,7 @@ ksocknal_api_startup (nal_t *nal, ptl_pid_t requested_pid,
         PORTAL_ALLOC(ksocknal_data.ksnd_schedulers,
                      sizeof(ksock_sched_t) * SOCKNAL_N_SCHED);
         if (ksocknal_data.ksnd_schedulers == NULL) {
-                ksocknal_api_shutdown (&ksocknal_api);
+                ksocknal_api_shutdown (nal);
                 return (-ENOMEM);
         }
 
@@ -1666,11 +1592,11 @@ ksocknal_api_startup (nal_t *nal, ptl_pid_t requested_pid,
         process_id.pid = 0;
         process_id.nid = 0;
         
-        rc = lib_init(&ksocknal_lib, process_id,
+        rc = lib_init(&ksocknal_lib, nal, process_id,
                       requested_limits, actual_limits);
         if (rc != PTL_OK) {
                 CERROR("lib_init failed: error %d\n", rc);
-                ksocknal_api_shutdown (&ksocknal_api);
+                ksocknal_api_shutdown (nal);
                 return (rc);
         }
 
@@ -1682,7 +1608,7 @@ ksocknal_api_startup (nal_t *nal, ptl_pid_t requested_pid,
                 if (rc != 0) {
                         CERROR("Can't spawn socknal scheduler[%d]: %d\n",
                                i, rc);
-                        ksocknal_api_shutdown (&ksocknal_api);
+                        ksocknal_api_shutdown (nal);
                         return (rc);
                 }
         }
@@ -1691,7 +1617,7 @@ ksocknal_api_startup (nal_t *nal, ptl_pid_t requested_pid,
                 rc = ksocknal_thread_start (ksocknal_autoconnectd, (void *)((long)i));
                 if (rc != 0) {
                         CERROR("Can't spawn socknal autoconnectd: %d\n", rc);
-                        ksocknal_api_shutdown (&ksocknal_api);
+                        ksocknal_api_shutdown (nal);
                         return (rc);
                 }
         }
@@ -1699,7 +1625,7 @@ ksocknal_api_startup (nal_t *nal, ptl_pid_t requested_pid,
         rc = ksocknal_thread_start (ksocknal_reaper, NULL);
         if (rc != 0) {
                 CERROR ("Can't spawn socknal reaper: %d\n", rc);
-                ksocknal_api_shutdown (&ksocknal_api);
+                ksocknal_api_shutdown (nal);
                 return (rc);
         }
 
@@ -1725,7 +1651,7 @@ ksocknal_api_startup (nal_t *nal, ptl_pid_t requested_pid,
                         PORTAL_ALLOC(fmb, offsetof(ksock_fmb_t, 
                                                    fmb_kiov[pool->fmp_buff_pages]));
                         if (fmb == NULL) {
-                                ksocknal_api_shutdown(&ksocknal_api);
+                                ksocknal_api_shutdown(nal);
                                 return (-ENOMEM);
                         }
 
@@ -1735,7 +1661,7 @@ ksocknal_api_startup (nal_t *nal, ptl_pid_t requested_pid,
                                 fmb->fmb_kiov[j].kiov_page = alloc_page(GFP_KERNEL);
 
                                 if (fmb->fmb_kiov[j].kiov_page == NULL) {
-                                        ksocknal_api_shutdown (&ksocknal_api);
+                                        ksocknal_api_shutdown (nal);
                                         return (-ENOMEM);
                                 }
 
@@ -1749,7 +1675,7 @@ ksocknal_api_startup (nal_t *nal, ptl_pid_t requested_pid,
         rc = libcfs_nal_cmd_register(SOCKNAL, &ksocknal_cmd, NULL);
         if (rc != 0) {
                 CERROR ("Can't initialise command interface (rc = %d)\n", rc);
-                ksocknal_api_shutdown (&ksocknal_api);
+                ksocknal_api_shutdown (nal);
                 return (rc);
         }
 
@@ -1794,14 +1720,8 @@ ksocknal_module_init (void)
         /* check ksnr_connected/connecting field large enough */
         LASSERT(SOCKNAL_CONN_NTYPES <= 4);
         
-        ksocknal_api.startup  = ksocknal_api_startup;
-        ksocknal_api.forward  = ksocknal_api_forward;
-        ksocknal_api.shutdown = ksocknal_api_shutdown;
-        ksocknal_api.lock     = ksocknal_api_lock;
-        ksocknal_api.unlock   = ksocknal_api_unlock;
-        ksocknal_api.nal_data = &ksocknal_data;
-
-        ksocknal_lib.nal_data = &ksocknal_data;
+        ksocknal_api.nal_ni_init = ksocknal_api_startup;
+        ksocknal_api.nal_ni_fini = ksocknal_api_shutdown;
 
         /* Initialise dynamic tunables to defaults once only */
         ksocknal_tunables.ksnd_io_timeout = SOCKNAL_IO_TIMEOUT;
index 87b23dc..ff73f71 100644 (file)
@@ -160,10 +160,6 @@ typedef struct {
         struct list_head *ksnd_peers;           /* hash table of all my known peers */
         int               ksnd_peer_hash_size;  /* size of ksnd_peers */
 
-        nal_cb_t         *ksnd_nal_cb;
-        spinlock_t        ksnd_nal_cb_lock;     /* lib cli/sti lock */
-        wait_queue_head_t ksnd_yield_waitq;     /* where yield waits */
-
         atomic_t          ksnd_nthreads;        /* # live threads */
         int               ksnd_shuttingdown;    /* tell threads to exit */
         ksock_sched_t    *ksnd_schedulers;      /* scheduler state */
@@ -364,7 +360,7 @@ typedef struct ksock_peer
 } ksock_peer_t;
 
 
-extern nal_cb_t         ksocknal_lib;
+extern lib_nal_t        ksocknal_lib;
 extern ksock_nal_data_t ksocknal_data;
 extern ksock_tunables_t ksocknal_tunables;
 
index 21e0abe..5815d16 100644 (file)
  *  LIB functions follow
  *
  */
-ptl_err_t
-ksocknal_read(nal_cb_t *nal, void *private, void *dst_addr,
-              user_ptr src_addr, size_t len)
-{
-        CDEBUG(D_NET, LPX64": reading %ld bytes from %p -> %p\n",
-               nal->ni.nid, (long)len, src_addr, dst_addr);
-
-        memcpy( dst_addr, src_addr, len );
-        return PTL_OK;
-}
-
-ptl_err_t
-ksocknal_write(nal_cb_t *nal, void *private, user_ptr dst_addr,
-               void *src_addr, size_t len)
-{
-        CDEBUG(D_NET, LPX64": writing %ld bytes from %p -> %p\n",
-               nal->ni.nid, (long)len, src_addr, dst_addr);
-
-        memcpy( dst_addr, src_addr, len );
-        return PTL_OK;
-}
-
-void *
-ksocknal_malloc(nal_cb_t *nal, size_t len)
-{
-        void *buf;
-
-        PORTAL_ALLOC(buf, len);
-
-        if (buf != NULL)
-                memset(buf, 0, len);
-
-        return (buf);
-}
-
-void
-ksocknal_free(nal_cb_t *nal, void *buf, size_t len)
-{
-        PORTAL_FREE(buf, len);
-}
-
-void
-ksocknal_printf(nal_cb_t *nal, const char *fmt, ...)
-{
-        va_list ap;
-        char msg[256];
-
-        va_start (ap, fmt);
-        vsnprintf (msg, sizeof (msg), fmt, ap); /* sprint safely */
-        va_end (ap);
-
-        msg[sizeof (msg) - 1] = 0;              /* ensure terminated */
-
-        CDEBUG (D_NET, "%s", msg);
-}
-
-void
-ksocknal_cli(nal_cb_t *nal, unsigned long *flags)
-{
-        ksock_nal_data_t *data = nal->nal_data;
-
-        /* OK to ignore 'flags'; we're only ever serialise threads and
-         * never need to lock out interrupts */
-        spin_lock(&data->ksnd_nal_cb_lock);
-}
-
-void
-ksocknal_sti(nal_cb_t *nal, unsigned long *flags)
-{
-        ksock_nal_data_t *data;
-        data = nal->nal_data;
-
-        /* OK to ignore 'flags'; we're only ever serialise threads and
-         * never need to lock out interrupts */
-        spin_unlock(&data->ksnd_nal_cb_lock);
-}
-
-void
-ksocknal_callback(nal_cb_t *nal, void *private, lib_eq_t *eq, ptl_event_t *ev)
-{
-        /* holding ksnd_nal_cb_lock */
-
-        if (eq->event_callback != NULL)
-                eq->event_callback(ev);
-        
-        if (waitqueue_active(&ksocknal_data.ksnd_yield_waitq))
-                wake_up_all(&ksocknal_data.ksnd_yield_waitq);
-}
-
 int
-ksocknal_dist(nal_cb_t *nal, ptl_nid_t nid, unsigned long *dist)
+ksocknal_dist(lib_nal_t *nal, ptl_nid_t nid, unsigned long *dist)
 {
         /* I would guess that if ksocknal_get_peer (nid) == NULL,
            and we're not routing, then 'nid' is very distant :) */
-        if ( nal->ni.nid == nid ) {
+        if (nal->libnal_ni.ni_pid.nid == nid) {
                 *dist = 0;
         } else {
                 *dist = 1;
@@ -882,8 +793,8 @@ ksocknal_find_connectable_route_locked (ksock_peer_t *peer)
 {
         struct list_head  *tmp;
         ksock_route_t     *route;
-        ksock_route_t     *candidate = NULL;
-        int                found = 0;
+        ksock_route_t     *first_lazy = NULL;
+        int                found_connecting_or_connected = 0;
         int                bits;
         
         list_for_each (tmp, &peer->ksnp_routes) {
@@ -896,7 +807,7 @@ ksocknal_find_connectable_route_locked (ksock_peer_t *peer)
                         /* All typed connections have been established, or
                          * an untyped connection has been established, or
                          * connections are currently being established */
-                        found = 1;
+                        found_connecting_or_connected = 1;
                         continue;
                 }
 
@@ -904,20 +815,24 @@ ksocknal_find_connectable_route_locked (ksock_peer_t *peer)
                 if (!time_after_eq (jiffies, route->ksnr_timeout))
                         continue;
                 
-                /* always do eager routes */
+                /* eager routes always want to be connected */
                 if (route->ksnr_eager)
                         return (route);
 
-                if (candidate == NULL) {
-                        /* If we don't find any other route that is fully
-                         * connected or connecting, the first connectable
-                         * route is returned.  If it fails to connect, it
-                         * will get placed at the end of the list */
-                        candidate = route;
-                }
+                if (first_lazy == NULL)
+                        first_lazy = route;
         }
-        return (found ? NULL : candidate);
+        
+        /* No eager routes need to be connected.  If some connection has
+         * already been established, or is being established there's nothing to
+         * do.  Otherwise we return the first lazy route we found.  If it fails
+         * to connect, it will go to the end of the list. */
+
+        if (!list_empty (&peer->ksnp_conns) ||
+            found_connecting_or_connected)
+                return (NULL);
+        
+        return (first_lazy);
 }
 
 ksock_route_t *
@@ -1028,7 +943,7 @@ ksocknal_launch_packet (ksock_tx_t *tx, ptl_nid_t nid)
 }
 
 ptl_err_t
-ksocknal_sendmsg(nal_cb_t     *nal, 
+ksocknal_sendmsg(lib_nal_t     *nal, 
                  void         *private, 
                  lib_msg_t    *cookie,
                  ptl_hdr_t    *hdr, 
@@ -1125,7 +1040,7 @@ ksocknal_sendmsg(nal_cb_t     *nal,
 }
 
 ptl_err_t
-ksocknal_send (nal_cb_t *nal, void *private, lib_msg_t *cookie,
+ksocknal_send (lib_nal_t *nal, void *private, lib_msg_t *cookie,
                ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
                unsigned int payload_niov, struct iovec *payload_iov,
                size_t payload_offset, size_t payload_len)
@@ -1137,7 +1052,7 @@ ksocknal_send (nal_cb_t *nal, void *private, lib_msg_t *cookie,
 }
 
 ptl_err_t
-ksocknal_send_pages (nal_cb_t *nal, void *private, lib_msg_t *cookie, 
+ksocknal_send_pages (lib_nal_t *nal, void *private, lib_msg_t *cookie, 
                      ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
                      unsigned int payload_niov, ptl_kiov_t *payload_kiov, 
                      size_t payload_offset, size_t payload_len)
@@ -1159,7 +1074,7 @@ ksocknal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd)
                 fwd->kprfd_gateway_nid, fwd->kprfd_target_nid);
 
         /* I'm the gateway; must be the last hop */
-        if (nid == ksocknal_lib.ni.nid)
+        if (nid == ksocknal_lib.libnal_ni.ni_pid.nid)
                 nid = fwd->kprfd_target_nid;
 
         /* setup iov for hdr */
@@ -1544,7 +1459,8 @@ ksocknal_process_receive (ksock_conn_t *conn)
         switch (conn->ksnc_rx_state) {
         case SOCKNAL_RX_HEADER:
                 if (conn->ksnc_hdr.type != HTON__u32(PTL_MSG_HELLO) &&
-                    NTOH__u64(conn->ksnc_hdr.dest_nid) != ksocknal_lib.ni.nid) {
+                    NTOH__u64(conn->ksnc_hdr.dest_nid) != 
+                    ksocknal_lib.libnal_ni.ni_pid.nid) {
                         /* This packet isn't for me */
                         ksocknal_fwd_parse (conn);
                         switch (conn->ksnc_rx_state) {
@@ -1561,7 +1477,13 @@ ksocknal_process_receive (ksock_conn_t *conn)
                 }
 
                 /* sets wanted_len, iovs etc */
-                lib_parse(&ksocknal_lib, &conn->ksnc_hdr, conn);
+                rc = lib_parse(&ksocknal_lib, &conn->ksnc_hdr, conn);
+
+                if (rc != PTL_OK) {
+                        /* I just received garbage: give up on this conn */
+                        ksocknal_close_conn_and_siblings (conn, rc);
+                        return (-EPROTO);
+                }
 
                 if (conn->ksnc_rx_nob_wanted != 0) { /* need to get payload? */
                         conn->ksnc_rx_state = SOCKNAL_RX_BODY;
@@ -1608,7 +1530,7 @@ ksocknal_process_receive (ksock_conn_t *conn)
 }
 
 ptl_err_t
-ksocknal_recv (nal_cb_t *nal, void *private, lib_msg_t *msg,
+ksocknal_recv (lib_nal_t *nal, void *private, lib_msg_t *msg,
                unsigned int niov, struct iovec *iov, 
                size_t offset, size_t mlen, size_t rlen)
 {
@@ -1636,7 +1558,7 @@ ksocknal_recv (nal_cb_t *nal, void *private, lib_msg_t *msg,
 }
 
 ptl_err_t
-ksocknal_recv_pages (nal_cb_t *nal, void *private, lib_msg_t *msg,
+ksocknal_recv_pages (lib_nal_t *nal, void *private, lib_msg_t *msg,
                      unsigned int niov, ptl_kiov_t *kiov, 
                      size_t offset, size_t mlen, size_t rlen)
 {
@@ -2029,7 +1951,7 @@ ksocknal_hello (struct socket *sock, ptl_nid_t *nid, int *type,
         hmv->version_major = __cpu_to_le16 (PORTALS_PROTO_VERSION_MAJOR);
         hmv->version_minor = __cpu_to_le16 (PORTALS_PROTO_VERSION_MINOR);
 
-        hdr.src_nid = __cpu_to_le64 (ksocknal_lib.ni.nid);
+        hdr.src_nid = __cpu_to_le64 (ksocknal_lib.libnal_ni.ni_pid.nid);
         hdr.type    = __cpu_to_le32 (PTL_MSG_HELLO);
 
         hdr.msg.hello.type = __cpu_to_le32 (*type);
@@ -2698,19 +2620,11 @@ ksocknal_reaper (void *arg)
         return (0);
 }
 
-nal_cb_t ksocknal_lib = {
-        nal_data:       &ksocknal_data,                /* NAL private data */
-        cb_send:         ksocknal_send,
-        cb_send_pages:   ksocknal_send_pages,
-        cb_recv:         ksocknal_recv,
-        cb_recv_pages:   ksocknal_recv_pages,
-        cb_read:         ksocknal_read,
-        cb_write:        ksocknal_write,
-        cb_malloc:       ksocknal_malloc,
-        cb_free:         ksocknal_free,
-        cb_printf:       ksocknal_printf,
-        cb_cli:          ksocknal_cli,
-        cb_sti:          ksocknal_sti,
-        cb_callback:     ksocknal_callback,
-        cb_dist:         ksocknal_dist
+lib_nal_t ksocknal_lib = {
+        libnal_data:       &ksocknal_data,      /* NAL private data */
+        libnal_send:        ksocknal_send,
+        libnal_send_pages:  ksocknal_send_pages,
+        libnal_recv:        ksocknal_recv,
+        libnal_recv_pages:  ksocknal_recv_pages,
+        libnal_dist:        ksocknal_dist
 };
index 4e63c86..06f1578 100644 (file)
 #define PORTAL_MINOR 240
 
 struct nal_cmd_handler {
+        int                  nch_number;
         nal_cmd_handler_fn  *nch_handler;
         void                *nch_private;
 };
 
-static struct nal_cmd_handler nal_cmd[NAL_MAX_NR + 1];
+static struct nal_cmd_handler nal_cmd[16];
 static DECLARE_MUTEX(nal_cmd_sem);
 
 #ifdef PORTAL_DEBUG
@@ -245,23 +246,53 @@ static inline void freedata(void *data, int len)
         PORTAL_FREE(data, len);
 }
 
+struct nal_cmd_handler *
+libcfs_find_nal_cmd_handler(int nal)
+{
+        int    i;
+
+        for (i = 0; i < sizeof(nal_cmd)/sizeof(nal_cmd[0]); i++)
+                if (nal_cmd[i].nch_handler != NULL &&
+                    nal_cmd[i].nch_number == nal)
+                        return (&nal_cmd[i]);
+
+        return (NULL);
+}
+
 int
 libcfs_nal_cmd_register(int nal, nal_cmd_handler_fn *handler, void *private)
 {
-        int rc = 0;
+        struct nal_cmd_handler *cmd;
+        int                     i;
+        int                     rc;
 
         CDEBUG(D_IOCTL, "Register NAL %d, handler: %p\n", nal, handler);
 
-        if (nal > 0  && nal <= NAL_MAX_NR) {
-                down(&nal_cmd_sem);
-                if (nal_cmd[nal].nch_handler != NULL)
-                        rc = -EBUSY;
-                else {
-                        nal_cmd[nal].nch_handler = handler;
-                        nal_cmd[nal].nch_private = private;
+        down(&nal_cmd_sem);
+
+        if (libcfs_find_nal_cmd_handler(nal) != NULL) {
+                up (&nal_cmd_sem);
+                return (-EBUSY);
+        }
+
+        cmd = NULL;
+        for (i = 0; i < sizeof(nal_cmd)/sizeof(nal_cmd[0]); i++)
+                if (nal_cmd[i].nch_handler == NULL) {
+                        cmd = &nal_cmd[i];
+                        break;
                 }
-                up(&nal_cmd_sem);
+        
+        if (cmd == NULL) {
+                rc = -EBUSY;
+        } else {
+                rc = 0;
+                cmd->nch_number = nal;
+                cmd->nch_handler = handler;
+                cmd->nch_private = private;
         }
+
+        up(&nal_cmd_sem);
+
         return rc;
 }
 EXPORT_SYMBOL(libcfs_nal_cmd_register);
@@ -269,14 +300,15 @@ EXPORT_SYMBOL(libcfs_nal_cmd_register);
 void
 libcfs_nal_cmd_unregister(int nal)
 {
-        CDEBUG(D_IOCTL, "Unregister NAL %d\n", nal);
+        struct nal_cmd_handler *cmd;
 
-        LASSERT(nal > 0 && nal <= NAL_MAX_NR);
-        LASSERT(nal_cmd[nal].nch_handler != NULL);
+        CDEBUG(D_IOCTL, "Unregister NAL %d\n", nal);
 
         down(&nal_cmd_sem);
-        nal_cmd[nal].nch_handler = NULL;
-        nal_cmd[nal].nch_private = NULL;
+        cmd = libcfs_find_nal_cmd_handler(nal);
+        LASSERT (cmd != NULL);
+        cmd->nch_handler = NULL;
+        cmd->nch_private = NULL;
         up(&nal_cmd_sem);
 }
 EXPORT_SYMBOL(libcfs_nal_cmd_unregister);
@@ -284,16 +316,17 @@ EXPORT_SYMBOL(libcfs_nal_cmd_unregister);
 int
 libcfs_nal_cmd(struct portals_cfg *pcfg)
 {
+        struct nal_cmd_handler *cmd;
         __u32 nal = pcfg->pcfg_nal;
         int   rc = -EINVAL;
         ENTRY;
 
         down(&nal_cmd_sem);
-        if (nal > 0 && nal <= NAL_MAX_NR && 
-            nal_cmd[nal].nch_handler != NULL) {
+        cmd = libcfs_find_nal_cmd_handler(nal);
+        if (cmd != NULL) {
                 CDEBUG(D_IOCTL, "calling handler nal: %d, cmd: %d\n", nal, 
                        pcfg->pcfg_command);
-                rc = nal_cmd[nal].nch_handler(pcfg, nal_cmd[nal].nch_private);
+                rc = cmd->nch_handler(pcfg, cmd->nch_private);
         }
         up(&nal_cmd_sem);
 
index 6ce334b..c0f2e71 100644 (file)
@@ -1,6 +1,6 @@
 MODULES := portals
-portals-objs := api-eq.o api-init.o api-me.o api-errno.o api-ni.o api-wrap.o
-portals-objs += lib-dispatch.o lib-init.o lib-me.o lib-msg.o lib-eq.o lib-md.o
+portals-objs := api-errno.o api-ni.o api-wrap.o
+portals-objs += lib-init.o lib-me.o lib-msg.o lib-eq.o lib-md.o
 portals-objs += lib-move.o lib-ni.o lib-pid.o module.o
 
 @INCLUDE_RULES@
index de01765..088902a 100644 (file)
@@ -6,7 +6,7 @@
 include $(src)/../Kernelenv
 
 obj-y += portals.o
-portals-objs    :=     lib-dispatch.o lib-eq.o lib-init.o lib-md.o lib-me.o \
+portals-objs    :=     lib-eq.o lib-init.o lib-md.o lib-me.o \
                        lib-move.o lib-msg.o lib-ni.o lib-pid.o \
-                       api-eq.o api-errno.o api-init.o api-me.o api-ni.o \
-                       api-wrap.o module.o
+                       api-errno.o api-ni.o api-wrap.o \
+                       module.o
diff --git a/lnet/lnet/api-eq.c b/lnet/lnet/api-eq.c
deleted file mode 100644 (file)
index 0306043..0000000
+++ /dev/null
@@ -1,120 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * api/api-eq.c
- * User-level event queue management routines
- *
- *  Copyright (c) 2001-2003 Cluster File Systems, Inc.
- *  Copyright (c) 2001-2002 Sandia National Laboratories
- *
- *   This file is part of Lustre, http://www.sf.net/projects/lustre/
- *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#define DEBUG_SUBSYSTEM S_PORTALS
-#include <portals/api-support.h>
-
-int ptl_get_event (ptl_eq_t *eq, ptl_event_t *ev)
-{
-        int          new_index = eq->sequence & (eq->size - 1);
-        ptl_event_t *new_event = &eq->base[new_index];
-        ENTRY;
-
-        CDEBUG(D_INFO, "new_event: %p, sequence: %lu, eq->size: %u\n",
-               new_event, eq->sequence, eq->size);
-
-        if (PTL_SEQ_GT (eq->sequence, new_event->sequence)) {
-                RETURN(PTL_EQ_EMPTY);
-        }
-
-        *ev = *new_event;
-
-        /* ensure event is delivered correctly despite possible 
-           races with lib_finalize */
-        if (eq->sequence != new_event->sequence) {
-                CERROR("DROPPING EVENT: eq seq %lu ev seq %lu\n",
-                       eq->sequence, new_event->sequence);
-                RETURN(PTL_EQ_DROPPED);
-        }
-
-        eq->sequence = new_event->sequence + 1;
-        RETURN(PTL_OK);
-}
-
-int PtlEQGet(ptl_handle_eq_t eventq, ptl_event_t * ev)
-{
-        int which;
-        
-        return (PtlEQPoll (&eventq, 1, 0, ev, &which));
-}
-
-int PtlEQWait(ptl_handle_eq_t eventq_in, ptl_event_t *event_out)
-{
-        int which;
-        
-        return (PtlEQPoll (&eventq_in, 1, PTL_TIME_FOREVER, 
-                           event_out, &which));
-}
-
-int PtlEQPoll(ptl_handle_eq_t *eventqs_in, int neq_in, int timeout,
-              ptl_event_t *event_out, int *which_out)
-{
-        nal_t        *nal;
-        int           i;
-        int           rc;
-        unsigned long flags;
-        
-        if (!ptl_init)
-                RETURN(PTL_NO_INIT);
-
-        if (neq_in < 1)
-                RETURN(PTL_EQ_INVALID);
-        
-        nal = ptl_hndl2nal(&eventqs_in[0]);
-        if (nal == NULL)
-                RETURN(PTL_EQ_INVALID);
-
-        nal->lock(nal, &flags);
-
-        for (;;) {
-                for (i = 0; i < neq_in; i++) {
-                        ptl_eq_t *eq = ptl_handle2usereq(&eventqs_in[i]);
-
-                        if (i > 0 &&
-                            ptl_hndl2nal(&eventqs_in[i]) != nal) {
-                                nal->unlock(nal, &flags);
-                                RETURN (PTL_EQ_INVALID);
-                        }
-
-                        /* size must be a power of 2 to handle a wrapped sequence # */
-                        LASSERT (eq->size != 0 &&
-                                 eq->size == LOWEST_BIT_SET (eq->size));
-
-                        rc = ptl_get_event (eq, event_out);
-                        if (rc != PTL_EQ_EMPTY) {
-                                nal->unlock(nal, &flags);
-                                *which_out = i;
-                                RETURN(rc);
-                        }
-                }
-                
-                if (timeout == 0) {
-                        nal->unlock(nal, &flags);
-                        RETURN (PTL_EQ_EMPTY);
-                }
-                        
-                timeout = nal->yield(nal, &flags, timeout);
-        }
-}
index 1c01c88..9a4e5ac 100644 (file)
@@ -40,6 +40,9 @@ const char *ptl_err_str[] = {
 
         "PTL_EQ_IN_USE",
 
+        "PTL_NI_INVALID",
+        "PTL_MD_ILLEGAL",
+
         "PTL_MAX_ERRNO"
 };
 /* If you change these, you must update the number table in portals/errno.h */
diff --git a/lnet/lnet/api-init.c b/lnet/lnet/api-init.c
deleted file mode 100644 (file)
index 9a98714..0000000
+++ /dev/null
@@ -1,49 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * api/api-init.c
- * Initialization and global data for the p30 user side library
- *
- *  Copyright (c) 2001-2003 Cluster File Systems, Inc.
- *  Copyright (c) 2001-2002 Sandia National Laboratories
- *
- *   This file is part of Lustre, http://www.sf.net/projects/lustre/
- *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#define DEBUG_SUBSYSTEM S_PORTALS
-#include <portals/api-support.h>
-
-int PtlInit(int *max_interfaces)
-{
-        if (max_interfaces != NULL)
-                *max_interfaces = NAL_MAX_NR;
-
-        LASSERT(!strcmp(ptl_err_str[PTL_MAX_ERRNO], "PTL_MAX_ERRNO"));
-
-        return ptl_ni_init();
-}
-
-
-void PtlFini(void)
-{
-        ptl_ni_fini();
-}
-
-
-void PtlSnprintHandle(char *str, int len, ptl_handle_any_t h)
-{
-        snprintf(str, len, "0x%lx."LPX64, h.nal_idx, h.cookie);
-}
diff --git a/lnet/lnet/api-me.c b/lnet/lnet/api-me.c
deleted file mode 100644 (file)
index 37f0150..0000000
+++ /dev/null
@@ -1,28 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * api/api-me.c
- * Match Entry local operations.
- *
- *  Copyright (c) 2001-2003 Cluster File Systems, Inc.
- *  Copyright (c) 2001-2002 Sandia National Laboratories
- *
- *   This file is part of Lustre, http://www.sf.net/projects/lustre/
- *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#define DEBUG_SUBSYSTEM S_PORTALS
-#include <portals/api-support.h>
-
index 4f37d13..56afd45 100644 (file)
@@ -66,6 +66,8 @@ nal_t *ptl_hndl2nal(ptl_handle_any_t *handle)
          * invalidated out from under her (or worse, swapped for a
          * completely different interface!) */
 
+        LASSERT (ptl_init);
+
         if (((idx ^ NI_HANDLE_MAGIC) & ~NI_HANDLE_MASK) != 0)
                 return NULL;
 
@@ -112,12 +114,17 @@ void ptl_unregister_nal (ptl_interface_t interface)
         ptl_mutex_exit();
 }
 
-int ptl_ni_init(void)
+int PtlInit(int *max_interfaces)
 {
+        LASSERT(!strcmp(ptl_err_str[PTL_MAX_ERRNO], "PTL_MAX_ERRNO"));
+
         /* If this assertion fails, we need more bits in NI_HANDLE_MASK and
          * to shift NI_HANDLE_MAGIC left appropriately */
         LASSERT (NAL_MAX_NR <= (NI_HANDLE_MASK + 1));
         
+        if (max_interfaces != NULL)
+                *max_interfaces = NAL_MAX_NR;
+
         ptl_mutex_enter();
 
         if (!ptl_init) {
@@ -143,7 +150,7 @@ int ptl_ni_init(void)
         return PTL_OK;
 }
 
-void ptl_ni_fini(void)
+void PtlFini(void)
 {
         nal_t  *nal;
         int     i;
@@ -160,7 +167,7 @@ void ptl_ni_fini(void)
                         if (nal->nal_refct != 0) {
                                 CWARN("NAL %d has outstanding refcount %d\n",
                                       i, nal->nal_refct);
-                                nal->shutdown(nal);
+                                nal->nal_ni_fini(nal);
                         }
                         
                         ptl_nal_table[i] = NULL;
@@ -202,9 +209,11 @@ int PtlNIInit(ptl_interface_t interface, ptl_pid_t requested_pid,
         }
 
         nal = ptl_nal_table[interface];
-
+        nal->nal_handle.nal_idx = (NI_HANDLE_MAGIC & ~NI_HANDLE_MASK) | interface;
+        nal->nal_handle.cookie = 0;
+        
         CDEBUG(D_OTHER, "Starting up NAL (%d) refs %d\n", interface, nal->nal_refct);
-        rc = nal->startup(nal, requested_pid, desired_limits, actual_limits);
+        rc = nal->nal_ni_init(nal, requested_pid, desired_limits, actual_limits);
 
         if (rc != PTL_OK) {
                 CERROR("Error %d starting up NAL %d, refs %d\n", rc,
@@ -218,10 +227,11 @@ int PtlNIInit(ptl_interface_t interface, ptl_pid_t requested_pid,
         }
         
         nal->nal_refct++;
-        handle->nal_idx = (NI_HANDLE_MAGIC & ~NI_HANDLE_MASK) | interface;
+        *handle = nal->nal_handle;
 
  out:
         ptl_mutex_exit ();
+
         return rc;
 }
 
@@ -248,15 +258,8 @@ int PtlNIFini(ptl_handle_ni_t ni)
         nal->nal_refct--;
 
         /* nal_refct == 0 tells nal->shutdown to really shut down */
-        nal->shutdown(nal);
+        nal->nal_ni_fini(nal);
 
         ptl_mutex_exit ();
         return PTL_OK;
 }
-
-int PtlNIHandle(ptl_handle_any_t handle_in, ptl_handle_ni_t * ni_out)
-{
-        *ni_out = handle_in;
-
-        return PTL_OK;
-}
index 3e6f9ce..d7ff020 100644 (file)
 # define DEBUG_SUBSYSTEM S_PORTALS
 #include <portals/api-support.h>
 
-static int do_forward(ptl_handle_any_t any_h, int cmd, void *argbuf,
-                      int argsize, void *retbuf, int retsize)
+void PtlSnprintHandle(char *str, int len, ptl_handle_any_t h)
 {
-        nal_t *nal;
+        snprintf(str, len, "0x%lx."LPX64, h.nal_idx, h.cookie);
+}
 
-        if (!ptl_init) {
-                CERROR("Not initialized\n");
+int PtlNIHandle(ptl_handle_any_t handle_in, ptl_handle_ni_t *ni_out)
+{
+        if (!ptl_init)
                 return PTL_NO_INIT;
-        }
-
-        nal = ptl_hndl2nal(&any_h);
-        if (!nal)
+        
+        if (ptl_hndl2nal(&handle_in) == NULL)
                 return PTL_HANDLE_INVALID;
-
-        nal->forward(nal, cmd, argbuf, argsize, retbuf, retsize);
-
+        
+        *ni_out = handle_in;
         return PTL_OK;
 }
 
 int PtlGetId(ptl_handle_ni_t ni_handle, ptl_process_id_t *id)
 {
-        PtlGetId_in args;
-        PtlGetId_out ret;
-        int rc;
-
-        args.handle_in = ni_handle;
+        nal_t     *nal;
 
-        rc = do_forward(ni_handle, PTL_GETID, &args, sizeof(args), &ret,
-                        sizeof(ret));
-        if (rc != PTL_OK)
-                return rc;
+        if (!ptl_init)
+                return PTL_NO_INIT;
         
-        if (id)
-                *id = ret.id_out;
+        nal = ptl_hndl2nal(&ni_handle);
+        if (nal == NULL)
+                return PTL_NI_INVALID;
 
-        return ret.rc;
+        return nal->nal_get_id(nal, id);
 }
 
 int PtlFailNid (ptl_handle_ni_t interface, ptl_nid_t nid, unsigned int threshold) 
 {
-        PtlFailNid_in  args;
-        PtlFailNid_out ret;
-        int            rc;
-        
-        args.interface = interface;
-        args.nid       = nid;
-        args.threshold = threshold;
+        nal_t     *nal;
+
+        if (!ptl_init)
+                return PTL_NO_INIT;
         
-        rc = do_forward (interface, PTL_FAILNID, 
-                         &args, sizeof(args), &ret, sizeof (ret));
+        nal = ptl_hndl2nal(&interface);
+        if (nal == NULL)
+                return PTL_NI_INVALID;
 
-        return ((rc != PTL_OK) ? rc : ret.rc);
+        return nal->nal_fail_nid(nal, nid, threshold);
 }
 
 int PtlNIStatus(ptl_handle_ni_t interface_in, ptl_sr_index_t register_in,
-                ptl_sr_value_t * status_out)
+                ptl_sr_value_t *status_out)
 {
-        PtlNIStatus_in args;
-        PtlNIStatus_out ret;
-        int rc;
+        nal_t     *nal;
 
-        args.interface_in = interface_in;
-        args.register_in = register_in;
-
-        rc = do_forward(interface_in, PTL_NISTATUS, &args, sizeof(args), &ret,
-                        sizeof(ret));
-
-        if (rc != PTL_OK)
-                return rc;
-
-        if (status_out)
-                *status_out = ret.status_out;
+        if (!ptl_init)
+                return PTL_NO_INIT;
+        
+        nal = ptl_hndl2nal(&interface_in);
+        if (nal == NULL)
+                return PTL_NI_INVALID;
 
-        return ret.rc;
+        return nal->nal_ni_status(nal, register_in, status_out);
 }
 
 int PtlNIDist(ptl_handle_ni_t interface_in, ptl_process_id_t process_in,
               unsigned long *distance_out)
 {
-        PtlNIDist_in args;
-        PtlNIDist_out ret;
-        int rc;
-
-        args.interface_in = interface_in;
-        args.process_in = process_in;
-
-        rc = do_forward(interface_in, PTL_NIDIST, &args, sizeof(args), &ret,
-                        sizeof(ret));
+        nal_t     *nal;
 
-        if (rc != PTL_OK)
-                return rc;
-
-        if (distance_out)
-                *distance_out = ret.distance_out;
+        if (!ptl_init)
+                return PTL_NO_INIT;
+        
+        nal = ptl_hndl2nal(&interface_in);
+        if (nal == NULL)
+                return PTL_NI_INVALID;
 
-        return ret.rc;
+        return nal->nal_ni_dist(nal, &process_in, distance_out);
 }
 
 int PtlMEAttach(ptl_handle_ni_t interface_in, ptl_pt_index_t index_in,
                 ptl_process_id_t match_id_in, ptl_match_bits_t match_bits_in,
                 ptl_match_bits_t ignore_bits_in, ptl_unlink_t unlink_in,
-                ptl_ins_pos_t pos_in, ptl_handle_me_t * handle_out)
+                ptl_ins_pos_t pos_in, ptl_handle_me_t *handle_out)
 {
-        PtlMEAttach_in args;
-        PtlMEAttach_out ret;
-        int rc;
-
-        args.interface_in = interface_in;
-        args.index_in = index_in;
-        args.match_id_in = match_id_in;
-        args.match_bits_in = match_bits_in;
-        args.ignore_bits_in = ignore_bits_in;
-        args.unlink_in = unlink_in;
-        args.position_in = pos_in;
-
-        rc = do_forward(interface_in, PTL_MEATTACH, &args, sizeof(args), &ret,
-                        sizeof(ret));
-
-        if (rc != PTL_OK)
-                return rc;
-
-        if (handle_out) {
-                handle_out->nal_idx = interface_in.nal_idx;
-                handle_out->cookie = ret.handle_out.cookie;
-        }
-
-        return ret.rc;
+        nal_t     *nal;
+
+        if (!ptl_init)
+                return PTL_NO_INIT;
+        
+        nal = ptl_hndl2nal(&interface_in);
+        if (nal == NULL)
+                return PTL_NI_INVALID;
+
+        return nal->nal_me_attach(nal, index_in, match_id_in, 
+                                  match_bits_in, ignore_bits_in,
+                                  unlink_in, pos_in, handle_out);
 }
 
 int PtlMEInsert(ptl_handle_me_t current_in, ptl_process_id_t match_id_in,
@@ -160,367 +125,226 @@ int PtlMEInsert(ptl_handle_me_t current_in, ptl_process_id_t match_id_in,
                 ptl_unlink_t unlink_in, ptl_ins_pos_t position_in,
                 ptl_handle_me_t * handle_out)
 {
-        PtlMEInsert_in args;
-        PtlMEInsert_out ret;
-        int rc;
-
-        args.current_in = current_in;
-        args.match_id_in = match_id_in;
-        args.match_bits_in = match_bits_in;
-        args.ignore_bits_in = ignore_bits_in;
-        args.unlink_in = unlink_in;
-        args.position_in = position_in;
-
-        rc = do_forward(current_in, PTL_MEINSERT, &args, sizeof(args), &ret,
-                        sizeof(ret));
-
-        if (rc != PTL_OK)
-                return (rc == PTL_HANDLE_INVALID) ? PTL_ME_INVALID : rc;
-
-        if (handle_out) {
-                handle_out->nal_idx = current_in.nal_idx;
-                handle_out->cookie = ret.handle_out.cookie;
-        }
-        return ret.rc;
+        nal_t     *nal;
+
+        if (!ptl_init)
+                return PTL_NO_INIT;
+        
+        nal = ptl_hndl2nal(&current_in);
+        if (nal == NULL)
+                return PTL_ME_INVALID;
+
+        return nal->nal_me_insert(nal, &current_in, match_id_in,
+                                  match_bits_in, ignore_bits_in,
+                                  unlink_in, position_in, handle_out);
 }
 
 int PtlMEUnlink(ptl_handle_me_t current_in)
 {
-        PtlMEUnlink_in args;
-        PtlMEUnlink_out ret;
-        int rc;
+        nal_t     *nal;
 
-        args.current_in = current_in;
-        args.unlink_in = PTL_RETAIN;
-
-        rc = do_forward(current_in, PTL_MEUNLINK, &args, sizeof(args), &ret,
-                        sizeof(ret));
-
-        if (rc != PTL_OK)
-                return (rc == PTL_HANDLE_INVALID) ? PTL_ME_INVALID : rc;
+        if (!ptl_init)
+                return PTL_NO_INIT;
+        
+        nal = ptl_hndl2nal(&current_in);
+        if (nal == NULL)
+                return PTL_ME_INVALID;
 
-        return ret.rc;
+        return nal->nal_me_unlink(nal, &current_in);
 }
 
-int PtlTblDump(ptl_handle_ni_t ni, int index_in)
+int PtlMDAttach(ptl_handle_me_t me_in, ptl_md_t md_in,
+                ptl_unlink_t unlink_in, ptl_handle_md_t * handle_out)
 {
-        PtlTblDump_in args;
-        PtlTblDump_out ret;
-        int rc;
+        nal_t     *nal;
 
-        args.index_in = index_in;
-
-        rc = do_forward(ni, PTL_TBLDUMP, &args, sizeof(args), &ret,
-                        sizeof(ret));
+        if (!ptl_init)
+                return PTL_NO_INIT;
+        
+        nal = ptl_hndl2nal(&me_in);
+        if (nal == NULL)
+                return PTL_ME_INVALID;
 
-        if (rc != PTL_OK)
-                return rc;
+        if (!PtlHandleIsEqual(md_in.eventq, PTL_EQ_NONE) &&
+            ptl_hndl2nal(&md_in.eventq) != nal)
+                return PTL_MD_ILLEGAL;
 
-        return ret.rc;
+        return (nal->nal_md_attach)(nal, &me_in, &md_in, 
+                                    unlink_in, handle_out);
 }
 
-int PtlMEDump(ptl_handle_me_t current_in)
+int PtlMDBind(ptl_handle_ni_t ni_in, ptl_md_t md_in,
+              ptl_unlink_t unlink_in, ptl_handle_md_t *handle_out)
 {
-        PtlMEDump_in args;
-        PtlMEDump_out ret;
-        int rc;
+        nal_t     *nal;
 
-        args.current_in = current_in;
-
-        rc = do_forward(current_in, PTL_MEDUMP, &args, sizeof(args), &ret,
-                        sizeof(ret));
+        if (!ptl_init)
+                return PTL_NO_INIT;
+        
+        nal = ptl_hndl2nal(&ni_in);
+        if (nal == NULL)
+                return PTL_NI_INVALID;
 
-        if (rc != PTL_OK)
-                return (rc == PTL_HANDLE_INVALID) ? PTL_ME_INVALID : rc;
+        if (!PtlHandleIsEqual(md_in.eventq, PTL_EQ_NONE) &&
+            ptl_hndl2nal(&md_in.eventq) != nal)
+                return PTL_MD_ILLEGAL;
 
-        return ret.rc;
+        return (nal->nal_md_bind)(nal, &md_in, unlink_in, handle_out);
 }
 
-static ptl_handle_eq_t md2eq (ptl_md_t *md)
+int PtlMDUpdate(ptl_handle_md_t md_in, ptl_md_t *old_inout,
+                ptl_md_t *new_inout, ptl_handle_eq_t testq_in)
 {
-        if (PtlHandleIsEqual (md->eventq, PTL_EQ_NONE))
-                return (PTL_EQ_NONE);
+        nal_t    *nal;
         
-        return (ptl_handle2usereq (&md->eventq)->cb_eq_handle);
-}
-
-
-int PtlMDAttach(ptl_handle_me_t me_in, ptl_md_t md_in,
-                ptl_unlink_t unlink_in, ptl_handle_md_t * handle_out)
-{
-        PtlMDAttach_in args;
-        PtlMDAttach_out ret;
-        int rc;
-
-        args.eq_in = md2eq(&md_in);
-        args.me_in = me_in;
-        args.md_in = md_in;
-        args.unlink_in = unlink_in;
-                
-        rc = do_forward(me_in, PTL_MDATTACH, 
-                        &args, sizeof(args), &ret, sizeof(ret));
-
-        if (rc != PTL_OK)
-                return (rc == PTL_HANDLE_INVALID) ? PTL_ME_INVALID : rc;
-
-        if (handle_out) {
-                handle_out->nal_idx = me_in.nal_idx;
-                handle_out->cookie = ret.handle_out.cookie;
-        }
-        return ret.rc;
-}
-
+        if (!ptl_init)
+                return PTL_NO_INIT;
+        
+        nal = ptl_hndl2nal(&md_in);
+        if (nal == NULL)
+                return PTL_MD_INVALID;
 
+        if (!PtlHandleIsEqual(testq_in, PTL_EQ_NONE) &&
+            ptl_hndl2nal(&testq_in) != nal)
+                return PTL_EQ_INVALID;
 
-int PtlMDBind(ptl_handle_ni_t ni_in, ptl_md_t md_in,
-              ptl_unlink_t unlink_in, ptl_handle_md_t * handle_out)
-{
-        PtlMDBind_in args;
-        PtlMDBind_out ret;
-        int rc;
-
-        args.eq_in = md2eq(&md_in);
-        args.ni_in = ni_in;
-        args.md_in = md_in;
-        args.unlink_in = unlink_in;
-
-        rc = do_forward(ni_in, PTL_MDBIND, 
-                        &args, sizeof(args), &ret, sizeof(ret));
-
-        if (rc != PTL_OK)
-                return rc;
-
-        if (handle_out) {
-                handle_out->nal_idx = ni_in.nal_idx;
-                handle_out->cookie = ret.handle_out.cookie;
-        }
-        return ret.rc;
+        return (nal->nal_md_update)(nal, &md_in, 
+                                    old_inout, new_inout, &testq_in);
 }
 
-int PtlMDUpdate(ptl_handle_md_t md_in, ptl_md_t *old_inout,
-                ptl_md_t *new_inout, ptl_handle_eq_t testq_in)
+int PtlMDUnlink(ptl_handle_md_t md_in)
 {
-        PtlMDUpdate_internal_in args;
-        PtlMDUpdate_internal_out ret;
-        int rc;
-
-        args.md_in = md_in;
-
-        if (old_inout) {
-                args.old_inout = *old_inout;
-                args.old_inout_valid = 1;
-        } else
-                args.old_inout_valid = 0;
-
-        if (new_inout) {
-                args.new_inout = *new_inout;
-                args.new_inout_valid = 1;
-        } else
-                args.new_inout_valid = 0;
-
-        if (PtlHandleIsEqual (testq_in, PTL_EQ_NONE)) {
-                args.testq_in = PTL_EQ_NONE;
-                args.sequence_in = -1;
-        } else {
-                ptl_eq_t *eq = ptl_handle2usereq (&testq_in);
-                
-                args.testq_in = eq->cb_eq_handle;
-                args.sequence_in = eq->sequence;
-        }
-
-        rc = do_forward(md_in, PTL_MDUPDATE, &args, sizeof(args), &ret,
-                        sizeof(ret));
-        if (rc != PTL_OK)
-                return (rc == PTL_HANDLE_INVALID) ? PTL_MD_INVALID : rc;
-
-        if (old_inout)
-                *old_inout = ret.old_inout;
-
-        return ret.rc;
+        nal_t    *nal;
+        
+        if (!ptl_init)
+                return PTL_NO_INIT;
+        
+        nal = ptl_hndl2nal(&md_in);
+        if (nal == NULL)
+                return PTL_MD_INVALID;
+        
+        return (nal->nal_md_unlink)(nal, &md_in);
 }
 
-int PtlMDUnlink(ptl_handle_md_t md_in)
+int PtlEQAlloc(ptl_handle_ni_t interface, ptl_size_t count,
+               ptl_eq_handler_t callback,
+               ptl_handle_eq_t *handle_out)
 {
-        PtlMDUnlink_in args;
-        PtlMDUnlink_out ret;
-        int rc;
-
-        args.md_in = md_in;
-        rc = do_forward(md_in, PTL_MDUNLINK, &args, sizeof(args), &ret,
-                        sizeof(ret));
-        if (rc != PTL_OK)
-                return (rc == PTL_HANDLE_INVALID) ? PTL_MD_INVALID : rc;
+        nal_t    *nal;
+        
+        if (!ptl_init)
+                return PTL_NO_INIT;
+        
+        nal = ptl_hndl2nal(&interface);
+        if (nal == NULL)
+                return PTL_NI_INVALID;
 
-        return ret.rc;
+        return (nal->nal_eq_alloc)(nal, count, callback, handle_out);
 }
 
-int PtlEQAlloc(ptl_handle_ni_t interface, ptl_size_t count,
-               ptl_eq_handler_t callback,
-               ptl_handle_eq_t * handle_out)
+int PtlEQFree(ptl_handle_eq_t eventq)
 {
-        ptl_eq_t *eq = NULL;
-        ptl_event_t *ev = NULL;
-        PtlEQAlloc_in args;
-        PtlEQAlloc_out ret;
-        int rc, i;
-        nal_t *nal;
+        nal_t       *nal;
 
         if (!ptl_init)
                 return PTL_NO_INIT;
         
-        nal = ptl_hndl2nal (&interface);
+        nal = ptl_hndl2nal(&eventq);
         if (nal == NULL)
-                return PTL_HANDLE_INVALID;
+                return PTL_EQ_INVALID;
 
-        if (count != LOWEST_BIT_SET(count)) {   /* not a power of 2 already */
-                do {                    /* knock off all but the top bit... */
-                        count &= ~LOWEST_BIT_SET (count);
-                } while (count != LOWEST_BIT_SET(count));
-
-                count <<= 1;                             /* ...and round up */
-        }
-
-        if (count == 0)        /* catch bad parameter / overflow on roundup */
-                return (PTL_VAL_FAILED);
-
-        PORTAL_ALLOC(ev, count * sizeof(ptl_event_t));
-        if (!ev)
-                return PTL_NO_SPACE;
-
-        for (i = 0; i < count; i++)
-                ev[i].sequence = 0;
-
-        args.ni_in = interface;
-        args.count_in = count;
-        args.base_in = ev;
-        args.len_in = count * sizeof(*ev);
-        args.callback_in = callback;
-
-        rc = do_forward(interface, PTL_EQALLOC, &args, sizeof(args), &ret,
-                        sizeof(ret));
-        if (rc != PTL_OK)
-                goto fail;
-        if (ret.rc)
-                GOTO(fail, rc = ret.rc);
-
-        PORTAL_ALLOC(eq, sizeof(*eq));
-        if (!eq) {
-                rc = PTL_NO_SPACE;
-                goto fail;
-        }
-
-        eq->sequence = 1;
-        eq->size = count;
-        eq->base = ev;
-
-        /* EQ handles are a little wierd.  PtlEQGet() just looks at the
-         * queued events in shared memory.  It doesn't want to do_forward()
-         * at all, so the cookie in the EQ handle we pass out of here is
-         * simply a pointer to the event queue we just set up.  We stash
-         * the handle returned by do_forward(), so we can pass it back via
-         * do_forward() when we need to. */
-
-        eq->cb_eq_handle.nal_idx = interface.nal_idx;
-        eq->cb_eq_handle.cookie = ret.handle_out.cookie;
-
-        handle_out->nal_idx = interface.nal_idx;
-        handle_out->cookie = (__u64)((unsigned long)eq);
-        return PTL_OK;
+        return (nal->nal_eq_free)(nal, &eventq);
+}
 
-fail:
-        PORTAL_FREE(ev, count * sizeof(ptl_event_t));
-        return rc;
+int PtlEQGet(ptl_handle_eq_t eventq, ptl_event_t *ev)
+{
+        int which;
+        
+        return (PtlEQPoll (&eventq, 1, 0, ev, &which));
 }
 
-int PtlEQFree(ptl_handle_eq_t eventq)
+int PtlEQWait(ptl_handle_eq_t eventq_in, ptl_event_t *event_out)
 {
-        PtlEQFree_in args;
-        PtlEQFree_out ret;
-        ptl_eq_t *eq;
-        int rc;
+        int which;
+        
+        return (PtlEQPoll (&eventq_in, 1, PTL_TIME_FOREVER, 
+                           event_out, &which));
+}
 
-        eq = ptl_handle2usereq (&eventq);
-        args.eventq_in = eq->cb_eq_handle;
+int PtlEQPoll(ptl_handle_eq_t *eventqs_in, int neq_in, int timeout,
+              ptl_event_t *event_out, int *which_out)
+{
+        int           i;
+        nal_t        *nal;
 
-        rc = do_forward(eq->cb_eq_handle, PTL_EQFREE, &args,
-                        sizeof(args), &ret, sizeof(ret));
+        if (!ptl_init)
+                return PTL_NO_INIT;
+
+        if (neq_in < 1)
+                return PTL_EQ_INVALID;
+
+        nal = ptl_hndl2nal(&eventqs_in[0]);
+        if (nal == NULL)
+                return PTL_EQ_INVALID;
 
-        /* XXX we're betting rc == PTL_OK here */
-        PORTAL_FREE(eq->base, eq->size * sizeof(ptl_event_t));
-        PORTAL_FREE(eq, sizeof(*eq));
+        for (i = 1; i < neq_in; i++)
+                if (ptl_hndl2nal(&eventqs_in[i]) != nal)
+                        return PTL_EQ_INVALID;
 
-        return rc;
+        return (nal->nal_eq_poll)(nal, eventqs_in, neq_in, timeout,
+                                  event_out, which_out);
 }
 
+
 int PtlACEntry(ptl_handle_ni_t ni_in, ptl_ac_index_t index_in,
                ptl_process_id_t match_id_in, ptl_pt_index_t portal_in)
 {
-        PtlACEntry_in args;
-        PtlACEntry_out ret;
-        int rc;
-
-        /*
-         * Copy arguments into the argument block to
-         * hand to the forwarding object
-         */
-        args.ni_in = ni_in;
-        args.index_in = index_in;
-        args.match_id_in = match_id_in;
-        args.portal_in = portal_in;
-
-        rc = do_forward(ni_in, PTL_ACENTRY, &args, sizeof(args), &ret,
-                        sizeof(ret));
-
-        return (rc != PTL_OK) ? rc : ret.rc;
+        nal_t    *nal;
+
+        if (!ptl_init)
+                return PTL_NO_INIT;
+        
+        nal = ptl_hndl2nal(&ni_in);
+        if (nal == NULL)
+                return PTL_NI_INVALID;
+        
+        return (nal->nal_ace_entry)(nal, index_in, match_id_in, portal_in);
 }
 
 int PtlPut(ptl_handle_md_t md_in, ptl_ack_req_t ack_req_in,
            ptl_process_id_t target_in, ptl_pt_index_t portal_in,
-           ptl_ac_index_t cookie_in, ptl_match_bits_t match_bits_in,
+           ptl_ac_index_t ac_in, ptl_match_bits_t match_bits_in,
            ptl_size_t offset_in, ptl_hdr_data_t hdr_data_in)
 {
-        PtlPut_in args;
-        PtlPut_out ret;
-        int rc;
-
-        /*
-         * Copy arguments into the argument block to
-         * hand to the forwarding object
-         */
-        args.md_in = md_in;
-        args.ack_req_in = ack_req_in;
-        args.target_in = target_in;
-        args.portal_in = portal_in;
-        args.cookie_in = cookie_in;
-        args.match_bits_in = match_bits_in;
-        args.offset_in = offset_in;
-        args.hdr_data_in = hdr_data_in;
-
-        rc = do_forward(md_in, PTL_PUT, &args, sizeof(args), &ret, sizeof(ret));
-
-        return (rc != PTL_OK) ? rc : ret.rc;
+        nal_t    *nal;
+
+        if (!ptl_init)
+                return PTL_NO_INIT;
+        
+        nal = ptl_hndl2nal(&md_in);
+        if (nal == NULL)
+                return PTL_MD_INVALID;
+
+        return (nal->nal_put)(nal, &md_in, ack_req_in,
+                              &target_in, portal_in, ac_in,
+                              match_bits_in, offset_in, hdr_data_in);
 }
 
 int PtlGet(ptl_handle_md_t md_in, ptl_process_id_t target_in,
-           ptl_pt_index_t portal_in, ptl_ac_index_t cookie_in,
+           ptl_pt_index_t portal_in, ptl_ac_index_t ac_in,
            ptl_match_bits_t match_bits_in, ptl_size_t offset_in)
 {
-        PtlGet_in args;
-        PtlGet_out ret;
-        int rc;
-
-        /*
-         * Copy arguments into the argument block to
-         * hand to the forwarding object
-         */
-        args.md_in = md_in;
-        args.target_in = target_in;
-        args.portal_in = portal_in;
-        args.cookie_in = cookie_in;
-        args.match_bits_in = match_bits_in;
-        args.offset_in = offset_in;
-
-        rc = do_forward(md_in, PTL_GET, &args, sizeof(args), &ret, sizeof(ret));
-
-        return (rc != PTL_OK) ? rc : ret.rc;
+        nal_t  *nal;
+
+        if (!ptl_init)
+                return PTL_NO_INIT;
+
+        nal = ptl_hndl2nal(&md_in);
+        if (nal == NULL)
+                return PTL_MD_INVALID;
+
+        return (nal->nal_get)(nal, &md_in, 
+                              &target_in, portal_in, ac_in,
+                              match_bits_in, offset_in);
 }
+
index bf7a107..285f8fe 100644 (file)
@@ -3,8 +3,8 @@
 # This code is issued under the GNU General Public License.
 # See the file COPYING in this distribution
 
-my_sources = api-eq.c api-init.c api-me.c api-errno.c api-ni.c api-wrap.c \
-               lib-dispatch.c lib-init.c lib-me.c lib-msg.c lib-eq.c \
+my_sources =    api-errno.c api-ni.c api-wrap.c \
+               lib-init.c lib-me.c lib-msg.c lib-eq.c \
                lib-md.c lib-move.c lib-ni.c lib-pid.c
 
 if !CRAY_PORTALS
diff --git a/lnet/lnet/lib-dispatch.c b/lnet/lnet/lib-dispatch.c
deleted file mode 100644 (file)
index 798e117..0000000
+++ /dev/null
@@ -1,79 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * lib/lib-dispatch.c
- *
- *  Copyright (c) 2001-2003 Cluster File Systems, Inc.
- *  Copyright (c) 2001-2002 Sandia National Laboratories
- *
- *   This file is part of Lustre, http://www.sf.net/projects/lustre/
- *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#define DEBUG_SUBSYSTEM S_PORTALS
-#include <portals/lib-p30.h>
-#include <portals/lib-dispatch.h>
-
-typedef struct {
-        int (*fun) (nal_cb_t * nal, void *private, void *in, void *out);
-        char *name;
-} dispatch_table_t;
-
-static dispatch_table_t dispatch_table[] = {
-        [PTL_GETID] {do_PtlGetId, "PtlGetId"},
-        [PTL_NISTATUS] {do_PtlNIStatus, "PtlNIStatus"},
-        [PTL_NIDIST] {do_PtlNIDist, "PtlNIDist"},
-        [PTL_MEATTACH] {do_PtlMEAttach, "PtlMEAttach"},
-        [PTL_MEINSERT] {do_PtlMEInsert, "PtlMEInsert"},
-        [PTL_MEUNLINK] {do_PtlMEUnlink, "PtlMEUnlink"},
-        [PTL_TBLDUMP] {do_PtlTblDump, "PtlTblDump"},
-        [PTL_MEDUMP] {do_PtlMEDump, "PtlMEDump"},
-        [PTL_MDATTACH] {do_PtlMDAttach, "PtlMDAttach"},
-        [PTL_MDBIND] {do_PtlMDBind, "PtlMDBind"},
-        [PTL_MDUPDATE] {do_PtlMDUpdate_internal, "PtlMDUpdate_internal"},
-        [PTL_MDUNLINK] {do_PtlMDUnlink, "PtlMDUnlink"},
-        [PTL_EQALLOC] {do_PtlEQAlloc_internal, "PtlEQAlloc_internal"},
-        [PTL_EQFREE] {do_PtlEQFree_internal, "PtlEQFree_internal"},
-        [PTL_PUT] {do_PtlPut, "PtlPut"},
-        [PTL_GET] {do_PtlGet, "PtlGet"},
-        [PTL_FAILNID] {do_PtlFailNid, "PtlFailNid"},
-        /*    */ {0, ""}
-};
-
-/*
- * This really should be elsewhere, but lib-p30/dispatch.c is
- * an automatically generated file.
- */
-void lib_dispatch(nal_cb_t * nal, void *private, int index, void *arg_block,
-                  void *ret_block)
-{
-        lib_ni_t *ni = &nal->ni;
-
-        if (index < 0 || index > LIB_MAX_DISPATCH ||
-            !dispatch_table[index].fun) {
-                CDEBUG(D_NET, LPU64": Invalid API call %d\n", ni->nid, index);
-                return;
-        }
-
-        CDEBUG(D_NET, LPU64": API call %s (%d)\n", ni->nid,
-               dispatch_table[index].name, index);
-
-        dispatch_table[index].fun(nal, private, arg_block, ret_block);
-}
-
-char *dispatch_name(int index)
-{
-        return dispatch_table[index].name;
-}
index 8a91860..8ea6fdd 100644 (file)
 
 #define DEBUG_SUBSYSTEM S_PORTALS
 #include <portals/lib-p30.h>
-#include <portals/arg-blocks.h>
 
-int do_PtlEQAlloc_internal(nal_cb_t * nal, void *private, void *v_args,
-                           void *v_ret)
+int 
+lib_api_eq_alloc (nal_t *apinal, ptl_size_t count,
+                  ptl_eq_handler_t callback, 
+                  ptl_handle_eq_t *handle)
 {
-        /*
-         * Incoming:
-         *      ptl_handle_ni_t ni_in
-         *      ptl_size_t count_in
-         *      void                    * base_in
-         *
-         * Outgoing:
-         *      ptl_handle_eq_t         * handle_out
-         */
-
-        PtlEQAlloc_in *args = v_args;
-        PtlEQAlloc_out *ret = v_ret;
-
-        lib_eq_t *eq;
-        unsigned long flags;
-
-        /* api should have rounded up */
-        if (args->count_in != LOWEST_BIT_SET (args->count_in))
-                return ret->rc = PTL_VAL_FAILED;
+        lib_nal_t     *nal = apinal->nal_data;
+        lib_eq_t      *eq;
+        unsigned long  flags;
+        int            rc;
 
+        /* We need count to be a power of 2 so that when eq_{enq,deq}_seq
+         * overflow, they don't skip entries, so the queue has the same
+         * apparant capacity at all times */
+
+        if (count != LOWEST_BIT_SET(count)) {   /* not a power of 2 already */
+                do {                    /* knock off all but the top bit... */
+                        count &= ~LOWEST_BIT_SET (count);
+                } while (count != LOWEST_BIT_SET(count));
+
+                count <<= 1;                             /* ...and round up */
+        }
+
+        if (count == 0)        /* catch bad parameter / overflow on roundup */
+                return (PTL_VAL_FAILED);
+        
         eq = lib_eq_alloc (nal);
         if (eq == NULL)
-                return (ret->rc = PTL_NO_SPACE);
+                return (PTL_NO_SPACE);
 
-        state_lock(nal, &flags);
+        PORTAL_ALLOC(eq->eq_events, count * sizeof(ptl_event_t));
+        if (eq->eq_events == NULL) {
+                LIB_LOCK(nal, flags);
+                lib_eq_free (nal, eq);
+                LIB_UNLOCK(nal, flags);
+        }
 
-        if (nal->cb_map != NULL) {
+        if (nal->libnal_map != NULL) {
                 struct iovec iov = {
-                        .iov_base = args->base_in,
-                        .iov_len = args->count_in * sizeof (ptl_event_t) };
+                        .iov_base = eq->eq_events,
+                        .iov_len = count * sizeof(ptl_event_t)};
 
-                ret->rc = nal->cb_map (nal, 1, &iov, &eq->eq_addrkey);
-                if (ret->rc != PTL_OK) {
+                rc = nal->libnal_map(nal, 1, &iov, &eq->eq_addrkey);
+                if (rc != PTL_OK) {
+                        LIB_LOCK(nal, flags);
                         lib_eq_free (nal, eq);
-                        
-                        state_unlock (nal, &flags);
-                        return (ret->rc);
+                        LIB_UNLOCK(nal, flags);
+                        return (rc);
                 }
         }
 
-        eq->sequence = 1;
-        eq->base = args->base_in;
-        eq->size = args->count_in;
+        /* NB this resets all event sequence numbers to 0, to be earlier
+         * than eq_deq_seq */
+        memset(eq->eq_events, 0, count * sizeof(ptl_event_t));
+
+        eq->eq_deq_seq = 1;
+        eq->eq_enq_seq = 1;
+        eq->eq_size = count;
         eq->eq_refcount = 0;
-        eq->event_callback = args->callback_in;
+        eq->eq_callback = callback;
+
+        LIB_LOCK(nal, flags);
 
         lib_initialise_handle (nal, &eq->eq_lh, PTL_COOKIE_TYPE_EQ);
-        list_add (&eq->eq_list, &nal->ni.ni_active_eqs);
+        list_add (&eq->eq_list, &nal->libnal_ni.ni_active_eqs);
 
-        state_unlock(nal, &flags);
+        LIB_UNLOCK(nal, flags);
 
-        ptl_eq2handle(&ret->handle_out, eq);
-        return (ret->rc = PTL_OK);
+        ptl_eq2handle(handle, nal, eq);
+        return (PTL_OK);
 }
 
-int do_PtlEQFree_internal(nal_cb_t * nal, void *private, void *v_args,
-                          void *v_ret)
+int 
+lib_api_eq_free(nal_t *apinal, ptl_handle_eq_t *eqh)
 {
-        /*
-         * Incoming:
-         *      ptl_handle_eq_t eventq_in
-         *
-         * Outgoing:
-         */
-
-        PtlEQFree_in *args = v_args;
-        PtlEQFree_out *ret = v_ret;
-        lib_eq_t *eq;
-        long flags;
+        lib_nal_t     *nal = apinal->nal_data;
+        lib_eq_t      *eq;
+        int            size;
+        ptl_event_t   *events;
+        void          *addrkey;
+        unsigned long  flags;
 
-        state_lock (nal, &flags);
+        LIB_LOCK(nal, flags);
 
-        eq = ptl_handle2eq(&args->eventq_in, nal);
+        eq = ptl_handle2eq(eqh, nal);
         if (eq == NULL) {
-                ret->rc = PTL_EQ_INVALID;
-        } else if (eq->eq_refcount != 0) {
-                ret->rc = PTL_EQ_IN_USE;
+                LIB_UNLOCK(nal, flags);
+                return (PTL_EQ_INVALID);
+        }
+
+        if (eq->eq_refcount != 0) {
+                LIB_UNLOCK(nal, flags);
+                return (PTL_EQ_IN_USE);
+        }
+
+        /* stash for free after lock dropped */
+        events  = eq->eq_events;
+        size    = eq->eq_size;
+        addrkey = eq->eq_addrkey;
+
+        lib_invalidate_handle (nal, &eq->eq_lh);
+        list_del (&eq->eq_list);
+        lib_eq_free (nal, eq);
+
+        LIB_UNLOCK(nal, flags);
+
+        if (nal->libnal_unmap != NULL) {
+                struct iovec iov = {
+                        .iov_base = events,
+                        .iov_len = size * sizeof(ptl_event_t)};
+
+                nal->libnal_unmap(nal, 1, &iov, &addrkey);
+        }
+
+        PORTAL_FREE(events, size * sizeof (ptl_event_t));
+
+        return (PTL_OK);
+}
+
+int
+lib_get_event (lib_eq_t *eq, ptl_event_t *ev)
+{
+        int          new_index = eq->eq_deq_seq & (eq->eq_size - 1);
+        ptl_event_t *new_event = &eq->eq_events[new_index];
+        int          rc;
+        ENTRY;
+
+        CDEBUG(D_INFO, "event: %p, sequence: %lu, eq->size: %u\n",
+               new_event, eq->eq_deq_seq, eq->eq_size);
+
+        if (PTL_SEQ_GT (eq->eq_deq_seq, new_event->sequence)) {
+                RETURN(PTL_EQ_EMPTY);
+        }
+
+        /* We've got a new event... */
+        *ev = *new_event;
+
+        /* ...but did it overwrite an event we've not seen yet? */
+        if (eq->eq_deq_seq == new_event->sequence) {
+                rc = PTL_OK;
         } else {
-                if (nal->cb_unmap != NULL) {
-                        struct iovec iov = {
-                                .iov_base = eq->base,
-                                .iov_len = eq->size * sizeof (ptl_event_t) };
-                        
-                        nal->cb_unmap(nal, 1, &iov, &eq->eq_addrkey);
+                CERROR("Event Queue Overflow: eq seq %lu ev seq %lu\n",
+                       eq->eq_deq_seq, new_event->sequence);
+                rc = PTL_EQ_DROPPED;
+        }
+
+        eq->eq_deq_seq = new_event->sequence + 1;
+        RETURN(rc);
+}
+
+
+int
+lib_api_eq_poll (nal_t *apinal, 
+                 ptl_handle_eq_t *eventqs, int neq, int timeout_ms,
+                 ptl_event_t *event, int *which)
+{
+        lib_nal_t       *nal = apinal->nal_data;
+        lib_ni_t        *ni = &nal->libnal_ni;
+        unsigned long    flags;
+        int              i;
+        int              rc;
+#ifdef __KERNEL__
+        wait_queue_t     wq;
+        unsigned long    now;
+#else
+        struct timeval   then;
+        struct timeval   now;
+        struct timespec  ts;
+#endif
+        ENTRY;
+
+        LIB_LOCK(nal, flags);
+
+        for (;;) {
+                for (i = 0; i < neq; i++) {
+                        lib_eq_t *eq = ptl_handle2eq(&eventqs[i], nal);
+
+                        rc = lib_get_event (eq, event);
+                        if (rc != PTL_EQ_EMPTY) {
+                                LIB_UNLOCK(nal, flags);
+                                *which = i;
+                                RETURN(rc);
+                        }
+                }
+                
+                if (timeout_ms == 0) {
+                        LIB_UNLOCK (nal, flags);
+                        RETURN (PTL_EQ_EMPTY);
                 }
 
-                lib_invalidate_handle (nal, &eq->eq_lh);
-                list_del (&eq->eq_list);
-                lib_eq_free (nal, eq);
-                ret->rc = PTL_OK;
-        }
+                /* Some architectures force us to do spin locking/unlocking
+                 * in the same stack frame, means we can abstract the
+                 * locking here */
+#ifdef __KERNEL__
+                init_waitqueue_entry(&wq, current);
+                set_current_state(TASK_INTERRUPTIBLE);
+                add_wait_queue(&ni->ni_waitq, &wq);
 
-        state_unlock (nal, &flags);
+                LIB_UNLOCK(nal, flags);
 
-        return (ret->rc);
+                if (timeout_ms < 0) {
+                        schedule ();
+                } else {
+                        now = jiffies;
+                        schedule_timeout((timeout_ms * HZ)/1000);
+                        timeout_ms -= ((jiffies - now) * 1000)/HZ;
+                        if (timeout_ms < 0)
+                                timeout_ms = 0;
+                }
+                
+                LIB_LOCK(nal, flags);
+#else
+                if (timeout_ms < 0) {
+                        pthread_cond_wait(&ni->ni_cond, &ni->ni_mutex);
+                } else {
+                        gettimeofday(&then, NULL);
+                        
+                        ts.tv_sec = then.tv_sec + timeout_ms/1000;
+                        ts.tv_nsec = then.tv_usec * 1000 + 
+                                     (timeout_ms%1000) * 1000000;
+                        if (ts.tv_nsec >= 1000000000) {
+                                ts.tv_sec++;
+                                ts.tv_nsec -= 1000000000;
+                        }
+                        
+                        pthread_cond_timedwait(&ni->ni_cond,
+                                               &ni->ni_mutex, &ts);
+                        
+                        gettimeofday(&now, NULL);
+                        timeout_ms -= (now.tv_sec - then.tv_sec) * 1000 +
+                                      (now.tv_usec - then.tv_usec) / 1000;
+                        
+                        if (timeout_ms < 0)
+                                timeout_ms = 0;
+                }
+#endif
+        }
 }
index c62dbc2..9d97bc1 100644 (file)
@@ -41,7 +41,7 @@
 #ifndef PTL_USE_LIB_FREELIST
 
 int
-kportal_descriptor_setup (nal_cb_t *nal,
+kportal_descriptor_setup (lib_nal_t *nal,
                           ptl_ni_limits_t *requested_limits,
                           ptl_ni_limits_t *actual_limits)
 {
@@ -54,13 +54,13 @@ kportal_descriptor_setup (nal_cb_t *nal,
 }
 
 void
-kportal_descriptor_cleanup (nal_cb_t *nal)
+kportal_descriptor_cleanup (lib_nal_t *nal)
 {
 }
 #else
 
 int
-lib_freelist_init (nal_cb_t *nal, lib_freelist_t *fl, int n, int size)
+lib_freelist_init (lib_nal_t *nal, lib_freelist_t *fl, int n, int size)
 {
         char *space;
 
@@ -68,7 +68,7 @@ lib_freelist_init (nal_cb_t *nal, lib_freelist_t *fl, int n, int size)
 
         size += offsetof (lib_freeobj_t, fo_contents);
 
-        space = nal->cb_malloc (nal, n * size);
+        PORTAL_ALLOC(space, n * size);
         if (space == NULL)
                 return (PTL_NO_SPACE);
 
@@ -88,7 +88,7 @@ lib_freelist_init (nal_cb_t *nal, lib_freelist_t *fl, int n, int size)
 }
 
 void
-lib_freelist_fini (nal_cb_t *nal, lib_freelist_t *fl)
+lib_freelist_fini (lib_nal_t *nal, lib_freelist_t *fl)
 {
         struct list_head *el;
         int               count;
@@ -102,23 +102,24 @@ lib_freelist_fini (nal_cb_t *nal, lib_freelist_t *fl)
 
         LASSERT (count == fl->fl_nobjs);
 
-        nal->cb_free (nal, fl->fl_objs, fl->fl_nobjs * fl->fl_objsize);
+        PORTAL_FREE(fl->fl_objs, fl->fl_nobjs * fl->fl_objsize);
         memset (fl, 0, sizeof (fl));
 }
 
 int
-kportal_descriptor_setup (nal_cb_t *nal,
+kportal_descriptor_setup (lib_nal_t *nal,
                           ptl_ni_limits_t *requested_limits,
                           ptl_ni_limits_t *actual_limits)
 {
         /* NB on failure caller must still call kportal_descriptor_cleanup */
         /*               ******                                            */
-        int rc;
+        lib_ni_t  *ni = &nal->libnal_ni;
+        int        rc;
 
-        memset (&nal->ni.ni_free_mes,  0, sizeof (nal->ni.ni_free_mes));
-        memset (&nal->ni.ni_free_msgs, 0, sizeof (nal->ni.ni_free_msgs));
-        memset (&nal->ni.ni_free_mds,  0, sizeof (nal->ni.ni_free_mds));
-        memset (&nal->ni.ni_free_eqs,  0, sizeof (nal->ni.ni_free_eqs));
+        memset (&ni->ni_free_mes,  0, sizeof (ni->ni_free_mes));
+        memset (&ni->ni_free_msgs, 0, sizeof (ni->ni_free_msgs));
+        memset (&ni->ni_free_mds,  0, sizeof (ni->ni_free_mds));
+        memset (&ni->ni_free_eqs,  0, sizeof (ni->ni_free_eqs));
 
         /* Ignore requested limits! */
         actual_limits->max_mes = MAX_MES;
@@ -127,39 +128,41 @@ kportal_descriptor_setup (nal_cb_t *nal,
         /* Hahahah what a load of bollocks.  There's nowhere to
          * specify the max # messages in-flight */
 
-        rc = lib_freelist_init (nal, &nal->ni.ni_free_mes,
+        rc = lib_freelist_init (nal, &ni->ni_free_mes,
                                 MAX_MES, sizeof (lib_me_t));
         if (rc != PTL_OK)
                 return (rc);
 
-        rc = lib_freelist_init (nal, &nal->ni.ni_free_msgs,
+        rc = lib_freelist_init (nal, &ni->ni_free_msgs,
                                 MAX_MSGS, sizeof (lib_msg_t));
         if (rc != PTL_OK)
                 return (rc);
 
-        rc = lib_freelist_init (nal, &nal->ni.ni_free_mds,
+        rc = lib_freelist_init (nal, &ni->ni_free_mds,
                                 MAX_MDS, sizeof (lib_md_t));
         if (rc != PTL_OK)
                 return (rc);
 
-        rc = lib_freelist_init (nal, &nal->ni.ni_free_eqs,
+        rc = lib_freelist_init (nal, &ni->ni_free_eqs,
                                 MAX_EQS, sizeof (lib_eq_t));
         return (rc);
 }
 
 void
-kportal_descriptor_cleanup (nal_cb_t *nal)
+kportal_descriptor_cleanup (lib_nal_t *nal)
 {
-        lib_freelist_fini (nal, &nal->ni.ni_free_mes);
-        lib_freelist_fini (nal, &nal->ni.ni_free_msgs);
-        lib_freelist_fini (nal, &nal->ni.ni_free_mds);
-        lib_freelist_fini (nal, &nal->ni.ni_free_eqs);
+        lib_ni_t   *ni = &nal->libnal_ni;
+        
+        lib_freelist_fini (nal, &ni->ni_free_mes);
+        lib_freelist_fini (nal, &ni->ni_free_msgs);
+        lib_freelist_fini (nal, &ni->ni_free_mds);
+        lib_freelist_fini (nal, &ni->ni_free_eqs);
 }
 
 #endif
 
 __u64
-lib_create_interface_cookie (nal_cb_t *nal)
+lib_create_interface_cookie (lib_nal_t *nal)
 {
         /* NB the interface cookie in wire handles guards against delayed
          * replies and ACKs appearing valid in a new instance of the same
@@ -180,9 +183,9 @@ lib_create_interface_cookie (nal_cb_t *nal)
 }
 
 int
-lib_setup_handle_hash (nal_cb_t *nal) 
+lib_setup_handle_hash (lib_nal_t *nal) 
 {
-        lib_ni_t *ni = &nal->ni;
+        lib_ni_t *ni = &nal->libnal_ni;
         int       i;
         
         /* Arbitrary choice of hash table size */
@@ -191,9 +194,8 @@ lib_setup_handle_hash (nal_cb_t *nal)
 #else
         ni->ni_lh_hash_size = (MAX_MES + MAX_MDS + MAX_EQS)/4;
 #endif
-        ni->ni_lh_hash_table = 
-                (struct list_head *)nal->cb_malloc (nal, ni->ni_lh_hash_size
-                                                    * sizeof (struct list_head));
+        PORTAL_ALLOC(ni->ni_lh_hash_table,
+                     ni->ni_lh_hash_size * sizeof (struct list_head));
         if (ni->ni_lh_hash_table == NULL)
                 return (PTL_NO_SPACE);
         
@@ -206,22 +208,22 @@ lib_setup_handle_hash (nal_cb_t *nal)
 }
 
 void
-lib_cleanup_handle_hash (nal_cb_t *nal)
+lib_cleanup_handle_hash (lib_nal_t *nal)
 {
-        lib_ni_t *ni = &nal->ni;
+        lib_ni_t *ni = &nal->libnal_ni;
 
         if (ni->ni_lh_hash_table == NULL)
                 return;
         
-        nal->cb_free (nal, ni->ni_lh_hash_table,
-                      ni->ni_lh_hash_size * sizeof (struct list_head));
+        PORTAL_FREE(ni->ni_lh_hash_table,
+                    ni->ni_lh_hash_size * sizeof (struct list_head));
 }
 
 lib_handle_t *
-lib_lookup_cookie (nal_cb_t *nal, __u64 cookie, int type) 
+lib_lookup_cookie (lib_nal_t *nal, __u64 cookie, int type) 
 {
         /* ALWAYS called with statelock held */
-        lib_ni_t            *ni = &nal->ni;
+        lib_ni_t            *ni = &nal->libnal_ni;
         struct list_head    *list;
         struct list_head    *el;
         unsigned int         hash;
@@ -243,10 +245,10 @@ lib_lookup_cookie (nal_cb_t *nal, __u64 cookie, int type)
 }
 
 void
-lib_initialise_handle (nal_cb_t *nal, lib_handle_t *lh, int type) 
+lib_initialise_handle (lib_nal_t *nal, lib_handle_t *lh, int type) 
 {
         /* ALWAYS called with statelock held */
-        lib_ni_t       *ni = &nal->ni;
+        lib_ni_t       *ni = &nal->libnal_ni;
         unsigned int    hash;
 
         LASSERT (type >= 0 && type < PTL_COOKIE_TYPES);
@@ -258,95 +260,120 @@ lib_initialise_handle (nal_cb_t *nal, lib_handle_t *lh, int type)
 }
 
 void
-lib_invalidate_handle (nal_cb_t *nal, lib_handle_t *lh)
+lib_invalidate_handle (lib_nal_t *nal, lib_handle_t *lh)
 {
         list_del (&lh->lh_hash_chain);
 }
 
 int
-lib_init(nal_cb_t *nal, ptl_process_id_t process_id,
+lib_init(lib_nal_t *libnal, nal_t *apinal, 
+         ptl_process_id_t process_id,
          ptl_ni_limits_t *requested_limits,
          ptl_ni_limits_t *actual_limits)
 {
         int       rc = PTL_OK;
-        lib_ni_t *ni = &nal->ni;
-        int ptl_size;
-        int i;
+        lib_ni_t *ni = &libnal->libnal_ni;
+        int       ptl_size;
+        int       i;
         ENTRY;
 
         /* NB serialised in PtlNIInit() */
 
         lib_assert_wire_constants ();
-        
-        /*
-         * Allocate the portal table for this interface
-         * and all per-interface objects.
-         */
-        memset(&ni->counters, 0, sizeof(lib_counters_t));
 
-        rc = kportal_descriptor_setup (nal, requested_limits, 
-                                       &ni->actual_limits);
+        /* Setup the API nal with the lib API handling functions */
+        apinal->nal_get_id    = lib_api_get_id;
+        apinal->nal_ni_status = lib_api_ni_status;
+        apinal->nal_ni_dist   = lib_api_ni_dist;
+        apinal->nal_fail_nid  = lib_api_fail_nid;
+        apinal->nal_me_attach = lib_api_me_attach;
+        apinal->nal_me_insert = lib_api_me_insert;
+        apinal->nal_me_unlink = lib_api_me_unlink;
+        apinal->nal_md_attach = lib_api_md_attach;
+        apinal->nal_md_bind   = lib_api_md_bind;
+        apinal->nal_md_unlink = lib_api_md_unlink;
+        apinal->nal_md_update = lib_api_md_update;
+        apinal->nal_eq_alloc  = lib_api_eq_alloc;
+        apinal->nal_eq_free   = lib_api_eq_free;
+        apinal->nal_eq_poll   = lib_api_eq_poll;
+        apinal->nal_put       = lib_api_put;
+        apinal->nal_get       = lib_api_get;
+
+        apinal->nal_data      = libnal;
+        ni->ni_api            = apinal;
+
+        rc = kportal_descriptor_setup (libnal, requested_limits, 
+                                       &ni->ni_actual_limits);
         if (rc != PTL_OK)
                 goto out;
 
+        memset(&ni->ni_counters, 0, sizeof(lib_counters_t));
+
         INIT_LIST_HEAD (&ni->ni_active_msgs);
         INIT_LIST_HEAD (&ni->ni_active_mds);
         INIT_LIST_HEAD (&ni->ni_active_eqs);
-
         INIT_LIST_HEAD (&ni->ni_test_peers);
 
-        ni->ni_interface_cookie = lib_create_interface_cookie (nal);
+#ifdef __KERNEL__
+        spin_lock_init (&ni->ni_lock);
+        init_waitqueue_head (&ni->ni_waitq);
+#else
+        pthread_mutex_init(&ni->ni_mutex, NULL);
+        pthread_cond_init(&ni->ni_cond, NULL);
+#endif
+
+        ni->ni_interface_cookie = lib_create_interface_cookie (libnal);
         ni->ni_next_object_cookie = 0;
-        rc = lib_setup_handle_hash (nal);
+        rc = lib_setup_handle_hash (libnal);
         if (rc != PTL_OK)
                 goto out;
         
-        ni->nid = process_id.nid;
-        ni->pid = process_id.pid;
+        ni->ni_pid = process_id;
 
         if (requested_limits != NULL)
                 ptl_size = requested_limits->max_pt_index + 1;
         else
                 ptl_size = 64;
 
-        ni->tbl.size = ptl_size;
-        ni->tbl.tbl = nal->cb_malloc(nal, sizeof(struct list_head) * ptl_size);
-        if (ni->tbl.tbl == NULL) {
+        ni->ni_portals.size = ptl_size;
+        PORTAL_ALLOC(ni->ni_portals.tbl,
+                     ptl_size * sizeof(struct list_head));
+        if (ni->ni_portals.tbl == NULL) {
                 rc = PTL_NO_SPACE;
                 goto out;
         }
 
         for (i = 0; i < ptl_size; i++)
-                INIT_LIST_HEAD(&(ni->tbl.tbl[i]));
+                INIT_LIST_HEAD(&(ni->ni_portals.tbl[i]));
 
         /* max_{mes,mds,eqs} set in kportal_descriptor_setup */
 
         /* We don't have an access control table! */
-        ni->actual_limits.max_ac_index = -1;
+        ni->ni_actual_limits.max_ac_index = -1;
 
-        ni->actual_limits.max_pt_index = ptl_size - 1;
-        ni->actual_limits.max_md_iovecs = PTL_MD_MAX_IOV;
-        ni->actual_limits.max_me_list = INT_MAX;
+        ni->ni_actual_limits.max_pt_index = ptl_size - 1;
+        ni->ni_actual_limits.max_md_iovecs = PTL_MD_MAX_IOV;
+        ni->ni_actual_limits.max_me_list = INT_MAX;
 
         /* We don't support PtlGetPut! */
-        ni->actual_limits.max_getput_md = 0;
+        ni->ni_actual_limits.max_getput_md = 0;
 
         if (actual_limits != NULL)
-                *actual_limits = ni->actual_limits;
+                *actual_limits = ni->ni_actual_limits;
 
  out:
         if (rc != PTL_OK) {
-                lib_cleanup_handle_hash (nal);
-                kportal_descriptor_cleanup (nal);
+                lib_cleanup_handle_hash (libnal);
+                kportal_descriptor_cleanup (libnal);
         }
 
         RETURN (rc);
 }
 
 int
-lib_fini(nal_cb_t * nal)
+lib_fini(lib_nal_t *nal)
 {
-        lib_ni_t *ni = &nal->ni;
+        lib_ni_t *ni = &nal->libnal_ni;
         int       idx;
 
         /* NB no state_lock() since this is the last reference.  The NAL
@@ -355,9 +382,9 @@ lib_fini(nal_cb_t * nal)
          * network op (eg MD with non-zero pending count)
          */
 
-        for (idx = 0; idx < ni->tbl.size; idx++)
-                while (!list_empty (&ni->tbl.tbl[idx])) {
-                        lib_me_t *me = list_entry (ni->tbl.tbl[idx].next,
+        for (idx = 0; idx < ni->ni_portals.size; idx++)
+                while (!list_empty (&ni->ni_portals.tbl[idx])) {
+                        lib_me_t *me = list_entry (ni->ni_portals.tbl[idx].next,
                                                    lib_me_t, me_list);
 
                         CERROR ("Active me %p on exit\n", me);
@@ -392,10 +419,16 @@ lib_fini(nal_cb_t * nal)
                 lib_msg_free (nal, msg);
         }
 
-        nal->cb_free(nal, ni->tbl.tbl, sizeof(struct list_head) * ni->tbl.size);
+        PORTAL_FREE(ni->ni_portals.tbl,  
+                    ni->ni_portals.size * sizeof(struct list_head));
 
         lib_cleanup_handle_hash (nal);
         kportal_descriptor_cleanup (nal);
 
+#ifndef __KERNEL__
+        pthread_mutex_destroy(&ni->ni_mutex);
+        pthread_cond_destroy(&ni->ni_cond);
+#endif
+
         return (PTL_OK);
 }
index 64a55b9..a4df791 100644 (file)
 #endif
 
 #include <portals/lib-p30.h>
-#include <portals/arg-blocks.h>
 
 /* must be called with state lock held */
-void lib_md_unlink(nal_cb_t * nal, lib_md_t * md)
+void
+lib_md_unlink(lib_nal_t *nal, lib_md_t *md)
 {
         if ((md->md_flags & PTL_MD_FLAG_ZOMBIE) == 0) {
                 /* first unlink attempt... */
@@ -62,12 +62,15 @@ void lib_md_unlink(nal_cb_t * nal, lib_md_t * md)
         CDEBUG(D_NET, "Unlinking md %p\n", md);
 
         if ((md->options & PTL_MD_KIOV) != 0) {
-                if (nal->cb_unmap_pages != NULL)
-                        nal->cb_unmap_pages (nal, md->md_niov, md->md_iov.kiov, 
-                                             &md->md_addrkey);
-        } else if (nal->cb_unmap != NULL) {
-                nal->cb_unmap (nal, md->md_niov, md->md_iov.iov, 
-                               &md->md_addrkey);
+                if (nal->libnal_unmap_pages != NULL)
+                        nal->libnal_unmap_pages (nal, 
+                                                 md->md_niov, 
+                                                 md->md_iov.kiov, 
+                                                 &md->md_addrkey);
+        } else if (nal->libnal_unmap != NULL) {
+                nal->libnal_unmap (nal, 
+                                   md->md_niov, md->md_iov.iov, 
+                                   &md->md_addrkey);
         }
 
         if (md->eq != NULL) {
@@ -80,124 +83,124 @@ void lib_md_unlink(nal_cb_t * nal, lib_md_t * md)
 }
 
 /* must be called with state lock held */
-static int lib_md_build(nal_cb_t *nal, lib_md_t *new, void *private,
-                        ptl_md_t *md, ptl_handle_eq_t *eqh, int unlink)
+static int
+lib_md_build(lib_nal_t *nal, lib_md_t *lmd, ptl_md_t *umd, int unlink)
 {
         lib_eq_t     *eq = NULL;
         int           rc;
         int           i;
         int           niov;
+        int           total_length = 0;
 
         /* NB we are passed an allocated, but uninitialised/active md.
          * if we return success, caller may lib_md_unlink() it.
          * otherwise caller may only lib_md_free() it.
          */
 
-        if (!PtlHandleIsEqual (*eqh, PTL_EQ_NONE)) {
-                eq = ptl_handle2eq(eqh, nal);
+        if (!PtlHandleIsEqual (umd->eventq, PTL_EQ_NONE)) {
+                eq = ptl_handle2eq(&umd->eventq, nal);
                 if (eq == NULL)
                         return PTL_EQ_INVALID;
         }
 
-        /* Must check this _before_ allocation.  Also, note that non-iov
-         * MDs must set md_niov to 0. */
-        LASSERT((md->options & (PTL_MD_IOVEC | PTL_MD_KIOV)) == 0 ||
-                md->length <= PTL_MD_MAX_IOV);
-
         /* This implementation doesn't know how to create START events or
          * disable END events.  Best to LASSERT our caller is compliant so
          * we find out quickly...  */
-        LASSERT (PtlHandleIsEqual (*eqh, PTL_EQ_NONE) ||
-                 ((md->options & PTL_MD_EVENT_START_DISABLE) != 0 &&
-                  (md->options & PTL_MD_EVENT_END_DISABLE) == 0));
-
-        if ((md->options & PTL_MD_MAX_SIZE) != 0 && /* max size used */
-            (md->max_size < 0 || md->max_size > md->length)) // illegal max_size
-                return PTL_MD_INVALID;
-
-        new->me = NULL;
-        new->start = md->start;
-        new->offset = 0;
-        new->max_size = md->max_size;
-        new->options = md->options;
-        new->user_ptr = md->user_ptr;
-        new->eq = eq;
-        new->threshold = md->threshold;
-        new->pending = 0;
-        new->md_flags = (unlink == PTL_UNLINK) ? PTL_MD_FLAG_AUTO_UNLINK : 0;
-
-        if ((md->options & PTL_MD_IOVEC) != 0) {
-                int total_length = 0;
-
-                if ((md->options & PTL_MD_KIOV) != 0) /* Can't specify both */
-                        return PTL_MD_INVALID; 
-
-                new->md_niov = niov = md->length;
-                
-                if (nal->cb_read (nal, private, new->md_iov.iov, md->start,
-                                  niov * sizeof (new->md_iov.iov[0])))
-                        return PTL_SEGV;
+        LASSERT (eq == NULL ||
+                 ((umd->options & PTL_MD_EVENT_START_DISABLE) != 0 &&
+                  (umd->options & PTL_MD_EVENT_END_DISABLE) == 0));
+
+        lmd->me = NULL;
+        lmd->start = umd->start;
+        lmd->offset = 0;
+        lmd->max_size = umd->max_size;
+        lmd->options = umd->options;
+        lmd->user_ptr = umd->user_ptr;
+        lmd->eq = eq;
+        lmd->threshold = umd->threshold;
+        lmd->pending = 0;
+        lmd->md_flags = (unlink == PTL_UNLINK) ? PTL_MD_FLAG_AUTO_UNLINK : 0;
+
+        if ((umd->options & PTL_MD_IOVEC) != 0) {
+
+                if ((umd->options & PTL_MD_KIOV) != 0) /* Can't specify both */
+                        return PTL_MD_ILLEGAL; 
+
+                lmd->md_niov = niov = umd->length;
+                memcpy(lmd->md_iov.iov, umd->start,
+                       niov * sizeof (lmd->md_iov.iov[0]));
 
                 for (i = 0; i < niov; i++) {
                         /* We take the base address on trust */
-                        if (new->md_iov.iov[i].iov_len <= 0) /* invalid length */
-                                return PTL_VAL_FAILED;
+                        if (lmd->md_iov.iov[i].iov_len <= 0) /* invalid length */
+                                return PTL_MD_ILLEGAL;
 
-                        total_length += new->md_iov.iov[i].iov_len;
+                        total_length += lmd->md_iov.iov[i].iov_len;
                 }
 
-                new->length = total_length;
+                lmd->length = total_length;
 
-                if (nal->cb_map != NULL) {
-                        rc = nal->cb_map (nal, niov, new->md_iov.iov, 
-                                          &new->md_addrkey);
+                if ((umd->options & PTL_MD_MAX_SIZE) != 0 && /* max size used */
+                    (umd->max_size < 0 || 
+                     umd->max_size > total_length)) // illegal max_size
+                        return PTL_MD_ILLEGAL;
+
+                if (nal->libnal_map != NULL) {
+                        rc = nal->libnal_map (nal, niov, lmd->md_iov.iov, 
+                                              &lmd->md_addrkey);
                         if (rc != PTL_OK)
                                 return (rc);
                 }
-        } else if ((md->options & PTL_MD_KIOV) != 0) {
+        } else if ((umd->options & PTL_MD_KIOV) != 0) {
 #ifndef __KERNEL__
-                return PTL_MD_INVALID;
-#else
-                int total_length = 0;
-                
+                return PTL_MD_ILLEGAL;
+#else                
                 /* Trap attempt to use paged I/O if unsupported early. */
-                if (nal->cb_send_pages == NULL ||
-                    nal->cb_recv_pages == NULL)
+                if (nal->libnal_send_pages == NULL ||
+                    nal->libnal_recv_pages == NULL)
                         return PTL_MD_INVALID;
 
-                new->md_niov = niov = md->length;
+                lmd->md_niov = niov = umd->length;
+                memcpy(lmd->md_iov.kiov, umd->start,
+                       niov * sizeof (lmd->md_iov.kiov[0]));
 
-                if (nal->cb_read (nal, private, new->md_iov.kiov, md->start,
-                                  niov * sizeof (new->md_iov.kiov[0])))
-                        return PTL_SEGV;
-                
                 for (i = 0; i < niov; i++) {
                         /* We take the page pointer on trust */
-                        if (new->md_iov.kiov[i].kiov_offset + 
-                            new->md_iov.kiov[i].kiov_len > PAGE_SIZE )
+                        if (lmd->md_iov.kiov[i].kiov_offset + 
+                            lmd->md_iov.kiov[i].kiov_len > PAGE_SIZE )
                                 return PTL_VAL_FAILED; /* invalid length */
 
-                        total_length += new->md_iov.kiov[i].kiov_len;
+                        total_length += lmd->md_iov.kiov[i].kiov_len;
                 }
 
-                new->length = total_length;
+                lmd->length = total_length;
+
+                if ((umd->options & PTL_MD_MAX_SIZE) != 0 && /* max size used */
+                    (umd->max_size < 0 || 
+                     umd->max_size > total_length)) // illegal max_size
+                        return PTL_MD_ILLEGAL;
 
-                if (nal->cb_map_pages != NULL) {
-                        rc = nal->cb_map_pages (nal, niov, new->md_iov.kiov, 
-                                                &new->md_addrkey);
+                if (nal->libnal_map_pages != NULL) {
+                        rc = nal->libnal_map_pages (nal, niov, lmd->md_iov.kiov, 
+                                                    &lmd->md_addrkey);
                         if (rc != PTL_OK)
                                 return (rc);
                 }
 #endif
         } else {   /* contiguous */
-                new->length = md->length;
-                new->md_niov = niov = 1;
-                new->md_iov.iov[0].iov_base = md->start;
-                new->md_iov.iov[0].iov_len = md->length;
-
-                if (nal->cb_map != NULL) {
-                        rc = nal->cb_map (nal, niov, new->md_iov.iov, 
-                                          &new->md_addrkey);
+                lmd->length = umd->length;
+                lmd->md_niov = niov = 1;
+                lmd->md_iov.iov[0].iov_base = umd->start;
+                lmd->md_iov.iov[0].iov_len = umd->length;
+
+                if ((umd->options & PTL_MD_MAX_SIZE) != 0 && /* max size used */
+                    (umd->max_size < 0 || 
+                     umd->max_size > umd->length)) // illegal max_size
+                        return PTL_MD_ILLEGAL;
+
+                if (nal->libnal_map != NULL) {
+                        rc = nal->libnal_map (nal, niov, lmd->md_iov.iov, 
+                                              &lmd->md_addrkey);
                         if (rc != PTL_OK)
                                 return (rc);
                 }
@@ -207,140 +210,125 @@ static int lib_md_build(nal_cb_t *nal, lib_md_t *new, void *private,
                 eq->eq_refcount++;
 
         /* It's good; let handle2md succeed and add to active mds */
-        lib_initialise_handle (nal, &new->md_lh, PTL_COOKIE_TYPE_MD);
-        list_add (&new->md_list, &nal->ni.ni_active_mds);
+        lib_initialise_handle (nal, &lmd->md_lh, PTL_COOKIE_TYPE_MD);
+        list_add (&lmd->md_list, &nal->libnal_ni.ni_active_mds);
 
         return PTL_OK;
 }
 
 /* must be called with state lock held */
-void lib_md_deconstruct(nal_cb_t * nal, lib_md_t * md, ptl_md_t * new)
+void
+lib_md_deconstruct(lib_nal_t *nal, lib_md_t *lmd, ptl_md_t *umd)
 {
         /* NB this doesn't copy out all the iov entries so when a
          * discontiguous MD is copied out, the target gets to know the
          * original iov pointer (in start) and the number of entries it had
          * and that's all.
          */
-        new->start = md->start;
-        new->length = ((md->options & (PTL_MD_IOVEC | PTL_MD_KIOV)) == 0) ?
-                      md->length : md->md_niov;
-        new->threshold = md->threshold;
-        new->max_size = md->max_size;
-        new->options = md->options;
-        new->user_ptr = md->user_ptr;
-        ptl_eq2handle(&new->eventq, md->eq);
+        umd->start = lmd->start;
+        umd->length = ((lmd->options & (PTL_MD_IOVEC | PTL_MD_KIOV)) == 0) ?
+                      lmd->length : lmd->md_niov;
+        umd->threshold = lmd->threshold;
+        umd->max_size = lmd->max_size;
+        umd->options = lmd->options;
+        umd->user_ptr = lmd->user_ptr;
+        ptl_eq2handle(&umd->eventq, nal, lmd->eq);
 }
 
-int do_PtlMDAttach(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
+int 
+lib_api_md_attach(nal_t *apinal, ptl_handle_me_t *meh,
+                  ptl_md_t *umd, ptl_unlink_t unlink, 
+                  ptl_handle_md_t *handle)
 {
-        /*
-         * Incoming:
-         *      ptl_handle_me_t current_in
-         *      ptl_md_t md_in
-         *      ptl_unlink_t unlink_in
-         *
-         * Outgoing:
-         *      ptl_handle_md_t         * handle_out
-         */
-
-        PtlMDAttach_in *args = v_args;
-        PtlMDAttach_out *ret = v_ret;
-        lib_me_t *me;
-        lib_md_t *md;
+        lib_nal_t    *nal = apinal->nal_data;
+        lib_me_t     *me;
+        lib_md_t     *md;
         unsigned long flags;
+        int           rc;
 
-        if ((args->md_in.options & (PTL_MD_KIOV | PTL_MD_IOVEC)) != 0 &&
-            args->md_in.length > PTL_MD_MAX_IOV) /* too many fragments */
-                return (ret->rc = PTL_IOV_INVALID);
+        if ((umd->options & (PTL_MD_KIOV | PTL_MD_IOVEC)) != 0 &&
+            umd->length > PTL_MD_MAX_IOV) /* too many fragments */
+                return PTL_IOV_INVALID;
 
-        md = lib_md_alloc(nal, &args->md_in);
+        md = lib_md_alloc(nal, umd);
         if (md == NULL)
-                return (ret->rc = PTL_NO_SPACE);
+                return PTL_NO_SPACE;
 
-        state_lock(nal, &flags);
+        LIB_LOCK(nal, flags);
 
-        me = ptl_handle2me(&args->me_in, nal);
+        me = ptl_handle2me(meh, nal);
         if (me == NULL) {
-                ret->rc = PTL_ME_INVALID;
+                rc = PTL_ME_INVALID;
         } else if (me->md != NULL) {
-                ret->rc = PTL_ME_IN_USE;
+                rc = PTL_ME_IN_USE;
         } else {
-                ret->rc = lib_md_build(nal, md, private, &args->md_in,
-                                       &args->eq_in, args->unlink_in);
-
-                if (ret->rc == PTL_OK) {
+                rc = lib_md_build(nal, md, umd, unlink);
+                if (rc == PTL_OK) {
                         me->md = md;
                         md->me = me;
 
-                        ptl_md2handle(&ret->handle_out, md);
+                        ptl_md2handle(handle, nal, md);
 
-                        state_unlock (nal, &flags);
+                        LIB_UNLOCK(nal, flags);
                         return (PTL_OK);
                 }
         }
 
         lib_md_free (nal, md);
 
-        state_unlock (nal, &flags);
-        return (ret->rc);
+        LIB_UNLOCK(nal, flags);
+        return (rc);
 }
 
-int do_PtlMDBind(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
+int
+lib_api_md_bind(nal_t *apinal, 
+                ptl_md_t *umd, ptl_unlink_t unlink,
+                ptl_handle_md_t *handle)
 {
-        /*
-         * Incoming:
-         *      ptl_handle_ni_t ni_in
-         *      ptl_md_t md_in
-         *
-         * Outgoing:
-         *      ptl_handle_md_t         * handle_out
-         */
-
-        PtlMDBind_in *args = v_args;
-        PtlMDBind_out *ret = v_ret;
-        lib_md_t *md;
+        lib_nal_t    *nal = apinal->nal_data;
+        lib_md_t     *md;
         unsigned long flags;
+        int           rc;
 
-        if ((args->md_in.options & (PTL_MD_KIOV | PTL_MD_IOVEC)) != 0 &&
-            args->md_in.length > PTL_MD_MAX_IOV) /* too many fragments */
-                return (ret->rc = PTL_IOV_INVALID);
+        if ((umd->options & (PTL_MD_KIOV | PTL_MD_IOVEC)) != 0 &&
+            umd->length > PTL_MD_MAX_IOV) /* too many fragments */
+                return PTL_IOV_INVALID;
 
-        md = lib_md_alloc(nal, &args->md_in);
+        md = lib_md_alloc(nal, umd);
         if (md == NULL)
-                return (ret->rc = PTL_NO_SPACE);
+                return PTL_NO_SPACE;
 
-        state_lock(nal, &flags);
+        LIB_LOCK(nal, flags);
 
-        ret->rc = lib_md_build(nal, md, private, &args->md_in, 
-                               &args->eq_in, args->unlink_in);
+        rc = lib_md_build(nal, md, umd, unlink);
 
-        if (ret->rc == PTL_OK) {
-                ptl_md2handle(&ret->handle_out, md);
+        if (rc == PTL_OK) {
+                ptl_md2handle(handle, nal, md);
 
-                state_unlock(nal, &flags);
+                LIB_UNLOCK(nal, flags);
                 return (PTL_OK);
         }
 
         lib_md_free (nal, md);
 
-        state_unlock(nal, &flags);
-        return (ret->rc);
+        LIB_UNLOCK(nal, flags);
+        return (rc);
 }
 
-int do_PtlMDUnlink(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
+int
+lib_api_md_unlink (nal_t *apinal, ptl_handle_md_t *mdh)
 {
-        PtlMDUnlink_in  *args = v_args;
-        PtlMDUnlink_out *ret = v_ret;
+        lib_nal_t       *nal = apinal->nal_data;
         ptl_event_t      ev;
         lib_md_t        *md;
         unsigned long    flags;
 
-        state_lock(nal, &flags);
+        LIB_LOCK(nal, flags);
 
-        md = ptl_handle2md(&args->md_in, nal);
+        md = ptl_handle2md(mdh, nal);
         if (md == NULL) {
-                state_unlock(nal, &flags);
-                return (ret->rc = PTL_MD_INVALID);
+                LIB_UNLOCK(nal, flags);
+                return PTL_MD_INVALID;
         }
 
         /* If the MD is busy, lib_md_unlink just marks it for deletion, and
@@ -356,95 +344,82 @@ int do_PtlMDUnlink(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
                 ev.unlinked = 1;
                 lib_md_deconstruct(nal, md, &ev.mem_desc);
                 
-                lib_enq_event_locked(nal, private, md->eq, &ev);
+                lib_enq_event_locked(nal, NULL, md->eq, &ev);
         }
 
-        lib_md_deconstruct(nal, md, &ret->status_out);
         lib_md_unlink(nal, md);
-        ret->rc = PTL_OK;
 
-        state_unlock(nal, &flags);
-
-        return (PTL_OK);
+        LIB_UNLOCK(nal, flags);
+        return PTL_OK;
 }
 
-int do_PtlMDUpdate_internal(nal_cb_t * nal, void *private, void *v_args,
-                            void *v_ret)
+int
+lib_api_md_update (nal_t *apinal,
+                   ptl_handle_md_t *mdh,
+                   ptl_md_t *oldumd, ptl_md_t *newumd,
+                   ptl_handle_eq_t *testqh)
 {
-        /*
-         * Incoming:
-         *      ptl_handle_md_t md_in
-         *      ptl_md_t                * old_inout
-         *      ptl_md_t                * new_inout
-         *      ptl_handle_eq_t testq_in
-         *      ptl_seq_t               sequence_in
-         *
-         * Outgoing:
-         *      ptl_md_t                * old_inout
-         *      ptl_md_t                * new_inout
-         */
-        PtlMDUpdate_internal_in *args = v_args;
-        PtlMDUpdate_internal_out *ret = v_ret;
-        lib_md_t *md;
-        lib_eq_t *test_eq = NULL;
-        ptl_md_t *new = &args->new_inout;
+        lib_nal_t    *nal = apinal->nal_data;
+        lib_md_t     *md;
+        lib_eq_t     *test_eq = NULL;
         unsigned long flags;
+        int           rc;
 
-        state_lock(nal, &flags);
+        LIB_LOCK(nal, flags);
 
-        md = ptl_handle2md(&args->md_in, nal);
+        md = ptl_handle2md(mdh, nal);
         if (md == NULL) {
-                 ret->rc = PTL_MD_INVALID;
+                 rc = PTL_MD_INVALID;
                  goto out;
         }
 
-        if (args->old_inout_valid)
-                lib_md_deconstruct(nal, md, &ret->old_inout);
+        if (oldumd != NULL)
+                lib_md_deconstruct(nal, md, oldumd);
 
-        if (!args->new_inout_valid) {
-                ret->rc = PTL_OK;
+        if (newumd == NULL) {
+                rc = PTL_OK;
                 goto out;
         }
 
         /* XXX fttb, the new MD must be the same "shape" wrt fragmentation,
          * since we simply overwrite the old lib-md */
-        if ((((new->options ^ md->options) & 
+        if ((((newumd->options ^ md->options) & 
               (PTL_MD_IOVEC | PTL_MD_KIOV)) != 0) ||
-            ((new->options & (PTL_MD_IOVEC | PTL_MD_KIOV)) != 0 && 
-             new->length != md->md_niov)) {
-                ret->rc = PTL_IOV_INVALID;
+            ((newumd->options & (PTL_MD_IOVEC | PTL_MD_KIOV)) != 0 && 
+             newumd->length != md->md_niov)) {
+                rc = PTL_IOV_INVALID;
                 goto out;
         } 
 
-        if (!PtlHandleIsEqual (args->testq_in, PTL_EQ_NONE)) {
-                test_eq = ptl_handle2eq(&args->testq_in, nal);
+        if (!PtlHandleIsEqual (*testqh, PTL_EQ_NONE)) {
+                test_eq = ptl_handle2eq(testqh, nal);
                 if (test_eq == NULL) {
-                        ret->rc = PTL_EQ_INVALID;
+                        rc = PTL_EQ_INVALID;
                         goto out;
                 }
         }
 
         if (md->pending != 0) {
-                        ret->rc = PTL_MD_NO_UPDATE;
-                        goto out;
+                rc = PTL_MD_NO_UPDATE;
+                goto out;
         }
 
         if (test_eq == NULL ||
-            test_eq->sequence == args->sequence_in) {
+            test_eq->eq_deq_seq == test_eq->eq_enq_seq) {
                 lib_me_t *me = md->me;
                 int       unlink = (md->md_flags & PTL_MD_FLAG_AUTO_UNLINK) ?
                                    PTL_UNLINK : PTL_RETAIN;
 
                 // #warning this does not track eq refcounts properly 
-                ret->rc = lib_md_build(nal, md, private,
-                                       new, &new->eventq, unlink);
+                rc = lib_md_build(nal, md, newumd, unlink);
 
                 md->me = me;
         } else {
-                ret->rc = PTL_MD_NO_UPDATE;
+                rc = PTL_MD_NO_UPDATE;
         }
 
  out:
-        state_unlock(nal, &flags);
-        return (ret->rc);
+        LIB_UNLOCK(nal, flags);
+
+        return rc;
 }
index 271fc82..9665b4f 100644 (file)
 #endif
 
 #include <portals/lib-p30.h>
-#include <portals/arg-blocks.h>
 
-static void lib_me_dump(nal_cb_t * nal, lib_me_t * me);
-
-int do_PtlMEAttach(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
+int
+lib_api_me_attach(nal_t *apinal,
+                  ptl_pt_index_t portal,
+                  ptl_process_id_t match_id, 
+                  ptl_match_bits_t match_bits, 
+                  ptl_match_bits_t ignore_bits,
+                  ptl_unlink_t unlink, ptl_ins_pos_t pos,
+                  ptl_handle_me_t *handle)
 {
-        PtlMEAttach_in *args = v_args;
-        PtlMEAttach_out *ret = v_ret;
-        lib_ni_t *ni = &nal->ni;
-        lib_ptl_t *tbl = &ni->tbl;
+        lib_nal_t    *nal = apinal->nal_data;
+        lib_ni_t     *ni = &nal->libnal_ni;
+        lib_ptl_t    *tbl = &ni->ni_portals;
+        lib_me_t     *me;
         unsigned long flags;
-        lib_me_t *me;
 
-        if (args->index_in >= tbl->size)
-                return ret->rc = PTL_PT_INDEX_INVALID;
+        if (portal >= tbl->size)
+                return PTL_PT_INDEX_INVALID;
 
         /* Should check for valid matchid, but not yet */
-        if (0)
-                return ret->rc = PTL_PROCESS_INVALID;
 
         me = lib_me_alloc (nal);
         if (me == NULL)
-                return (ret->rc = PTL_NO_SPACE);
+                return PTL_NO_SPACE;
 
-        state_lock(nal, &flags);
+        LIB_LOCK(nal, flags);
 
-        me->match_id = args->match_id_in;
-        me->match_bits = args->match_bits_in;
-        me->ignore_bits = args->ignore_bits_in;
-        me->unlink = args->unlink_in;
+        me->match_id = match_id;
+        me->match_bits = match_bits;
+        me->ignore_bits = ignore_bits;
+        me->unlink = unlink;
         me->md = NULL;
 
         lib_initialise_handle (nal, &me->me_lh, PTL_COOKIE_TYPE_ME);
 
-        if (args->position_in == PTL_INS_AFTER)
-                list_add_tail(&me->me_list, &(tbl->tbl[args->index_in]));
+        if (pos == PTL_INS_AFTER)
+                list_add_tail(&me->me_list, &(tbl->tbl[portal]));
         else
-                list_add(&me->me_list, &(tbl->tbl[args->index_in]));
+                list_add(&me->me_list, &(tbl->tbl[portal]));
 
-        ptl_me2handle(&ret->handle_out, me);
+        ptl_me2handle(handle, nal, me);
 
-        state_unlock(nal, &flags);
+        LIB_UNLOCK(nal, flags);
 
-        return ret->rc = PTL_OK;
+        return PTL_OK;
 }
 
-int do_PtlMEInsert(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
+int
+lib_api_me_insert(nal_t *apinal,
+                  ptl_handle_me_t *current_meh,
+                  ptl_process_id_t match_id, 
+                  ptl_match_bits_t match_bits, 
+                  ptl_match_bits_t ignore_bits,
+                  ptl_unlink_t unlink, ptl_ins_pos_t pos,
+                  ptl_handle_me_t *handle)
 {
-        PtlMEInsert_in *args = v_args;
-        PtlMEInsert_out *ret = v_ret;
+        lib_nal_t    *nal = apinal->nal_data;
+        lib_me_t     *current_me;
+        lib_me_t     *new_me;
         unsigned long flags;
-        lib_me_t *me;
-        lib_me_t *new;
 
-        new = lib_me_alloc (nal);
-        if (new == NULL)
-                return (ret->rc = PTL_NO_SPACE);
+        new_me = lib_me_alloc (nal);
+        if (new_me == NULL)
+                return PTL_NO_SPACE;
 
         /* Should check for valid matchid, but not yet */
 
-        state_lock(nal, &flags);
+        LIB_LOCK(nal, flags);
 
-        me = ptl_handle2me(&args->current_in, nal);
-        if (me == NULL) {
-                lib_me_free (nal, new);
+        current_me = ptl_handle2me(current_meh, nal);
+        if (current_me == NULL) {
+                lib_me_free (nal, new_me);
 
-                state_unlock (nal, &flags);
-                return (ret->rc = PTL_ME_INVALID);
+                LIB_UNLOCK(nal, flags);
+                return PTL_ME_INVALID;
         }
 
-        new->match_id = args->match_id_in;
-        new->match_bits = args->match_bits_in;
-        new->ignore_bits = args->ignore_bits_in;
-        new->unlink = args->unlink_in;
-        new->md = NULL;
+        new_me->match_id = match_id;
+        new_me->match_bits = match_bits;
+        new_me->ignore_bits = ignore_bits;
+        new_me->unlink = unlink;
+        new_me->md = NULL;
 
-        lib_initialise_handle (nal, &new->me_lh, PTL_COOKIE_TYPE_ME);
+        lib_initialise_handle (nal, &new_me->me_lh, PTL_COOKIE_TYPE_ME);
 
-        if (args->position_in == PTL_INS_AFTER)
-                list_add_tail(&new->me_list, &me->me_list);
+        if (pos == PTL_INS_AFTER)
+                list_add_tail(&new_me->me_list, &current_me->me_list);
         else
-                list_add(&new->me_list, &me->me_list);
+                list_add(&new_me->me_list, &current_me->me_list);
 
-        ptl_me2handle(&ret->handle_out, new);
+        ptl_me2handle(handle, nal, new_me);
 
-        state_unlock(nal, &flags);
+        LIB_UNLOCK(nal, flags);
 
-        return ret->rc = PTL_OK;
+        return PTL_OK;
 }
 
-int do_PtlMEUnlink(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
+int
+lib_api_me_unlink (nal_t *apinal, ptl_handle_me_t *meh)
 {
-        PtlMEUnlink_in *args = v_args;
-        PtlMEUnlink_out *ret = v_ret;
+        lib_nal_t    *nal = apinal->nal_data;
         unsigned long flags;
-        lib_me_t *me;
+        lib_me_t     *me;
+        int           rc;
 
-        state_lock(nal, &flags);
+        LIB_LOCK(nal, flags);
 
-        me = ptl_handle2me(&args->current_in, nal);
+        me = ptl_handle2me(meh, nal);
         if (me == NULL) {
-                ret->rc = PTL_ME_INVALID;
+                rc = PTL_ME_INVALID;
         } else {
                 lib_me_unlink(nal, me);
-                ret->rc = PTL_OK;
+                rc = PTL_OK;
         }
 
-        state_unlock(nal, &flags);
+        LIB_UNLOCK(nal, flags);
 
-        return (ret->rc);
+        return (rc);
 }
 
 /* call with state_lock please */
-void lib_me_unlink(nal_cb_t *nal, lib_me_t *me)
+void 
+lib_me_unlink(lib_nal_t *nal, lib_me_t *me)
 {
         list_del (&me->me_list);
 
@@ -157,64 +166,20 @@ void lib_me_unlink(nal_cb_t *nal, lib_me_t *me)
         lib_me_free(nal, me);
 }
 
-int do_PtlTblDump(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
+#if 0
+static void 
+lib_me_dump(lib_nal_t *nal, lib_me_t * me)
 {
-        PtlTblDump_in *args = v_args;
-        PtlTblDump_out *ret = v_ret;
-        lib_ptl_t *tbl = &nal->ni.tbl;
-        ptl_handle_any_t handle;
-        struct list_head *tmp;
-        unsigned long flags;
+        CWARN("Match Entry %p ("LPX64")\n", me, 
+              me->me_lh.lh_cookie);
 
-        if (args->index_in < 0 || args->index_in >= tbl->size)
-                return ret->rc = PTL_PT_INDEX_INVALID;
-
-        nal->cb_printf(nal, "Portal table index %d\n", args->index_in);
-
-        state_lock(nal, &flags);
-        list_for_each(tmp, &(tbl->tbl[args->index_in])) {
-                lib_me_t *me = list_entry(tmp, lib_me_t, me_list);
-                ptl_me2handle(&handle, me);
-                lib_me_dump(nal, me);
-        }
-        state_unlock(nal, &flags);
+        CWARN("\tMatch/Ignore\t= %016lx / %016lx\n",
+              me->match_bits, me->ignore_bits);
 
-        return ret->rc = PTL_OK;
-}
-
-int do_PtlMEDump(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
-{
-        PtlMEDump_in *args = v_args;
-        PtlMEDump_out *ret = v_ret;
-        lib_me_t *me;
-        unsigned long flags;
-
-        state_lock(nal, &flags);
-
-        me = ptl_handle2me(&args->current_in, nal);
-        if (me == NULL) {
-                ret->rc = PTL_ME_INVALID;
-        } else {
-                lib_me_dump(nal, me);
-                ret->rc = PTL_OK;
-        }
-
-        state_unlock(nal, &flags);
-
-        return ret->rc;
-}
-
-static void lib_me_dump(nal_cb_t * nal, lib_me_t * me)
-{
-        nal->cb_printf(nal, "Match Entry %p ("LPX64")\n", me, 
-                       me->me_lh.lh_cookie);
-
-        nal->cb_printf(nal, "\tMatch/Ignore\t= %016lx / %016lx\n",
-                       me->match_bits, me->ignore_bits);
-
-        nal->cb_printf(nal, "\tMD\t= %p\n", me->md);
-        nal->cb_printf(nal, "\tprev\t= %p\n",
-                       list_entry(me->me_list.prev, lib_me_t, me_list));
-        nal->cb_printf(nal, "\tnext\t= %p\n",
-                       list_entry(me->me_list.next, lib_me_t, me_list));
+        CWARN("\tMD\t= %p\n", me->md);
+        CWARN("\tprev\t= %p\n",
+              list_entry(me->me_list.prev, lib_me_t, me_list));
+        CWARN("\tnext\t= %p\n",
+              list_entry(me->me_list.next, lib_me_t, me_list));
 }
+#endif
index 477ddf8..9dcc06e 100644 (file)
 #endif
 #include <portals/p30.h>
 #include <portals/lib-p30.h>
-#include <portals/arg-blocks.h>
 
 /* forward ref */
-static void lib_commit_md (nal_cb_t *nal, lib_md_t *md, lib_msg_t *msg);
+static void lib_commit_md (lib_nal_t *nal, lib_md_t *md, lib_msg_t *msg);
 
 static lib_md_t *
-lib_match_md(nal_cb_t *nal, int index, int op_mask, 
+lib_match_md(lib_nal_t *nal, int index, int op_mask, 
              ptl_nid_t src_nid, ptl_pid_t src_pid, 
              ptl_size_t rlength, ptl_size_t roffset,
              ptl_match_bits_t match_bits, lib_msg_t *msg,
              ptl_size_t *mlength_out, ptl_size_t *offset_out)
 {
-        lib_ni_t         *ni = &nal->ni;
-        struct list_head *match_list = &ni->tbl.tbl[index];
+        lib_ni_t         *ni = &nal->libnal_ni;
+        struct list_head *match_list = &ni->ni_portals.tbl[index];
         struct list_head *tmp;
         lib_me_t         *me;
         lib_md_t         *md;
@@ -55,9 +54,9 @@ lib_match_md(nal_cb_t *nal, int index, int op_mask,
         CDEBUG (D_NET, "Request from "LPU64".%d of length %d into portal %d "
                 "MB="LPX64"\n", src_nid, src_pid, rlength, index, match_bits);
 
-        if (index < 0 || index >= ni->tbl.size) {
+        if (index < 0 || index >= ni->ni_portals.size) {
                 CERROR("Invalid portal %d not in [0-%d]\n",
-                       index, ni->tbl.size);
+                       index, ni->ni_portals.size);
                 goto failed;
         }
 
@@ -153,66 +152,65 @@ lib_match_md(nal_cb_t *nal, int index, int op_mask,
  failed:
         CERROR (LPU64": Dropping %s from "LPU64".%d portal %d match "LPX64
                 " offset %d length %d: no match\n",
-                ni->nid, (op_mask == PTL_MD_OP_GET) ? "GET" : "PUT",
+                ni->ni_pid.nid, (op_mask == PTL_MD_OP_GET) ? "GET" : "PUT",
                 src_nid, src_pid, index, match_bits, roffset, rlength);
         RETURN(NULL);
 }
 
-int do_PtlFailNid (nal_cb_t *nal, void *private, void *v_args, void *v_ret)
+int lib_api_fail_nid (nal_t *apinal, ptl_nid_t nid, unsigned int threshold)
 {
-        PtlFailNid_in     *args = v_args;
-        PtlFailNid_out    *ret  = v_ret;
+        lib_nal_t         *nal = apinal->nal_data;
         lib_test_peer_t   *tp;
         unsigned long      flags;
         struct list_head  *el;
         struct list_head  *next;
         struct list_head   cull;
         
-        if (args->threshold != 0) {
+        if (threshold != 0) {
                 /* Adding a new entry */
-                tp = (lib_test_peer_t *)nal->cb_malloc (nal, sizeof (*tp));
+                PORTAL_ALLOC(tp, sizeof(*tp));
                 if (tp == NULL)
-                        return (ret->rc = PTL_FAIL);
+                        return PTL_NO_SPACE;
                 
-                tp->tp_nid = args->nid;
-                tp->tp_threshold = args->threshold;
+                tp->tp_nid = nid;
+                tp->tp_threshold = threshold;
                 
-                state_lock (nal, &flags);
-                list_add (&tp->tp_list, &nal->ni.ni_test_peers);
-                state_unlock (nal, &flags);
-                return (ret->rc = PTL_OK);
+                LIB_LOCK(nal, flags);
+                list_add_tail (&tp->tp_list, &nal->libnal_ni.ni_test_peers);
+                LIB_UNLOCK(nal, flags);
+                return PTL_OK;
         }
         
         /* removing entries */
         INIT_LIST_HEAD (&cull);
         
-        state_lock (nal, &flags);
+        LIB_LOCK(nal, flags);
 
-        list_for_each_safe (el, next, &nal->ni.ni_test_peers) {
+        list_for_each_safe (el, next, &nal->libnal_ni.ni_test_peers) {
                 tp = list_entry (el, lib_test_peer_t, tp_list);
                 
                 if (tp->tp_threshold == 0 ||    /* needs culling anyway */
-                    args->nid == PTL_NID_ANY || /* removing all entries */
-                    tp->tp_nid == args->nid)    /* matched this one */
+                    nid == PTL_NID_ANY ||       /* removing all entries */
+                    tp->tp_nid == nid)          /* matched this one */
                 {
                         list_del (&tp->tp_list);
                         list_add (&tp->tp_list, &cull);
                 }
         }
         
-        state_unlock (nal, &flags);
+        LIB_UNLOCK(nal, flags);
                 
         while (!list_empty (&cull)) {
                 tp = list_entry (cull.next, lib_test_peer_t, tp_list);
 
                 list_del (&tp->tp_list);
-                nal->cb_free (nal, tp, sizeof (*tp));
+                PORTAL_FREE(tp, sizeof (*tp));
         }
-        return (ret->rc = PTL_OK);
+        return PTL_OK;
 }
 
 static int
-fail_peer (nal_cb_t *nal, ptl_nid_t nid, int outgoing) 
+fail_peer (lib_nal_t *nal, ptl_nid_t nid, int outgoing) 
 {
         lib_test_peer_t  *tp;
         struct list_head *el;
@@ -223,9 +221,9 @@ fail_peer (nal_cb_t *nal, ptl_nid_t nid, int outgoing)
 
         INIT_LIST_HEAD (&cull);
         
-        state_lock (nal, &flags);
+        LIB_LOCK (nal, flags);
 
-        list_for_each_safe (el, next, &nal->ni.ni_test_peers) {
+        list_for_each_safe (el, next, &nal->libnal_ni.ni_test_peers) {
                 tp = list_entry (el, lib_test_peer_t, tp_list);
 
                 if (tp->tp_threshold == 0) {
@@ -257,13 +255,13 @@ fail_peer (nal_cb_t *nal, ptl_nid_t nid, int outgoing)
                 }
         }
         
-        state_unlock (nal, &flags);
+        LIB_UNLOCK (nal, flags);
 
         while (!list_empty (&cull)) {
                 tp = list_entry (cull.next, lib_test_peer_t, tp_list);
                 list_del (&tp->tp_list);
                 
-                nal->cb_free (nal, tp, sizeof (*tp));
+                PORTAL_FREE(tp, sizeof (*tp));
         }
 
         return (fail);
@@ -554,52 +552,52 @@ lib_extract_kiov (int dst_niov, ptl_kiov_t *dst,
 #endif
 
 ptl_err_t
-lib_recv (nal_cb_t *nal, void *private, lib_msg_t *msg, lib_md_t *md,
+lib_recv (lib_nal_t *nal, void *private, lib_msg_t *msg, lib_md_t *md,
           ptl_size_t offset, ptl_size_t mlen, ptl_size_t rlen)
 {
         if (mlen == 0)
-                return (nal->cb_recv(nal, private, msg,
-                                     0, NULL,
-                                     offset, mlen, rlen));
+                return (nal->libnal_recv(nal, private, msg,
+                                         0, NULL,
+                                         offset, mlen, rlen));
 
         if ((md->options & PTL_MD_KIOV) == 0)
-                return (nal->cb_recv(nal, private, msg,
-                                     md->md_niov, md->md_iov.iov, 
-                                     offset, mlen, rlen));
+                return (nal->libnal_recv(nal, private, msg,
+                                         md->md_niov, md->md_iov.iov, 
+                                         offset, mlen, rlen));
 
-        return (nal->cb_recv_pages(nal, private, msg, 
-                                   md->md_niov, md->md_iov.kiov,
-                                   offset, mlen, rlen));
+        return (nal->libnal_recv_pages(nal, private, msg, 
+                                       md->md_niov, md->md_iov.kiov,
+                                       offset, mlen, rlen));
 }
 
 ptl_err_t
-lib_send (nal_cb_t *nal, void *private, lib_msg_t *msg,
+lib_send (lib_nal_t *nal, void *private, lib_msg_t *msg,
           ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
           lib_md_t *md, ptl_size_t offset, ptl_size_t len) 
 {
         if (len == 0)
-                return (nal->cb_send(nal, private, msg,
-                                     hdr, type, nid, pid,
-                                     0, NULL,
-                                     offset, len));
+                return (nal->libnal_send(nal, private, msg,
+                                         hdr, type, nid, pid,
+                                         0, NULL,
+                                         offset, len));
         
         if ((md->options & PTL_MD_KIOV) == 0)
-                return (nal->cb_send(nal, private, msg, 
-                                     hdr, type, nid, pid,
-                                     md->md_niov, md->md_iov.iov,
-                                     offset, len));
-
-        return (nal->cb_send_pages(nal, private, msg, 
-                                   hdr, type, nid, pid,
-                                   md->md_niov, md->md_iov.kiov,
-                                   offset, len));
+                return (nal->libnal_send(nal, private, msg, 
+                                         hdr, type, nid, pid,
+                                         md->md_niov, md->md_iov.iov,
+                                         offset, len));
+
+        return (nal->libnal_send_pages(nal, private, msg, 
+                                       hdr, type, nid, pid,
+                                       md->md_niov, md->md_iov.kiov,
+                                       offset, len));
 }
 
 static void
-lib_commit_md (nal_cb_t *nal, lib_md_t *md, lib_msg_t *msg)
+lib_commit_md (lib_nal_t *nal, lib_md_t *md, lib_msg_t *msg)
 {
-        /* ALWAYS called holding the state_lock */
-        lib_counters_t *counters = &nal->ni.counters;
+        /* ALWAYS called holding the LIB_LOCK */
+        lib_counters_t *counters = &nal->libnal_ni.ni_counters;
 
         /* Here, we commit the MD to a network OP by marking it busy and
          * decrementing its threshold.  Come what may, the network "owns"
@@ -616,11 +614,11 @@ lib_commit_md (nal_cb_t *nal, lib_md_t *md, lib_msg_t *msg)
         if (counters->msgs_alloc > counters->msgs_max)
                 counters->msgs_max = counters->msgs_alloc;
 
-        list_add (&msg->msg_list, &nal->ni.ni_active_msgs);
+        list_add (&msg->msg_list, &nal->libnal_ni.ni_active_msgs);
 }
 
 static void
-lib_drop_message (nal_cb_t *nal, void *private, ptl_hdr_t *hdr)
+lib_drop_message (lib_nal_t *nal, void *private, ptl_hdr_t *hdr)
 {
         unsigned long flags;
 
@@ -628,10 +626,10 @@ lib_drop_message (nal_cb_t *nal, void *private, ptl_hdr_t *hdr)
          * to receive (init_msg() not called) and therefore can't cause an
          * event. */
         
-        state_lock(nal, &flags);
-        nal->ni.counters.drop_count++;
-        nal->ni.counters.drop_length += hdr->payload_length;
-        state_unlock(nal, &flags);
+        LIB_LOCK(nal, flags);
+        nal->libnal_ni.ni_counters.drop_count++;
+        nal->libnal_ni.ni_counters.drop_length += hdr->payload_length;
+        LIB_UNLOCK(nal, flags);
 
         /* NULL msg => if NAL calls lib_finalize it will be a noop */
         (void) lib_recv(nal, private, NULL, NULL, 0, 0, hdr->payload_length);
@@ -645,9 +643,9 @@ lib_drop_message (nal_cb_t *nal, void *private, ptl_hdr_t *hdr)
  *
  */
 static ptl_err_t
-parse_put(nal_cb_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
+parse_put(lib_nal_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
 {
-        lib_ni_t        *ni = &nal->ni;
+        lib_ni_t        *ni = &nal->libnal_ni;
         ptl_size_t       mlength = 0;
         ptl_size_t       offset = 0;
         ptl_err_t        rc;
@@ -659,7 +657,7 @@ parse_put(nal_cb_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
         hdr->msg.put.ptl_index = NTOH__u32 (hdr->msg.put.ptl_index);
         hdr->msg.put.offset = NTOH__u32 (hdr->msg.put.offset);
 
-        state_lock(nal, &flags);
+        LIB_LOCK(nal, flags);
 
         md = lib_match_md(nal, hdr->msg.put.ptl_index, PTL_MD_OP_PUT,
                           hdr->src_nid, hdr->src_pid,
@@ -667,7 +665,7 @@ parse_put(nal_cb_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
                           hdr->msg.put.match_bits, msg,
                           &mlength, &offset);
         if (md == NULL) {
-                state_unlock(nal, &flags);
+                LIB_UNLOCK(nal, flags);
                 return (PTL_FAIL);
         }
 
@@ -679,24 +677,24 @@ parse_put(nal_cb_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
                 msg->ack_wmd = hdr->msg.put.ack_wmd;
         }
 
-        ni->counters.recv_count++;
-        ni->counters.recv_length += mlength;
+        ni->ni_counters.recv_count++;
+        ni->ni_counters.recv_length += mlength;
 
-        state_unlock(nal, &flags);
+        LIB_UNLOCK(nal, flags);
 
         rc = lib_recv(nal, private, msg, md, offset, mlength,
                       hdr->payload_length);
         if (rc != PTL_OK)
                 CERROR(LPU64": error on receiving PUT from "LPU64": %d\n",
-                       ni->nid, hdr->src_nid, rc);
+                       ni->ni_pid.nid, hdr->src_nid, rc);
 
         return (rc);
 }
 
 static ptl_err_t
-parse_get(nal_cb_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
+parse_get(lib_nal_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
 {
-        lib_ni_t        *ni = &nal->ni;
+        lib_ni_t        *ni = &nal->libnal_ni;
         ptl_size_t       mlength = 0;
         ptl_size_t       offset = 0;
         lib_md_t        *md;
@@ -710,7 +708,7 @@ parse_get(nal_cb_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
         hdr->msg.get.sink_length = NTOH__u32 (hdr->msg.get.sink_length);
         hdr->msg.get.src_offset = NTOH__u32 (hdr->msg.get.src_offset);
 
-        state_lock(nal, &flags);
+        LIB_LOCK(nal, flags);
 
         md = lib_match_md(nal, hdr->msg.get.ptl_index, PTL_MD_OP_GET,
                           hdr->src_nid, hdr->src_pid,
@@ -718,24 +716,24 @@ parse_get(nal_cb_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
                           hdr->msg.get.match_bits, msg,
                           &mlength, &offset);
         if (md == NULL) {
-                state_unlock(nal, &flags);
+                LIB_UNLOCK(nal, flags);
                 return (PTL_FAIL);
         }
 
         msg->ev.type = PTL_EVENT_GET_END;
         msg->ev.hdr_data = 0;
 
-        ni->counters.send_count++;
-        ni->counters.send_length += mlength;
+        ni->ni_counters.send_count++;
+        ni->ni_counters.send_length += mlength;
 
-        state_unlock(nal, &flags);
+        LIB_UNLOCK(nal, flags);
 
         memset (&reply, 0, sizeof (reply));
         reply.type     = HTON__u32 (PTL_MSG_REPLY);
         reply.dest_nid = HTON__u64 (hdr->src_nid);
-        reply.src_nid  = HTON__u64 (ni->nid);
         reply.dest_pid = HTON__u32 (hdr->src_pid);
-        reply.src_pid  = HTON__u32 (ni->pid);
+        reply.src_nid  = HTON__u64 (ni->ni_pid.nid);
+        reply.src_pid  = HTON__u32 (ni->ni_pid.pid);
         reply.payload_length = HTON__u32 (mlength);
 
         reply.msg.reply.dst_wmd = hdr->msg.get.return_wmd;
@@ -747,7 +745,7 @@ parse_get(nal_cb_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
                        hdr->src_nid, hdr->src_pid, md, offset, mlength);
         if (rc != PTL_OK)
                 CERROR(LPU64": Unable to send REPLY for GET from "LPU64": %d\n",
-                       ni->nid, hdr->src_nid, rc);
+                       ni->ni_pid.nid, hdr->src_nid, rc);
 
         /* Discard any junk after the hdr */
         (void) lib_recv(nal, private, NULL, NULL, 0, 0, hdr->payload_length);
@@ -756,27 +754,27 @@ parse_get(nal_cb_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
 }
 
 static ptl_err_t
-parse_reply(nal_cb_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
+parse_reply(lib_nal_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
 {
-        lib_ni_t        *ni = &nal->ni;
+        lib_ni_t        *ni = &nal->libnal_ni;
         lib_md_t        *md;
         int              rlength;
         int              length;
         unsigned long    flags;
         ptl_err_t        rc;
 
-        state_lock(nal, &flags);
+        LIB_LOCK(nal, flags);
 
         /* NB handles only looked up by creator (no flips) */
         md = ptl_wire_handle2md(&hdr->msg.reply.dst_wmd, nal);
         if (md == NULL || md->threshold == 0) {
                 CERROR (LPU64": Dropping REPLY from "LPU64" for %s MD "LPX64"."LPX64"\n",
-                        ni->nid, hdr->src_nid,
+                        ni->ni_pid.nid, hdr->src_nid,
                         md == NULL ? "invalid" : "inactive",
                         hdr->msg.reply.dst_wmd.wh_interface_cookie,
                         hdr->msg.reply.dst_wmd.wh_object_cookie);
 
-                state_unlock(nal, &flags);
+                LIB_UNLOCK(nal, flags);
                 return (PTL_FAIL);
         }
 
@@ -788,10 +786,10 @@ parse_reply(nal_cb_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
                 if ((md->options & PTL_MD_TRUNCATE) == 0) {
                         CERROR (LPU64": Dropping REPLY from "LPU64
                                 " length %d for MD "LPX64" would overflow (%d)\n",
-                                ni->nid, hdr->src_nid, length,
+                                ni->ni_pid.nid, hdr->src_nid, length,
                                 hdr->msg.reply.dst_wmd.wh_object_cookie,
                                 md->length);
-                        state_unlock(nal, &flags);
+                        LIB_UNLOCK(nal, flags);
                         return (PTL_FAIL);
                 }
                 length = md->length;
@@ -812,23 +810,23 @@ parse_reply(nal_cb_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
 
         lib_md_deconstruct(nal, md, &msg->ev.mem_desc);
 
-        ni->counters.recv_count++;
-        ni->counters.recv_length += length;
+        ni->ni_counters.recv_count++;
+        ni->ni_counters.recv_length += length;
 
-        state_unlock(nal, &flags);
+        LIB_UNLOCK(nal, flags);
 
         rc = lib_recv(nal, private, msg, md, 0, length, rlength);
         if (rc != PTL_OK)
                 CERROR(LPU64": error on receiving REPLY from "LPU64": %d\n",
-                       ni->nid, hdr->src_nid, rc);
+                       ni->ni_pid.nid, hdr->src_nid, rc);
 
         return (rc);
 }
 
 static ptl_err_t
-parse_ack(nal_cb_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
+parse_ack(lib_nal_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
 {
-        lib_ni_t      *ni = &nal->ni;
+        lib_ni_t      *ni = &nal->libnal_ni;
         lib_md_t      *md;
         unsigned long  flags;
 
@@ -836,23 +834,23 @@ parse_ack(nal_cb_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
         hdr->msg.ack.match_bits = NTOH__u64 (hdr->msg.ack.match_bits);
         hdr->msg.ack.mlength = NTOH__u32 (hdr->msg.ack.mlength);
 
-        state_lock(nal, &flags);
+        LIB_LOCK(nal, flags);
 
         /* NB handles only looked up by creator (no flips) */
         md = ptl_wire_handle2md(&hdr->msg.ack.dst_wmd, nal);
         if (md == NULL || md->threshold == 0) {
                 CDEBUG(D_INFO, LPU64": Dropping ACK from "LPU64" to %s MD "
-                       LPX64"."LPX64"\n", ni->nid, hdr->src_nid, 
+                       LPX64"."LPX64"\n", ni->ni_pid.nid, hdr->src_nid, 
                        (md == NULL) ? "invalid" : "inactive",
                        hdr->msg.ack.dst_wmd.wh_interface_cookie,
                        hdr->msg.ack.dst_wmd.wh_object_cookie);
 
-                state_unlock(nal, &flags);
+                LIB_UNLOCK(nal, flags);
                 return (PTL_FAIL);
         }
 
         CDEBUG(D_NET, LPU64": ACK from "LPU64" into md "LPX64"\n",
-               ni->nid, hdr->src_nid, 
+               ni->ni_pid.nid, hdr->src_nid, 
                hdr->msg.ack.dst_wmd.wh_object_cookie);
 
         lib_commit_md(nal, md, msg);
@@ -865,9 +863,9 @@ parse_ack(nal_cb_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
 
         lib_md_deconstruct(nal, md, &msg->ev.mem_desc);
 
-        ni->counters.recv_count++;
+        ni->ni_counters.recv_count++;
 
-        state_unlock(nal, &flags);
+        LIB_UNLOCK(nal, flags);
         
         /* We have received and matched up the ack OK, create the
          * completion event now... */
@@ -898,125 +896,152 @@ hdr_type_string (ptl_hdr_t *hdr)
         }
 }
 
-void print_hdr(nal_cb_t * nal, ptl_hdr_t * hdr)
+void print_hdr(lib_nal_t *nal, ptl_hdr_t * hdr)
 {
         char *type_str = hdr_type_string (hdr);
 
-        nal->cb_printf(nal, "P3 Header at %p of type %s\n", hdr, type_str);
-        nal->cb_printf(nal, "    From nid/pid %Lu/%Lu", hdr->src_nid,
-                       hdr->src_pid);
-        nal->cb_printf(nal, "    To nid/pid %Lu/%Lu\n", hdr->dest_nid,
-                       hdr->dest_pid);
+        CWARN("P3 Header at %p of type %s\n", hdr, type_str);
+        CWARN("    From nid/pid "LPX64"/%u", hdr->src_nid, hdr->src_pid);
+        CWARN("    To nid/pid "LPX64"/%u\n", hdr->dest_nid, hdr->dest_pid);
 
         switch (hdr->type) {
         default:
                 break;
 
         case PTL_MSG_PUT:
-                nal->cb_printf(nal,
-                               "    Ptl index %d, ack md "LPX64"."LPX64", "
-                               "match bits "LPX64"\n",
-                               hdr->msg.put.ptl_index,
-                               hdr->msg.put.ack_wmd.wh_interface_cookie,
-                               hdr->msg.put.ack_wmd.wh_object_cookie,
-                               hdr->msg.put.match_bits);
-                nal->cb_printf(nal,
-                               "    Length %d, offset %d, hdr data "LPX64"\n",
-                               hdr->payload_length, hdr->msg.put.offset,
-                               hdr->msg.put.hdr_data);
+                CWARN("    Ptl index %d, ack md "LPX64"."LPX64", "
+                      "match bits "LPX64"\n",
+                      hdr->msg.put.ptl_index,
+                      hdr->msg.put.ack_wmd.wh_interface_cookie,
+                      hdr->msg.put.ack_wmd.wh_object_cookie,
+                      hdr->msg.put.match_bits);
+                CWARN("    Length %d, offset %d, hdr data "LPX64"\n",
+                      hdr->payload_length, hdr->msg.put.offset,
+                      hdr->msg.put.hdr_data);
                 break;
 
         case PTL_MSG_GET:
-                nal->cb_printf(nal,
-                               "    Ptl index %d, return md "LPX64"."LPX64", "
-                               "match bits "LPX64"\n", hdr->msg.get.ptl_index,
-                               hdr->msg.get.return_wmd.wh_interface_cookie,
-                               hdr->msg.get.return_wmd.wh_object_cookie,
-                               hdr->msg.get.match_bits);
-                nal->cb_printf(nal,
-                               "    Length %d, src offset %d\n",
-                               hdr->msg.get.sink_length,
-                               hdr->msg.get.src_offset);
+                CWARN("    Ptl index %d, return md "LPX64"."LPX64", "
+                      "match bits "LPX64"\n", hdr->msg.get.ptl_index,
+                      hdr->msg.get.return_wmd.wh_interface_cookie,
+                      hdr->msg.get.return_wmd.wh_object_cookie,
+                      hdr->msg.get.match_bits);
+                CWARN("    Length %d, src offset %d\n",
+                      hdr->msg.get.sink_length,
+                      hdr->msg.get.src_offset);
                 break;
 
         case PTL_MSG_ACK:
-                nal->cb_printf(nal, "    dst md "LPX64"."LPX64", "
-                               "manipulated length %d\n",
-                               hdr->msg.ack.dst_wmd.wh_interface_cookie,
-                               hdr->msg.ack.dst_wmd.wh_object_cookie,
-                               hdr->msg.ack.mlength);
+                CWARN("    dst md "LPX64"."LPX64", "
+                      "manipulated length %d\n",
+                      hdr->msg.ack.dst_wmd.wh_interface_cookie,
+                      hdr->msg.ack.dst_wmd.wh_object_cookie,
+                      hdr->msg.ack.mlength);
                 break;
 
         case PTL_MSG_REPLY:
-                nal->cb_printf(nal, "    dst md "LPX64"."LPX64", "
-                               "length %d\n",
-                               hdr->msg.reply.dst_wmd.wh_interface_cookie,
-                               hdr->msg.reply.dst_wmd.wh_object_cookie,
-                               hdr->payload_length);
+                CWARN("    dst md "LPX64"."LPX64", "
+                      "length %d\n",
+                      hdr->msg.reply.dst_wmd.wh_interface_cookie,
+                      hdr->msg.reply.dst_wmd.wh_object_cookie,
+                      hdr->payload_length);
         }
 
 }                               /* end of print_hdr() */
 
 
-void 
-lib_parse(nal_cb_t *nal, ptl_hdr_t *hdr, void *private)
+ptl_err_t
+lib_parse(lib_nal_t *nal, ptl_hdr_t *hdr, void *private)
 {
         unsigned long  flags;
         ptl_err_t      rc;
         lib_msg_t     *msg;
+
+        /* NB we return PTL_OK if we manage to parse the header and believe
+         * it looks OK.  Anything that goes wrong with receiving the
+         * message after that point is the responsibility of the NAL */
         
         /* convert common fields to host byte order */
-        hdr->dest_nid = NTOH__u64 (hdr->dest_nid);
+        hdr->type = NTOH__u32 (hdr->type);
         hdr->src_nid = NTOH__u64 (hdr->src_nid);
-        hdr->dest_pid = NTOH__u32 (hdr->dest_pid);
         hdr->src_pid = NTOH__u32 (hdr->src_pid);
-        hdr->type = NTOH__u32 (hdr->type);
+        hdr->dest_pid = NTOH__u32 (hdr->dest_pid);
         hdr->payload_length = NTOH__u32(hdr->payload_length);
-#if 0
-        nal->cb_printf(nal, "%d: lib_parse: nal=%p hdr=%p type=%d\n",
-                       nal->ni.nid, nal, hdr, hdr->type);
-        print_hdr(nal, hdr);
-#endif
-        if (hdr->type == PTL_MSG_HELLO) {
+
+        switch (hdr->type) {
+        case PTL_MSG_HELLO: {
                 /* dest_nid is really ptl_magicversion_t */
                 ptl_magicversion_t *mv = (ptl_magicversion_t *)&hdr->dest_nid;
 
-                CERROR (LPU64": Dropping unexpected HELLO message: "
+                mv->magic = NTOH__u32(mv->magic);
+                mv->version_major = NTOH__u16(mv->version_major);
+                mv->version_minor = NTOH__u16(mv->version_minor);
+
+                if (mv->magic == PORTALS_PROTO_MAGIC &&
+                    mv->version_major == PORTALS_PROTO_VERSION_MAJOR &&
+                    mv->version_minor == PORTALS_PROTO_VERSION_MINOR) {
+                        CWARN (LPU64": Dropping unexpected HELLO message: "
+                               "magic %d, version %d.%d from "LPD64"\n",
+                               nal->libnal_ni.ni_pid.nid, mv->magic, 
+                               mv->version_major, mv->version_minor,
+                               hdr->src_nid);
+
+                        /* it's good but we don't want it */
+                        lib_drop_message(nal, private, hdr);
+                        return PTL_OK;
+                }
+
+                /* we got garbage */
+                CERROR (LPU64": Bad HELLO message: "
                         "magic %d, version %d.%d from "LPD64"\n",
-                        nal->ni.nid, mv->magic, 
+                        nal->libnal_ni.ni_pid.nid, mv->magic, 
                         mv->version_major, mv->version_minor,
                         hdr->src_nid);
-                lib_drop_message(nal, private, hdr);
-                return;
+                return PTL_FAIL;
         }
-        
-        if (hdr->dest_nid != nal->ni.nid) {
-                CERROR(LPU64": Dropping %s message from "LPU64" to "LPU64
-                       " (not me)\n", nal->ni.nid, hdr_type_string (hdr),
-                       hdr->src_nid, hdr->dest_nid);
-                lib_drop_message(nal, private, hdr);
-                return;
+
+        case PTL_MSG_ACK:
+        case PTL_MSG_PUT:
+        case PTL_MSG_GET:
+        case PTL_MSG_REPLY:
+                hdr->dest_nid = NTOH__u64 (hdr->dest_nid);
+                if (hdr->dest_nid != nal->libnal_ni.ni_pid.nid) {
+                        CERROR(LPU64": BAD dest NID in %s message from"
+                               LPU64" to "LPU64" (not me)\n", 
+                               nal->libnal_ni.ni_pid.nid, hdr_type_string (hdr),
+                               hdr->src_nid, hdr->dest_nid);
+                        return PTL_FAIL;
+                }
+                break;
+
+        default:
+                CERROR(LPU64": Bad message type 0x%x from "LPU64"\n",
+                       nal->libnal_ni.ni_pid.nid, hdr->type, hdr->src_nid);
+                return PTL_FAIL;
         }
 
-        if (!list_empty (&nal->ni.ni_test_peers) && /* normally we don't */
+        /* We've decided we're not receiving garbage since we can parse the
+         * header.  We will return PTL_OK come what may... */
+
+        if (!list_empty (&nal->libnal_ni.ni_test_peers) && /* normally we don't */
             fail_peer (nal, hdr->src_nid, 0))      /* shall we now? */
         {
                 CERROR(LPU64": Dropping incoming %s from "LPU64
                        ": simulated failure\n",
-                       nal->ni.nid, hdr_type_string (hdr), 
+                       nal->libnal_ni.ni_pid.nid, hdr_type_string (hdr), 
                        hdr->src_nid);
                 lib_drop_message(nal, private, hdr);
-                return;
+                return PTL_OK;
         }
 
         msg = lib_msg_alloc(nal);
         if (msg == NULL) {
                 CERROR(LPU64": Dropping incoming %s from "LPU64
                        ": can't allocate a lib_msg_t\n",
-                       nal->ni.nid, hdr_type_string (hdr), 
+                       nal->libnal_ni.ni_pid.nid, hdr_type_string (hdr), 
                        hdr->src_nid);
                 lib_drop_message(nal, private, hdr);
-                return;
+                return PTL_OK;
         }
 
         switch (hdr->type) {
@@ -1033,10 +1058,8 @@ lib_parse(nal_cb_t *nal, ptl_hdr_t *hdr, void *private)
                 rc = parse_reply(nal, hdr, private, msg);
                 break;
         default:
-                CERROR(LPU64": Dropping <unknown> message from "LPU64
-                       ": Bad type=0x%x\n",  nal->ni.nid, hdr->src_nid,
-                       hdr->type);
-                rc = PTL_FAIL;
+                LASSERT(0);
+                rc = PTL_FAIL;                  /* no compiler warning please */
                 break;
         }
                 
@@ -1045,123 +1068,114 @@ lib_parse(nal_cb_t *nal, ptl_hdr_t *hdr, void *private)
                         /* committed... */
                         lib_finalize(nal, private, msg, rc);
                 } else {
-                        state_lock(nal, &flags);
-                        lib_msg_free(nal, msg); /* expects state_lock held */
-                        state_unlock(nal, &flags);
+                        LIB_LOCK(nal, flags);
+                        lib_msg_free(nal, msg); /* expects LIB_LOCK held */
+                        LIB_UNLOCK(nal, flags);
 
                         lib_drop_message(nal, private, hdr);
                 }
         }
+
+        return PTL_OK;
+        /* That's "OK I can parse it", not "OK I like it" :) */
 }
 
 int 
-do_PtlPut(nal_cb_t *nal, void *private, void *v_args, void *v_ret)
+lib_api_put(nal_t *apinal, ptl_handle_md_t *mdh, 
+            ptl_ack_req_t ack, ptl_process_id_t *id,
+            ptl_pt_index_t portal, ptl_ac_index_t ac,
+            ptl_match_bits_t match_bits, 
+            ptl_size_t offset, ptl_hdr_data_t hdr_data)
 {
-        /*
-         * Incoming:
-         *      ptl_handle_md_t md_in
-         *      ptl_ack_req_t ack_req_in
-         *      ptl_process_id_t target_in
-         *      ptl_pt_index_t portal_in
-         *      ptl_ac_index_t cookie_in
-         *      ptl_match_bits_t match_bits_in
-         *      ptl_size_t offset_in
-         *
-         * Outgoing:
-         */
-
-        PtlPut_in        *args = v_args;
-        ptl_process_id_t *id = &args->target_in;
-        PtlPut_out       *ret = v_ret;
-        lib_ni_t         *ni = &nal->ni;
+        lib_nal_t        *nal = apinal->nal_data;
+        lib_ni_t         *ni = &nal->libnal_ni;
         lib_msg_t        *msg;
         ptl_hdr_t         hdr;
         lib_md_t         *md;
         unsigned long     flags;
         int               rc;
         
-        if (!list_empty (&nal->ni.ni_test_peers) && /* normally we don't */
+        if (!list_empty (&ni->ni_test_peers) && /* normally we don't */
             fail_peer (nal, id->nid, 1))           /* shall we now? */
         {
-                CERROR(LPU64": Dropping PUT to "LPU64": simulated failure\n",
-                       nal->ni.nid, id->nid);
-                return (ret->rc = PTL_PROCESS_INVALID);
+                CERROR("Dropping PUT to "LPU64": simulated failure\n",
+                       id->nid);
+                return PTL_PROCESS_INVALID;
         }
 
         msg = lib_msg_alloc(nal);
         if (msg == NULL) {
                 CERROR(LPU64": Dropping PUT to "LPU64": ENOMEM on lib_msg_t\n",
-                       ni->nid, id->nid);
-                return (ret->rc = PTL_NO_SPACE);
+                       ni->ni_pid.nid, id->nid);
+                return PTL_NO_SPACE;
         }
 
-        state_lock(nal, &flags);
+        LIB_LOCK(nal, flags);
 
-        md = ptl_handle2md(&args->md_in, nal);
+        md = ptl_handle2md(mdh, nal);
         if (md == NULL || md->threshold == 0) {
                 lib_msg_free(nal, msg);
-                state_unlock(nal, &flags);
+                LIB_UNLOCK(nal, flags);
         
-                return (ret->rc = PTL_MD_INVALID);
+                return PTL_MD_INVALID;
         }
 
-        CDEBUG(D_NET, "PtlPut -> %Lu: %lu\n", (unsigned long long)id->nid,
-               (unsigned long)id->pid);
+        CDEBUG(D_NET, "PtlPut -> "LPX64"\n", id->nid);
 
         memset (&hdr, 0, sizeof (hdr));
         hdr.type     = HTON__u32 (PTL_MSG_PUT);
         hdr.dest_nid = HTON__u64 (id->nid);
-        hdr.src_nid  = HTON__u64 (ni->nid);
         hdr.dest_pid = HTON__u32 (id->pid);
-        hdr.src_pid  = HTON__u32 (ni->pid);
+        hdr.src_nid  = HTON__u64 (ni->ni_pid.nid);
+        hdr.src_pid  = HTON__u32 (ni->ni_pid.pid);
         hdr.payload_length = HTON__u32 (md->length);
 
         /* NB handles only looked up by creator (no flips) */
-        if (args->ack_req_in == PTL_ACK_REQ) {
+        if (ack == PTL_ACK_REQ) {
                 hdr.msg.put.ack_wmd.wh_interface_cookie = ni->ni_interface_cookie;
                 hdr.msg.put.ack_wmd.wh_object_cookie = md->md_lh.lh_cookie;
         } else {
                 hdr.msg.put.ack_wmd = PTL_WIRE_HANDLE_NONE;
         }
 
-        hdr.msg.put.match_bits = HTON__u64 (args->match_bits_in);
-        hdr.msg.put.ptl_index = HTON__u32 (args->portal_in);
-        hdr.msg.put.offset = HTON__u32 (args->offset_in);
-        hdr.msg.put.hdr_data = args->hdr_data_in;
+        hdr.msg.put.match_bits = HTON__u64 (match_bits);
+        hdr.msg.put.ptl_index = HTON__u32 (portal);
+        hdr.msg.put.offset = HTON__u32 (offset);
+        hdr.msg.put.hdr_data = hdr_data;
 
         lib_commit_md(nal, md, msg);
         
         msg->ev.type = PTL_EVENT_SEND_END;
-        msg->ev.initiator.nid = ni->nid;
-        msg->ev.initiator.pid = ni->pid;
-        msg->ev.portal = args->portal_in;
-        msg->ev.match_bits = args->match_bits_in;
+        msg->ev.initiator.nid = ni->ni_pid.nid;
+        msg->ev.initiator.pid = ni->ni_pid.pid;
+        msg->ev.portal = portal;
+        msg->ev.match_bits = match_bits;
         msg->ev.rlength = md->length;
         msg->ev.mlength = md->length;
-        msg->ev.offset = args->offset_in;
-        msg->ev.hdr_data = args->hdr_data_in;
+        msg->ev.offset = offset;
+        msg->ev.hdr_data = hdr_data;
 
         lib_md_deconstruct(nal, md, &msg->ev.mem_desc);
 
-        ni->counters.send_count++;
-        ni->counters.send_length += md->length;
+        ni->ni_counters.send_count++;
+        ni->ni_counters.send_length += md->length;
 
-        state_unlock(nal, &flags);
+        LIB_UNLOCK(nal, flags);
         
-        rc = lib_send (nal, private, msg, &hdr, PTL_MSG_PUT,
+        rc = lib_send (nal, NULL, msg, &hdr, PTL_MSG_PUT,
                        id->nid, id->pid, md, 0, md->length);
         if (rc != PTL_OK) {
-                CERROR(LPU64": error sending PUT to "LPU64": %d\n",
-                       ni->nid, id->nid, rc);
-                lib_finalize (nal, private, msg, rc);
+                CERROR("Error sending PUT to "LPX64": %d\n",
+                       id->nid, rc);
+                lib_finalize (nal, NULL, msg, rc);
         }
         
         /* completion will be signalled by an event */
-        return ret->rc = PTL_OK;
+        return PTL_OK;
 }
 
 lib_msg_t * 
-lib_create_reply_msg (nal_cb_t *nal, ptl_nid_t peer_nid, lib_msg_t *getmsg)
+lib_create_reply_msg (lib_nal_t *nal, ptl_nid_t peer_nid, lib_msg_t *getmsg)
 {
         /* The NAL can DMA direct to the GET md (i.e. no REPLY msg).  This
          * returns a msg for the NAL to pass to lib_finalize() when the sink
@@ -1170,12 +1184,12 @@ lib_create_reply_msg (nal_cb_t *nal, ptl_nid_t peer_nid, lib_msg_t *getmsg)
          * CAVEAT EMPTOR: 'getmsg' is the original GET, which is freed when
          * lib_finalize() is called on it, so the NAL must call this first */
 
-        lib_ni_t        *ni = &nal->ni;
+        lib_ni_t        *ni = &nal->libnal_ni;
         lib_msg_t       *msg = lib_msg_alloc(nal);
         lib_md_t        *getmd = getmsg->md;
         unsigned long    flags;
 
-        state_lock(nal, &flags);
+        LIB_LOCK(nal, flags);
 
         LASSERT (getmd->pending > 0);
 
@@ -1205,72 +1219,60 @@ lib_create_reply_msg (nal_cb_t *nal, ptl_nid_t peer_nid, lib_msg_t *getmsg)
 
         lib_md_deconstruct(nal, getmd, &msg->ev.mem_desc);
 
-        ni->counters.recv_count++;
-        ni->counters.recv_length += getmd->length;
+        ni->ni_counters.recv_count++;
+        ni->ni_counters.recv_length += getmd->length;
 
-        state_unlock(nal, &flags);
+        LIB_UNLOCK(nal, flags);
 
         return msg;
 
  drop_msg:
         lib_msg_free(nal, msg);
  drop:
-        nal->ni.counters.drop_count++;
-        nal->ni.counters.drop_length += getmd->length;
+        nal->libnal_ni.ni_counters.drop_count++;
+        nal->libnal_ni.ni_counters.drop_length += getmd->length;
 
-        state_unlock (nal, &flags);
+        LIB_UNLOCK (nal, flags);
 
         return NULL;
 }
 
 int 
-do_PtlGet(nal_cb_t *nal, void *private, void *v_args, void *v_ret)
+lib_api_get(nal_t *apinal, ptl_handle_md_t *mdh, ptl_process_id_t *id,
+            ptl_pt_index_t portal, ptl_ac_index_t ac,
+            ptl_match_bits_t match_bits, ptl_size_t offset)
 {
-        /*
-         * Incoming:
-         *      ptl_handle_md_t md_in
-         *      ptl_process_id_t target_in
-         *      ptl_pt_index_t portal_in
-         *      ptl_ac_index_t cookie_in
-         *      ptl_match_bits_t match_bits_in
-         *      ptl_size_t offset_in
-         *
-         * Outgoing:
-         */
-
-        PtlGet_in        *args = v_args;
-        ptl_process_id_t *id = &args->target_in;
-        PtlGet_out       *ret = v_ret;
-        lib_ni_t         *ni = &nal->ni;
+        lib_nal_t        *nal = apinal->nal_data;
+        lib_ni_t         *ni = &nal->libnal_ni;
         lib_msg_t        *msg;
         ptl_hdr_t         hdr;
         lib_md_t         *md;
         unsigned long     flags;
         int               rc;
         
-        if (!list_empty (&nal->ni.ni_test_peers) && /* normally we don't */
+        if (!list_empty (&ni->ni_test_peers) && /* normally we don't */
             fail_peer (nal, id->nid, 1))           /* shall we now? */
         {
-                CERROR(LPU64": Dropping PUT to "LPU64": simulated failure\n",
-                       nal->ni.nid, id->nid);
-                return (ret->rc = PTL_PROCESS_INVALID);
+                CERROR("Dropping PUT to "LPX64": simulated failure\n",
+                       id->nid);
+                return PTL_PROCESS_INVALID;
         }
 
         msg = lib_msg_alloc(nal);
         if (msg == NULL) {
-                CERROR(LPU64": Dropping GET to "LPU64": ENOMEM on lib_msg_t\n",
-                       ni->nid, id->nid);
-                return (ret->rc = PTL_NO_SPACE);
+                CERROR("Dropping GET to "LPU64": ENOMEM on lib_msg_t\n",
+                       id->nid);
+                return PTL_NO_SPACE;
         }
 
-        state_lock(nal, &flags);
+        LIB_LOCK(nal, flags);
 
-        md = ptl_handle2md(&args->md_in, nal);
+        md = ptl_handle2md(mdh, nal);
         if (md == NULL || !md->threshold) {
                 lib_msg_free(nal, msg);
-                state_unlock(nal, &flags);
+                LIB_UNLOCK(nal, flags);
 
-                return ret->rc = PTL_MD_INVALID;
+                return PTL_MD_INVALID;
         }
 
         CDEBUG(D_NET, "PtlGet -> %Lu: %lu\n", (unsigned long long)id->nid,
@@ -1279,48 +1281,47 @@ do_PtlGet(nal_cb_t *nal, void *private, void *v_args, void *v_ret)
         memset (&hdr, 0, sizeof (hdr));
         hdr.type     = HTON__u32 (PTL_MSG_GET);
         hdr.dest_nid = HTON__u64 (id->nid);
-        hdr.src_nid  = HTON__u64 (ni->nid);
         hdr.dest_pid = HTON__u32 (id->pid);
-        hdr.src_pid  = HTON__u32 (ni->pid);
+        hdr.src_nid  = HTON__u64 (ni->ni_pid.nid);
+        hdr.src_pid  = HTON__u32 (ni->ni_pid.pid);
         hdr.payload_length = 0;
 
         /* NB handles only looked up by creator (no flips) */
         hdr.msg.get.return_wmd.wh_interface_cookie = ni->ni_interface_cookie;
         hdr.msg.get.return_wmd.wh_object_cookie = md->md_lh.lh_cookie;
 
-        hdr.msg.get.match_bits = HTON__u64 (args->match_bits_in);
-        hdr.msg.get.ptl_index = HTON__u32 (args->portal_in);
-        hdr.msg.get.src_offset = HTON__u32 (args->offset_in);
+        hdr.msg.get.match_bits = HTON__u64 (match_bits);
+        hdr.msg.get.ptl_index = HTON__u32 (portal);
+        hdr.msg.get.src_offset = HTON__u32 (offset);
         hdr.msg.get.sink_length = HTON__u32 (md->length);
 
         lib_commit_md(nal, md, msg);
 
         msg->ev.type = PTL_EVENT_SEND_END;
-        msg->ev.initiator.nid = ni->nid;
-        msg->ev.initiator.pid = ni->pid;
-        msg->ev.portal = args->portal_in;
-        msg->ev.match_bits = args->match_bits_in;
+        msg->ev.initiator = ni->ni_pid;
+        msg->ev.portal = portal;
+        msg->ev.match_bits = match_bits;
         msg->ev.rlength = md->length;
         msg->ev.mlength = md->length;
-        msg->ev.offset = args->offset_in;
+        msg->ev.offset = offset;
         msg->ev.hdr_data = 0;
 
         lib_md_deconstruct(nal, md, &msg->ev.mem_desc);
 
-        ni->counters.send_count++;
+        ni->ni_counters.send_count++;
 
-        state_unlock(nal, &flags);
+        LIB_UNLOCK(nal, flags);
 
-        rc = lib_send (nal, private, msg, &hdr, PTL_MSG_GET,
+        rc = lib_send (nal, NULL, msg, &hdr, PTL_MSG_GET,
                        id->nid, id->pid, NULL, 0, 0);
         if (rc != PTL_OK) {
                 CERROR(LPU64": error sending GET to "LPU64": %d\n",
-                       ni->nid, id->nid, rc);
-                lib_finalize (nal, private, msg, rc);
+                       ni->ni_pid.nid, id->nid, rc);
+                lib_finalize (nal, NULL, msg, rc);
         }
         
         /* completion will be signalled by an event */
-        return ret->rc = PTL_OK;
+        return PTL_OK;
 }
 
 void lib_assert_wire_constants (void)
index 1b69533..328b8d8 100644 (file)
 #include <portals/lib-p30.h>
 
 void
-lib_enq_event_locked (nal_cb_t *nal, void *private, 
+lib_enq_event_locked (lib_nal_t *nal, void *private, 
                       lib_eq_t *eq, ptl_event_t *ev)
 {
         ptl_event_t  *eq_slot;
-        int           rc;
         
-        ev->sequence = eq->sequence++; /* Allocate the next queue slot */
-
-        /* size must be a power of 2 to handle a wrapped sequence # */
-        LASSERT (eq->size != 0 &&
-                 eq->size == LOWEST_BIT_SET (eq->size));
-        eq_slot = eq->base + (ev->sequence & (eq->size - 1));
+        ev->sequence = eq->eq_enq_seq++; /* Allocate the next queue slot */
 
-        /* Copy the event into the allocated slot, ensuring all the rest of
-         * the event's contents have been copied _before_ the sequence
-         * number gets updated.  A processes 'getting' an event waits on
-         * the next queue slot's sequence to be 'new'.  When it is, _all_
-         * other event fields had better be consistent.  I assert
-         * 'sequence' is the last member, so I only need a 2 stage copy. */
+        /* size must be a power of 2 to handle sequence # overflow */
+        LASSERT (eq->eq_size != 0 &&
+                 eq->eq_size == LOWEST_BIT_SET (eq->eq_size));
+        eq_slot = eq->eq_events + (ev->sequence & (eq->eq_size - 1));
 
-        LASSERT(sizeof (ptl_event_t) ==
-                offsetof(ptl_event_t, sequence) + sizeof(ev->sequence));
+        /* There is no race since both event consumers and event producers
+         * take the LIB_LOCK(), so we don't screw around with memory
+         * barriers, setting the sequence number last or wierd structure
+         * layout assertions. */
+        *eq_slot = *ev;
 
-        rc = nal->cb_write (nal, private, (user_ptr)eq_slot, ev,
-                            offsetof (ptl_event_t, sequence));
-        LASSERT (rc == PTL_OK);
+        /* Call the callback handler (if any) */
+        if (eq->eq_callback != NULL)
+                eq->eq_callback (eq_slot);
 
+        /* Wake anyone sleeping for an event (see lib-eq.c) */
 #ifdef __KERNEL__
-        barrier();
-#endif
-        /* Updating the sequence number is what makes the event 'new' NB if
-         * the cb_write below isn't atomic, this could cause a race with
-         * PtlEQGet */
-        rc = nal->cb_write(nal, private, (user_ptr)&eq_slot->sequence,
-                           (void *)&ev->sequence,sizeof (ev->sequence));
-        LASSERT (rc == PTL_OK);
-
-#ifdef __KERNEL__
-        barrier();
+        if (waitqueue_active(&nal->libnal_ni.ni_waitq))
+                wake_up_all(&nal->libnal_ni.ni_waitq);
+#else
+        pthread_cond_broadcast(&nal->libnal_ni.ni_cond);
 #endif
-
-        if (nal->cb_callback != NULL)
-                nal->cb_callback(nal, private, eq, ev);
-        else if (eq->event_callback != NULL)
-                eq->event_callback(ev);
 }
 
 void 
-lib_finalize(nal_cb_t *nal, void *private, lib_msg_t *msg, ptl_err_t status)
+lib_finalize (lib_nal_t *nal, void *private, lib_msg_t *msg, ptl_err_t status)
 {
         lib_md_t     *md;
         int           unlink;
@@ -101,9 +85,9 @@ lib_finalize(nal_cb_t *nal, void *private, lib_msg_t *msg, ptl_err_t status)
                 memset (&ack, 0, sizeof (ack));
                 ack.type     = HTON__u32 (PTL_MSG_ACK);
                 ack.dest_nid = HTON__u64 (msg->ev.initiator.nid);
-                ack.src_nid  = HTON__u64 (nal->ni.nid);
                 ack.dest_pid = HTON__u32 (msg->ev.initiator.pid);
-                ack.src_pid  = HTON__u32 (nal->ni.pid);
+                ack.src_nid  = HTON__u64 (nal->libnal_ni.ni_pid.nid);
+                ack.src_pid  = HTON__u32 (nal->libnal_ni.ni_pid.pid);
                 ack.payload_length = 0;
 
                 ack.msg.ack.dst_wmd = msg->ack_wmd;
@@ -122,7 +106,7 @@ lib_finalize(nal_cb_t *nal, void *private, lib_msg_t *msg, ptl_err_t status)
 
         md = msg->md;
 
-        state_lock(nal, &flags);
+        LIB_LOCK(nal, flags);
 
         /* Now it's safe to drop my caller's ref */
         md->pending--;
@@ -148,8 +132,8 @@ lib_finalize(nal_cb_t *nal, void *private, lib_msg_t *msg, ptl_err_t status)
                 lib_md_unlink(nal, md);
 
         list_del (&msg->msg_list);
-        nal->ni.counters.msgs_alloc--;
+        nal->libnal_ni.ni_counters.msgs_alloc--;
         lib_msg_free(nal, msg);
 
-        state_unlock(nal, &flags);
+        LIB_UNLOCK(nal, flags);
 }
index aa959fc..0f298a0 100644 (file)
 
 #define DEBUG_SUBSYSTEM S_PORTALS
 #include <portals/lib-p30.h>
-#include <portals/arg-blocks.h>
 
 #define MAX_DIST 18446744073709551615ULL
 
-int do_PtlNIStatus(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
+int lib_api_ni_status (nal_t *apinal, ptl_sr_index_t sr_idx,
+                       ptl_sr_value_t *status)
 {
-        /*
-         * Incoming:
-         *      ptl_handle_ni_t interface_in
-         *      ptl_sr_index_t register_in
-         *
-         * Outgoing:
-         *      ptl_sr_value_t          * status_out
-         */
-
-        PtlNIStatus_in *args = v_args;
-        PtlNIStatus_out *ret = v_ret;
-        lib_ni_t *ni = &nal->ni;
-        lib_counters_t *count = &ni->counters;
-
-        if (!args)
-                return ret->rc = PTL_SEGV;
-
-        ret->rc = PTL_OK;
-        ret->status_out = 0;
-
-        /*
-         * I hate this sort of code....  Hash tables, offset lists?
-         * Treat the counters as an array of ints?
-         */
-        if (args->register_in == PTL_SR_DROP_COUNT)
-                ret->status_out = count->drop_count;
-
-        else if (args->register_in == PTL_SR_DROP_LENGTH)
-                ret->status_out = count->drop_length;
-
-        else if (args->register_in == PTL_SR_RECV_COUNT)
-                ret->status_out = count->recv_count;
-
-        else if (args->register_in == PTL_SR_RECV_LENGTH)
-                ret->status_out = count->recv_length;
-
-        else if (args->register_in == PTL_SR_SEND_COUNT)
-                ret->status_out = count->send_count;
-
-        else if (args->register_in == PTL_SR_SEND_LENGTH)
-                ret->status_out = count->send_length;
-
-        else if (args->register_in == PTL_SR_MSGS_MAX)
-                ret->status_out = count->msgs_max;
-        else
-                ret->rc = PTL_SR_INDEX_INVALID;
-
-        return ret->rc;
+        lib_nal_t      *nal = apinal->nal_data;
+        lib_ni_t       *ni = &nal->libnal_ni;
+        lib_counters_t *count = &ni->ni_counters;
+
+        switch (sr_idx) {
+        case PTL_SR_DROP_COUNT:
+                *status = count->drop_count;
+                return PTL_OK;
+        case PTL_SR_DROP_LENGTH:
+                *status = count->drop_length;
+                return PTL_OK;
+        case PTL_SR_RECV_COUNT:
+                *status = count->recv_count;
+                return PTL_OK;
+        case PTL_SR_RECV_LENGTH:
+                *status = count->recv_length;
+                return PTL_OK;
+        case PTL_SR_SEND_COUNT:
+                *status = count->send_count;
+                return PTL_OK;
+        case PTL_SR_SEND_LENGTH:
+                *status = count->send_length;
+                return PTL_OK;
+        case PTL_SR_MSGS_MAX:
+                *status = count->msgs_max;
+                return PTL_OK;
+        default:
+                *status = 0;
+                return PTL_SR_INDEX_INVALID;
+        }
 }
 
 
-int do_PtlNIDist(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
+int lib_api_ni_dist (nal_t *apinal, ptl_process_id_t *pid, unsigned long *dist)
 {
-        /*
-         * Incoming:
-         *      ptl_handle_ni_t interface_in
-         *      ptl_process_id_t process_in
-
-         *
-         * Outgoing:
-         *      unsigned long   * distance_out
-
-         */
-
-        PtlNIDist_in *args = v_args;
-        PtlNIDist_out *ret = v_ret;
-
-        unsigned long dist;
-        ptl_process_id_t id_in = args->process_in;
-        ptl_nid_t nid;
-        int rc;
-
-        nid = id_in.nid;
-
-        if ((rc = nal->cb_dist(nal, nid, &dist)) != 0) {
-                ret->distance_out = (unsigned long) MAX_DIST;
-                return PTL_PROCESS_INVALID;
-        }
-
-        ret->distance_out = dist;
+        lib_nal_t *nal = apinal->nal_data;
 
-        return ret->rc = PTL_OK;
+        return (nal->libnal_dist(nal, pid->nid, dist));
 }
index 12eebb5..ff2a601 100644 (file)
@@ -35,24 +35,12 @@ extern int getpid(void);
 #       include <unistd.h>
 #endif
 #include <portals/lib-p30.h>
-#include <portals/arg-blocks.h>
 
-int do_PtlGetId(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
+int
+lib_api_get_id(nal_t *apinal, ptl_process_id_t *pid)
 {
-        /*
-         * Incoming:
-         *      ptl_handle_ni_t handle_in
-         *
-         * Outgoing:
-         *      ptl_process_id_t        * id_out
-         *      ptl_id_t                * gsize_out
-         */
-
-        PtlGetId_out *ret = v_ret;
-        lib_ni_t *ni = &nal->ni;
-
-        ret->id_out.nid = ni->nid;
-        ret->id_out.pid = ni->pid;
-
-        return ret->rc = PTL_OK;
+        lib_nal_t *nal = apinal->nal_data;
+        
+        *pid = nal->libnal_ni.ni_pid;
+        return PTL_OK;
 }
index 40e9da4..5615a72 100644 (file)
@@ -160,7 +160,6 @@ EXPORT_SYMBOL(ptl_register_nal);
 EXPORT_SYMBOL(ptl_unregister_nal);
 
 EXPORT_SYMBOL(ptl_err_str);
-EXPORT_SYMBOL(lib_dispatch);
 EXPORT_SYMBOL(PtlMEAttach);
 EXPORT_SYMBOL(PtlMEInsert);
 EXPORT_SYMBOL(PtlMEUnlink);
@@ -192,7 +191,6 @@ EXPORT_SYMBOL(lib_parse);
 EXPORT_SYMBOL(lib_create_reply_msg);
 EXPORT_SYMBOL(lib_init);
 EXPORT_SYMBOL(lib_fini);
-EXPORT_SYMBOL(dispatch_name);
 
 MODULE_AUTHOR("Peter J. Braam <braam@clusterfs.com>");
 MODULE_DESCRIPTION("Portals v3.1");
index 6507924..f329e2a 100644 (file)
@@ -91,8 +91,8 @@ void set_address(bridge t,ptl_pid_t pidrequest)
     int port;
     if (pidrequest==(unsigned short)PTL_PID_ANY) port = 0;
     else port=pidrequest;
-    t->nal_cb->ni.nid=get_node_id();
-    t->nal_cb->ni.pid=port;
+    t->lib_nal->libnal_ni.ni_pid.nid=get_node_id();
+    t->lib_nal->libnal_ni.ni_pid.pid=port;
 }
 #else
 
@@ -120,10 +120,9 @@ void set_address(bridge t,ptl_pid_t pidrequest)
     in_addr = get_node_id();
 
     t->iptop8 = in_addr >> PNAL_HOSTID_SHIFT;/* for making new connections */
-    t->nal_cb->ni.nid = ((in_addr & PNAL_HOSTID_MASK) 
-                            << PNAL_VNODE_SHIFT)
-        + virtnode;
-
+    t->lib_nal->libnal_ni.ni_pid.nid = ((in_addr & PNAL_HOSTID_MASK) 
+                                        << PNAL_VNODE_SHIFT)
+                                       + virtnode;
     pid=pidrequest;
     /* TODO: Support of pid PTL_ID_ANY with virtual nodes needs more work. */
 #ifdef notyet
@@ -141,6 +140,6 @@ void set_address(bridge t,ptl_pid_t pidrequest)
             return;
         }
     else port = ((virtnode << PNAL_VNODE_SHIFT) + pid) + PNAL_BASE_PORT;
-    t->nal_cb->ni.pid=pid;
+    t->lib_nal->libnal_ni.ni_pid.pid=pid;
 }
 #endif
index 90ce324..d2f0f2c 100644 (file)
@@ -19,7 +19,7 @@
 
 typedef struct bridge {
     int alive;
-    nal_cb_t *nal_cb;
+    lib_nal_t *lib_nal;
     void *lower;
     void *local;
     void (*shutdown)(struct bridge *);
index e40c4b9..f3843d7 100644 (file)
@@ -60,34 +60,6 @@ void procbridge_wakeup_nal(procbridge p)
     syscall(SYS_write, p->notifier[0], buf, sizeof(buf));
 }
 
-/* Function: forward
- * Arguments: nal_t *nal: pointer to my top-side nal structure
- *            id: the command to pass to the lower layer
- *            args, args_len:pointer to and length of the request
- *            ret, ret_len:  pointer to and size of the result
- * Returns: a portals status code
- *
- * forwards a packaged api call from the 'api' side to the 'library'
- *   side, and collects the result
- */
-static int procbridge_forward(nal_t *n, int id, void *args, size_t args_len,
-                             void *ret, size_t ret_len)
-{
-    bridge b = (bridge) n->nal_data;
-
-    if (id == PTL_FINI) {
-            lib_fini(b->nal_cb);
-
-            if (b->shutdown)
-                (*b->shutdown)(b);
-    }
-
-    lib_dispatch(b->nal_cb, NULL, id, args, ret);
-
-    return (PTL_OK);
-}
-
-
 /* Function: shutdown
  * Arguments: nal: a pointer to my top side nal structure
  *            ni: my network interface index
@@ -97,7 +69,8 @@ static int procbridge_forward(nal_t *n, int id, void *args, size_t args_len,
  */
 static void procbridge_shutdown(nal_t *n)
 {
-    bridge b=(bridge)n->nal_data;
+    lib_nal_t *nal = n->nal_data;
+    bridge b=(bridge)nal->libnal_data;
     procbridge p=(procbridge)b->local;
 
     p->nal_flags |= NAL_FLAG_STOPPING;
@@ -117,83 +90,19 @@ static void procbridge_shutdown(nal_t *n)
 }
 
 
-static void procbridge_lock(nal_t * n, unsigned long *flags)
-{
-    bridge b=(bridge)n->nal_data;
-    procbridge p=(procbridge)b->local;
-
-    pthread_mutex_lock(&p->mutex);
-}
-
-static void procbridge_unlock(nal_t * n, unsigned long *flags)
-{
-    bridge b=(bridge)n->nal_data;
-    procbridge p=(procbridge)b->local;
-
-    pthread_mutex_unlock(&p->mutex);
-}
-
-/* Function: yield
- * Arguments:  pid:
- *
- *  this function was originally intended to allow the
- *   lower half thread to be scheduled to allow progress. we
- *   overload it to explicitly block until signalled by the
- *   lower half.
- */
-static int procbridge_yield(nal_t *n, unsigned long *flags, int milliseconds)
-{
-    bridge b=(bridge)n->nal_data;
-    procbridge p=(procbridge)b->local;
-
-    if (milliseconds == 0)
-            return 0;
-            
-    if (milliseconds < 0) {
-        pthread_cond_wait(&p->cond,&p->mutex);
-    } else {
-        struct timeval then;
-        struct timeval now;
-        struct timespec timeout;
-
-        gettimeofday(&then, NULL);
-        timeout.tv_sec = then.tv_sec + milliseconds/1000;
-        timeout.tv_nsec = then.tv_usec * 1000 + milliseconds % 1000 * 1000000;
-        if (timeout.tv_nsec >= 1000000000) {
-                timeout.tv_sec++;
-                timeout.tv_nsec -= 1000000000;
-        }
-
-        pthread_cond_timedwait(&p->cond, &p->mutex, &timeout);
-
-        gettimeofday(&now, NULL);
-        milliseconds -= (now.tv_sec - then.tv_sec) * 1000 + 
-                        (now.tv_usec - then.tv_usec) / 1000;
-        
-        if (milliseconds < 0)
-                milliseconds = 0;
-    }
-
-    return (milliseconds);
-}
-
 /* forward decl */
 extern int procbridge_startup (nal_t *, ptl_pid_t,
                                ptl_ni_limits_t *, ptl_ni_limits_t *);
 
 /* api_nal
  *  the interface vector to allow the generic code to access
- *  this nal. this is seperate from the library side nal_cb.
+ *  this nal. this is seperate from the library side lib_nal.
  *  TODO: should be dyanmically allocated
  */
 nal_t procapi_nal = {
     nal_data: NULL,
-    startup:  procbridge_startup,
-    shutdown: procbridge_shutdown,
-    forward:  procbridge_forward,
-    yield:    procbridge_yield,
-    lock:     procbridge_lock,
-    unlock:   procbridge_unlock
+    nal_ni_init: procbridge_startup,
+    nal_ni_fini: procbridge_shutdown,
 };
 
 ptl_nid_t tcpnal_mynid;
@@ -228,7 +137,6 @@ int procbridge_startup (nal_t *nal, ptl_pid_t requested_pid,
 
     b=(bridge)malloc(sizeof(struct bridge));
     p=(procbridge)malloc(sizeof(struct procbridge));
-    nal->nal_data=b;
     b->local=p;
 
     args.nia_requested_pid = requested_pid;
@@ -236,6 +144,7 @@ int procbridge_startup (nal_t *nal, ptl_pid_t requested_pid,
     args.nia_actual_limits = actual_limits;
     args.nia_nal_type = nal_type;
     args.nia_bridge = b;
+    args.nia_apinal = nal;
 
     /* init procbridge */
     pthread_mutex_init(&p->mutex,0);
@@ -273,7 +182,7 @@ int procbridge_startup (nal_t *nal, ptl_pid_t requested_pid,
     if (p->nal_flags & NAL_FLAG_STOPPED)
         return PTL_FAIL;
 
-    b->nal_cb->ni.nid = tcpnal_mynid;
+    b->lib_nal->libnal_ni.ni_pid.nid = tcpnal_mynid;
 
     return PTL_OK;
 }
index 1c8e7dd..1f91ced 100644 (file)
@@ -30,7 +30,6 @@ typedef struct procbridge {
 
     int nal_flags;
 
-    pthread_mutex_t nal_cb_lock;
 } *procbridge;
 
 typedef struct nal_init_args {
@@ -39,6 +38,7 @@ typedef struct nal_init_args {
     ptl_ni_limits_t *nia_actual_limits;
     int              nia_nal_type;
     bridge           nia_bridge;
+    nal_t           *nia_apinal;
 } nal_init_args_t;
 
 extern void *nal_thread(void *);
index af0745b..7ee7c71 100644 (file)
 /* the following functions are stubs to satisfy the nal definition
    without doing anything particularily useful*/
 
-static ptl_err_t nal_write(nal_cb_t *nal,
-                           void *private,
-                           user_ptr dst_addr,
-                           void *src_addr,
-                           size_t len)
-{
-    memcpy(dst_addr, src_addr, len);
-    return PTL_OK;
-}
-
-static ptl_err_t nal_read(nal_cb_t * nal,
-                          void *private,
-                          void *dst_addr,
-                          user_ptr src_addr,
-                          size_t len)
-{
-       memcpy(dst_addr, src_addr, len);
-       return PTL_OK;
-}
-
-static void *nal_malloc(nal_cb_t *nal,
-                        size_t len)
-{
-    void *buf =  malloc(len);
-    return buf;
-}
-
-static void nal_free(nal_cb_t *nal,
-                     void *buf,
-                     size_t len)
-{
-    free(buf);
-}
-
-static void nal_printf(nal_cb_t *nal,
-                       const char *fmt,
-                       ...)
-{
-    va_list        ap;
-
-    va_start(ap, fmt);
-    vprintf(fmt, ap);
-    va_end(ap);
-}
-
-
-static void nal_cli(nal_cb_t *nal,
-                    unsigned long *flags)
-{
-    bridge b = (bridge) nal->nal_data;
-    procbridge p = (procbridge) b->local;
-
-    pthread_mutex_lock(&p->mutex);
-}
-
-
-static void nal_sti(nal_cb_t *nal,
-                    unsigned long *flags)
-{
-    bridge b = (bridge)nal->nal_data;
-    procbridge p = (procbridge) b->local;
-
-    pthread_mutex_unlock(&p->mutex);
-}
-
-static void nal_callback(nal_cb_t *nal, void *private,
-                         lib_eq_t *eq, ptl_event_t *ev)
-{
-        bridge b = (bridge)nal->nal_data;
-        procbridge p = (procbridge) b->local;
-
-        /* holding p->mutex */
-        if (eq->event_callback != NULL)
-                eq->event_callback(ev);
-        
-        pthread_cond_broadcast(&p->cond);
-}
-
-static int nal_dist(nal_cb_t *nal,
+static int nal_dist(lib_nal_t *nal,
                     ptl_nid_t nid,
                     unsigned long *dist)
 {
@@ -170,33 +92,25 @@ void *nal_thread(void *z)
     ptl_process_id_t process_id;
     int nal_type;
     
-    b->nal_cb=(nal_cb_t *)malloc(sizeof(nal_cb_t));
-    b->nal_cb->nal_data=b;
-    b->nal_cb->cb_read=nal_read;
-    b->nal_cb->cb_write=nal_write;
-    b->nal_cb->cb_malloc=nal_malloc;
-    b->nal_cb->cb_free=nal_free;
-    b->nal_cb->cb_map=NULL;
-    b->nal_cb->cb_unmap=NULL;
-    b->nal_cb->cb_printf=nal_printf;
-    b->nal_cb->cb_cli=nal_cli;
-    b->nal_cb->cb_sti=nal_sti;
-    b->nal_cb->cb_callback=nal_callback;
-    b->nal_cb->cb_dist=nal_dist;
+    b->lib_nal=(lib_nal_t *)malloc(sizeof(lib_nal_t));
+    b->lib_nal->libnal_data=b;
+    b->lib_nal->libnal_map=NULL;
+    b->lib_nal->libnal_unmap=NULL;
+    b->lib_nal->libnal_dist=nal_dist;
 
     nal_type = args->nia_nal_type;
 
-    /* Wierd, but this sets b->nal_cb->ni.{nid,pid}, which lib_init() is
-     * about to do from the process_id passed to it...*/
+    /* Wierd, but this sets b->lib_nal->libnal_ni.ni_pid.{nid,pid}, which
+     * lib_init() is about to do from the process_id passed to it...*/
     set_address(b,args->nia_requested_pid);
 
-    process_id.pid = b->nal_cb->ni.pid;
-    process_id.nid = b->nal_cb->ni.nid;
+    process_id = b->lib_nal->libnal_ni.ni_pid;
     
     if (nal_table[nal_type]) rc=(*nal_table[nal_type])(b);
     /* initialize the generic 'library' level code */
 
-    rc = lib_init(b->nal_cb, process_id, 
+    rc = lib_init(b->lib_nal, args->nia_apinal, 
+                  process_id, 
                   args->nia_requested_limits, 
                   args->nia_actual_limits);
 
index 6507924..f329e2a 100644 (file)
@@ -91,8 +91,8 @@ void set_address(bridge t,ptl_pid_t pidrequest)
     int port;
     if (pidrequest==(unsigned short)PTL_PID_ANY) port = 0;
     else port=pidrequest;
-    t->nal_cb->ni.nid=get_node_id();
-    t->nal_cb->ni.pid=port;
+    t->lib_nal->libnal_ni.ni_pid.nid=get_node_id();
+    t->lib_nal->libnal_ni.ni_pid.pid=port;
 }
 #else
 
@@ -120,10 +120,9 @@ void set_address(bridge t,ptl_pid_t pidrequest)
     in_addr = get_node_id();
 
     t->iptop8 = in_addr >> PNAL_HOSTID_SHIFT;/* for making new connections */
-    t->nal_cb->ni.nid = ((in_addr & PNAL_HOSTID_MASK) 
-                            << PNAL_VNODE_SHIFT)
-        + virtnode;
-
+    t->lib_nal->libnal_ni.ni_pid.nid = ((in_addr & PNAL_HOSTID_MASK) 
+                                        << PNAL_VNODE_SHIFT)
+                                       + virtnode;
     pid=pidrequest;
     /* TODO: Support of pid PTL_ID_ANY with virtual nodes needs more work. */
 #ifdef notyet
@@ -141,6 +140,6 @@ void set_address(bridge t,ptl_pid_t pidrequest)
             return;
         }
     else port = ((virtnode << PNAL_VNODE_SHIFT) + pid) + PNAL_BASE_PORT;
-    t->nal_cb->ni.pid=pid;
+    t->lib_nal->libnal_ni.ni_pid.pid=pid;
 }
 #endif
index 90ce324..d2f0f2c 100644 (file)
@@ -19,7 +19,7 @@
 
 typedef struct bridge {
     int alive;
-    nal_cb_t *nal_cb;
+    lib_nal_t *lib_nal;
     void *lower;
     void *local;
     void (*shutdown)(struct bridge *);
index e40c4b9..f3843d7 100644 (file)
@@ -60,34 +60,6 @@ void procbridge_wakeup_nal(procbridge p)
     syscall(SYS_write, p->notifier[0], buf, sizeof(buf));
 }
 
-/* Function: forward
- * Arguments: nal_t *nal: pointer to my top-side nal structure
- *            id: the command to pass to the lower layer
- *            args, args_len:pointer to and length of the request
- *            ret, ret_len:  pointer to and size of the result
- * Returns: a portals status code
- *
- * forwards a packaged api call from the 'api' side to the 'library'
- *   side, and collects the result
- */
-static int procbridge_forward(nal_t *n, int id, void *args, size_t args_len,
-                             void *ret, size_t ret_len)
-{
-    bridge b = (bridge) n->nal_data;
-
-    if (id == PTL_FINI) {
-            lib_fini(b->nal_cb);
-
-            if (b->shutdown)
-                (*b->shutdown)(b);
-    }
-
-    lib_dispatch(b->nal_cb, NULL, id, args, ret);
-
-    return (PTL_OK);
-}
-
-
 /* Function: shutdown
  * Arguments: nal: a pointer to my top side nal structure
  *            ni: my network interface index
@@ -97,7 +69,8 @@ static int procbridge_forward(nal_t *n, int id, void *args, size_t args_len,
  */
 static void procbridge_shutdown(nal_t *n)
 {
-    bridge b=(bridge)n->nal_data;
+    lib_nal_t *nal = n->nal_data;
+    bridge b=(bridge)nal->libnal_data;
     procbridge p=(procbridge)b->local;
 
     p->nal_flags |= NAL_FLAG_STOPPING;
@@ -117,83 +90,19 @@ static void procbridge_shutdown(nal_t *n)
 }
 
 
-static void procbridge_lock(nal_t * n, unsigned long *flags)
-{
-    bridge b=(bridge)n->nal_data;
-    procbridge p=(procbridge)b->local;
-
-    pthread_mutex_lock(&p->mutex);
-}
-
-static void procbridge_unlock(nal_t * n, unsigned long *flags)
-{
-    bridge b=(bridge)n->nal_data;
-    procbridge p=(procbridge)b->local;
-
-    pthread_mutex_unlock(&p->mutex);
-}
-
-/* Function: yield
- * Arguments:  pid:
- *
- *  this function was originally intended to allow the
- *   lower half thread to be scheduled to allow progress. we
- *   overload it to explicitly block until signalled by the
- *   lower half.
- */
-static int procbridge_yield(nal_t *n, unsigned long *flags, int milliseconds)
-{
-    bridge b=(bridge)n->nal_data;
-    procbridge p=(procbridge)b->local;
-
-    if (milliseconds == 0)
-            return 0;
-            
-    if (milliseconds < 0) {
-        pthread_cond_wait(&p->cond,&p->mutex);
-    } else {
-        struct timeval then;
-        struct timeval now;
-        struct timespec timeout;
-
-        gettimeofday(&then, NULL);
-        timeout.tv_sec = then.tv_sec + milliseconds/1000;
-        timeout.tv_nsec = then.tv_usec * 1000 + milliseconds % 1000 * 1000000;
-        if (timeout.tv_nsec >= 1000000000) {
-                timeout.tv_sec++;
-                timeout.tv_nsec -= 1000000000;
-        }
-
-        pthread_cond_timedwait(&p->cond, &p->mutex, &timeout);
-
-        gettimeofday(&now, NULL);
-        milliseconds -= (now.tv_sec - then.tv_sec) * 1000 + 
-                        (now.tv_usec - then.tv_usec) / 1000;
-        
-        if (milliseconds < 0)
-                milliseconds = 0;
-    }
-
-    return (milliseconds);
-}
-
 /* forward decl */
 extern int procbridge_startup (nal_t *, ptl_pid_t,
                                ptl_ni_limits_t *, ptl_ni_limits_t *);
 
 /* api_nal
  *  the interface vector to allow the generic code to access
- *  this nal. this is seperate from the library side nal_cb.
+ *  this nal. this is seperate from the library side lib_nal.
  *  TODO: should be dyanmically allocated
  */
 nal_t procapi_nal = {
     nal_data: NULL,
-    startup:  procbridge_startup,
-    shutdown: procbridge_shutdown,
-    forward:  procbridge_forward,
-    yield:    procbridge_yield,
-    lock:     procbridge_lock,
-    unlock:   procbridge_unlock
+    nal_ni_init: procbridge_startup,
+    nal_ni_fini: procbridge_shutdown,
 };
 
 ptl_nid_t tcpnal_mynid;
@@ -228,7 +137,6 @@ int procbridge_startup (nal_t *nal, ptl_pid_t requested_pid,
 
     b=(bridge)malloc(sizeof(struct bridge));
     p=(procbridge)malloc(sizeof(struct procbridge));
-    nal->nal_data=b;
     b->local=p;
 
     args.nia_requested_pid = requested_pid;
@@ -236,6 +144,7 @@ int procbridge_startup (nal_t *nal, ptl_pid_t requested_pid,
     args.nia_actual_limits = actual_limits;
     args.nia_nal_type = nal_type;
     args.nia_bridge = b;
+    args.nia_apinal = nal;
 
     /* init procbridge */
     pthread_mutex_init(&p->mutex,0);
@@ -273,7 +182,7 @@ int procbridge_startup (nal_t *nal, ptl_pid_t requested_pid,
     if (p->nal_flags & NAL_FLAG_STOPPED)
         return PTL_FAIL;
 
-    b->nal_cb->ni.nid = tcpnal_mynid;
+    b->lib_nal->libnal_ni.ni_pid.nid = tcpnal_mynid;
 
     return PTL_OK;
 }
index 1c8e7dd..1f91ced 100644 (file)
@@ -30,7 +30,6 @@ typedef struct procbridge {
 
     int nal_flags;
 
-    pthread_mutex_t nal_cb_lock;
 } *procbridge;
 
 typedef struct nal_init_args {
@@ -39,6 +38,7 @@ typedef struct nal_init_args {
     ptl_ni_limits_t *nia_actual_limits;
     int              nia_nal_type;
     bridge           nia_bridge;
+    nal_t           *nia_apinal;
 } nal_init_args_t;
 
 extern void *nal_thread(void *);
index af0745b..7ee7c71 100644 (file)
 /* the following functions are stubs to satisfy the nal definition
    without doing anything particularily useful*/
 
-static ptl_err_t nal_write(nal_cb_t *nal,
-                           void *private,
-                           user_ptr dst_addr,
-                           void *src_addr,
-                           size_t len)
-{
-    memcpy(dst_addr, src_addr, len);
-    return PTL_OK;
-}
-
-static ptl_err_t nal_read(nal_cb_t * nal,
-                          void *private,
-                          void *dst_addr,
-                          user_ptr src_addr,
-                          size_t len)
-{
-       memcpy(dst_addr, src_addr, len);
-       return PTL_OK;
-}
-
-static void *nal_malloc(nal_cb_t *nal,
-                        size_t len)
-{
-    void *buf =  malloc(len);
-    return buf;
-}
-
-static void nal_free(nal_cb_t *nal,
-                     void *buf,
-                     size_t len)
-{
-    free(buf);
-}
-
-static void nal_printf(nal_cb_t *nal,
-                       const char *fmt,
-                       ...)
-{
-    va_list        ap;
-
-    va_start(ap, fmt);
-    vprintf(fmt, ap);
-    va_end(ap);
-}
-
-
-static void nal_cli(nal_cb_t *nal,
-                    unsigned long *flags)
-{
-    bridge b = (bridge) nal->nal_data;
-    procbridge p = (procbridge) b->local;
-
-    pthread_mutex_lock(&p->mutex);
-}
-
-
-static void nal_sti(nal_cb_t *nal,
-                    unsigned long *flags)
-{
-    bridge b = (bridge)nal->nal_data;
-    procbridge p = (procbridge) b->local;
-
-    pthread_mutex_unlock(&p->mutex);
-}
-
-static void nal_callback(nal_cb_t *nal, void *private,
-                         lib_eq_t *eq, ptl_event_t *ev)
-{
-        bridge b = (bridge)nal->nal_data;
-        procbridge p = (procbridge) b->local;
-
-        /* holding p->mutex */
-        if (eq->event_callback != NULL)
-                eq->event_callback(ev);
-        
-        pthread_cond_broadcast(&p->cond);
-}
-
-static int nal_dist(nal_cb_t *nal,
+static int nal_dist(lib_nal_t *nal,
                     ptl_nid_t nid,
                     unsigned long *dist)
 {
@@ -170,33 +92,25 @@ void *nal_thread(void *z)
     ptl_process_id_t process_id;
     int nal_type;
     
-    b->nal_cb=(nal_cb_t *)malloc(sizeof(nal_cb_t));
-    b->nal_cb->nal_data=b;
-    b->nal_cb->cb_read=nal_read;
-    b->nal_cb->cb_write=nal_write;
-    b->nal_cb->cb_malloc=nal_malloc;
-    b->nal_cb->cb_free=nal_free;
-    b->nal_cb->cb_map=NULL;
-    b->nal_cb->cb_unmap=NULL;
-    b->nal_cb->cb_printf=nal_printf;
-    b->nal_cb->cb_cli=nal_cli;
-    b->nal_cb->cb_sti=nal_sti;
-    b->nal_cb->cb_callback=nal_callback;
-    b->nal_cb->cb_dist=nal_dist;
+    b->lib_nal=(lib_nal_t *)malloc(sizeof(lib_nal_t));
+    b->lib_nal->libnal_data=b;
+    b->lib_nal->libnal_map=NULL;
+    b->lib_nal->libnal_unmap=NULL;
+    b->lib_nal->libnal_dist=nal_dist;
 
     nal_type = args->nia_nal_type;
 
-    /* Wierd, but this sets b->nal_cb->ni.{nid,pid}, which lib_init() is
-     * about to do from the process_id passed to it...*/
+    /* Wierd, but this sets b->lib_nal->libnal_ni.ni_pid.{nid,pid}, which
+     * lib_init() is about to do from the process_id passed to it...*/
     set_address(b,args->nia_requested_pid);
 
-    process_id.pid = b->nal_cb->ni.pid;
-    process_id.nid = b->nal_cb->ni.nid;
+    process_id = b->lib_nal->libnal_ni.ni_pid;
     
     if (nal_table[nal_type]) rc=(*nal_table[nal_type])(b);
     /* initialize the generic 'library' level code */
 
-    rc = lib_init(b->nal_cb, process_id, 
+    rc = lib_init(b->lib_nal, args->nia_apinal, 
+                  process_id, 
                   args->nia_requested_limits, 
                   args->nia_actual_limits);
 
index 0c47f42..34a9c9d 100644 (file)
@@ -55,7 +55,7 @@
  *
  * sends a packet to the peer, after insuring that a connection exists
  */
-ptl_err_t tcpnal_send(nal_cb_t *n,
+ptl_err_t tcpnal_send(lib_nal_t *n,
                       void *private,
                       lib_msg_t *cookie,
                       ptl_hdr_t *hdr,
@@ -68,7 +68,7 @@ ptl_err_t tcpnal_send(nal_cb_t *n,
                       size_t len)
 {
     connection c;
-    bridge b=(bridge)n->nal_data;
+    bridge b=(bridge)n->libnal_data;
     struct iovec tiov[257];
     static pthread_mutex_t send_lock = PTHREAD_MUTEX_INITIALIZER;
     ptl_err_t rc = PTL_OK;
@@ -142,7 +142,7 @@ ptl_err_t tcpnal_send(nal_cb_t *n,
 
 
 /* Function:  tcpnal_recv
- * Arguments: nal_cb_t *nal:     pointer to my nal control block
+ * Arguments: lib_nal_t *nal:    pointer to my nal control block
  *            void *private:     connection pointer passed through
  *                               lib_parse()
  *            lib_msg_t *cookie: passed back to portals library
@@ -154,7 +154,7 @@ ptl_err_t tcpnal_send(nal_cb_t *n,
  * blocking read of the requested data. must drain out the
  * difference of mainpulated and requested lengths from the network
  */
-ptl_err_t tcpnal_recv(nal_cb_t *n,
+ptl_err_t tcpnal_recv(lib_nal_t *n,
                       void *private,
                       lib_msg_t *cookie,
                       unsigned int niov,
@@ -217,7 +217,8 @@ static int from_connection(void *a, void *d)
     ptl_hdr_t hdr;
 
     if (read_connection(c, (unsigned char *)&hdr, sizeof(hdr))){
-        lib_parse(b->nal_cb, &hdr, c);
+        lib_parse(b->lib_nal, &hdr, c);
+        /*TODO: check error status*/
         return(1);
     }
     return(0);
@@ -239,19 +240,19 @@ int tcpnal_init(bridge b)
 {
     manager m;
         
-    b->nal_cb->cb_send=tcpnal_send;
-    b->nal_cb->cb_recv=tcpnal_recv;
+    b->lib_nal->libnal_send=tcpnal_send;
+    b->lib_nal->libnal_recv=tcpnal_recv;
     b->shutdown=tcpnal_shutdown;
     
-    if (!(m=init_connections(PNAL_PORT(b->nal_cb->ni.nid,
-                                       b->nal_cb->ni.pid),
+    if (!(m=init_connections(PNAL_PORT(b->lib_nal->libnal_ni.ni_pid.nid,
+                                       b->lib_nal->libnal_ni.ni_pid.pid),
                              from_connection,b))){
         /* TODO: this needs to shut down the
            newly created junk */
         return(PTL_NAL_FAILED);
     }
     /* XXX cfs hack */
-    b->nal_cb->ni.pid=0;
+    b->lib_nal->libnal_ni.ni_pid.pid=0;
     b->lower=m;
     return(PTL_OK);
 }
index 0c47f42..34a9c9d 100644 (file)
@@ -55,7 +55,7 @@
  *
  * sends a packet to the peer, after insuring that a connection exists
  */
-ptl_err_t tcpnal_send(nal_cb_t *n,
+ptl_err_t tcpnal_send(lib_nal_t *n,
                       void *private,
                       lib_msg_t *cookie,
                       ptl_hdr_t *hdr,
@@ -68,7 +68,7 @@ ptl_err_t tcpnal_send(nal_cb_t *n,
                       size_t len)
 {
     connection c;
-    bridge b=(bridge)n->nal_data;
+    bridge b=(bridge)n->libnal_data;
     struct iovec tiov[257];
     static pthread_mutex_t send_lock = PTHREAD_MUTEX_INITIALIZER;
     ptl_err_t rc = PTL_OK;
@@ -142,7 +142,7 @@ ptl_err_t tcpnal_send(nal_cb_t *n,
 
 
 /* Function:  tcpnal_recv
- * Arguments: nal_cb_t *nal:     pointer to my nal control block
+ * Arguments: lib_nal_t *nal:    pointer to my nal control block
  *            void *private:     connection pointer passed through
  *                               lib_parse()
  *            lib_msg_t *cookie: passed back to portals library
@@ -154,7 +154,7 @@ ptl_err_t tcpnal_send(nal_cb_t *n,
  * blocking read of the requested data. must drain out the
  * difference of mainpulated and requested lengths from the network
  */
-ptl_err_t tcpnal_recv(nal_cb_t *n,
+ptl_err_t tcpnal_recv(lib_nal_t *n,
                       void *private,
                       lib_msg_t *cookie,
                       unsigned int niov,
@@ -217,7 +217,8 @@ static int from_connection(void *a, void *d)
     ptl_hdr_t hdr;
 
     if (read_connection(c, (unsigned char *)&hdr, sizeof(hdr))){
-        lib_parse(b->nal_cb, &hdr, c);
+        lib_parse(b->lib_nal, &hdr, c);
+        /*TODO: check error status*/
         return(1);
     }
     return(0);
@@ -239,19 +240,19 @@ int tcpnal_init(bridge b)
 {
     manager m;
         
-    b->nal_cb->cb_send=tcpnal_send;
-    b->nal_cb->cb_recv=tcpnal_recv;
+    b->lib_nal->libnal_send=tcpnal_send;
+    b->lib_nal->libnal_recv=tcpnal_recv;
     b->shutdown=tcpnal_shutdown;
     
-    if (!(m=init_connections(PNAL_PORT(b->nal_cb->ni.nid,
-                                       b->nal_cb->ni.pid),
+    if (!(m=init_connections(PNAL_PORT(b->lib_nal->libnal_ni.ni_pid.nid,
+                                       b->lib_nal->libnal_ni.ni_pid.pid),
                              from_connection,b))){
         /* TODO: this needs to shut down the
            newly created junk */
         return(PTL_NAL_FAILED);
     }
     /* XXX cfs hack */
-    b->nal_cb->ni.pid=0;
+    b->lib_nal->libnal_ni.ni_pid.pid=0;
     b->lower=m;
     return(PTL_OK);
 }
index 1b957a3..a563e0d 100644 (file)
@@ -1,7 +1,6 @@
 tbd  Cluster File Systems, Inc. <info@clusterfs.com>
        * version 1.2.x
        * bug fixes
-       - clear page cache after eviction (2766)
        - don't dereference NULL peer_ni in ldlm_handle_ast_error (3258)
        - don't allow unlinking open directory if it isn't empty (2904)
        - handle partial page writes in filter; fix 512b direct IO (3138)
@@ -12,7 +11,7 @@ tbd  Cluster File Systems, Inc. <info@clusterfs.com>
        * miscellania
        - drop scimac NAL (unmaintained)
 
-tbd  Cluster File Systems, Inc. <info@clusterfs.com>
+2004-05-27  Cluster File Systems, Inc. <info@clusterfs.com>
        * version 1.2.2
        * bug fixes
        - don't copy lvb into (possibly NULL) reply on error (2983)
@@ -47,11 +46,20 @@ tbd  Cluster File Systems, Inc. <info@clusterfs.com>
        - update iopen-2.6 patch with fixes from 2399,2517,2904 (3301)
        - don't leak open file on MDS after open resend (3325)
        - serialize filter_precreate and filter_destroy_precreated (3329)
+       - loop device shouldn't call sync_dev() for nul device (3092)
+       - clear page cache after eviction (2766)
+       - resynchronize MDS->OST in background (2824)
+       - refuse to mount the same filesystem twice on same mountpoint (3394)
+       - allow llmount to create routes for mounting behind routers (3320)
+       - push lock cancellation to blocking thread for glimpse ASTs (3409)
+       - don't call osc_set_data_with_check() for TEST_LOCK matches (3159)
+       - fix rare problem with rename on htree directories (3417)
        * miscellania
        - allow default OST striping configuration per directory (1414)
        - fix compilation for qswnal for 2.6 kernels (3125)
        - increase maximum number of MDS request buffers for large systems
        - change liblustreapi to be useful for external progs like lfsck (3098)
+       - increase local configuration timeout for slow disks (3353)
 
 2004-03-22  Cluster File Systems, Inc. <info@clusterfs.com>
        * version 1.2.1
index c5d668d..04e6356 100644 (file)
@@ -56,6 +56,10 @@ lvfs-sources:
 modules: lustre_build_version $(DEP) $(LDISKFS) lvfs-sources
        $(MAKE) $(ARCH_UM) -C $(LINUX) -f $(PWD)/kernel-tests/Makefile LUSTRE_LINUX_CONFIG=$(LINUX_CONFIG) SUBDIRS=$(PWD) -o tmp_include_depends -o scripts -o include/config/MARKER $@
 
+endif # MODULES
+
+all-recursive: lustre_build_version
+
 lustre_build_version:
        perl $(top_builddir)/scripts/version_tag.pl $(top_srcdir) $(top_builddir) > tmpver
        echo "#define LUSTRE_RELEASE @RELEASE@" >> tmpver
@@ -64,8 +68,6 @@ lustre_build_version:
                 $(RM) tmpver ||                                            \
                 mv tmpver $(top_builddir)/include/linux/lustre_build_version.h
 
-endif # MODULES
-
 dist-hook:
        find $(distdir) -name .deps | xargs rm -rf
        find $(distdir) -name CVS | xargs rm -rf
index 004852e..e1c2c6c 100644 (file)
@@ -48,7 +48,7 @@ or for RH9 systems you can use:
 
 ftp://fr2.rpmfind.net/linux/redhat/9/en/os/i386/RedHat/RPMS/autoconf-2.57-3.noarch.rpm
 EOF
-       [ "$cmd" = "automake" -a "$required" = "1.7.8" ] && cat >&2 <<EOF
+       [ "$cmd" = "automake-1.7" -a "$required" = "1.7.8" ] && cat >&2 <<EOF
 
 or for RH9 systems you can use:
 
@@ -58,31 +58,33 @@ EOF
 }
 
 check_version() {
+    local tool
     local cmd
     local required
     local version
 
-    cmd=$1
-    required=$2
+    tool=$1
+    cmd=$2
+    required=$3
     echo -n "checking for $cmd $required... "
     if ! $cmd --version >/dev/null ; then
        error_msg "missing"
     fi
-    version=$($cmd --version | awk "BEGIN { IGNORECASE=1 } /$cmd \(GNU $cmd\)/ { print \$4 }")
+    version=$($cmd --version | awk "BEGIN { IGNORECASE=1 } /$tool \(GNU $tool\)/ { print \$4 }")
     echo "found $version"
     if ! compare_versions "$required" "$version" ; then
        error_msg "too old"
     fi
 }
 
-check_version automake "1.7.8"
-check_version autoconf "2.57"
+check_version automake automake-1.7 "1.7.8"
+check_version autoconf autoconf "2.57"
 echo "Running aclocal..."
-aclocal
+aclocal-1.7
 echo "Running autoheader..."
 autoheader
 echo "Running automake..."
-automake -a -c
+automake-1.7 -a -c
 echo "Running autoconf..."
 autoconf
 
index 3373fd0..d667270 100644 (file)
@@ -5,7 +5,7 @@
 
 AC_INIT
 AC_CANONICAL_SYSTEM
-AM_INIT_AUTOMAKE(lustre, HEAD)
+AM_INIT_AUTOMAKE(lustre, b1_4)
 # AM_MAINTAINER_MODE
 
 # Four main targets: lustre kernel modules, utilities, tests, and liblustre
index 120e996..13363bd 100644 (file)
 static inline void lustre_daemonize_helper(void)
 {
         LASSERT(current->signal != NULL);
-        current->session = 1;
+        current->signal->session = 1;
         if (current->group_leader)
-                current->group_leader->__pgrp = 1;
+                current->group_leader->signal->pgrp = 1;
         else
                 CERROR("we aren't group leader\n");
-        current->tty = NULL;
+        current->signal->tty = NULL;
 }
 
 static inline int cleanup_group_info(void)
index 9b89859..09fd52e 100644 (file)
@@ -652,6 +652,8 @@ int ptlrpc_start_thread(struct obd_device *dev, struct ptlrpc_service *svc,
                         char *name);
 int ptlrpc_unregister_service(struct ptlrpc_service *service);
 int liblustre_check_services (void *arg);
+void ptlrpc_daemonize(void);
+
 
 struct ptlrpc_svc_data {
         char *name;
index 3de6a8f..f6b2f43 100644 (file)
 +              struct ext3_dir_entry_2 *de = (struct ext3_dir_entry_2 *) (from + map->offs);
 +              rec_len = EXT3_DIR_REC_LEN(de->name_len);
 +              memcpy (to, de, rec_len);
-+              ((struct ext3_dir_entry_2 *) to)->rec_len = rec_len;
++              ((struct ext3_dir_entry_2 *)to)->rec_len = cpu_to_le16(rec_len);
 +              de->inode = 0;
 +              map++;
 +              to += rec_len;
 +                      rec_len = EXT3_DIR_REC_LEN(de->name_len);
 +                      if (de > to)
 +                              memmove(to, de, rec_len);
-+                      to->rec_len = rec_len;
++                      to->rec_len = cpu_to_le16(rec_len);
 +                      prev = to;
-+                      to = (struct ext3_dir_entry_2 *) (((char *) to) + rec_len);
++                      to = (struct ext3_dir_entry_2 *)((char *)to + rec_len);
 +              }
 +              de = next;
 +      }
 +      data1 = bh2->b_data;
 +
 +      /* The 0th block becomes the root, move the dirents out */
-+      de = (struct ext3_dir_entry_2 *) &root->dotdot;
-+      de = (struct ext3_dir_entry_2 *) ((char *)de + de->rec_len);
++      de = (struct ext3_dir_entry_2 *)&root->dotdot;
++      de = (struct ext3_dir_entry_2 *)((char *)de + le16_to_cpu(de->rec_len));
 +      len = ((char *) root) + blocksize - (char *) de;
 +      memcpy (data1, de, len);
 +      de = (struct ext3_dir_entry_2 *) data1;
index e937932..9730921 100644 (file)
@@ -1405,7 +1405,7 @@ Index: linux-2.4.19/fs/ext3/namei.c
 +              struct ext3_dir_entry_2 *de = (struct ext3_dir_entry_2 *) (from + map->offs);
 +              rec_len = EXT3_DIR_REC_LEN(de->name_len);
 +              memcpy (to, de, rec_len);
-+              ((struct ext3_dir_entry_2 *) to)->rec_len = rec_len;
++              ((struct ext3_dir_entry_2 *)to)->rec_len = cpu_to_le16(rec_len);
 +              de->inode = 0;
 +              map++;
 +              to += rec_len;
@@ -1426,7 +1426,7 @@ Index: linux-2.4.19/fs/ext3/namei.c
 +                      rec_len = EXT3_DIR_REC_LEN(de->name_len);
 +                      if (de > to)
 +                              memmove(to, de, rec_len);
-+                      to->rec_len = rec_len;
++                      to->rec_len = cpu_to_le16(rec_len);
 +                      prev = to;
 +                      to = (struct ext3_dir_entry_2 *) (((char *) to) + rec_len);
 +              }
@@ -1652,8 +1652,8 @@ Index: linux-2.4.19/fs/ext3/namei.c
 +      data1 = bh2->b_data;
 +
 +      /* The 0th block becomes the root, move the dirents out */
-+      de = (struct ext3_dir_entry_2 *) &root->dotdot;
-+      de = (struct ext3_dir_entry_2 *) ((char *)de + de->rec_len);
++      de = (struct ext3_dir_entry_2 *)&root->dotdot;
++      de = (struct ext3_dir_entry_2 *)((char *)de + le16_to_cpu(de->rec_len));
 +      len = ((char *) root) + blocksize - (char *) de;
 +      memcpy (data1, de, len);
 +      de = (struct ext3_dir_entry_2 *) data1;
index 748671f..28a1ad6 100644 (file)
 +              struct ext3_dir_entry_2 *de = (struct ext3_dir_entry_2 *) (from + map->offs);
 +              rec_len = EXT3_DIR_REC_LEN(de->name_len);
 +              memcpy (to, de, rec_len);
-+              ((struct ext3_dir_entry_2 *) to)->rec_len = rec_len;
++              ((struct ext3_dir_entry_2 *)to)->rec_len = cpu_to_le16(rec_len);
 +              de->inode = 0;
 +              map++;
 +              to += rec_len;
 +                      rec_len = EXT3_DIR_REC_LEN(de->name_len);
 +                      if (de > to)
 +                              memmove(to, de, rec_len);
-+                      to->rec_len = rec_len;
++                      to->rec_len = cpu_to_le16(rec_len);
 +                      prev = to;
-+                      to = (struct ext3_dir_entry_2 *) (((char *) to) + rec_len);
++                      to = (struct ext3_dir_entry_2 *)((char *)to + rec_len);
 +              }
 +              de = next;
 +      }
 +      data1 = bh2->b_data;
 +
 +      /* The 0th block becomes the root, move the dirents out */
-+      de = (struct ext3_dir_entry_2 *) &root->dotdot;
-+      de = (struct ext3_dir_entry_2 *) ((char *)de + de->rec_len);
++      de = (struct ext3_dir_entry_2 *)&root->dotdot;
++      de = (struct ext3_dir_entry_2 *)((char *)de + le16_to_cpu(de->rec_len));
 +      len = ((char *) root) + blocksize - (char *) de;
 +      memcpy (data1, de, len);
 +      de = (struct ext3_dir_entry_2 *) data1;
index 748671f..28a1ad6 100644 (file)
 +              struct ext3_dir_entry_2 *de = (struct ext3_dir_entry_2 *) (from + map->offs);
 +              rec_len = EXT3_DIR_REC_LEN(de->name_len);
 +              memcpy (to, de, rec_len);
-+              ((struct ext3_dir_entry_2 *) to)->rec_len = rec_len;
++              ((struct ext3_dir_entry_2 *)to)->rec_len = cpu_to_le16(rec_len);
 +              de->inode = 0;
 +              map++;
 +              to += rec_len;
 +                      rec_len = EXT3_DIR_REC_LEN(de->name_len);
 +                      if (de > to)
 +                              memmove(to, de, rec_len);
-+                      to->rec_len = rec_len;
++                      to->rec_len = cpu_to_le16(rec_len);
 +                      prev = to;
-+                      to = (struct ext3_dir_entry_2 *) (((char *) to) + rec_len);
++                      to = (struct ext3_dir_entry_2 *)((char *)to + rec_len);
 +              }
 +              de = next;
 +      }
 +      data1 = bh2->b_data;
 +
 +      /* The 0th block becomes the root, move the dirents out */
-+      de = (struct ext3_dir_entry_2 *) &root->dotdot;
-+      de = (struct ext3_dir_entry_2 *) ((char *)de + de->rec_len);
++      de = (struct ext3_dir_entry_2 *)&root->dotdot;
++      de = (struct ext3_dir_entry_2 *)((char *)de + le16_to_cpu(de->rec_len));
 +      len = ((char *) root) + blocksize - (char *) de;
 +      memcpy (data1, de, len);
 +      de = (struct ext3_dir_entry_2 *) data1;
index 67f5afa..4c8d4fa 100644 (file)
 diff -Nru a/fs/ext3/namei.c b/fs/ext3/namei.c
 --- a/fs/ext3/namei.c  Thu Nov  7 10:57:49 2002
 +++ b/fs/ext3/namei.c  Thu Nov  7 10:57:49 2002
-@@ -2173,7 +2173,26 @@
+@@ -2173,7 +2173,30 @@
        /*
         * ok, that's it
         */
 -      ext3_delete_entry(handle, old_dir, old_de, old_bh);
-+      retval = ext3_delete_entry(handle, old_dir, old_de, old_bh);
-+      if (retval == -ENOENT) {
-+              /*
-+               * old_de could have moved out from under us.
-+               */
++      if (le32_to_cpu(old_de->inode) != old_inode->i_ino ||
++          old_de->name_len != old_dentry->d_name.len ||
++          strncmp(old_de->name, old_dentry->d_name.name, old_de->name_len) ||
++          (retval = ext3_delete_entry(handle, old_dir,
++                                      old_de, old_bh)) == -ENOENT) {
++              /* old_de could have moved from under us during htree split, so
++               * make sure that we are deleting the right entry.  We might
++               * also be pointing to a stale entry in the unused part of
++               * old_bh so just checking inum and the name isn't enough. */
 +              struct buffer_head *old_bh2;
 +              struct ext3_dir_entry_2 *old_de2;
-+              
++
 +              old_bh2 = ext3_find_entry(old_dentry, &old_de2);
 +              if (old_bh2) {
 +                      retval = ext3_delete_entry(handle, old_dir,
index 7865c63..0806c38 100644 (file)
@@ -1420,7 +1420,7 @@ Index: linux-2.4.19-pre1/fs/ext3/namei.c
 +              struct ext3_dir_entry_2 *de = (struct ext3_dir_entry_2 *) (from + map->offs);
 +              rec_len = EXT3_DIR_REC_LEN(de->name_len);
 +              memcpy (to, de, rec_len);
-+              ((struct ext3_dir_entry_2 *) to)->rec_len = rec_len;
++              ((struct ext3_dir_entry_2 *)to)->rec_len = cpu_to_le16(rec_len);
 +              de->inode = 0;
 +              map++;
 +              to += rec_len;
@@ -1441,9 +1441,9 @@ Index: linux-2.4.19-pre1/fs/ext3/namei.c
 +                      rec_len = EXT3_DIR_REC_LEN(de->name_len);
 +                      if (de > to)
 +                              memmove(to, de, rec_len);
-+                      to->rec_len = rec_len;
++                      to->rec_len = cpu_to_le16(rec_len);
 +                      prev = to;
-+                      to = (struct ext3_dir_entry_2 *) (((char *) to) + rec_len);
++                      to = (struct ext3_dir_entry_2 *)((char *)to + rec_len);
 +              }
 +              de = next;
 +      }
@@ -2258,19 +2258,23 @@ Index: linux-2.4.19-pre1/fs/ext3/namei.c
  
        if (IS_SYNC(old_dir) || IS_SYNC(new_dir))
                handle->h_sync = 1;
-@@ -1070,14 +2174,33 @@
+@@ -1070,14 +2174,37 @@
        /*
         * ok, that's it
         */
 -      ext3_delete_entry(handle, old_dir, old_de, old_bh);
-+      retval = ext3_delete_entry(handle, old_dir, old_de, old_bh);
-+      if (retval == -ENOENT) {
-+              /*
-+               * old_de could have moved out from under us.
-+               */
++      if (le32_to_cpu(old_de->inode) != old_inode->i_ino ||
++          old_de->name_len != old_dentry->d_name.len ||
++          strncmp(old_de->name, old_dentry->d_name.name, old_de->name_len) ||
++          (retval = ext3_delete_entry(handle, old_dir,
++                                      old_de, old_bh)) == -ENOENT) {
++              /* old_de could have moved from under us during htree split, so
++               * make sure that we are deleting the right entry.  We might
++               * also be pointing to a stale entry in the unused part of
++               * old_bh so just checking inum and the name isn't enough. */
 +              struct buffer_head *old_bh2;
 +              struct ext3_dir_entry_2 *old_de2;
-+              
++
 +              old_bh2 = ext3_find_entry(old_dentry, &old_de2);
 +              if (old_bh2) {
 +                      retval = ext3_delete_entry(handle, old_dir,
index 3a9719b..4b445f5 100644 (file)
@@ -1420,7 +1420,7 @@ Index: linux-2.4.21-chaos/fs/ext3/namei.c
 +              struct ext3_dir_entry_2 *de = (struct ext3_dir_entry_2 *) (from + map->offs);
 +              rec_len = EXT3_DIR_REC_LEN(de->name_len);
 +              memcpy (to, de, rec_len);
-+              ((struct ext3_dir_entry_2 *) to)->rec_len = rec_len;
++              ((struct ext3_dir_entry_2 *)to)->rec_len = cpu_to_le16(rec_len);
 +              de->inode = 0;
 +              map++;
 +              to += rec_len;
@@ -1441,9 +1441,9 @@ Index: linux-2.4.21-chaos/fs/ext3/namei.c
 +                      rec_len = EXT3_DIR_REC_LEN(de->name_len);
 +                      if (de > to)
 +                              memmove(to, de, rec_len);
-+                      to->rec_len = rec_len;
++                      to->rec_len = cpu_to_le16(rec_len);
 +                      prev = to;
-+                      to = (struct ext3_dir_entry_2 *) (((char *) to) + rec_len);
++                      to = (struct ext3_dir_entry_2 *)((char *) to + rec_len);
 +              }
 +              de = next;
 +      }
@@ -2263,19 +2263,23 @@ Index: linux-2.4.21-chaos/fs/ext3/namei.c
  
        if (IS_SYNC(old_dir) || IS_SYNC(new_dir))
                handle->h_sync = 1;
-@@ -1070,14 +2174,33 @@
+@@ -1070,14 +2174,37 @@
        /*
         * ok, that's it
         */
 -      ext3_delete_entry(handle, old_dir, old_de, old_bh);
-+      retval = ext3_delete_entry(handle, old_dir, old_de, old_bh);
-+      if (retval == -ENOENT) {
-+              /*
-+               * old_de could have moved out from under us.
-+               */
++      if (le32_to_cpu(old_de->inode) != old_inode->i_ino ||
++          old_de->name_len != old_dentry->d_name.len ||
++          strncmp(old_de->name, old_dentry->d_name.name, old_de->name_len) ||
++          (retval = ext3_delete_entry(handle, old_dir,
++                                      old_de, old_bh)) == -ENOENT) {
++              /* old_de could have moved from under us during htree split, so
++               * make sure that we are deleting the right entry.  We might
++               * also be pointing to a stale entry in the unused part of
++               * old_bh so just checking inum and the name isn't enough. */
 +              struct buffer_head *old_bh2;
 +              struct ext3_dir_entry_2 *old_de2;
-+              
++
 +              old_bh2 = ext3_find_entry(old_dentry, &old_de2);
 +              if (old_bh2) {
 +                      retval = ext3_delete_entry(handle, old_dir,
index 436bd34..ca2cacf 100644 (file)
 +              struct ext3_dir_entry_2 *de = (struct ext3_dir_entry_2 *) (from + map->offs);
 +              rec_len = EXT3_DIR_REC_LEN(de->name_len);
 +              memcpy (to, de, rec_len);
-+              ((struct ext3_dir_entry_2 *) to)->rec_len = rec_len;
++              ((struct ext3_dir_entry_2 *)to)->rec_len = cpu_to_le16(rec_len);
 +              de->inode = 0;
 +              map++;
 +              to += rec_len;
 +                      rec_len = EXT3_DIR_REC_LEN(de->name_len);
 +                      if (de > to)
 +                              memmove(to, de, rec_len);
-+                      to->rec_len = rec_len;
++                      to->rec_len = cpu_to_le16(rec_len);
 +                      prev = to;
-+                      to = (struct ext3_dir_entry_2 *) (((char *) to) + rec_len);
++                      to = (struct ext3_dir_entry_2 *)((char *)to + rec_len);
 +              }
 +              de = next;
 +      }
  
        if (IS_SYNC(old_dir) || IS_SYNC(new_dir))
                handle->h_sync = 1;
-@@ -1070,14 +2174,33 @@ static int ext3_rename (struct inode * o
+@@ -1070,14 +2174,37 @@ static int ext3_rename (struct inode * o
        /*
         * ok, that's it
         */
 -      ext3_delete_entry(handle, old_dir, old_de, old_bh);
-+      retval = ext3_delete_entry(handle, old_dir, old_de, old_bh);
-+      if (retval == -ENOENT) {
-+              /*
-+               * old_de could have moved out from under us.
-+               */
++      if (le32_to_cpu(old_de->inode) != old_inode->i_ino ||
++          old_de->name_len != old_dentry->d_name.len ||
++          strncmp(old_de->name, old_dentry->d_name.name, old_de->name_len) ||
++          (retval = ext3_delete_entry(handle, old_dir,
++                                      old_de, old_bh)) == -ENOENT) {
++              /* old_de could have moved from under us during htree split, so
++               * make sure that we are deleting the right entry.  We might
++               * also be pointing to a stale entry in the unused part of
++               * old_bh so just checking inum and the name isn't enough. */
 +              struct buffer_head *old_bh2;
 +              struct ext3_dir_entry_2 *old_de2;
-+              
++
 +              old_bh2 = ext3_find_entry(old_dentry, &old_de2);
 +              if (old_bh2) {
 +                      retval = ext3_delete_entry(handle, old_dir,
diff --git a/lustre/kernel_patches/patches/ext3-htree-rename_fix.patch b/lustre/kernel_patches/patches/ext3-htree-rename_fix.patch
new file mode 100644 (file)
index 0000000..75bf288
--- /dev/null
@@ -0,0 +1,24 @@
+===== fs/ext3/namei.c 1.52 vs edited =====
+--- 1.52/fs/ext3/namei.c       Mon May 10 05:25:34 2004
++++ edited/fs/ext3/namei.c     Thu May 20 19:57:10 2004
+@@ -2264,11 +2264,15 @@
+       /*
+        * ok, that's it
+        */
+-      retval = ext3_delete_entry(handle, old_dir, old_de, old_bh);
+-      if (retval == -ENOENT) {
+-              /*
+-               * old_de could have moved out from under us.
+-               */
++      if (le32_to_cpu(old_de->inode) != old_inode->i_ino ||
++          old_de->name_len != old_dentry->d_name.len ||
++          strncmp(old_de->name, old_dentry->d_name.name, old_de->name_len) ||
++          (retval = ext3_delete_entry(handle, old_dir,
++                                      old_de, old_bh)) == -ENOENT) {
++              /* old_de could have moved from under us during htree split, so
++               * make sure that we are deleting the right entry.  We might
++               * also be pointing to a stale entry in the unused part of
++               * old_bh so just checking inum and the name isn't enough. */
+               struct buffer_head *old_bh2;
+               struct ext3_dir_entry_2 *old_de2;
index a6e96f0..3e5148e 100644 (file)
@@ -1420,7 +1420,7 @@ Index: linux-2.4.21-suse/fs/ext3/namei.c
 +              struct ext3_dir_entry_2 *de = (struct ext3_dir_entry_2 *) (from + map->offs);
 +              rec_len = EXT3_DIR_REC_LEN(de->name_len);
 +              memcpy (to, de, rec_len);
-+              ((struct ext3_dir_entry_2 *) to)->rec_len = rec_len;
++              ((struct ext3_dir_entry_2 *)to)->rec_len = cpu_to_le16(rec_len);
 +              de->inode = 0;
 +              map++;
 +              to += rec_len;
@@ -1441,9 +1441,9 @@ Index: linux-2.4.21-suse/fs/ext3/namei.c
 +                      rec_len = EXT3_DIR_REC_LEN(de->name_len);
 +                      if (de > to)
 +                              memmove(to, de, rec_len);
-+                      to->rec_len = rec_len;
++                      to->rec_len = cpu_to_le16(rec_len);
 +                      prev = to;
-+                      to = (struct ext3_dir_entry_2 *) (((char *) to) + rec_len);
++                      to = (struct ext3_dir_entry_2 *)((char *)to + rec_len);
 +              }
 +              de = next;
 +      }
@@ -2227,19 +2227,23 @@ Index: linux-2.4.21-suse/fs/ext3/namei.c
  
        if (IS_SYNC(old_dir) || IS_SYNC(new_dir))
                handle->h_sync = 1;
-@@ -1069,14 +2172,33 @@
+@@ -1069,14 +2172,37 @@
        /*
         * ok, that's it
         */
 -      ext3_delete_entry(handle, old_dir, old_de, old_bh);
-+      retval = ext3_delete_entry(handle, old_dir, old_de, old_bh);
-+      if (retval == -ENOENT) {
-+              /*
-+               * old_de could have moved out from under us.
-+               */
++      if (le32_to_cpu(old_de->inode) != old_inode->i_ino ||
++          old_de->name_len != old_dentry->d_name.len ||
++          strncmp(old_de->name, old_dentry->d_name.name, old_de->name_len) ||
++          (retval = ext3_delete_entry(handle, old_dir,
++                                      old_de, old_bh)) == -ENOENT) {
++              /* old_de could have moved from under us during htree split, so
++               * make sure that we are deleting the right entry.  We might
++               * also be pointing to a stale entry in the unused part of
++               * old_bh so just checking inum and the name isn't enough. */
 +              struct buffer_head *old_bh2;
 +              struct ext3_dir_entry_2 *old_de2;
-+              
++
 +              old_bh2 = ext3_find_entry(old_dentry, &old_de2);
 +              if (old_bh2) {
 +                      retval = ext3_delete_entry(handle, old_dir,
index 903118b..31f2ae3 100644 (file)
 +              struct ext3_dir_entry_2 *de = (struct ext3_dir_entry_2 *) (from + map->offs);
 +              rec_len = EXT3_DIR_REC_LEN(de->name_len);
 +              memcpy (to, de, rec_len);
-+              ((struct ext3_dir_entry_2 *) to)->rec_len = rec_len;
++              ((struct ext3_dir_entry_2 *)to)->rec_len = cpu_to_le16(rec_len);
 +              de->inode = 0;
 +              map++;
 +              to += rec_len;
 +                      rec_len = EXT3_DIR_REC_LEN(de->name_len);
 +                      if (de > to)
 +                              memmove(to, de, rec_len);
-+                      to->rec_len = rec_len;
++                      to->rec_len = cpu_to_le16(rec_len);
 +                      prev = to;
-+                      to = (struct ext3_dir_entry_2 *) (((char *) to) + rec_len);
++                      to = (struct ext3_dir_entry_2 *)((char *)to + rec_len);
 +              }
 +              de = next;
 +      }
  
        if (IS_SYNC(old_dir) || IS_SYNC(new_dir))
                handle->h_sync = 1;
-@@ -1071,14 +2174,33 @@ static int ext3_rename (struct inode * o
+@@ -1071,14 +2174,37 @@ static int ext3_rename (struct inode * o
        /*
         * ok, that's it
         */
 -      ext3_delete_entry(handle, old_dir, old_de, old_bh);
-+      retval = ext3_delete_entry(handle, old_dir, old_de, old_bh);
-+      if (retval == -ENOENT) {
-+              /*
-+               * old_de could have moved out from under us.
-+               */
++      if (le32_to_cpu(old_de->inode) != old_inode->i_ino ||
++          old_de->name_len != old_dentry->d_name.len ||
++          strncmp(old_de->name, old_dentry->d_name.name, old_de->name_len) ||
++          (retval = ext3_delete_entry(handle, old_dir,
++                                      old_de, old_bh)) == -ENOENT) {
++              /* old_de could have moved from under us during htree split, so
++               * make sure that we are deleting the right entry.  We might
++               * also be pointing to a stale entry in the unused part of
++               * old_bh so just checking inum and the name isn't enough. */
 +              struct buffer_head *old_bh2;
 +              struct ext3_dir_entry_2 *old_de2;
-+              
++
 +              old_bh2 = ext3_find_entry(old_dentry, &old_de2);
 +              if (old_bh2) {
 +                      retval = ext3_delete_entry(handle, old_dir,
index 9602b80..c472368 100644 (file)
@@ -6,10 +6,10 @@
  include/linux/ext3_fs_i.h |    6 
  6 files changed, 500 insertions(+), 109 deletions(-)
 
-Index: linux-2.4.24/fs/ext3/namei.c
+Index: lum/fs/ext3/namei.c
 ===================================================================
---- linux-2.4.24.orig/fs/ext3/namei.c  2004-05-22 12:08:41.000000000 +0800
-+++ linux-2.4.24/fs/ext3/namei.c       2004-05-22 12:11:40.000000000 +0800
+--- lum.orig/fs/ext3/namei.c   2004-06-03 16:32:28.000000000 -0400
++++ lum/fs/ext3/namei.c        2004-06-03 16:45:45.000000000 -0400
 @@ -51,6 +51,9 @@
  {
        struct buffer_head *bh;
@@ -545,7 +545,7 @@ Index: linux-2.4.24/fs/ext3/namei.c
 +                      (struct ext3_dir_entry_2 *) (from + map->offs);
                rec_len = EXT3_DIR_REC_LEN(de->name_len);
                memcpy (to, de, rec_len);
-               ((struct ext3_dir_entry_2 *) to)->rec_len = rec_len;
+               ((struct ext3_dir_entry_2 *)to)->rec_len = cpu_to_le16(rec_len);
 @@ -987,7 +1150,8 @@
  
  static struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size)
@@ -977,7 +977,7 @@ Index: linux-2.4.24/fs/ext3/namei.c
        if (bh)
                brelse(bh);
        dx_release(frames);
-@@ -1901,6 +2220,7 @@
+@@ -1905,6 +2224,7 @@
        struct buffer_head * bh;
        struct ext3_dir_entry_2 * de;
        handle_t *handle;
@@ -985,7 +985,7 @@ Index: linux-2.4.24/fs/ext3/namei.c
  
        handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS);
        if (IS_ERR(handle)) {
-@@ -1908,7 +2228,7 @@
+@@ -1912,7 +2232,7 @@
        }
  
        retval = -ENOENT;
@@ -994,7 +994,7 @@ Index: linux-2.4.24/fs/ext3/namei.c
        if (!bh)
                goto end_rmdir;
  
-@@ -1919,14 +2239,19 @@
+@@ -1923,14 +2243,19 @@
        DQUOT_INIT(inode);
  
        retval = -EIO;
@@ -1016,7 +1016,7 @@ Index: linux-2.4.24/fs/ext3/namei.c
        if (retval)
                goto end_rmdir;
        if (inode->i_nlink != 2)
-@@ -1985,6 +2310,7 @@
+@@ -1989,6 +2314,7 @@
        struct buffer_head * bh;
        struct ext3_dir_entry_2 * de;
        handle_t *handle;
@@ -1024,7 +1024,7 @@ Index: linux-2.4.24/fs/ext3/namei.c
  
        handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS);
        if (IS_ERR(handle)) {
-@@ -1995,7 +2321,7 @@
+@@ -1999,7 +2325,7 @@
                handle->h_sync = 1;
  
        retval = -ENOENT;
@@ -1033,7 +1033,7 @@ Index: linux-2.4.24/fs/ext3/namei.c
        if (!bh)
                goto end_unlink;
  
-@@ -2003,8 +2329,10 @@
+@@ -2007,8 +2333,10 @@
        DQUOT_INIT(inode);
  
        retval = -EIO;
@@ -1045,7 +1045,7 @@ Index: linux-2.4.24/fs/ext3/namei.c
        
        if (!inode->i_nlink) {
                ext3_warning (inode->i_sb, "ext3_unlink",
-@@ -2013,6 +2341,7 @@
+@@ -2017,6 +2345,7 @@
                inode->i_nlink = 1;
        }
        retval = ext3_delete_entry(handle, dir, de, bh);
@@ -1053,7 +1053,7 @@ Index: linux-2.4.24/fs/ext3/namei.c
        if (retval)
                goto end_unlink;
        dir->i_ctime = dir->i_mtime = CURRENT_TIME;
-@@ -2151,6 +2480,7 @@
+@@ -2155,6 +2484,7 @@
        struct buffer_head * old_bh, * new_bh, * dir_bh;
        struct ext3_dir_entry_2 * old_de, * new_de;
        int retval;
@@ -1061,7 +1061,7 @@ Index: linux-2.4.24/fs/ext3/namei.c
  
        old_bh = new_bh = dir_bh = NULL;
  
-@@ -2163,7 +2493,10 @@
+@@ -2167,7 +2497,10 @@
        if (IS_SYNC(old_dir) || IS_SYNC(new_dir))
                handle->h_sync = 1;
  
@@ -1073,7 +1073,7 @@ Index: linux-2.4.24/fs/ext3/namei.c
        /*
         *  Check for inode number is _not_ due to possible IO errors.
         *  We might rmdir the source, keep it as pwd of some process
-@@ -2176,7 +2509,7 @@
+@@ -2180,7 +2513,7 @@
                goto end_rename;
  
        new_inode = new_dentry->d_inode;
@@ -1082,16 +1082,16 @@ Index: linux-2.4.24/fs/ext3/namei.c
        if (new_bh) {
                if (!new_inode) {
                        brelse (new_bh);
-@@ -2239,7 +2572,7 @@
+@@ -2247,7 +2580,7 @@
                struct buffer_head *old_bh2;
                struct ext3_dir_entry_2 *old_de2;
-               
 -              old_bh2 = ext3_find_entry(old_dentry, &old_de2);
 +              old_bh2 = ext3_find_entry(old_dentry, &old_de2, 1, &lock3 /* FIXME */);
                if (old_bh2) {
                        retval = ext3_delete_entry(handle, old_dir,
                                                   old_de2, old_bh2);
-@@ -2282,6 +2615,14 @@
+@@ -2290,6 +2623,14 @@
        retval = 0;
  
  end_rename:
@@ -1106,7 +1106,7 @@ Index: linux-2.4.24/fs/ext3/namei.c
        brelse (dir_bh);
        brelse (old_bh);
        brelse (new_bh);
-@@ -2290,6 +2631,29 @@
+@@ -2298,6 +2639,29 @@
  }
  
  /*
@@ -1136,10 +1136,10 @@ Index: linux-2.4.24/fs/ext3/namei.c
   * directories can handle most operations...
   */
  struct inode_operations ext3_dir_inode_operations = {
-Index: linux-2.4.24/fs/ext3/super.c
+Index: lum/fs/ext3/super.c
 ===================================================================
---- linux-2.4.24.orig/fs/ext3/super.c  2004-05-22 12:09:38.000000000 +0800
-+++ linux-2.4.24/fs/ext3/super.c       2004-05-22 12:11:40.000000000 +0800
+--- lum.orig/fs/ext3/super.c   2004-06-03 16:32:28.000000000 -0400
++++ lum/fs/ext3/super.c        2004-06-03 16:37:15.000000000 -0400
 @@ -733,6 +733,9 @@
                        if (want_numeric(value, "sb", sb_block))
                                return 0;
@@ -1173,10 +1173,10 @@ Index: linux-2.4.24/fs/ext3/super.c
        return sb;
  
  failed_mount3:
-Index: linux-2.4.24/fs/ext3/inode.c
+Index: lum/fs/ext3/inode.c
 ===================================================================
---- linux-2.4.24.orig/fs/ext3/inode.c  2004-05-22 12:09:48.000000000 +0800
-+++ linux-2.4.24/fs/ext3/inode.c       2004-05-22 12:11:40.000000000 +0800
+--- lum.orig/fs/ext3/inode.c   2004-06-03 16:32:29.000000000 -0400
++++ lum/fs/ext3/inode.c        2004-06-03 16:37:15.000000000 -0400
 @@ -2251,6 +2251,9 @@
        } else if (S_ISDIR(inode->i_mode)) {
                inode->i_op = &ext3_dir_inode_operations;
@@ -1187,10 +1187,10 @@ Index: linux-2.4.24/fs/ext3/inode.c
        } else if (S_ISLNK(inode->i_mode)) {
                if (ext3_inode_is_fast_symlink(inode))
                        inode->i_op = &ext3_fast_symlink_inode_operations;
-Index: linux-2.4.24/fs/ext3/ialloc.c
+Index: lum/fs/ext3/ialloc.c
 ===================================================================
---- linux-2.4.24.orig/fs/ext3/ialloc.c 2004-05-22 12:09:38.000000000 +0800
-+++ linux-2.4.24/fs/ext3/ialloc.c      2004-05-22 12:11:40.000000000 +0800
+--- lum.orig/fs/ext3/ialloc.c  2004-06-03 16:32:28.000000000 -0400
++++ lum/fs/ext3/ialloc.c       2004-06-03 16:37:15.000000000 -0400
 @@ -609,6 +609,9 @@
                return ERR_PTR(-EDQUOT);
        }
@@ -1201,10 +1201,10 @@ Index: linux-2.4.24/fs/ext3/ialloc.c
        return inode;
  
  fail:
-Index: linux-2.4.24/include/linux/ext3_fs.h
+Index: lum/include/linux/ext3_fs.h
 ===================================================================
---- linux-2.4.24.orig/include/linux/ext3_fs.h  2004-05-22 12:09:42.000000000 +0800
-+++ linux-2.4.24/include/linux/ext3_fs.h       2004-05-22 12:11:41.000000000 +0800
+--- lum.orig/include/linux/ext3_fs.h   2004-06-03 16:32:28.000000000 -0400
++++ lum/include/linux/ext3_fs.h        2004-06-03 16:37:15.000000000 -0400
 @@ -320,6 +320,7 @@
  /*
   * Mount flags
@@ -1213,10 +1213,10 @@ Index: linux-2.4.24/include/linux/ext3_fs.h
  #define EXT3_MOUNT_CHECK              0x0001  /* Do mount-time checks */
  #define EXT3_MOUNT_GRPID              0x0004  /* Create files with directory's group */
  #define EXT3_MOUNT_DEBUG              0x0008  /* Some debugging messages */
-Index: linux-2.4.24/include/linux/ext3_fs_i.h
+Index: lum/include/linux/ext3_fs_i.h
 ===================================================================
---- linux-2.4.24.orig/include/linux/ext3_fs_i.h        2004-05-22 12:09:38.000000000 +0800
-+++ linux-2.4.24/include/linux/ext3_fs_i.h     2004-05-22 12:13:54.000000000 +0800
+--- lum.orig/include/linux/ext3_fs_i.h 2004-06-03 16:32:28.000000000 -0400
++++ lum/include/linux/ext3_fs_i.h      2004-06-03 16:37:15.000000000 -0400
 @@ -17,6 +17,7 @@
  #define _LINUX_EXT3_FS_I
  
index a8bce7c..2100f53 100644 (file)
 +              ext3_dirent *de = (ext3_dirent *) (from + map->offs);
 +              rec_len = EXT3_DIR_REC_LEN(de->name_len);
 +              memcpy (to, de, rec_len);
-+              ((ext3_dirent *) to)->rec_len = rec_len;
++              ((ext3_dirent *)to)->rec_len = le16_to_cpu(rec_len);
 +              to += rec_len;
 +              map++;
 +      }
 +
 +      /* Fancy dance to stay within two buffers */
 +      de2 = dx_copy_dirents (data1, data2, map + split, count - split);
-+      data3 = (char *) de2 + de2->rec_len;
++      data3 = (char *) de2 + le16_to_cpu(de2->rec_len);
 +      de = dx_copy_dirents (data1, data3, map, split);
-+      memcpy(data1, data3, (char *) de + de->rec_len - data3);
++      memcpy(data1, data3, (char *) de + le16_to_cpu(de->rec_len) - data3);
 +      de = (ext3_dirent *) ((char *) de - data3 + data1); // relocate de
 +      de->rec_len = cpu_to_le16(data1 + dir->i_sb->s_blocksize - (char *)de);
 +      de2->rec_len = cpu_to_le16(data2 + dir->i_sb->s_blocksize-(char *)de2);
        if (IS_ERR(handle))
                return PTR_ERR(handle);
  
-@@ -1077,7 +1844,7 @@
+@@ -1069,14 +1837,37 @@
+       /*
+        * ok, that's it
+        */
+-      ext3_delete_entry(handle, old_dir, old_de, old_bh);
++      if (le32_to_cpu(old_de->inode) != old_inode->i_ino ||
++          old_de->name_len != old_dentry->d_name.len ||
++          strncmp(old_de->name, old_dentry->d_name.name, old_de->name_len) ||
++          (retval = ext3_delete_entry(handle, old_dir,
++                                      old_de, old_bh)) == -ENOENT) {
++              /* old_de could have moved from under us during htree split, so
++               * make sure that we are deleting the right entry.  We might
++               * also be pointing to a stale entry in the unused part of
++               * old_bh so just checking inum and the name isn't enough. */
++              struct buffer_head *old_bh2;
++              struct ext3_dir_entry_2 *old_de2;
++              
++              old_bh2 = ext3_find_entry(old_dentry, &old_de2);
++              if (old_bh2) {
++                      retval = ext3_delete_entry(handle, old_dir,
++                                                 old_de2, old_bh2);
++                      brelse(old_bh2);
++              }
++      }
++      if (retval) {
++              ext3_warning(old_dir->i_sb, "ext3_rename",
++                              "Deleting old file (%lu), %d, error=%d",
++                              old_dir->i_ino, old_dir->i_nlink, retval);
++      }
+       if (new_inode) {
+               new_inode->i_nlink--;
                new_inode->i_ctime = CURRENT_TIME;
        }
        old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME;
index 2133355..8a8d115 100644 (file)
@@ -8,8 +8,8 @@
 
 Index: linux-stage/fs/ext3/Makefile
 ===================================================================
---- linux-stage.orig/fs/ext3/Makefile  2004-05-07 16:00:16.000000000 -0400
-+++ linux-stage/fs/ext3/Makefile       2004-05-07 16:00:17.000000000 -0400
+--- linux-stage.orig/fs/ext3/Makefile  2004-05-11 17:21:20.000000000 -0400
++++ linux-stage/fs/ext3/Makefile       2004-05-11 17:21:21.000000000 -0400
 @@ -4,7 +4,7 @@
  
  obj-$(CONFIG_EXT3_FS) += ext3.o
@@ -21,8 +21,8 @@ Index: linux-stage/fs/ext3/Makefile
  ext3-$(CONFIG_EXT3_FS_XATTR)   += xattr.o xattr_user.o xattr_trusted.o
 Index: linux-stage/fs/ext3/inode.c
 ===================================================================
---- linux-stage.orig/fs/ext3/inode.c   2004-05-07 16:00:16.000000000 -0400
-+++ linux-stage/fs/ext3/inode.c        2004-05-07 17:21:59.000000000 -0400
+--- linux-stage.orig/fs/ext3/inode.c   2004-05-11 17:21:21.000000000 -0400
++++ linux-stage/fs/ext3/inode.c        2004-05-11 17:21:21.000000000 -0400
 @@ -37,6 +37,7 @@
  #include <linux/mpage.h>
  #include <linux/uio.h>
@@ -43,8 +43,8 @@ Index: linux-stage/fs/ext3/inode.c
        bh = iloc.bh;
 Index: linux-stage/fs/ext3/iopen.c
 ===================================================================
---- linux-stage.orig/fs/ext3/iopen.c   2004-05-07 16:00:17.000000000 -0400
-+++ linux-stage/fs/ext3/iopen.c        2004-05-07 17:22:37.000000000 -0400
+--- linux-stage.orig/fs/ext3/iopen.c   1969-12-31 19:00:00.000000000 -0500
++++ linux-stage/fs/ext3/iopen.c        2004-05-11 17:21:21.000000000 -0400
 @@ -0,0 +1,272 @@
 +/*
 + * linux/fs/ext3/iopen.c
@@ -320,8 +320,8 @@ Index: linux-stage/fs/ext3/iopen.c
 +}
 Index: linux-stage/fs/ext3/iopen.h
 ===================================================================
---- linux-stage.orig/fs/ext3/iopen.h   2004-05-07 16:00:17.000000000 -0400
-+++ linux-stage/fs/ext3/iopen.h        2004-05-07 16:00:17.000000000 -0400
+--- linux-stage.orig/fs/ext3/iopen.h   1969-12-31 19:00:00.000000000 -0500
++++ linux-stage/fs/ext3/iopen.h        2004-05-11 17:21:21.000000000 -0400
 @@ -0,0 +1,15 @@
 +/*
 + * iopen.h
@@ -340,8 +340,8 @@ Index: linux-stage/fs/ext3/iopen.h
 +                                         struct inode *inode, int rehash);
 Index: linux-stage/fs/ext3/namei.c
 ===================================================================
---- linux-stage.orig/fs/ext3/namei.c   2004-05-07 16:00:16.000000000 -0400
-+++ linux-stage/fs/ext3/namei.c        2004-05-07 16:00:17.000000000 -0400
+--- linux-stage.orig/fs/ext3/namei.c   2004-05-11 17:21:20.000000000 -0400
++++ linux-stage/fs/ext3/namei.c        2004-05-11 17:21:21.000000000 -0400
 @@ -37,6 +37,7 @@
  #include <linux/buffer_head.h>
  #include <linux/smp_lock.h>
@@ -420,30 +420,30 @@ Index: linux-stage/fs/ext3/namei.c
  }
 Index: linux-stage/fs/ext3/super.c
 ===================================================================
---- linux-stage.orig/fs/ext3/super.c   2004-05-07 16:00:16.000000000 -0400
-+++ linux-stage/fs/ext3/super.c        2004-05-07 17:21:59.000000000 -0400
+--- linux-stage.orig/fs/ext3/super.c   2004-05-11 17:21:21.000000000 -0400
++++ linux-stage/fs/ext3/super.c        2004-05-11 17:44:53.000000000 -0400
 @@ -536,7 +536,7 @@
        Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, Opt_noload,
        Opt_commit, Opt_journal_update, Opt_journal_inum,
        Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
--      Opt_ignore, Opt_err,
-+      Opt_ignore, Opt_err, Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
+-      Opt_ignore, Opt_barrier,
++      Opt_ignore, Opt_barrier, Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
+       Opt_err,
  };
  
- static match_table_t tokens = {
-@@ -575,6 +575,9 @@
-       {Opt_ignore, "noquota"},
+@@ -577,6 +577,9 @@
        {Opt_ignore, "quota"},
        {Opt_ignore, "usrquota"},
-+      {Opt_iopen,  "iopen"},
-+      {Opt_noiopen,  "noiopen"},
-+      {Opt_iopen_nopriv,  "iopen_nopriv"},
+       {Opt_barrier, "barrier=%u"},
++      {Opt_iopen, "iopen"},
++      {Opt_noiopen, "noiopen"},
++      {Opt_iopen_nopriv, "iopen_nopriv"},
        {Opt_err, NULL}
  };
  
-@@ -762,6 +765,18 @@
-               case Opt_abort:
-                       set_opt(sbi->s_mount_opt, ABORT);
+@@ -772,6 +775,18 @@
+                       else
+                               clear_opt(sbi->s_mount_opt, BARRIER);
                        break;
 +              case Opt_iopen:
 +                      set_opt (sbi->s_mount_opt, IOPEN);
@@ -462,14 +462,14 @@ Index: linux-stage/fs/ext3/super.c
                default:
 Index: linux-stage/include/linux/ext3_fs.h
 ===================================================================
---- linux-stage.orig/include/linux/ext3_fs.h   2004-05-07 16:00:16.000000000 -0400
-+++ linux-stage/include/linux/ext3_fs.h        2004-05-07 16:00:17.000000000 -0400
-@@ -325,6 +325,8 @@
- #define EXT3_MOUNT_NO_UID32           0x2000  /* Disable 32-bit UIDs */
+--- linux-stage.orig/include/linux/ext3_fs.h   2004-05-11 17:21:20.000000000 -0400
++++ linux-stage/include/linux/ext3_fs.h        2004-05-11 17:21:21.000000000 -0400
+@@ -326,6 +326,8 @@
  #define EXT3_MOUNT_XATTR_USER         0x4000  /* Extended user attributes */
  #define EXT3_MOUNT_POSIX_ACL          0x8000  /* POSIX Access Control Lists */
-+#define EXT3_MOUNT_IOPEN             0x10000  /* Allow access via iopen */
-+#define EXT3_MOUNT_IOPEN_NOPRIV              0x20000  /* Make iopen world-readable */
+ #define EXT3_MOUNT_BARRIER            0x10000 /* Use block barriers */
++#define EXT3_MOUNT_IOPEN                0x20000 /* Allow access via iopen */
++#define EXT3_MOUNT_IOPEN_NOPRIV         0x40000 /* Make iopen world-readable */
  
  /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
  #ifndef _LINUX_EXT2_FS_H
diff --git a/lustre/kernel_patches/patches/loop-sync-2.4.21-suse.patch b/lustre/kernel_patches/patches/loop-sync-2.4.21-suse.patch
new file mode 100644 (file)
index 0000000..73372b9
--- /dev/null
@@ -0,0 +1,11 @@
+--- drivers/block/loop.c.bu    2004-05-11 16:27:23.000000000 -0700
++++ drivers/block/loop.c       2004-05-11 16:28:50.000000000 -0700
+@@ -978,7 +978,7 @@ static int lo_release(struct inode *inod
+       lo = &loop_dev[dev];
+-      if (!(lo->lo_flags & LO_FLAGS_READ_ONLY)) {
++      if (!(lo->lo_flags & LO_FLAGS_READ_ONLY) && lo->lo_device != 0) {
+               fsync_dev(lo->lo_device);
+               invalidate_buffers(lo->lo_device);
+       }
index 6d2b7e6..1c5f97e 100644 (file)
@@ -1,3 +1,4 @@
+Version 37: fix htree rename-within-same-dir (b=3417), endianness (b=2447)
 Version 36: don't dput dentry after error (b=2350), zero page->private (3119)
 Version 35: pass intent to real_lookup after revalidate failure (b=3285)
 Version 34: fix ext3 iopen assertion failure (b=2517, b=2399)
@@ -8,6 +9,6 @@ Version 34: fix ext3 iopen assertion failure (b=2517, b=2399)
 --- /dev/null  Fri Aug 30 17:31:37 2002
 +++ linux-2.4.18-18.8.0-l12-braam/include/linux/lustre_version.h       Thu Feb 13 07:58:33 2003
 @@ -0,0 +1 @@
-+#define LUSTRE_KERNEL_VERSION 36
++#define LUSTRE_KERNEL_VERSION 37
 
 _
diff --git a/lustre/kernel_patches/patches/md_path_lookup-2.6-suse.patch b/lustre/kernel_patches/patches/md_path_lookup-2.6-suse.patch
new file mode 100644 (file)
index 0000000..4e2b66d
--- /dev/null
@@ -0,0 +1,25 @@
+Index: linux-2.6.4-51.0/drivers/md/dm-path-selector.c
+===================================================================
+--- linux-2.6.4-51.0.orig/drivers/md/dm-path-selector.c        2004-04-18 20:10:21.000000000 -0400
++++ linux-2.6.4-51.0/drivers/md/dm-path-selector.c     2004-04-18 20:10:59.000000000 -0400
+@@ -129,7 +129,7 @@
+       struct path *path;
+ };
+-static struct path_info *path_lookup(struct list_head *head, struct path *p)
++static struct path_info *md_path_lookup(struct list_head *head, struct path *p)
+ {
+       struct path_info *pi;
+@@ -235,9 +235,9 @@
+        * mind the expense of these searches.
+        */
+       spin_lock_irqsave(&s->lock, flags);
+-      pi = path_lookup(&s->valid_paths, p);
++      pi = md_path_lookup(&s->valid_paths, p);
+       if (!pi)
+-              pi = path_lookup(&s->invalid_paths, p);
++              pi = md_path_lookup(&s->invalid_paths, p);
+       if (!pi)
+               DMWARN("asked to change the state of an unknown path");
index c678b4e..12436a7 100644 (file)
@@ -1,42 +1,42 @@
-Index: linux-2.6.4-51.0/fs/exec.c
+Index: linux-2.6.5-12.1/fs/exec.c
 ===================================================================
---- linux-2.6.4-51.0.orig/fs/exec.c    2004-04-05 12:41:59.000000000 -0400
-+++ linux-2.6.4-51.0/fs/exec.c 2004-04-05 17:36:42.000000000 -0400
-@@ -122,8 +122,11 @@
-       struct file * file;
+--- linux-2.6.5-12.1.orig/fs/exec.c    2004-05-10 19:21:56.000000000 +0300
++++ linux-2.6.5-12.1/fs/exec.c 2004-05-25 17:32:14.038494200 +0300
+@@ -125,9 +125,10 @@
        struct nameidata nd;
        int error;
-+      intent_init(&nd.intent, IT_OPEN);
  
 -      nd.intent.open.flags = FMODE_READ;
-+      error = user_path_walk_it(library, &nd);
-+
-+      nd.intent.it_flags = O_RDONLY;
-       error = __user_walk(library, LOOKUP_FOLLOW|LOOKUP_OPEN, &nd);
-       if (error)
++      intent_init(&nd.intent, IT_OPEN);
+-      FSHOOK_BEGIN_USER_WALK(open,
++      nd.intent.it_flags = FMODE_READ;
++      FSHOOK_BEGIN_USER_WALK_IT(open,
+               error,
+               library,
+               LOOKUP_FOLLOW|LOOKUP_OPEN,
+@@ -144,7 +145,7 @@
                goto out;
-@@ -136,7 +139,7 @@
-       if (error)
-               goto exit;
+       }
  
 -      file = dentry_open(nd.dentry, nd.mnt, O_RDONLY);
 +      file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &nd.intent);
        error = PTR_ERR(file);
        if (IS_ERR(file))
                goto out;
-@@ -485,8 +488,9 @@
-       int err;
-       struct file *file;
+@@ -495,8 +496,9 @@
+       FSHOOK_BEGIN(open, err, .filename = name, .flags = O_RDONLY)
  
 -      nd.intent.open.flags = FMODE_READ;
 -      err = path_lookup(name, LOOKUP_FOLLOW|LOOKUP_OPEN, &nd);
 +      intent_init(&nd.intent, IT_OPEN);
-+      nd.intent.it_flags = O_RDONLY;
++      nd.intent.it_flags = FMODE_READ;
 +      err = path_lookup(name, LOOKUP_FOLLOW, &nd);
        file = ERR_PTR(err);
  
        if (!err) {
-@@ -499,7 +503,7 @@
+@@ -509,7 +511,7 @@
                                err = -EACCES;
                        file = ERR_PTR(err);
                        if (!err) {
@@ -45,11 +45,11 @@ Index: linux-2.6.4-51.0/fs/exec.c
                                if (!IS_ERR(file)) {
                                        err = deny_write_access(file);
                                        if (err) {
-Index: linux-2.6.4-51.0/fs/namei.c
+Index: linux-2.6.5-12.1/fs/namei.c
 ===================================================================
---- linux-2.6.4-51.0.orig/fs/namei.c   2004-04-05 12:41:59.000000000 -0400
-+++ linux-2.6.4-51.0/fs/namei.c        2004-04-05 17:36:42.000000000 -0400
-@@ -269,8 +269,19 @@
+--- linux-2.6.5-12.1.orig/fs/namei.c   2004-05-10 19:21:56.000000000 +0300
++++ linux-2.6.5-12.1/fs/namei.c        2004-05-25 17:32:14.040493896 +0300
+@@ -270,8 +270,19 @@
        return 0;
  }
  
@@ -69,7 +69,7 @@ Index: linux-2.6.4-51.0/fs/namei.c
        dput(nd->dentry);
        mntput(nd->mnt);
  }
-@@ -347,7 +358,10 @@
+@@ -348,7 +359,10 @@
  {
        struct dentry * result;
        struct inode *dir = parent->d_inode;
@@ -80,7 +80,7 @@ Index: linux-2.6.4-51.0/fs/namei.c
        down(&dir->i_sem);
        /*
         * First re-do the cached lookup just in case it was created
-@@ -386,7 +400,10 @@
+@@ -387,7 +401,10 @@
        if (result->d_op && result->d_op->d_revalidate) {
                if (!result->d_op->d_revalidate(result, nd) && !d_invalidate(result)) {
                        dput(result);
@@ -92,7 +92,7 @@ Index: linux-2.6.4-51.0/fs/namei.c
                }
        }
        return result;
-@@ -563,6 +580,33 @@
+@@ -564,6 +581,33 @@
        return PTR_ERR(dentry);
  }
  
@@ -126,7 +126,7 @@ Index: linux-2.6.4-51.0/fs/namei.c
  /*
   * Name resolution.
   *
-@@ -663,7 +705,9 @@
+@@ -664,7 +708,9 @@
  
                if (inode->i_op->follow_link) {
                        mntget(next.mnt);
@@ -136,7 +136,7 @@ Index: linux-2.6.4-51.0/fs/namei.c
                        dput(next.dentry);
                        mntput(next.mnt);
                        if (err)
-@@ -702,14 +746,29 @@
+@@ -703,14 +749,29 @@
                                inode = nd->dentry->d_inode;
                                /* fallthrough */
                        case 1:
@@ -166,7 +166,7 @@ Index: linux-2.6.4-51.0/fs/namei.c
                if (err)
                        break;
                follow_mount(&next.mnt, &next.dentry);
-@@ -935,7 +994,7 @@
+@@ -936,7 +997,7 @@
  }
  
  /* SMP-safe */
@@ -175,7 +175,7 @@ Index: linux-2.6.4-51.0/fs/namei.c
  {
        unsigned long hash;
        struct qstr this;
-@@ -955,11 +1014,16 @@
+@@ -956,11 +1017,16 @@
        }
        this.hash = end_name_hash(hash);
  
@@ -193,29 +193,31 @@ Index: linux-2.6.4-51.0/fs/namei.c
  /*
   *    namei()
   *
-@@ -971,7 +1035,7 @@
+@@ -972,7 +1038,8 @@
   * that namei follows links, while lnamei does not.
   * SMP-safe
   */
--int fastcall __user_walk(const char __user *name, unsigned flags, struct nameidata *nd)
-+int fastcall __user_walk_it(const char __user *name, unsigned flags, struct nameidata *nd)
+-int fastcall __user_walk(const char __user *name, unsigned flags, struct nameidata *nd, const char **pname)
++int fastcall __user_walk_it(const char __user *name, unsigned flags,
++                          struct nameidata *nd, const char **pname)
  {
        char *tmp = getname(name);
        int err = PTR_ERR(tmp);
-@@ -983,6 +1047,12 @@
+@@ -987,6 +1054,13 @@
        return err;
  }
  
-+int __user_walk(const char __user *name, unsigned flags, struct nameidata *nd)
++int __user_walk(const char __user *name, unsigned flags,
++              struct nameidata *nd, const char **pname)
 +{
 +      intent_init(&nd->intent, IT_LOOKUP);
-+      return __user_walk_it(name, flags, nd);
++      return __user_walk_it(name, flags, nd, pname);
 +}
 +
  /*
   * It's inline, so penalty for filesystems that don't use sticky bit is
   * minimal.
-@@ -1255,8 +1325,8 @@
+@@ -1259,8 +1333,8 @@
                acc_mode |= MAY_APPEND;
  
        /* Fill in the open() intent data */
@@ -226,7 +228,7 @@ Index: linux-2.6.4-51.0/fs/namei.c
  
        /*
         * The simplest case - just a plain lookup.
-@@ -1271,6 +1341,7 @@
+@@ -1275,6 +1349,7 @@
        /*
         * Create - we need to know the parent.
         */
@@ -234,7 +236,7 @@ Index: linux-2.6.4-51.0/fs/namei.c
        error = path_lookup(pathname, LOOKUP_PARENT|LOOKUP_OPEN|LOOKUP_CREATE, nd);
        if (error)
                return error;
-@@ -1287,7 +1358,9 @@
+@@ -1291,7 +1366,9 @@
        dir = nd->dentry;
        nd->flags &= ~LOOKUP_PARENT;
        down(&dir->d_inode->i_sem);
@@ -244,7 +246,7 @@ Index: linux-2.6.4-51.0/fs/namei.c
  
  do_last:
        error = PTR_ERR(dentry);
-@@ -1392,7 +1465,9 @@
+@@ -1396,7 +1473,9 @@
        }
        dir = nd->dentry;
        down(&dir->d_inode->i_sem);
@@ -254,7 +256,7 @@ Index: linux-2.6.4-51.0/fs/namei.c
        putname(nd->last.name);
        goto do_last;
  }
-@@ -2154,7 +2229,9 @@
+@@ -2196,7 +2275,9 @@
  __vfs_follow_link(struct nameidata *nd, const char *link)
  {
        int res = 0;
@@ -264,7 +266,7 @@ Index: linux-2.6.4-51.0/fs/namei.c
        if (IS_ERR(link))
                goto fail;
  
-@@ -2164,6 +2241,10 @@
+@@ -2206,6 +2287,10 @@
                        /* weird __emul_prefix() stuff did it */
                        goto out;
        }
@@ -275,11 +277,11 @@ Index: linux-2.6.4-51.0/fs/namei.c
        res = link_path_walk(link, nd);
  out:
        if (current->link_count || res || nd->last_type!=LAST_NORM)
-Index: linux-2.6.4-51.0/fs/namespace.c
+Index: linux-2.6.5-12.1/fs/namespace.c
 ===================================================================
---- linux-2.6.4-51.0.orig/fs/namespace.c       2004-04-05 12:41:59.000000000 -0400
-+++ linux-2.6.4-51.0/fs/namespace.c    2004-04-07 13:28:23.000000000 -0400
-@@ -107,6 +107,7 @@
+--- linux-2.6.5-12.1.orig/fs/namespace.c       2004-05-10 19:21:56.000000000 +0300
++++ linux-2.6.5-12.1/fs/namespace.c    2004-05-25 17:33:44.385759328 +0300
+@@ -108,6 +108,7 @@
  
  static void detach_mnt(struct vfsmount *mnt, struct nameidata *old_nd)
  {
@@ -287,7 +289,24 @@ Index: linux-2.6.4-51.0/fs/namespace.c
        old_nd->dentry = mnt->mnt_mountpoint;
        old_nd->mnt = mnt->mnt_parent;
        mnt->mnt_parent = mnt;
-@@ -748,6 +749,7 @@
+@@ -533,6 +534,8 @@
+               return err;
+       if (!old_name || !*old_name)
+               return -EINVAL;
++
++      intent_init(&old_nd.intent, IT_LOOKUP);
+       err = path_lookup(old_name, LOOKUP_FOLLOW, &old_nd);
+       if (err)
+               return err;
+@@ -601,6 +604,7 @@
+               return -EPERM;
+       if (!old_name || !*old_name)
+               return -EINVAL;
++      intent_init(&old_nd.intent, IT_LOOKUP);
+       err = path_lookup(old_name, LOOKUP_FOLLOW, &old_nd);
+       if (err)
+               return err;
+@@ -750,6 +754,7 @@
        int retval = 0;
        int mnt_flags = 0;
  
@@ -295,11 +314,11 @@ Index: linux-2.6.4-51.0/fs/namespace.c
        /* Discard magic */
        if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
                flags &= ~MS_MGC_MSK;
-Index: linux-2.6.4-51.0/fs/open.c
+Index: linux-2.6.5-12.1/fs/open.c
 ===================================================================
---- linux-2.6.4-51.0.orig/fs/open.c    2004-04-05 12:41:59.000000000 -0400
-+++ linux-2.6.4-51.0/fs/open.c 2004-04-05 17:36:42.000000000 -0400
-@@ -211,7 +211,7 @@
+--- linux-2.6.5-12.1.orig/fs/open.c    2004-05-10 19:21:56.000000000 +0300
++++ linux-2.6.5-12.1/fs/open.c 2004-05-25 17:32:14.042493592 +0300
+@@ -227,12 +227,12 @@
        struct nameidata nd;
        struct inode * inode;
        int error;
@@ -308,7 +327,13 @@ Index: linux-2.6.4-51.0/fs/open.c
        error = -EINVAL;
        if (length < 0) /* sorry, but loff_t says... */
                goto out;
-@@ -470,6 +470,7 @@
+-      FSHOOK_BEGIN_USER_PATH_WALK(truncate, error, path, nd, filename, .length = length)
++      FSHOOK_BEGIN_USER_PATH_WALK_IT(truncate, error, path, nd, filename, .length = length)
+       inode = nd.dentry->d_inode;
+@@ -466,6 +466,7 @@
        int old_fsuid, old_fsgid;
        kernel_cap_t old_cap;
        int res;
@@ -316,31 +341,49 @@ Index: linux-2.6.4-51.0/fs/open.c
  
        if (mode & ~S_IRWXO)    /* where's F_OK, X_OK, W_OK, R_OK? */
                return -EINVAL;
-@@ -501,6 +502,7 @@
+@@ -490,7 +491,7 @@
+       else
+               current->cap_effective = current->cap_permitted;
+-      FSHOOK_BEGIN_USER_WALK(access,
++      FSHOOK_BEGIN_USER_WALK_IT(access,
+               res,
+               filename,
+               LOOKUP_FOLLOW|LOOKUP_ACCESS,
+@@ -506,6 +507,7 @@
                if(!res && (mode & S_IWOTH) && IS_RDONLY(nd.dentry->d_inode)
                   && !special_file(nd.dentry->d_inode->i_mode))
                        res = -EROFS;
 +
                path_release(&nd);
-       }
  
-@@ -515,6 +517,7 @@
+       FSHOOK_END_USER_WALK(access, res, path)
+@@ -545,11 +547,13 @@
+ asmlinkage long sys_fchdir(unsigned int fd)
  {
-       struct nameidata nd;
++      struct nameidata nd;
+       struct file *file;
+       struct dentry *dentry;
+       struct inode *inode;
+       struct vfsmount *mnt;
        int error;
 +      intent_init(&nd.intent, IT_GETATTR);
  
-       error = __user_walk(filename, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &nd);
-       if (error)
-@@ -566,6 +569,7 @@
+       FSHOOK_BEGIN(fchdir, error, .fd = fd)
+@@ -582,8 +586,9 @@
  {
        struct nameidata nd;
        int error;
 +      intent_init(&nd.intent, IT_GETATTR);
  
-       error = __user_walk(filename, LOOKUP_FOLLOW | LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd);
-       if (error)
-@@ -638,7 +642,7 @@
+-      FSHOOK_BEGIN_USER_WALK(chroot,
++      FSHOOK_BEGIN_USER_WALK_IT(chroot,
+               error,
+               filename,
+               LOOKUP_FOLLOW | LOOKUP_DIRECTORY | LOOKUP_NOALT,
+@@ -670,7 +675,7 @@
        error = -EROFS;
        if (IS_RDONLY(inode))
                goto dput_and_out;
@@ -349,7 +392,7 @@ Index: linux-2.6.4-51.0/fs/open.c
        error = -EPERM;
        if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
                goto dput_and_out;
-@@ -746,27 +750,8 @@
+@@ -804,27 +809,8 @@
   * for the internal routines (ie open_namei()/follow_link() etc). 00 is
   * used by symlinks.
   */
@@ -379,7 +422,7 @@ Index: linux-2.6.4-51.0/fs/open.c
  {
        struct file * f;
        struct inode *inode;
-@@ -778,6 +763,7 @@
+@@ -836,6 +822,7 @@
                goto cleanup_dentry;
        f->f_flags = flags;
        f->f_mode = (flags+1) & O_ACCMODE;
@@ -387,7 +430,7 @@ Index: linux-2.6.4-51.0/fs/open.c
        inode = dentry->d_inode;
        if (f->f_mode & FMODE_WRITE) {
                error = get_write_access(inode);
-@@ -797,6 +783,7 @@
+@@ -855,6 +842,7 @@
                error = f->f_op->open(inode,f);
                if (error)
                        goto cleanup_all;
@@ -395,7 +438,7 @@ Index: linux-2.6.4-51.0/fs/open.c
        }
        f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
  
-@@ -821,6 +808,7 @@
+@@ -879,6 +867,7 @@
  cleanup_file:
        put_filp(f);
  cleanup_dentry:
@@ -403,7 +446,7 @@ Index: linux-2.6.4-51.0/fs/open.c
        dput(dentry);
        mntput(mnt);
        return ERR_PTR(error);
-@@ -828,6 +816,36 @@
+@@ -886,6 +875,36 @@
  
  EXPORT_SYMBOL(dentry_open);
  
@@ -440,11 +483,11 @@ Index: linux-2.6.4-51.0/fs/open.c
  /*
   * Find an empty file descriptor entry, and mark it busy.
   */
-Index: linux-2.6.4-51.0/fs/stat.c
+Index: linux-2.6.5-12.1/fs/stat.c
 ===================================================================
---- linux-2.6.4-51.0.orig/fs/stat.c    2004-04-05 12:41:59.000000000 -0400
-+++ linux-2.6.4-51.0/fs/stat.c 2004-04-05 17:36:42.000000000 -0400
-@@ -36,7 +36,7 @@
+--- linux-2.6.5-12.1.orig/fs/stat.c    2004-05-10 19:21:56.000000000 +0300
++++ linux-2.6.5-12.1/fs/stat.c 2004-05-25 17:32:14.042493592 +0300
+@@ -37,7 +37,7 @@
  
  EXPORT_SYMBOL(generic_fillattr);
  
@@ -453,7 +496,7 @@ Index: linux-2.6.4-51.0/fs/stat.c
  {
        struct inode *inode = dentry->d_inode;
        int retval;
-@@ -45,6 +45,8 @@
+@@ -46,6 +46,8 @@
        if (retval)
                return retval;
  
@@ -462,7 +505,7 @@ Index: linux-2.6.4-51.0/fs/stat.c
        if (inode->i_op->getattr)
                return inode->i_op->getattr(mnt, dentry, stat);
  
-@@ -61,14 +63,20 @@
+@@ -62,14 +64,20 @@
  
  EXPORT_SYMBOL(vfs_getattr);
  
@@ -477,46 +520,51 @@ Index: linux-2.6.4-51.0/fs/stat.c
        int error;
 +      intent_init(&nd.intent, IT_GETATTR);
  
--      error = user_path_walk(name, &nd);
-+      error = user_path_walk_it(name, &nd);
-       if (!error) {
+-      FSHOOK_BEGIN_USER_PATH_WALK(stat, error, name, nd, path, .link = false)
++      FSHOOK_BEGIN_USER_PATH_WALK_IT(stat, error, name, nd, path, .link = false)
 -              error = vfs_getattr(nd.mnt, nd.dentry, stat);
 +              error = vfs_getattr_it(nd.mnt, nd.dentry, &nd.intent, stat);
                path_release(&nd);
-       }
-       return error;
-@@ -80,10 +88,11 @@
+       FSHOOK_END_USER_WALK(stat, error, path)
+@@ -83,10 +91,11 @@
  {
        struct nameidata nd;
        int error;
 +      intent_init(&nd.intent, IT_GETATTR);
  
--      error = user_path_walk_link(name, &nd);
-+      error = user_path_walk_link_it(name, &nd);
-       if (!error) {
+-      FSHOOK_BEGIN_USER_PATH_WALK_LINK(stat, error, name, nd, path, .link = true)
++      FSHOOK_BEGIN_USER_PATH_WALK_LINK_IT(stat, error, name, nd, path, .link = true)
 -              error = vfs_getattr(nd.mnt, nd.dentry, stat);
 +              error = vfs_getattr_it(nd.mnt, nd.dentry, &nd.intent, stat);
                path_release(&nd);
-       }
-       return error;
-@@ -95,9 +104,12 @@
+       FSHOOK_END_USER_WALK(stat, error, path)
+@@ -99,6 +108,8 @@
+ int vfs_fstat(unsigned int fd, struct kstat *stat)
  {
-       struct file *f = fget(fd);
-       int error = -EBADF;
+       int error;
 +      struct nameidata nd;
 +      intent_init(&nd.intent, IT_GETATTR);
  
+       FSHOOK_BEGIN(fstat, error, .fd = fd)
+@@ -106,7 +117,8 @@
+       error = -EBADF;
        if (f) {
 -              error = vfs_getattr(f->f_vfsmnt, f->f_dentry, stat);
 +              error = vfs_getattr_it(f->f_vfsmnt, f->f_dentry, &nd.intent, stat);
 +              intent_release(&nd.intent);
                fput(f);
        }
-       return error;
-Index: linux-2.6.4-51.0/fs/nfs/dir.c
+Index: linux-2.6.5-12.1/fs/nfs/dir.c
 ===================================================================
---- linux-2.6.4-51.0.orig/fs/nfs/dir.c 2004-04-05 12:41:59.000000000 -0400
-+++ linux-2.6.4-51.0/fs/nfs/dir.c      2004-04-07 13:27:47.000000000 -0400
+--- linux-2.6.5-12.1.orig/fs/nfs/dir.c 2004-05-10 19:21:53.000000000 +0300
++++ linux-2.6.5-12.1/fs/nfs/dir.c      2004-05-25 17:32:14.043493440 +0300
 @@ -709,7 +709,7 @@
                return 0;
        if (!nd || (nd->flags & LOOKUP_CONTINUE) || !(nd->flags & LOOKUP_CREATE))
@@ -535,10 +583,10 @@ Index: linux-2.6.4-51.0/fs/nfs/dir.c
  
        /*
         * The 0 argument passed into the create function should one day
-Index: linux-2.6.4-51.0/fs/inode.c
+Index: linux-2.6.5-12.1/fs/inode.c
 ===================================================================
---- linux-2.6.4-51.0.orig/fs/inode.c   2004-04-05 12:41:59.000000000 -0400
-+++ linux-2.6.4-51.0/fs/inode.c        2004-04-05 17:36:43.000000000 -0400
+--- linux-2.6.5-12.1.orig/fs/inode.c   2004-05-10 19:21:56.000000000 +0300
++++ linux-2.6.5-12.1/fs/inode.c        2004-05-25 17:32:14.044493288 +0300
 @@ -221,6 +221,7 @@
        inodes_stat.nr_unused--;
  }
@@ -547,11 +595,11 @@ Index: linux-2.6.4-51.0/fs/inode.c
  /**
   * clear_inode - clear an inode
   * @inode: inode to clear
-Index: linux-2.6.4-51.0/fs/super.c
+Index: linux-2.6.5-12.1/fs/super.c
 ===================================================================
---- linux-2.6.4-51.0.orig/fs/super.c   2004-04-05 12:41:59.000000000 -0400
-+++ linux-2.6.4-51.0/fs/super.c        2004-04-05 17:36:43.000000000 -0400
-@@ -787,6 +787,8 @@
+--- linux-2.6.5-12.1.orig/fs/super.c   2004-05-10 19:21:56.000000000 +0300
++++ linux-2.6.5-12.1/fs/super.c        2004-05-25 17:32:14.045493136 +0300
+@@ -789,6 +789,8 @@
        return (struct vfsmount *)sb;
  }
  
@@ -560,10 +608,10 @@ Index: linux-2.6.4-51.0/fs/super.c
  struct vfsmount *kern_mount(struct file_system_type *type)
  {
        return do_kern_mount(type->name, 0, type->name, NULL);
-Index: linux-2.6.4-51.0/include/linux/dcache.h
+Index: linux-2.6.5-12.1/include/linux/dcache.h
 ===================================================================
---- linux-2.6.4-51.0.orig/include/linux/dcache.h       2004-04-05 12:42:07.000000000 -0400
-+++ linux-2.6.4-51.0/include/linux/dcache.h    2004-04-05 17:36:43.000000000 -0400
+--- linux-2.6.5-12.1.orig/include/linux/dcache.h       2004-04-04 06:38:24.000000000 +0300
++++ linux-2.6.5-12.1/include/linux/dcache.h    2004-05-25 17:32:14.045493136 +0300
 @@ -4,6 +4,7 @@
  #ifdef __KERNEL__
  
@@ -581,11 +629,11 @@ Index: linux-2.6.4-51.0/include/linux/dcache.h
  struct dentry_stat_t {
        int nr_dentry;
        int nr_unused;
-Index: linux-2.6.4-51.0/include/linux/fs.h
+Index: linux-2.6.5-12.1/include/linux/fs.h
 ===================================================================
---- linux-2.6.4-51.0.orig/include/linux/fs.h   2004-04-05 12:42:07.000000000 -0400
-+++ linux-2.6.4-51.0/include/linux/fs.h        2004-04-05 17:36:43.000000000 -0400
-@@ -249,6 +249,8 @@
+--- linux-2.6.5-12.1.orig/include/linux/fs.h   2004-05-10 19:21:56.000000000 +0300
++++ linux-2.6.5-12.1/include/linux/fs.h        2004-05-25 17:32:14.046492984 +0300
+@@ -250,6 +250,8 @@
  #define ATTR_ATTR_FLAG        1024
  #define ATTR_KILL_SUID        2048
  #define ATTR_KILL_SGID        4096
@@ -594,7 +642,7 @@ Index: linux-2.6.4-51.0/include/linux/fs.h
  
  /*
   * This is the Inode Attributes structure, used for notify_change().  It
-@@ -422,6 +424,7 @@
+@@ -423,6 +425,7 @@
        struct block_device     *i_bdev;
        struct cdev             *i_cdev;
        int                     i_cindex;
@@ -602,7 +650,7 @@ Index: linux-2.6.4-51.0/include/linux/fs.h
  
        unsigned long           i_dnotify_mask; /* Directory notify events */
        struct dnotify_struct   *i_dnotify; /* for directory notifications */
-@@ -554,6 +557,7 @@
+@@ -556,6 +559,7 @@
        spinlock_t              f_ep_lock;
  #endif /* #ifdef CONFIG_EPOLL */
        struct address_space    *f_mapping;
@@ -610,7 +658,7 @@ Index: linux-2.6.4-51.0/include/linux/fs.h
  };
  extern spinlock_t files_lock;
  #define file_list_lock() spin_lock(&files_lock);
-@@ -874,7 +878,9 @@
+@@ -886,7 +890,9 @@
        void (*truncate) (struct inode *);
        int (*permission) (struct inode *, int, struct nameidata *);
        int (*setattr) (struct dentry *, struct iattr *);
@@ -620,7 +668,7 @@ Index: linux-2.6.4-51.0/include/linux/fs.h
        int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
        ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t);
        ssize_t (*listxattr) (struct dentry *, char *, size_t);
-@@ -1101,6 +1107,7 @@
+@@ -1114,6 +1120,7 @@
  extern int unregister_filesystem(struct file_system_type *);
  extern struct vfsmount *kern_mount(struct file_system_type *);
  extern int may_umount(struct vfsmount *);
@@ -628,7 +676,7 @@ Index: linux-2.6.4-51.0/include/linux/fs.h
  extern long do_mount(char *, char *, char *, unsigned long, void *);
  
  extern int vfs_statfs(struct super_block *, struct kstatfs *);
-@@ -1165,6 +1172,7 @@
+@@ -1178,6 +1185,7 @@
  extern int do_truncate(struct dentry *, loff_t start);
  extern struct file *filp_open(const char *, int, int);
  extern struct file * dentry_open(struct dentry *, struct vfsmount *, int);
@@ -636,10 +684,10 @@ Index: linux-2.6.4-51.0/include/linux/fs.h
  extern int filp_close(struct file *, fl_owner_t id);
  extern char * getname(const char __user *);
  
-Index: linux-2.6.4-51.0/include/linux/namei.h
+Index: linux-2.6.5-12.1/include/linux/namei.h
 ===================================================================
---- linux-2.6.4-51.0.orig/include/linux/namei.h        2004-04-05 12:42:07.000000000 -0400
-+++ linux-2.6.4-51.0/include/linux/namei.h     2004-04-05 17:36:43.000000000 -0400
+--- linux-2.6.5-12.1.orig/include/linux/namei.h        2004-05-10 19:21:56.000000000 +0300
++++ linux-2.6.5-12.1/include/linux/namei.h     2004-05-25 17:32:14.047492832 +0300
 @@ -2,25 +2,55 @@
  #define _LINUX_NAMEI_H
  
@@ -717,15 +765,15 @@ Index: linux-2.6.4-51.0/include/linux/namei.h
 @@ -49,6 +82,12 @@
  #define LOOKUP_ACCESS         (0x0400)
  
- extern int FASTCALL(__user_walk(const char __user *, unsigned, struct nameidata *));
-+extern int FASTCALL(__user_walk_it(const char __user *name, unsigned flags, struct nameidata *nd));
+ extern int FASTCALL(__user_walk(const char __user *, unsigned, struct nameidata *, const char **));
++extern int FASTCALL(__user_walk_it(const char __user *, unsigned, struct nameidata *, const char **));
 +#define user_path_walk_it(name,nd) \
-+      __user_walk_it(name, LOOKUP_FOLLOW, nd)
++      __user_walk_it(name, LOOKUP_FOLLOW, nd, 0)
 +#define user_path_walk_link_it(name,nd) \
-+      __user_walk_it(name, 0, nd)
++      __user_walk_it(name, 0, nd, 0)
 +extern void intent_release(struct lookup_intent *);
  #define user_path_walk(name,nd) \
-       __user_walk(name, LOOKUP_FOLLOW, nd)
+       __user_walk(name, LOOKUP_FOLLOW, nd, 0)
  #define user_path_walk_link(name,nd) \
 @@ -60,7 +99,6 @@
  
@@ -735,11 +783,11 @@ Index: linux-2.6.4-51.0/include/linux/namei.h
  extern int follow_down(struct vfsmount **, struct dentry **);
  extern int follow_up(struct vfsmount **, struct dentry **);
  
-Index: linux-2.6.4-51.0/kernel/exit.c
+Index: linux-2.6.5-12.1/kernel/exit.c
 ===================================================================
---- linux-2.6.4-51.0.orig/kernel/exit.c        2004-04-05 12:42:08.000000000 -0400
-+++ linux-2.6.4-51.0/kernel/exit.c     2004-04-05 17:36:43.000000000 -0400
-@@ -259,6 +259,8 @@
+--- linux-2.6.5-12.1.orig/kernel/exit.c        2004-05-10 19:21:56.000000000 +0300
++++ linux-2.6.5-12.1/kernel/exit.c     2004-05-25 17:32:14.047492832 +0300
+@@ -260,6 +260,8 @@
        write_unlock_irq(&tasklist_lock);
  }
  
@@ -748,7 +796,7 @@ Index: linux-2.6.4-51.0/kernel/exit.c
  void __set_special_pids(pid_t session, pid_t pgrp)
  {
        struct task_struct *curr = current;
-@@ -428,6 +430,8 @@
+@@ -429,6 +431,8 @@
        __exit_files(tsk);
  }
  
@@ -757,3 +805,57 @@ Index: linux-2.6.4-51.0/kernel/exit.c
  static inline void __put_fs_struct(struct fs_struct *fs)
  {
        /* No need to hold fs->lock if we are killing it */
+Index: linux-2.6.5-12.1/include/linux/fshooks.h
+===================================================================
+--- linux-2.6.5-12.1.orig/include/linux/fshooks.h      2004-05-10 19:21:56.000000000 +0300
++++ linux-2.6.5-12.1/include/linux/fshooks.h   2004-05-25 17:32:14.048492680 +0300
+@@ -90,12 +90,18 @@
+ #define FSHOOK_BEGIN_USER_WALK(type, err, path, flags, nd, field, args...) \
+               FSHOOK_BEGIN_USER_WALK_COMMON(type, err, __user_walk(path, flags, &nd, &info.field), nd, args)
++#define FSHOOK_BEGIN_USER_WALK_IT(type, err, path, flags, nd, field, args...) \
++              FSHOOK_BEGIN_USER_WALK_COMMON(type, err, __user_walk_it(path, flags, &nd, &info.field), nd, args)
+ #define FSHOOK_BEGIN_USER_PATH_WALK(type, err, path, nd, field, args...) \
+               FSHOOK_BEGIN_USER_WALK_COMMON(type, err, __user_walk(path, LOOKUP_FOLLOW, &nd, &info.field), nd, args)
++#define FSHOOK_BEGIN_USER_PATH_WALK_IT(type, err, path, nd, field, args...) \
++              FSHOOK_BEGIN_USER_WALK_COMMON(type, err, __user_walk_it(path, LOOKUP_FOLLOW, &nd, &info.field), nd, args)
+ #define FSHOOK_BEGIN_USER_PATH_WALK_LINK(type, err, path, nd, field, args...) \
+               FSHOOK_BEGIN_USER_WALK_COMMON(type, err, __user_walk(path, 0, &nd, &info.field), nd, args)
++#define FSHOOK_BEGIN_USER_PATH_WALK_LINK_IT(type, err, path, nd, field, args...) \
++              FSHOOK_BEGIN_USER_WALK_COMMON(type, err, __user_walk_it(path, 0, &nd, &info.field), nd, args)
+ #define FSHOOK_END_USER_WALK(type, err, field) \
+                               (void)(&info != (struct fshook_##type##_info *)-1L); \
+@@ -126,12 +132,18 @@
+ #define FSHOOK_BEGIN_USER_WALK(type, err, path, flags, nd, field, args...) \
+       if (!(err = __user_walk(path, flags, &nd, 0))) {
++#define FSHOOK_BEGIN_USER_WALK_IT(type, err, path, flags, nd, field, args...) \
++      if (!(err = __user_walk_it(path, flags, &nd, 0))) {
+ #define FSHOOK_BEGIN_USER_PATH_WALK(type, err, path, nd, field, args...) \
+       if (!(err = user_path_walk(path, &nd))) {
++#define FSHOOK_BEGIN_USER_PATH_WALK_IT(type, err, path, nd, field, args...) \
++      if (!(err = user_path_walk_it(path, &nd))) {
+ #define FSHOOK_BEGIN_USER_PATH_WALK_LINK(type, err, path, nd, field, args...) \
+       if (!(err = user_path_walk_link(path, &nd))) {
++#define FSHOOK_BEGIN_USER_PATH_WALK_LINK_IT(type, err, path, nd, field, args...) \
++      if (!(err = user_path_walk_link_it(path, &nd))) {
+ #define FSHOOK_END_USER_WALK(type, err, field) ((void)0);}
+Index: linux-2.6.5-12.1/fs/block_dev.c
+===================================================================
+--- linux-2.6.5-12.1.orig/fs/block_dev.c       2004-05-10 19:21:55.000000000 +0300
++++ linux-2.6.5-12.1/fs/block_dev.c    2004-05-25 17:32:39.517620784 +0300
+@@ -834,6 +834,7 @@
+       if (!path || !*path)
+               return ERR_PTR(-EINVAL);
++      intent_init(&nd.intent, IT_LOOKUP);
+       error = path_lookup(path, LOOKUP_FOLLOW, &nd);
+       if (error)
+               return ERR_PTR(error);
index 2bd3c6d..934dd77 100644 (file)
@@ -2,11 +2,11 @@
 
 .old..........pc/vfs_nointent_2.6.0-suse/fs/namei.c
 .new.........fs/namei.c
-Index: linux-2.6.4-51.0/fs/namei.c
+Index: linux-2.6.5-12.1/fs/namei.c
 ===================================================================
---- linux-2.6.4-51.0.orig/fs/namei.c   2004-04-05 17:36:42.000000000 -0400
-+++ linux-2.6.4-51.0/fs/namei.c        2004-04-05 17:36:43.000000000 -0400
-@@ -1276,7 +1276,7 @@
+--- linux-2.6.5-12.1.orig/fs/namei.c   2004-05-11 15:41:54.000000000 -0400
++++ linux-2.6.5-12.1/fs/namei.c        2004-05-11 15:42:00.000000000 -0400
+@@ -1292,7 +1292,7 @@
                if (!error) {
                        DQUOT_INIT(inode);
                        
@@ -15,7 +15,7 @@ Index: linux-2.6.4-51.0/fs/namei.c
                }
                put_write_access(inode);
                if (error)
-@@ -1526,6 +1526,7 @@
+@@ -1542,6 +1542,7 @@
        char * tmp;
        struct dentry * dentry;
        struct nameidata nd;
@@ -23,7 +23,7 @@ Index: linux-2.6.4-51.0/fs/namei.c
  
        if (S_ISDIR(mode))
                return -EPERM;
-@@ -1536,6 +1537,15 @@
+@@ -1554,6 +1555,15 @@
        error = path_lookup(tmp, LOOKUP_PARENT, &nd);
        if (error)
                goto out;
@@ -39,7 +39,7 @@ Index: linux-2.6.4-51.0/fs/namei.c
        dentry = lookup_create(&nd, 0);
        error = PTR_ERR(dentry);
  
-@@ -1562,6 +1572,7 @@
+@@ -1580,6 +1590,7 @@
                dput(dentry);
        }
        up(&nd.dentry->d_inode->i_sem);
@@ -47,8 +47,8 @@ Index: linux-2.6.4-51.0/fs/namei.c
        path_release(&nd);
  out:
        putname(tmp);
-@@ -1603,10 +1614,18 @@
-       if (!IS_ERR(tmp)) {
+@@ -1626,10 +1637,18 @@
                struct dentry *dentry;
                struct nameidata nd;
 +                intent_init(&nd.intent, IT_LOOKUP);
@@ -66,15 +66,15 @@ Index: linux-2.6.4-51.0/fs/namei.c
                dentry = lookup_create(&nd, 1);
                error = PTR_ERR(dentry);
                if (!IS_ERR(dentry)) {
-@@ -1616,6 +1635,7 @@
+@@ -1639,6 +1658,7 @@
                        dput(dentry);
                }
                up(&nd.dentry->d_inode->i_sem);
 +out2:
                path_release(&nd);
  out:
-               putname(tmp);
-@@ -1696,6 +1716,7 @@
+@@ -1722,6 +1742,7 @@
        char * name;
        struct dentry *dentry;
        struct nameidata nd;
@@ -82,7 +82,7 @@ Index: linux-2.6.4-51.0/fs/namei.c
  
        name = getname(pathname);
        if(IS_ERR(name))
-@@ -1716,6 +1737,14 @@
+@@ -1744,6 +1765,14 @@
                        error = -EBUSY;
                        goto exit1;
        }
@@ -97,7 +97,7 @@ Index: linux-2.6.4-51.0/fs/namei.c
        down(&nd.dentry->d_inode->i_sem);
        dentry = lookup_hash(&nd.last, nd.dentry);
        error = PTR_ERR(dentry);
-@@ -1774,6 +1805,7 @@
+@@ -1805,6 +1834,7 @@
        struct dentry *dentry;
        struct nameidata nd;
        struct inode *inode = NULL;
@@ -105,7 +105,7 @@ Index: linux-2.6.4-51.0/fs/namei.c
  
        name = getname(pathname);
        if(IS_ERR(name))
-@@ -1785,6 +1817,13 @@
+@@ -1818,6 +1848,13 @@
        error = -EISDIR;
        if (nd.last_type != LAST_NORM)
                goto exit1;
@@ -119,8 +119,8 @@ Index: linux-2.6.4-51.0/fs/namei.c
        down(&nd.dentry->d_inode->i_sem);
        dentry = lookup_hash(&nd.last, nd.dentry);
        error = PTR_ERR(dentry);
-@@ -1852,10 +1891,18 @@
-       if (!IS_ERR(to)) {
+@@ -1891,10 +1928,18 @@
                struct dentry *dentry;
                struct nameidata nd;
 +                intent_init(&nd.intent, IT_LOOKUP);
@@ -138,15 +138,15 @@ Index: linux-2.6.4-51.0/fs/namei.c
                dentry = lookup_create(&nd, 0);
                error = PTR_ERR(dentry);
                if (!IS_ERR(dentry)) {
-@@ -1863,6 +1910,7 @@
+@@ -1902,6 +1947,7 @@
                        dput(dentry);
                }
                up(&nd.dentry->d_inode->i_sem);
 +out2:
                path_release(&nd);
  out:
-               putname(to);
-@@ -1926,6 +1974,8 @@
+@@ -1968,6 +2014,8 @@
        struct nameidata nd, old_nd;
        int error;
        char * to;
@@ -155,7 +155,7 @@ Index: linux-2.6.4-51.0/fs/namei.c
  
        to = getname(newname);
        if (IS_ERR(to))
-@@ -1940,6 +1990,13 @@
+@@ -1986,6 +2034,13 @@
        error = -EXDEV;
        if (old_nd.mnt != nd.mnt)
                goto out_release;
@@ -169,7 +169,7 @@ Index: linux-2.6.4-51.0/fs/namei.c
        new_dentry = lookup_create(&nd, 0);
        error = PTR_ERR(new_dentry);
        if (!IS_ERR(new_dentry)) {
-@@ -1990,7 +2047,7 @@
+@@ -2038,7 +2093,7 @@
   *       locking].
   */
  int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
@@ -178,7 +178,7 @@ Index: linux-2.6.4-51.0/fs/namei.c
  {
        int error = 0;
        struct inode *target;
-@@ -2035,7 +2092,7 @@
+@@ -2083,7 +2138,7 @@
  }
  
  int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
@@ -187,7 +187,7 @@ Index: linux-2.6.4-51.0/fs/namei.c
  {
        struct inode *target;
        int error;
-@@ -2112,6 +2169,8 @@
+@@ -2160,6 +2215,8 @@
        struct dentry * old_dentry, *new_dentry;
        struct dentry * trap;
        struct nameidata oldnd, newnd;
@@ -196,7 +196,7 @@ Index: linux-2.6.4-51.0/fs/namei.c
  
        error = path_lookup(oldname, LOOKUP_PARENT, &oldnd);
        if (error)
-@@ -2134,6 +2193,13 @@
+@@ -2182,6 +2239,13 @@
        if (newnd.last_type != LAST_NORM)
                goto exit2;
  
@@ -210,7 +210,7 @@ Index: linux-2.6.4-51.0/fs/namei.c
        trap = lock_rename(new_dir, old_dir);
  
        old_dentry = lookup_hash(&oldnd.last, old_dir);
-@@ -2165,8 +2231,7 @@
+@@ -2213,8 +2277,7 @@
        if (new_dentry == trap)
                goto exit5;
  
@@ -220,11 +220,11 @@ Index: linux-2.6.4-51.0/fs/namei.c
  exit5:
        dput(new_dentry);
  exit4:
-Index: linux-2.6.4-51.0/fs/open.c
+Index: linux-2.6.5-12.1/fs/open.c
 ===================================================================
---- linux-2.6.4-51.0.orig/fs/open.c    2004-04-05 17:36:42.000000000 -0400
-+++ linux-2.6.4-51.0/fs/open.c 2004-04-06 01:37:39.000000000 -0400
-@@ -187,9 +187,10 @@
+--- linux-2.6.5-12.1.orig/fs/open.c    2004-05-11 15:41:54.000000000 -0400
++++ linux-2.6.5-12.1/fs/open.c 2004-05-11 16:07:02.000000000 -0400
+@@ -203,9 +203,10 @@
        return error;
  }
  
@@ -236,7 +236,7 @@ Index: linux-2.6.4-51.0/fs/open.c
        struct iattr newattrs;
  
        /* Not pretty: "inode->i_size" shouldn't really be signed. But it is. */
-@@ -200,7 +201,14 @@
+@@ -216,7 +217,14 @@
        newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
        down(&dentry->d_inode->i_sem);
        down_write(&dentry->d_inode->i_alloc_sem);
@@ -252,7 +252,7 @@ Index: linux-2.6.4-51.0/fs/open.c
        up_write(&dentry->d_inode->i_alloc_sem);
        up(&dentry->d_inode->i_sem);
        return err;
-@@ -256,7 +264,7 @@
+@@ -271,7 +279,7 @@
        error = locks_verify_truncate(inode, NULL, length);
        if (!error) {
                DQUOT_INIT(inode);
@@ -261,7 +261,7 @@ Index: linux-2.6.4-51.0/fs/open.c
        }
        put_write_access(inode);
  
-@@ -308,7 +316,7 @@
+@@ -328,7 +336,7 @@
  
        error = locks_verify_truncate(inode, file, length);
        if (!error)
@@ -270,30 +270,7 @@ Index: linux-2.6.4-51.0/fs/open.c
  out_putf:
        fput(file);
  out:
-@@ -387,9 +395,19 @@
-                   (error = permission(inode,MAY_WRITE,&nd)) != 0)
-                       goto dput_and_out;
-       }
--      down(&inode->i_sem);
--      error = notify_change(nd.dentry, &newattrs);
--      up(&inode->i_sem);
-+      if (inode->i_op->setattr_raw) {
-+              struct inode_operations *op = nd.dentry->d_inode->i_op;
-+
-+              newattrs.ia_valid |= ATTR_RAW;
-+              error = op->setattr_raw(inode, &newattrs);
-+              /* the file system wants to use normal vfs path now */
-+              if (error != -EOPNOTSUPP)
-+                      goto dput_and_out;
-+      } else {
-+                down(&inode->i_sem);
-+                error = notify_change(nd.dentry, &newattrs);
-+                up(&inode->i_sem);
-+        }
- dput_and_out:
-       path_release(&nd);
- out:
-@@ -440,9 +458,19 @@
+@@ -402,9 +410,19 @@
                    (error = permission(inode,MAY_WRITE,&nd)) != 0)
                        goto dput_and_out;
        }
@@ -315,8 +292,8 @@ Index: linux-2.6.4-51.0/fs/open.c
 +        }
  dput_and_out:
        path_release(&nd);
- out:
-@@ -592,36 +620,52 @@
+@@ -613,39 +631,55 @@
        return error;
  }
  
@@ -326,11 +303,14 @@ Index: linux-2.6.4-51.0/fs/open.c
 -      struct inode * inode;
 -      struct dentry * dentry;
 -      struct file * file;
--      int err = -EBADF;
+-      int err;
 +      struct inode * inode = dentry->d_inode;
        struct iattr newattrs;
 +      int error = -EROFS;
  
+-      FSHOOK_BEGIN(fchmod, err, .fd = fd, .mode = mode)
+-
+-      err = -EBADF;
 -      file = fget(fd);
 -      if (!file)
 +      if (IS_RDONLY(inode))
@@ -338,18 +318,19 @@ Index: linux-2.6.4-51.0/fs/open.c
 +      
 +      if (inode->i_op->setattr_raw) {
 +              struct inode_operations *op = dentry->d_inode->i_op;
--      dentry = file->f_dentry;
--      inode = dentry->d_inode;
++              
 +              newattrs.ia_mode = mode;
 +              newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
 +              newattrs.ia_valid |= ATTR_RAW;
 +              error = op->setattr_raw(inode, &newattrs);
-+              /* the file system wants to use normal vfs path now */
++              /* the file system wants to use the normal vfs path now */
 +              if (error != -EOPNOTSUPP)
 +                      goto out;
 +      }
  
+-      dentry = file->f_dentry;
+-      inode = dentry->d_inode;
+-
 -      err = -EROFS;
 -      if (IS_RDONLY(inode))
 -              goto out_putf;
@@ -367,16 +348,19 @@ Index: linux-2.6.4-51.0/fs/open.c
 -      err = notify_change(dentry, &newattrs);
 +      error = notify_change(dentry, &newattrs);
        up(&inode->i_sem);
+-out_putf:
 +out:
 +      return error;
 +}
--out_putf:
++
 +asmlinkage long sys_fchmod(unsigned int fd, mode_t mode)
 +{
 +      struct file * file;
 +      int err = -EBADF;
 +
++      FSHOOK_BEGIN(fchmod, err, .fd = fd, .mode = mode)
++
 +      file = fget(fd);
 +      if (!file)
 +              goto out;
@@ -384,8 +368,8 @@ Index: linux-2.6.4-51.0/fs/open.c
 +      err = chmod_common(file->f_dentry, mode);
        fput(file);
  out:
-       return err;
-@@ -630,32 +674,13 @@
+@@ -657,9 +691,7 @@
  asmlinkage long sys_chmod(const char __user * filename, mode_t mode)
  {
        struct nameidata nd;
@@ -393,9 +377,12 @@ Index: linux-2.6.4-51.0/fs/open.c
        int error;
 -      struct iattr newattrs;
  
-       error = user_path_walk(filename, &nd);
-       if (error)
-               goto out;
+       FSHOOK_BEGIN_USER_PATH_WALK(chmod,
+               error,
+@@ -669,25 +701,7 @@
+               .mode = mode,
+               .link = false)
 -      inode = nd.dentry->d_inode;
 -
 -      error = -EROFS;
@@ -413,13 +400,13 @@ Index: linux-2.6.4-51.0/fs/open.c
 -      newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
 -      error = notify_change(nd.dentry, &newattrs);
 -      up(&inode->i_sem);
+-
 -dput_and_out:
 +      error = chmod_common(nd.dentry, mode);
        path_release(&nd);
- out:
-       return error;
-@@ -676,6 +701,18 @@
+       FSHOOK_END_USER_WALK(chmod, error, path)
+@@ -710,6 +724,18 @@
        if (IS_RDONLY(inode))
                goto out;
        error = -EPERM;
@@ -438,7 +425,7 @@ Index: linux-2.6.4-51.0/fs/open.c
        if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
                goto out;
        newattrs.ia_valid =  ATTR_CTIME;
-@@ -689,6 +726,7 @@
+@@ -723,6 +749,7 @@
        }
        if (!S_ISDIR(inode->i_mode))
                newattrs.ia_valid |= ATTR_KILL_SUID|ATTR_KILL_SGID;
@@ -446,11 +433,11 @@ Index: linux-2.6.4-51.0/fs/open.c
        down(&inode->i_sem);
        error = notify_change(dentry, &newattrs);
        up(&inode->i_sem);
-Index: linux-2.6.4-51.0/fs/exec.c
+Index: linux-2.6.5-12.1/fs/exec.c
 ===================================================================
---- linux-2.6.4-51.0.orig/fs/exec.c    2004-04-05 17:36:42.000000000 -0400
-+++ linux-2.6.4-51.0/fs/exec.c 2004-04-05 17:36:43.000000000 -0400
-@@ -1418,7 +1418,7 @@
+--- linux-2.6.5-12.1.orig/fs/exec.c    2004-05-11 15:41:54.000000000 -0400
++++ linux-2.6.5-12.1/fs/exec.c 2004-05-11 15:42:00.000000000 -0400
+@@ -1435,7 +1435,7 @@
                goto close_fail;
        if (!file->f_op->write)
                goto close_fail;
@@ -459,11 +446,11 @@ Index: linux-2.6.4-51.0/fs/exec.c
                goto close_fail;
  
        retval = binfmt->core_dump(signr, regs, file);
-Index: linux-2.6.4-51.0/include/linux/fs.h
+Index: linux-2.6.5-12.1/include/linux/fs.h
 ===================================================================
---- linux-2.6.4-51.0.orig/include/linux/fs.h   2004-04-05 17:36:43.000000000 -0400
-+++ linux-2.6.4-51.0/include/linux/fs.h        2004-04-05 17:36:43.000000000 -0400
-@@ -866,13 +866,20 @@
+--- linux-2.6.5-12.1.orig/include/linux/fs.h   2004-05-11 15:41:54.000000000 -0400
++++ linux-2.6.5-12.1/include/linux/fs.h        2004-05-11 15:42:00.000000000 -0400
+@@ -878,13 +878,20 @@
        int (*create) (struct inode *,struct dentry *,int, struct nameidata *);
        struct dentry * (*lookup) (struct inode *,struct dentry *, struct nameidata *);
        int (*link) (struct dentry *,struct inode *,struct dentry *);
@@ -484,7 +471,7 @@ Index: linux-2.6.4-51.0/include/linux/fs.h
        int (*readlink) (struct dentry *, char __user *,int);
        int (*follow_link) (struct dentry *, struct nameidata *);
        void (*truncate) (struct inode *);
-@@ -1169,7 +1176,7 @@
+@@ -1182,7 +1189,7 @@
  
  /* fs/open.c */
  
@@ -493,10 +480,10 @@ Index: linux-2.6.4-51.0/include/linux/fs.h
  extern struct file *filp_open(const char *, int, int);
  extern struct file * dentry_open(struct dentry *, struct vfsmount *, int);
  extern struct file * dentry_open_it(struct dentry *, struct vfsmount *, int, struct lookup_intent *);
-Index: linux-2.6.4-51.0/net/unix/af_unix.c
+Index: linux-2.6.5-12.1/net/unix/af_unix.c
 ===================================================================
---- linux-2.6.4-51.0.orig/net/unix/af_unix.c   2004-04-05 12:42:07.000000000 -0400
-+++ linux-2.6.4-51.0/net/unix/af_unix.c        2004-04-05 17:36:43.000000000 -0400
+--- linux-2.6.5-12.1.orig/net/unix/af_unix.c   2004-04-03 22:37:36.000000000 -0500
++++ linux-2.6.5-12.1/net/unix/af_unix.c        2004-05-11 15:42:00.000000000 -0400
 @@ -676,6 +676,7 @@
        int err = 0;
        
index 15af341..c2c88f6 100644 (file)
@@ -11,3 +11,4 @@ removepage-2.6-suse.patch
 dev_read_only-2.6-suse.patch 
 export-2.6-suse.patch
 header-guards-2.6-suse.patch
+md_path_lookup-2.6-suse.patch
index cff99dd..d27088e 100644 (file)
@@ -7,3 +7,4 @@ ext3-init-generation-2.6-suse.patch
 ext3-ea-in-inode-2.6-suse.patch
 export-ext3-2.6-suse.patch
 ext3-include-fixes-2.6-suse.patch
+ext3-htree-rename_fix.patch 
index f4e7175..03e0db2 100644 (file)
@@ -30,3 +30,4 @@ ext3-xattr-ptr-arith-fix.patch
 kernel_text_address-2.4.20-vanilla.patch 
 procfs-ndynamic-2.4.21-suse2.patch 
 ext3-truncate-buffer-head.patch
+loop-sync-2.4.21-suse.patch
index ef3b1ae..d8b192b 100644 (file)
@@ -1,7 +1,7 @@
-KERNEL=linux-2.6.4-51.8.tar.gz
+KERNEL=linux-2.6.5-12.1.tar.gz
 SERIES=2.6-suse
-VERSION=2.6.4
-EXTRA_VERSION=51.8_lustre
+VERSION=2.6.5
+EXTRA_VERSION=12.1_lustre
 RHBUILD=0
 
 BASE_ARCHS=""
index b24081e..11838d6 100644 (file)
@@ -1,6 +1,8 @@
+if MODULES
 if LDISKFS
 modulefs_DATA = ldiskfs$(KMODEXT)
 endif
+endif
 
 ldiskfs_linux_headers := $(addprefix linux/,$(subst ext3,ldiskfs,$(notdir $(linux_headers))))
 
index cfd1c8c..cfaefc5 100644 (file)
@@ -892,8 +892,13 @@ static void ldlm_handle_gl_callback(struct ptlrpc_request *req,
         if (lock->l_granted_mode == LCK_PW &&
             !lock->l_readers && !lock->l_writers &&
             time_after(jiffies, lock->l_last_used + 10 * HZ)) {
+#ifdef __KERNEL__
+                ldlm_bl_to_thread(ns, NULL, lock);
+                l_unlock(&ns->ns_lock);
+#else
                 l_unlock(&ns->ns_lock);
                 ldlm_handle_bl_callback(ns, NULL, lock);
+#endif
                 EXIT;
                 return;
         }
index 7e75089..787d921 100644 (file)
@@ -284,6 +284,9 @@ static void cleanup_resource(struct ldlm_resource *res, struct list_head *q,
                 lock = list_entry(tmp, struct ldlm_lock, l_res_link);
                 LDLM_LOCK_GET(lock);
 
+                /* Set CBPENDING so nothing in the cancellation path
+                 * can match this lock */
+                lock->l_flags |= LDLM_FL_CBPENDING;
                 lock->l_flags |= LDLM_FL_FAILED;
                 lock->l_flags |= flags;
 
@@ -292,7 +295,6 @@ static void cleanup_resource(struct ldlm_resource *res, struct list_head *q,
                          * alternative: pretend that we got a blocking AST from
                          * the server, so that when the lock is decref'd, it
                          * will go away ... */
-                        lock->l_flags |= LDLM_FL_CBPENDING;
                         /* ... without sending a CANCEL message. */
                         lock->l_flags |= LDLM_FL_LOCAL_ONLY;
                         LDLM_DEBUG(lock, "setting FL_LOCAL_ONLY");
index ff73edf..278567e 100644 (file)
@@ -46,8 +46,8 @@ replay_ost_single_DEPENDENCIES = $(top_builddir)/liblustre/liblustre.a
 
 if MPITESTS
 test_lock_cancel_SOURCES = test_lock_cancel.c
-test_lock_cancel_CFLAGS = $(LL_CFLAGS) -I/opt/lam/include -L/opt/lam/lib
-test_lock_cancel_LDADD :=  $(LLIB_EXEC) -lmpi -llam
+test_lock_cancel_CFLAGS = $(LL_CFLAGS) -I/opt/lam/include
+test_lock_cancel_LDADD :=  $(LLIB_EXEC)  -L/opt/lam/lib -lmpi -llam
 endif
 
 
index aa00caf..84b8f2f 100644 (file)
@@ -426,11 +426,11 @@ void ll_pgcache_remove_extent(struct inode *inode, struct lov_stripe_md *lsm,
 
                 tmpex.l_extent.end = tmpex.l_extent.start + PAGE_CACHE_SIZE - 1;
                 /* check to see if another DLM lock covers this page */
-                ldlm_lock2handle(lock, &lockh);
-                rc2 = ldlm_lock_match(NULL, 
+                rc2 = ldlm_lock_match(lock->l_resource->lr_namespace,
                                       LDLM_FL_BLOCK_GRANTED|LDLM_FL_CBPENDING |
                                       LDLM_FL_TEST_LOCK,
-                                      NULL, 0, &tmpex, 0, &lockh);
+                                      &lock->l_resource->lr_name, LDLM_EXTENT,
+                                      &tmpex, LCK_PR | LCK_PW, &lockh);
                 if (rc2 == 0 && page->mapping != NULL) {
                         // checking again to account for writeback's lock_page()
                         LL_CDEBUG_PAGE(D_PAGE, page, "truncating\n");
index 2c29286..1190e4c 100644 (file)
@@ -338,24 +338,20 @@ static int lov_notify(struct obd_device *obd, struct obd_device *watched,
         }
         uuid = &watched->u.cli.cl_import->imp_target_uuid;
 
-        /*
-         * Must notify (MDS) before we mark the OSC as active, so that
-         * the orphan deletion happens without interference from racing
-         * creates.
+        /* Set OSC as active before notifying the observer, so the
+         * observer can use the OSC normally.  
          */
-        if (obd->obd_observer) {
-                /* Pass the notification up the chain. */
-                rc = obd_notify(obd->obd_observer, watched, active);
-                if (rc)
-                        RETURN(rc);
-        }
-
         rc = lov_set_osc_active(&obd->u.lov, uuid, active);
-
         if (rc) {
                 CERROR("%sactivation of %s failed: %d\n",
                        active ? "" : "de", uuid->uuid, rc);
+                RETURN(rc);
         }
+
+        if (obd->obd_observer)
+                /* Pass the notification up the chain. */
+                rc = obd_notify(obd->obd_observer, watched, active);
+
         RETURN(rc);
 }
 
@@ -936,7 +932,7 @@ static int lov_destroy(struct obd_export *exp, struct obdo *oa,
                 err = obd_destroy(lov->tgts[loi->loi_ost_idx].ltd_exp, &tmp,
                                   NULL, oti);
                 if (err && lov->tgts[loi->loi_ost_idx].active) {
-                        CERROR("error: destroying objid "LPX64" subobj "
+                        CDEBUG(D_INODE, "error: destroying objid "LPX64" subobj "
                                LPX64" on OST idx %d: rc = %d\n",
                                oa->o_id, loi->loi_id, loi->loi_ost_idx, err);
                         if (!rc)
index 045daa3..f552198 100644 (file)
@@ -2110,33 +2110,59 @@ static int mds_postsetup(struct obd_device *obd)
 err_cleanup:
         mds_lov_clean(obd);
 err_llog:
-        obd_llog_cleanup(llog_get_context(&obd->obd_llogs, LLOG_CONFIG_ORIG_CTXT));
+        obd_llog_cleanup(llog_get_context(&obd->obd_llogs,
+                                          LLOG_CONFIG_ORIG_CTXT));
         RETURN(rc);
 }
 
-static int mds_postrecov(struct obd_device *obd)
-
+int mds_postrecov(struct obd_device *obd)
 {
+        struct mds_obd *mds = &obd->u.mds;
         struct llog_ctxt *ctxt;
-        int rc, rc2;
+        int rc, item = 0;
         ENTRY;
 
         LASSERT(!obd->obd_recovering);
         ctxt = llog_get_context(&obd->obd_llogs, LLOG_UNLINK_ORIG_CTXT);
         LASSERT(ctxt != NULL);
 
+        /* set nextid first, so we are sure it happens */
+        rc = mds_lov_set_nextid(obd);
+        if (rc) {
+                CERROR("%s: mds_lov_set_nextid failed\n", obd->obd_name);
+                GOTO(out, rc);
+        }
+
+        /* clean PENDING dir */
+        rc = mds_cleanup_orphans(obd);
+        if (rc < 0)
+                GOTO(out, rc);
+        item = rc;
+
         rc = llog_connect(ctxt, obd->u.mds.mds_lov_desc.ld_tgt_count,
                           NULL, NULL, NULL);
-        if (rc != 0) {
-                CERROR("faild at llog_origin_connect: %d\n", rc);
+        if (rc) {
+                CERROR("%s: failed at llog_origin_connect: %d\n", 
+                       obd->obd_name, rc);
+                GOTO(out, rc);
         }
 
-        rc = mds_cleanup_orphans(obd);
+        /* remove the orphaned precreated objects */
+        rc = mds_lov_clearorphans(mds, NULL /* all OSTs */);
+        if (rc)
+                GOTO(err_llog, rc);
+
+out:
+        RETURN(rc < 0 ? rc : item);
 
-        rc2 = mds_lov_set_nextid(obd);
-        if (rc2 == 0)
-                rc2 = rc;
-        RETURN(rc2);
+err_llog:
+        /* cleanup all llogging subsystems */
+        rc = obd_llog_finish(obd, &obd->obd_llogs,
+                             mds->mds_lov_desc.ld_tgt_count);
+        if (rc)
+                CERROR("%s: failed to cleanup llogging subsystems\n",
+                        obd->obd_name);
+        goto out;
 }
 
 int mds_lov_clean(struct obd_device *obd)
index 253ab59..dddd484 100644 (file)
@@ -77,6 +77,7 @@ int mds_lov_write_objids(struct obd_device *obd);
 void mds_lov_update_objids(struct obd_device *obd, obd_id *ids);
 int mds_lov_set_growth(struct mds_obd *mds, int count);
 int mds_lov_set_nextid(struct obd_device *obd);
+int mds_lov_clearorphans(struct mds_obd *mds, struct obd_uuid *ost_uuid);
 int mds_post_mds_lovconf(struct obd_device *obd);
 int mds_notify(struct obd_device *obd, struct obd_device *watched, int active);
 int mds_convert_lov_ea(struct obd_device *obd, struct inode *inode,
@@ -108,6 +109,7 @@ int mds_lov_clean(struct obd_device *obd);
 extern int mds_iocontrol(unsigned int cmd, struct obd_export *exp,
                          int len, void *karg, void *uarg);
 extern int mds_lock_mode_for_dir(struct obd_device *, struct dentry *, int);
+int mds_postrecov(struct obd_device *obd);
 
 #ifdef __KERNEL__
 int mds_get_md(struct obd_device *, struct inode *, void *md, int *size, 
index e4ab36d..82e1b05 100644 (file)
@@ -117,7 +117,7 @@ int mds_lov_write_objids(struct obd_device *obd)
         RETURN(rc);
 }
 
-static int mds_lov_clearorphans(struct mds_obd *mds, struct obd_uuid *ost_uuid)
+int mds_lov_clearorphans(struct mds_obd *mds, struct obd_uuid *ost_uuid)
 {
         int rc;
         struct obdo oa;
@@ -156,12 +156,6 @@ int mds_lov_set_nextid(struct obd_device *obd)
 
         rc = obd_set_info(mds->mds_osc_exp, strlen("next_id"), "next_id",
                           mds->mds_lov_desc.ld_tgt_count, mds->mds_lov_objids);
-        if (rc < 0)
-                GOTO(out, rc);
-
-        rc = mds_lov_clearorphans(mds, NULL /* all OSTs */);
-
-out:
         RETURN(rc);
 }
 
@@ -272,30 +266,10 @@ int mds_lov_connect(struct obd_device *obd, char * lov_name)
          * set_nextid().  The class driver can help us here, because
          * it can use the obd_recovering flag to determine when the
          * the OBD is full available. */
-        if (!obd->obd_recovering) {
-                struct llog_ctxt *ctxt;
-                ctxt = llog_get_context(&obd->obd_llogs, LLOG_UNLINK_ORIG_CTXT);
-                rc = llog_connect(ctxt, obd->u.mds.mds_lov_desc.ld_tgt_count,
-                                  NULL, NULL, NULL);
-                if (rc != 0)
-                        CERROR("faild at llog_origin_connect: %d\n", rc);
-
-                rc = mds_cleanup_orphans(obd);
-                if (rc > 0)
-                        CERROR("Cleanup %d orphans while MDS isn't recovering\n", rc);
-
-                rc = mds_lov_set_nextid(obd);
-                if (rc)
-                        GOTO(err_llog, rc);
-        }
+        if (!obd->obd_recovering)
+                rc = mds_postrecov(obd);
         RETURN(rc);
 
-err_llog:
-        /* cleanup all llogging subsystems */
-        rc = obd_llog_finish(obd, &obd->obd_llogs,
-                             mds->mds_lov_desc.ld_tgt_count);
-        if (rc)
-                CERROR("failed to cleanup llogging subsystems\n");
 err_reg:
         obd_register_observer(mds->mds_osc_obd, NULL);
 err_discon:
@@ -520,6 +494,92 @@ int mds_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
                 RETURN(-EINVAL);
         }
         RETURN(0);
+
+}
+
+struct mds_lov_sync_info {
+        struct obd_device *mlsi_obd; /* the lov device to sync */
+        struct obd_uuid   *mlsi_uuid;  /* target to sync */
+};
+
+int mds_lov_synchronize(void *data)
+{
+        struct mds_lov_sync_info *mlsi = data;
+        struct llog_ctxt *ctxt;
+        struct obd_device *obd;
+        struct obd_uuid *uuid;
+        unsigned long flags;
+        int rc;
+
+        lock_kernel();
+        ptlrpc_daemonize();
+
+        SIGNAL_MASK_LOCK(current, flags);
+        sigfillset(&current->blocked);
+        RECALC_SIGPENDING;
+        SIGNAL_MASK_UNLOCK(current, flags);
+
+        obd = mlsi->mlsi_obd;
+        uuid = mlsi->mlsi_uuid;
+
+        OBD_FREE(mlsi, sizeof(*mlsi));
+
+        LASSERT(obd != NULL);
+        LASSERT(uuid != NULL);
+
+        rc = obd_set_info(obd->u.mds.mds_osc_exp, strlen("mds_conn"), 
+                          "mds_conn", 0, uuid);
+        if (rc != 0)
+                RETURN(rc);
+        
+        ctxt = llog_get_context(&obd->obd_llogs, LLOG_UNLINK_ORIG_CTXT);
+        LASSERT(ctxt != NULL);
+
+        rc = llog_connect(ctxt, obd->u.mds.mds_lov_desc.ld_tgt_count,
+                          NULL, NULL, uuid);
+        if (rc != 0) {
+                CERROR("%s: failed at llog_origin_connect: %d\n", 
+                       obd->obd_name, rc);
+                RETURN(rc);
+        }
+        
+        CWARN("MDS %s: %s now active, resetting orphans\n",
+              obd->obd_name, uuid->uuid);
+        rc = mds_lov_clearorphans(&obd->u.mds, uuid);
+        if (rc != 0) {
+                CERROR("%s: failed at mds_lov_clearorphans: %d\n", 
+                       obd->obd_name, rc);
+                RETURN(rc);
+        }
+
+        RETURN(0);
+}
+
+int mds_lov_start_synchronize(struct obd_device *obd, struct obd_uuid *uuid)
+{
+        struct mds_lov_sync_info *mlsi;
+        int rc;
+        
+        ENTRY;
+
+        OBD_ALLOC(mlsi, sizeof(*mlsi));
+        if (mlsi == NULL)
+                RETURN(-ENOMEM);
+
+        mlsi->mlsi_obd = obd;
+        mlsi->mlsi_uuid = uuid;
+
+        rc = kernel_thread(mds_lov_synchronize, mlsi, CLONE_VM | CLONE_FILES);
+        if (rc < 0)
+                CERROR("%s: error starting mds_lov_synchronize: %d\n", 
+                       obd->obd_name, rc);
+        else {
+                CDEBUG(D_HA, "%s: mds_lov_synchronize thread: %d\n", 
+                       obd->obd_name, rc);
+                rc = 0;
+        }
+
+        RETURN(rc);
 }
 
 int mds_notify(struct obd_device *obd, struct obd_device *watched, int active)
@@ -542,27 +602,7 @@ int mds_notify(struct obd_device *obd, struct obd_device *watched, int active)
                 CWARN("MDS %s: in recovery, not resetting orphans on %s\n",
                       obd->obd_name, uuid->uuid);
         } else {
-                struct llog_ctxt *ctxt;
-
-                ctxt = llog_get_context(&obd->obd_llogs, LLOG_UNLINK_ORIG_CTXT);
-                LASSERT(ctxt != NULL);
-
-                rc = obd_set_info(obd->u.mds.mds_osc_exp, strlen("mds_conn"),
-                                  "mds_conn", 0, uuid);
-                if (rc != 0)
-                        RETURN(rc);
-
-                ctxt = llog_get_context(&obd->obd_llogs, LLOG_UNLINK_ORIG_CTXT);
-                rc = llog_connect(ctxt, obd->u.mds.mds_lov_desc.ld_tgt_count,
-                                  NULL, NULL, uuid);
-                if (rc != 0) {
-                        CERROR("faild at llog_origin_connect: %d\n", rc);
-                        RETURN(rc);
-                }
-
-                CWARN("MDS %s: %s now active, resetting orphans\n",
-                      obd->obd_name, uuid->uuid);
-                rc = mds_lov_clearorphans(&obd->u.mds, uuid);
+                rc = mds_lov_start_synchronize(obd, uuid);
         }
         RETURN(rc);
 }
index 709f67c..bc82a93 100644 (file)
@@ -1178,6 +1178,12 @@ int mds_mfd_close(struct ptlrpc_request *req, struct obd_device *obd,
                 int stripe_count = 0;
                 LASSERT(rc == 0); /* mds_put_write_access must have succeeded */
 
+                if (obd->obd_recovering) {
+                        CDEBUG(D_HA, "not remove orphan %s until recovery"
+                               " is over\n", fidname);
+                        GOTO(out, rc);
+                }
+
                 CDEBUG(D_HA, "destroying orphan object %s\n", fidname);
 
                 /* Sadly, there is no easy way to save pending_child from
index b0cc7ec..951c09f 100644 (file)
@@ -149,7 +149,7 @@ static int mds_osc_destroy_orphan(struct mds_obd *mds,
         rc = obd_destroy(mds->mds_osc_exp, oa, lsm, &oti);
         obdo_free(oa);
         if (rc)
-                CERROR("destroy orphan objid 0x"LPX64" on ost error "
+                CDEBUG(D_INODE, "destroy orphan objid 0x"LPX64" on ost error "
                        "%d\n", lsm->lsm_object_id, rc);
 out_free_memmd:
         obd_free_memmd(mds->mds_osc_exp, &lsm);
@@ -320,7 +320,7 @@ int mds_cleanup_orphans(struct obd_device *obd)
                         item ++;
                         CWARN("removed orphan %s from MDS and OST\n", d_name);
                 } else {
-                        CERROR("removed orphan %s from MDS and OST failed,"
+                        CDEBUG(D_INODE, "removed orphan %s from MDS/OST failed,"
                                " rc = %d\n", d_name, rc);
                         rc = 0;
                 }
index bb2b2c4..f6a0667 100644 (file)
@@ -659,7 +659,7 @@ static void cleanup_obdclass(void)
  * kernel patch */
 #include <linux/lustre_version.h>
 #define LUSTRE_MIN_VERSION 32
-#define LUSTRE_MAX_VERSION 36
+#define LUSTRE_MAX_VERSION 37
 #if (LUSTRE_KERNEL_VERSION < LUSTRE_MIN_VERSION)
 # error Cannot continue: Your Lustre kernel patch is older than the sources
 #elif (LUSTRE_KERNEL_VERSION > LUSTRE_MAX_VERSION)
diff --git a/lustre/obdclass/simple.c b/lustre/obdclass/simple.c
deleted file mode 100644 (file)
index 48cf4d2..0000000
+++ /dev/null
@@ -1,266 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2002, 2003 Cluster File Systems, Inc.
- *  Author: Peter Braam <braam@clusterfs.com>
- *  Aurhot: Andreas Dilger <adilger@clusterfs.com>
- *
- *   This file is part of Lustre, http://www.lustre.org.
- *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#ifndef EXPORT_SYMTAB
-# define EXPORT_SYMTAB
-#endif
-
-#include <linux/version.h>
-#include <linux/fs.h>
-#include <asm/unistd.h>
-
-#define DEBUG_SUBSYSTEM S_FILTER
-
-#include <linux/obd.h>
-#include <linux/lustre_lib.h>
-#include <linux/lustre_compat25.h>
-
-/* Debugging check only needed during development */
-#ifdef OBD_CTXT_DEBUG
-# define ASSERT_CTXT_MAGIC(magic) LASSERT((magic) == OBD_RUN_CTXT_MAGIC)
-# define ASSERT_NOT_KERNEL_CTXT(msg) LASSERT(!segment_eq(get_fs(), get_ds()))
-# define ASSERT_KERNEL_CTXT(msg) LASSERT(segment_eq(get_fs(), get_ds()))
-#else
-# define ASSERT_CTXT_MAGIC(magic) do {} while(0)
-# define ASSERT_NOT_KERNEL_CTXT(msg) do {} while(0)
-# define ASSERT_KERNEL_CTXT(msg) do {} while(0)
-#endif
-
-/* push / pop to root of obd store */
-void push_ctxt(struct lvfs_run_ctxt *save, struct lvfs_run_ctxt *new_ctx,
-               struct lvfs_ucred *uc)
-{
-        //ASSERT_NOT_KERNEL_CTXT("already in kernel context!\n");
-        ASSERT_CTXT_MAGIC(new_ctx->magic);
-        OBD_SET_CTXT_MAGIC(save);
-
-        /*
-        CDEBUG(D_INFO,
-               "= push %p->%p = cur fs %p pwd %p:d%d:i%d (%*s), pwdmnt %p:%d\n",
-               save, current, current->fs, current->fs->pwd,
-               atomic_read(&current->fs->pwd->d_count),
-               atomic_read(&current->fs->pwd->d_inode->i_count),
-               current->fs->pwd->d_name.len, current->fs->pwd->d_name.name,
-               current->fs->pwdmnt,
-               atomic_read(&current->fs->pwdmnt->mnt_count));
-        */
-
-        save->fs = get_fs();
-        LASSERT(atomic_read(&current->fs->pwd->d_count));
-        LASSERT(atomic_read(&new_ctx->pwd->d_count));
-        save->pwd = dget(current->fs->pwd);
-        save->pwdmnt = mntget(current->fs->pwdmnt);
-        save->ngroups = current->ngroups;
-
-        LASSERT(save->pwd);
-        LASSERT(save->pwdmnt);
-        LASSERT(new_ctx->pwd);
-        LASSERT(new_ctx->pwdmnt);
-
-        if (uc) {
-                save->luc.luc_fsuid = current->fsuid;
-                save->luc.luc_fsgid = current->fsgid;
-                save->luc.luc_cap = current->cap_effective;
-                save->luc.luc_suppgid1 = current->groups[0];
-                save->luc.luc_suppgid2 = current->groups[1];
-
-                current->fsuid = uc->luc_fsuid;
-                current->fsgid = uc->luc_fsgid;
-                current->cap_effective = uc->luc_cap;
-                current->ngroups = 0;
-
-                if (uc->luc_suppgid1 != -1)
-                        current->groups[current->ngroups++] = uc->luc_suppgid1;
-                if (uc->luc_suppgid2 != -1)
-                        current->groups[current->ngroups++] = uc->luc_suppgid2;
-        }
-        set_fs(new_ctx->fs);
-        set_fs_pwd(current->fs, new_ctx->pwdmnt, new_ctx->pwd);
-
-        /*
-        CDEBUG(D_INFO,
-               "= push %p->%p = cur fs %p pwd %p:d%d:i%d (%*s), pwdmnt %p:%d\n",
-               new_ctx, current, current->fs, current->fs->pwd,
-               atomic_read(&current->fs->pwd->d_count),
-               atomic_read(&current->fs->pwd->d_inode->i_count),
-               current->fs->pwd->d_name.len, current->fs->pwd->d_name.name,
-               current->fs->pwdmnt,
-               atomic_read(&current->fs->pwdmnt->mnt_count));
-        */
-}
-EXPORT_SYMBOL(push_ctxt);
-
-void pop_ctxt(struct lvfs_run_ctxt *saved, struct lvfs_run_ctxt *new_ctx,
-              struct lvfs_ucred *uc)
-{
-        //printk("pc0");
-        ASSERT_CTXT_MAGIC(saved->magic);
-        //printk("pc1");
-        ASSERT_KERNEL_CTXT("popping non-kernel context!\n");
-
-        /*
-        CDEBUG(D_INFO,
-               " = pop  %p==%p = cur %p pwd %p:d%d:i%d (%*s), pwdmnt %p:%d\n",
-               new_ctx, current, current->fs, current->fs->pwd,
-               atomic_read(&current->fs->pwd->d_count),
-               atomic_read(&current->fs->pwd->d_inode->i_count),
-               current->fs->pwd->d_name.len, current->fs->pwd->d_name.name,
-               current->fs->pwdmnt,
-               atomic_read(&current->fs->pwdmnt->mnt_count));
-        */
-
-        LASSERT(current->fs->pwd == new_ctx->pwd);
-        LASSERT(current->fs->pwdmnt == new_ctx->pwdmnt);
-
-        set_fs(saved->fs);
-        set_fs_pwd(current->fs, saved->pwdmnt, saved->pwd);
-
-        dput(saved->pwd);
-        mntput(saved->pwdmnt);
-        if (uc) {
-                current->fsuid = saved->luc.luc_fsuid;
-                current->fsgid = saved->luc.luc_fsgid;
-                current->cap_effective = saved->luc.luc_cap;
-                current->ngroups = saved->ngroups;
-                current->groups[0] = saved->luc.luc_suppgid1;
-                current->groups[1] = saved->luc.luc_suppgid2;
-        }
-
-        /*
-        CDEBUG(D_INFO,
-               "= pop  %p->%p = cur fs %p pwd %p:d%d:i%d (%*s), pwdmnt %p:%d\n",
-               saved, current, current->fs, current->fs->pwd,
-               atomic_read(&current->fs->pwd->d_count),
-               atomic_read(&current->fs->pwd->d_inode->i_count),
-               current->fs->pwd->d_name.len, current->fs->pwd->d_name.name,
-               current->fs->pwdmnt,
-               atomic_read(&current->fs->pwdmnt->mnt_count));
-        */
-}
-EXPORT_SYMBOL(pop_ctxt);
-
-/* utility to make a file */
-struct dentry *simple_mknod(struct dentry *dir, char *name, int mode)
-{
-        struct dentry *dchild;
-        int err = 0;
-        ENTRY;
-
-        ASSERT_KERNEL_CTXT("kernel doing mknod outside kernel context\n");
-        CDEBUG(D_INODE, "creating file %*s\n", (int)strlen(name), name);
-
-        dchild = ll_lookup_one_len(name, dir, strlen(name));
-        if (IS_ERR(dchild))
-                GOTO(out_up, dchild);
-
-        if (dchild->d_inode) {
-                if (!S_ISREG(dchild->d_inode->i_mode))
-                        GOTO(out_err, err = -EEXIST);
-
-                GOTO(out_up, dchild);
-        }
-
-        err = ll_vfs_create(dir->d_inode, dchild, (mode & ~S_IFMT) | S_IFREG, NULL);
-        if (err)
-                GOTO(out_err, err);
-
-        RETURN(dchild);
-
-out_err:
-        dput(dchild);
-        dchild = ERR_PTR(err);
-out_up:
-        return dchild;
-}
-EXPORT_SYMBOL(simple_mknod);
-
-/* utility to make a directory */
-struct dentry *simple_mkdir(struct dentry *dir, char *name, int mode)
-{
-        struct dentry *dchild;
-        int err = 0;
-        ENTRY;
-
-        ASSERT_KERNEL_CTXT("kernel doing mkdir outside kernel context\n");
-        CDEBUG(D_INODE, "creating directory %*s\n", (int)strlen(name), name);
-        dchild = ll_lookup_one_len(name, dir, strlen(name));
-        if (IS_ERR(dchild))
-                GOTO(out_up, dchild);
-
-        if (dchild->d_inode) {
-                if (!S_ISDIR(dchild->d_inode->i_mode))
-                        GOTO(out_err, err = -ENOTDIR);
-
-                GOTO(out_up, dchild);
-        }
-
-        err = vfs_mkdir(dir->d_inode, dchild, mode);
-        if (err)
-                GOTO(out_err, err);
-
-        RETURN(dchild);
-
-out_err:
-        dput(dchild);
-        dchild = ERR_PTR(err);
-out_up:
-        return dchild;
-}
-EXPORT_SYMBOL(simple_mkdir);
-
-/*
- * Read a file from within kernel context.  Prior to calling this
- * function we should already have done a push_ctxt().
- */
-int lustre_fread(struct file *file, void *buf, int len, loff_t *off)
-{
-        ASSERT_KERNEL_CTXT("kernel doing read outside kernel context\n");
-        if (!file || !file->f_op || !file->f_op->read || !off)
-                RETURN(-ENOSYS);
-
-        return file->f_op->read(file, buf, len, off);
-}
-EXPORT_SYMBOL(lustre_fread);
-
-/*
- * Write a file from within kernel context.  Prior to calling this
- * function we should already have done a push_ctxt().
- */
-int lustre_fwrite(struct file *file, const void *buf, int len, loff_t *off)
-{
-        ENTRY;
-        ASSERT_KERNEL_CTXT("kernel doing write outside kernel context\n");
-        if (!file)
-                RETURN(-ENOENT);
-        if (!file->f_op)
-                RETURN(-ENOSYS);
-        if (!off)
-                RETURN(-EINVAL);
-
-        if (!file->f_op->write)
-                RETURN(-EROFS);
-
-        RETURN(file->f_op->write(file, buf, len, off));
-}
-EXPORT_SYMBOL(lustre_fwrite);
-
index 8014526..95f8263 100644 (file)
@@ -143,6 +143,8 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa,
         push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
         cleanup_phase = 2;
 
+        generic_osync_inode(inode, inode->i_mapping, OSYNC_DATA|OSYNC_METADATA);
+
         oti->oti_handle = fsfilt_brw_start(obd, objcount, &fso,
                                            niocount, res, oti);
         
@@ -185,7 +187,7 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa,
                         offs = k * inode->i_sb->s_blocksize;
 
                         if (!bio || !can_be_merged(bio, sector) ||
-                            !bio_add_page(bio, lnb->page, lnb->len, offs)) {
+                            !bio_add_page(bio, lnb->page, PAGE_SIZE, offs)) {
                                 if (bio) {
                                         atomic_inc(&dreq->numreqs);
                                         submit_bio(WRITE, bio);
@@ -198,7 +200,8 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa,
                                 bio->bi_end_io = dio_complete_routine;
                                 bio->bi_private = dreq;
 
-                                if (!bio_add_page(bio, lnb->page, lnb->len, 0))
+                                if (!bio_add_page(bio, lnb->page, PAGE_SIZE, 
+                                                  offs))
                                         LBUG();
                         }
                 }
@@ -210,7 +213,6 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa,
                         iattr.ia_size = this_size;
         }
 
-#warning This probably needs filemap_fdatasync() like filter_io_24 (bug 2366)
         if (bio) {
                 atomic_inc(&dreq->numreqs);
                 fsfilt_send_bio(obd, inode, bio);
index ed8ae35..a3ebcc5 100644 (file)
@@ -238,30 +238,43 @@ int osc_create(struct obd_export *exp, struct obdo *oa,
        /* this is the special case where create removes orphans */
        if ((oa->o_valid & OBD_MD_FLFLAGS) &&
            oa->o_flags == OBD_FL_DELORPHAN) {
-                CDEBUG(D_HA, "%p: oscc recovery started\n", oscc);
+                CDEBUG(D_HA, "%s; oscc recovery started\n", 
+                       exp->exp_obd->obd_name);
+                LASSERT(oscc->oscc_flags & OSCC_FLAG_RECOVERING);
+
                 /* delete from next_id on up */
                 oa->o_valid |= OBD_MD_FLID;
                 oa->o_id = oscc->oscc_next_id - 1;
 
                 CDEBUG(D_HA, "%s: deleting to next_id: "LPU64"\n", 
-                       oscc->oscc_obd->u.cli.cl_import->imp_target_uuid.uuid, 
-                       oa->o_id);
+                       exp->exp_obd->obd_name, oa->o_id);
 
                 rc = osc_real_create(exp, oa, ea, NULL);
+                if (oscc->oscc_obd == NULL) {
+                        CWARN("the obd for oscc %p has been freed\n", oscc);
+                        RETURN(rc);
+                }
 
                 spin_lock(&oscc->oscc_lock);
-                if (rc == -ENOSPC)
-                        oscc->oscc_flags |= OSCC_FLAG_NOSPC;
-                oscc->oscc_flags &= ~OSCC_FLAG_RECOVERING;
-                oscc->oscc_last_id = oa->o_id;
-                wake_up(&oscc->oscc_waitq);
+                if (rc == 0 || rc == -ENOSPC) {
+                        if (rc == -ENOSPC)
+                                oscc->oscc_flags |= OSCC_FLAG_NOSPC;
+                        oscc->oscc_flags &= ~OSCC_FLAG_RECOVERING;
+                        oscc->oscc_last_id = oa->o_id;
+
+                        /* recover happen in mds_setup, before cobd_setup, so
+                         * reset oscc_gr = 0 here, it sould be no harm to CMD */
+                        oscc->oscc_gr = 0;
+
+                        CDEBUG(D_HA, "%s: oscc recovery finished: %d\n", 
+                               exp->exp_obd->obd_name, rc);
+                        wake_up(&oscc->oscc_waitq);
+                        
+                } else {
+                        CDEBUG(D_ERROR, "%s: oscc recovery failed: %d\n", 
+                               exp->exp_obd->obd_name, rc);
+                }
                 spin_unlock(&oscc->oscc_lock);
-                
-                /*recover happen in mds_setup, before cobd_setup, so
-                 *reset oscc_gr = 0 here, it sould be no harm to CMD
-                 */ 
-                oscc->oscc_gr = 0;
-                CDEBUG(D_HA, "%p: oscc recovery finished\n", oscc);
 
                RETURN(rc);
        }
@@ -272,20 +285,20 @@ int osc_create(struct obd_export *exp, struct obdo *oa,
                 if (oscc_recovering(oscc)) {
                         struct l_wait_info lwi;
                         
-                        CDEBUG(D_HA,"%p: oscc recovery in progress, waiting\n", 
-                               oscc);
+                        CDEBUG(D_HA,"%s: oscc sync in progress, waiting\n", 
+                               exp->exp_obd->obd_name);
                         
                         lwi = LWI_TIMEOUT(MAX(obd_timeout * HZ, 1), NULL, NULL);
                         rc = l_wait_event(oscc->oscc_waitq, 
                                           !oscc_recovering(oscc), &lwi);
                         LASSERT(rc == 0 || rc == -ETIMEDOUT);
                         if (rc == -ETIMEDOUT) {
-                                CDEBUG(D_HA, "%p: timed out waiting for "
-                                       "recovery\n", oscc);
+                                CDEBUG(D_HA, "%s: timed out waiting for sync\n",
+                                       exp->exp_obd->obd_name);
                                 RETURN(rc);
                         }
-                        CDEBUG(D_HA, "%p: oscc recovery over, waking up\n", 
-                               oscc);
+                        CDEBUG(D_HA, "%s: oscc sync over, waking up\n", 
+                               exp->exp_obd->obd_name);
                 }
                 
                 spin_lock(&oscc->oscc_lock);
index 3a8aefe..6010c07 100644 (file)
@@ -317,6 +317,8 @@ int osc_real_create(struct obd_export *exp, struct obdo *oa,
                         oa->o_flags == OBD_FL_DELORPHAN);
                 DEBUG_REQ(D_HA, request,
                           "delorphan from OST integration");
+                /* Don't resend the delorphan request */
+                request->rq_no_resend = request->rq_no_delay = 1;
         }
 
         rc = ptlrpc_queue_wait(request);
@@ -2481,7 +2483,8 @@ static int osc_match(struct obd_export *exp, struct lov_stripe_md *lsm,
         rc = ldlm_lock_match(obd->obd_namespace, *flags, &res_id, type,
                              policy, mode, lockh);
         if (rc) {
-                osc_set_data_with_check(lockh, data);
+                if (!(*flags & LDLM_FL_TEST_LOCK))
+                        osc_set_data_with_check(lockh, data);
                 RETURN(rc);
         }
         /* If we're trying to read, we also search for an existing PW lock.  The
index 636ee1d..cb6e0a2 100644 (file)
@@ -218,12 +218,13 @@ if test x$enable_modules != xno ; then
        fi
        LUSTRE_MODULE_TRY_MAKE(
                [#include <linux/version.h>],
-               [LINUXRELEASE=UTS_RELEASE],
+               [char *LINUXRELEASE;
+                LINUXRELEASE=UTS_RELEASE;],
                [$makerule LUSTRE_KERNEL_TEST=conftest.i],
                [test -s kernel-tests/conftest.i],
                [
                        # LINUXRELEASE="UTS_RELEASE"
-                       eval $(grep LINUXRELEASE kernel-tests/conftest.i)
+                       eval $(grep "LINUXRELEASE=" kernel-tests/conftest.i)
                ],[
                        AC_MSG_RESULT([unknown])
                        AC_MSG_ERROR([Could not preprocess test program.  Consult config.log for details.])
index c55dd37..6ef28a8 100644 (file)
@@ -7,12 +7,6 @@
 #include <linux/libcfs.h>
 #define PORTAL_DEBUG
 
-#ifndef offsetof
-# define offsetof(typ,memb)     ((unsigned long)((char *)&(((typ *)0)->memb)))
-#endif
-
-#define LOWEST_BIT_SET(x)       ((x) & ~((x) - 1))
-
 #ifdef __KERNEL__
 # include <linux/vmalloc.h>
 # include <linux/time.h>
@@ -647,7 +641,6 @@ enum {
         TCPNAL    = 5,
         ROUTER    = 6,
         IBNAL     = 7,
-        CRAY_KB_ERNAL = 8,
         NAL_ENUM_END_MARKER
 };
 
index 51d2d2f..1127698 100644 (file)
@@ -4,7 +4,7 @@
 #ifndef _KPR_H
 #define _KPR_H
 
-# include <portals/lib-nal.h> /* for ptl_hdr_t */
+# include <portals/lib-types.h> /* for ptl_hdr_t */
 
 /******************************************************************************/
 /* Kernel Portals Router interface */
index c2a15f4..a205163 100644 (file)
@@ -79,9 +79,11 @@ extern unsigned int portal_cerror;
 #define S_PTLROUTER   0x00100000
 #define S_COBD        0x00200000
 #define S_IBNAL       0x00400000
-#define S_LMV         0x00800000
-#define S_SM          0x01000000
-#define S_CMOBD       0x02000000
+#define S_SM          0x00800000
+#define S_ASOBD       0x01000000
+#define S_LMV         0x02000000
+#define S_CMOBD       0x04000000
+
 /* If you change these values, please keep portals/utils/debug.c
  * up to date! */
 
index 609290d..b4741cc 100644 (file)
@@ -77,8 +77,10 @@ static inline char *strdup(const char *str)
 #endif
 
 #ifdef __KERNEL__
+# define NTOH__u16(var) le16_to_cpu(var)
 # define NTOH__u32(var) le32_to_cpu(var)
 # define NTOH__u64(var) le64_to_cpu(var)
+# define HTON__u16(var) cpu_to_le16(var)
 # define HTON__u32(var) cpu_to_le32(var)
 # define HTON__u64(var) cpu_to_le64(var)
 #else
@@ -92,8 +94,10 @@ static inline char *strdup(const char *str)
        };       \
        (ret);     \
     })
+# define NTOH__u16(var) (var)
 # define NTOH__u32(var) (var)
 # define NTOH__u64(var) (expansion_u64(var))
+# define HTON__u16(var) (var)
 # define HTON__u32(var) (var)
 # define HTON__u64(var) (expansion_u64(var))
 #endif
index cfae78c..c5994c6 100644 (file)
@@ -19,9 +19,4 @@
 
 #include <portals/internal.h>
 #include <portals/nal.h>
-#include <portals/arg-blocks.h>
 
-/* Hack for 2.4.18 macro name collision */
-#ifdef yield
-#undef yield
-#endif
index 6d382bb..c7aaced 100644 (file)
@@ -5,7 +5,6 @@
 
 #include <portals/types.h>
 
-#ifndef PTL_NO_WRAP
 int PtlInit(int *);
 void PtlFini(void);
 
@@ -17,8 +16,6 @@ int PtlNIInitialized(ptl_interface_t);
 
 int PtlNIFini(ptl_handle_ni_t interface_in);
 
-#endif
-
 int PtlGetId(ptl_handle_ni_t ni_handle, ptl_process_id_t *id);
 
 
@@ -32,9 +29,7 @@ int PtlNIStatus(ptl_handle_ni_t interface_in, ptl_sr_index_t register_in,
 int PtlNIDist(ptl_handle_ni_t interface_in, ptl_process_id_t process_in,
               unsigned long *distance_out);
 
-#ifndef PTL_NO_WRAP
 int PtlNIHandle(ptl_handle_any_t handle_in, ptl_handle_ni_t * interface_out);
-#endif
 
 
 /* 
@@ -74,16 +69,12 @@ int PtlMEUnlink(ptl_handle_me_t current_in);
 
 int PtlMEUnlinkList(ptl_handle_me_t current_in);
 
-int PtlTblDump(ptl_handle_ni_t ni, int index_in);
-int PtlMEDump(ptl_handle_me_t current_in);
-
 
 
 /*
  * Memory descriptors
  */
 
-#ifndef PTL_NO_WRAP
 int PtlMDAttach(ptl_handle_me_t current_in, ptl_md_t md_in,
                 ptl_unlink_t unlink_in, ptl_handle_md_t * handle_out);
 
@@ -95,7 +86,6 @@ int PtlMDUnlink(ptl_handle_md_t md_in);
 int PtlMDUpdate(ptl_handle_md_t md_in, ptl_md_t * old_inout,
                 ptl_md_t * new_inout, ptl_handle_eq_t testq_in);
 
-#endif
 
 /* These should not be called by users */
 int PtlMDUpdate_internal(ptl_handle_md_t md_in, ptl_md_t * old_inout,
@@ -108,16 +98,11 @@ int PtlMDUpdate_internal(ptl_handle_md_t md_in, ptl_md_t * old_inout,
 /*
  * Event queues
  */
-#ifndef PTL_NO_WRAP
-
-/* These should be called by users */
 int PtlEQAlloc(ptl_handle_ni_t ni_in, ptl_size_t count_in,
                ptl_eq_handler_t handler,
                ptl_handle_eq_t *handle_out);
 int PtlEQFree(ptl_handle_eq_t eventq_in);
 
-int PtlEQCount(ptl_handle_eq_t eventq_in, ptl_size_t * count_out);
-
 int PtlEQGet(ptl_handle_eq_t eventq_in, ptl_event_t * event_out);
 
 
@@ -125,7 +110,6 @@ int PtlEQWait(ptl_handle_eq_t eventq_in, ptl_event_t * event_out);
 
 int PtlEQPoll(ptl_handle_eq_t *eventqs_in, int neq_in, int timeout,
              ptl_event_t *event_out, int *which_out);
-#endif
 
 /*
  * Access Control Table
diff --git a/lustre/portals/include/portals/arg-blocks.h b/lustre/portals/include/portals/arg-blocks.h
deleted file mode 100644 (file)
index 21e30d5..0000000
+++ /dev/null
@@ -1,268 +0,0 @@
-#ifndef PTL_BLOCKS_H
-#define PTL_BLOCKS_H
-
-#include "build_check.h"
-
-/*
- * blocks.h
- *
- * Argument block types for the Portals 3.0 library
- * Generated by idl
- *
- */
-
-#include <portals/types.h>
-
-/* put LIB_MAX_DISPATCH last here  -- these must match the
-   assignements to the dispatch table in lib-p30/dispatch.c */
-#define PTL_GETID     1
-#define PTL_NISTATUS  2
-#define PTL_NIDIST    3
-// #define PTL_NIDEBUG   4
-#define PTL_MEATTACH  5
-#define PTL_MEINSERT  6
-// #define PTL_MEPREPEND 7
-#define PTL_MEUNLINK  8
-#define PTL_TBLDUMP   9 
-#define PTL_MEDUMP   10
-#define PTL_MDATTACH 11
-// #define PTL_MDINSERT 12
-#define PTL_MDBIND   13
-#define PTL_MDUPDATE 14
-#define PTL_MDUNLINK 15
-#define PTL_EQALLOC  16
-#define PTL_EQFREE   17
-#define PTL_ACENTRY  18
-#define PTL_PUT      19 
-#define PTL_GET      20
-#define PTL_FAILNID  21
-#define LIB_MAX_DISPATCH 21
-
-typedef struct PtlFailNid_in {
-       ptl_handle_ni_t interface;
-       ptl_nid_t       nid;
-       unsigned int    threshold;
-} PtlFailNid_in;
-
-typedef struct PtlFailNid_out {
-       int             rc;
-} PtlFailNid_out;
-
-typedef struct PtlGetId_in {
-        ptl_handle_ni_t handle_in;
-} PtlGetId_in;
-
-typedef struct PtlGetId_out {
-        int rc;
-        ptl_process_id_t id_out;
-} PtlGetId_out;
-
-typedef struct PtlNIStatus_in {
-        ptl_handle_ni_t interface_in;
-        ptl_sr_index_t register_in;
-} PtlNIStatus_in;
-
-typedef struct PtlNIStatus_out {
-        int rc;
-        ptl_sr_value_t status_out;
-} PtlNIStatus_out;
-
-
-typedef struct PtlNIDist_in {
-        ptl_handle_ni_t interface_in;
-        ptl_process_id_t process_in;
-} PtlNIDist_in;
-
-typedef struct PtlNIDist_out {
-        int rc;
-        unsigned long distance_out;
-} PtlNIDist_out;
-
-
-typedef struct PtlNIDebug_in {
-        unsigned int mask_in;
-} PtlNIDebug_in;
-
-typedef struct PtlNIDebug_out {
-        unsigned int rc;
-} PtlNIDebug_out;
-
-
-typedef struct PtlMEAttach_in {
-        ptl_handle_ni_t interface_in;
-        ptl_pt_index_t index_in;
-        ptl_ins_pos_t position_in;
-        ptl_process_id_t match_id_in;
-        ptl_match_bits_t match_bits_in;
-        ptl_match_bits_t ignore_bits_in;
-        ptl_unlink_t unlink_in;
-} PtlMEAttach_in;
-
-typedef struct PtlMEAttach_out {
-        int rc;
-        ptl_handle_me_t handle_out;
-} PtlMEAttach_out;
-
-
-typedef struct PtlMEInsert_in {
-        ptl_handle_me_t current_in;
-        ptl_process_id_t match_id_in;
-        ptl_match_bits_t match_bits_in;
-        ptl_match_bits_t ignore_bits_in;
-        ptl_unlink_t unlink_in;
-        ptl_ins_pos_t position_in;
-} PtlMEInsert_in;
-
-typedef struct PtlMEInsert_out {
-        int rc;
-        ptl_handle_me_t handle_out;
-} PtlMEInsert_out;
-
-typedef struct PtlMEUnlink_in {
-        ptl_handle_me_t current_in;
-        ptl_unlink_t unlink_in;
-} PtlMEUnlink_in;
-
-typedef struct PtlMEUnlink_out {
-        int rc;
-} PtlMEUnlink_out;
-
-
-typedef struct PtlTblDump_in {
-        int index_in;
-} PtlTblDump_in;
-
-typedef struct PtlTblDump_out {
-        int rc;
-} PtlTblDump_out;
-
-
-typedef struct PtlMEDump_in {
-        ptl_handle_me_t current_in;
-} PtlMEDump_in;
-
-typedef struct PtlMEDump_out {
-        int rc;
-} PtlMEDump_out;
-
-
-typedef struct PtlMDAttach_in {
-        ptl_handle_me_t me_in;
-        ptl_handle_eq_t eq_in;
-        ptl_md_t md_in;
-        ptl_unlink_t unlink_in;
-} PtlMDAttach_in;
-
-typedef struct PtlMDAttach_out {
-        int rc;
-        ptl_handle_md_t handle_out;
-} PtlMDAttach_out;
-
-
-typedef struct PtlMDBind_in {
-        ptl_handle_ni_t ni_in;
-        ptl_handle_eq_t eq_in;
-        ptl_md_t md_in;
-       ptl_unlink_t unlink_in;
-} PtlMDBind_in;
-
-typedef struct PtlMDBind_out {
-        int rc;
-        ptl_handle_md_t handle_out;
-} PtlMDBind_out;
-
-
-typedef struct PtlMDUpdate_internal_in {
-        ptl_handle_md_t md_in;
-        ptl_handle_eq_t testq_in;
-        ptl_seq_t sequence_in;
-
-        ptl_md_t old_inout;
-        int old_inout_valid;
-        ptl_md_t new_inout;
-        int new_inout_valid;
-} PtlMDUpdate_internal_in;
-
-typedef struct PtlMDUpdate_internal_out {
-        int rc;
-        ptl_md_t old_inout;
-        ptl_md_t new_inout;
-} PtlMDUpdate_internal_out;
-
-
-typedef struct PtlMDUnlink_in {
-        ptl_handle_md_t md_in;
-} PtlMDUnlink_in;
-
-typedef struct PtlMDUnlink_out {
-        int rc;
-        ptl_md_t status_out;
-} PtlMDUnlink_out;
-
-
-typedef struct PtlEQAlloc_in {
-        ptl_handle_ni_t ni_in;
-        ptl_size_t count_in;
-        void *base_in;
-        int len_in;
-        ptl_eq_handler_t callback_in;
-} PtlEQAlloc_in;
-
-typedef struct PtlEQAlloc_out {
-        int rc;
-        ptl_handle_eq_t handle_out;
-} PtlEQAlloc_out;
-
-
-typedef struct PtlEQFree_in {
-        ptl_handle_eq_t eventq_in;
-} PtlEQFree_in;
-
-typedef struct PtlEQFree_out {
-        int rc;
-} PtlEQFree_out;
-
-
-typedef struct PtlACEntry_in {
-        ptl_handle_ni_t ni_in;
-        ptl_ac_index_t index_in;
-        ptl_process_id_t match_id_in;
-        ptl_pt_index_t portal_in;
-} PtlACEntry_in;
-
-typedef struct PtlACEntry_out {
-        int rc;
-} PtlACEntry_out;
-
-
-typedef struct PtlPut_in {
-        ptl_handle_md_t md_in;
-        ptl_ack_req_t ack_req_in;
-        ptl_process_id_t target_in;
-        ptl_pt_index_t portal_in;
-        ptl_ac_index_t cookie_in;
-        ptl_match_bits_t match_bits_in;
-        ptl_size_t offset_in;
-        ptl_hdr_data_t hdr_data_in;
-} PtlPut_in;
-
-typedef struct PtlPut_out {
-        int rc;
-} PtlPut_out;
-
-
-typedef struct PtlGet_in {
-        ptl_handle_md_t md_in;
-        ptl_process_id_t target_in;
-        ptl_pt_index_t portal_in;
-        ptl_ac_index_t cookie_in;
-        ptl_match_bits_t match_bits_in;
-        ptl_size_t offset_in;
-} PtlGet_in;
-
-typedef struct PtlGet_out {
-        int rc;
-} PtlGet_out;
-
-
-#endif
index a98bfd9..42f2626 100644 (file)
@@ -41,7 +41,10 @@ typedef enum {
 
        PTL_EQ_IN_USE           = 21,
 
-        PTL_MAX_ERRNO          = 22
+       PTL_NI_INVALID          = 22,
+       PTL_MD_ILLEGAL          = 23,
+       
+        PTL_MAX_ERRNO          = 24
 } ptl_err_t;
 /* If you change these, you must update the string table in api-errno.c */
 
diff --git a/lustre/portals/include/portals/lib-dispatch.h b/lustre/portals/include/portals/lib-dispatch.h
deleted file mode 100644 (file)
index 610c776..0000000
+++ /dev/null
@@ -1,45 +0,0 @@
-#ifndef PTL_DISPATCH_H
-#define PTL_DISPATCH_H
-
-#include "build_check.h"
-/*
- * include/dispatch.h
- *
- * Dispatch table header and externs for remote side
- * operations
- *
- * Generated by idl
- *
- */
-
-#include <portals/lib-p30.h>
-#include <portals/arg-blocks.h>
-
-extern int do_PtlGetId(nal_cb_t * nal, void *private, void *args, void *ret);
-extern int do_PtlNIStatus(nal_cb_t * nal, void *private, void *args, void *ret);
-extern int do_PtlNIDist(nal_cb_t * nal, void *private, void *args, void *ret);
-extern int do_PtlMEAttach(nal_cb_t * nal, void *private, void *args, void *ret);
-extern int do_PtlMEInsert(nal_cb_t * nal, void *private, void *args, void *ret);
-extern int do_PtlMEPrepend(nal_cb_t * nal, void *private, void *args,
-                           void *ret);
-extern int do_PtlMEUnlink(nal_cb_t * nal, void *private, void *args, void *ret);
-extern int do_PtlTblDump(nal_cb_t * nal, void *private, void *args, void *ret);
-extern int do_PtlMEDump(nal_cb_t * nal, void *private, void *args, void *ret);
-extern int do_PtlMDAttach(nal_cb_t * nal, void *private, void *args,
-                                   void *ret);
-extern int do_PtlMDBind(nal_cb_t * nal, void *private, void *args,
-                                 void *ret);
-extern int do_PtlMDUpdate_internal(nal_cb_t * nal, void *private, void *args,
-                                   void *ret);
-extern int do_PtlMDUnlink(nal_cb_t * nal, void *private, void *args,
-                                   void *ret);
-extern int do_PtlEQAlloc_internal(nal_cb_t * nal, void *private, void *args,
-                                  void *ret);
-extern int do_PtlEQFree_internal(nal_cb_t * nal, void *private, void *args,
-                                 void *ret);
-extern int do_PtlPut(nal_cb_t * nal, void *private, void *args, void *ret);
-extern int do_PtlGet(nal_cb_t * nal, void *private, void *args, void *ret);
-extern int do_PtlFailNid (nal_cb_t *nal, void *private, void *args, void *ret);
-
-extern char *dispatch_name(int index);
-#endif
diff --git a/lustre/portals/include/portals/lib-nal.h b/lustre/portals/include/portals/lib-nal.h
deleted file mode 100644 (file)
index d1d0495..0000000
+++ /dev/null
@@ -1,116 +0,0 @@
-#ifndef _LIB_NAL_H_
-#define _LIB_NAL_H_
-
-#include "build_check.h"
-/*
- * nal.h
- *
- * Library side headers that define the abstraction layer's
- * responsibilities and interfaces
- */
-
-#include <portals/lib-types.h>
-
-struct nal_cb_t {
-       /*
-        * Per interface portal table, access control table
-        * and NAL private data field;
-        */
-       lib_ni_t ni;
-       void *nal_data;
-       /*
-        * send: Sends a preformatted header and payload data to a
-        * specified remote process. The payload is scattered over 'niov'
-        * fragments described by iov, starting at 'offset' for 'mlen'
-        * bytes.  
-        * NB the NAL may NOT overwrite iov.  
-        * PTL_OK on success => NAL has committed to send and will call
-        * lib_finalize on completion
-        */
-       ptl_err_t (*cb_send) (nal_cb_t * nal, void *private, lib_msg_t * cookie, 
-                             ptl_hdr_t * hdr, int type, ptl_nid_t nid, ptl_pid_t pid, 
-                             unsigned int niov, struct iovec *iov, 
-                             size_t offset, size_t mlen);
-
-       /* as send, but with a set of page fragments (NULL if not supported) */
-       ptl_err_t (*cb_send_pages) (nal_cb_t * nal, void *private, lib_msg_t * cookie, 
-                                   ptl_hdr_t * hdr, int type, ptl_nid_t nid, ptl_pid_t pid, 
-                                   unsigned int niov, ptl_kiov_t *iov, 
-                                   size_t offset, size_t mlen);
-       /*
-        * recv: Receives an incoming message from a remote process.  The
-        * payload is to be received into the scattered buffer of 'niov'
-        * fragments described by iov, starting at 'offset' for 'mlen'
-        * bytes.  Payload bytes after 'mlen' up to 'rlen' are to be
-        * discarded.  
-        * NB the NAL may NOT overwrite iov.
-        * PTL_OK on success => NAL has committed to receive and will call
-        * lib_finalize on completion
-        */
-       ptl_err_t (*cb_recv) (nal_cb_t * nal, void *private, lib_msg_t * cookie,
-                             unsigned int niov, struct iovec *iov, 
-                             size_t offset, size_t mlen, size_t rlen);
-
-       /* as recv, but with a set of page fragments (NULL if not supported) */
-       ptl_err_t (*cb_recv_pages) (nal_cb_t * nal, void *private, lib_msg_t * cookie,
-                                   unsigned int niov, ptl_kiov_t *iov, 
-                                   size_t offset, size_t mlen, size_t rlen);
-       /*
-        * read: Reads a block of data from a specified user address
-        */
-       ptl_err_t (*cb_read) (nal_cb_t * nal, void *private, void *dst_addr,
-                             user_ptr src_addr, size_t len);
-
-       /*
-        * write: Writes a block of data into a specified user address
-        */
-       ptl_err_t (*cb_write) (nal_cb_t * nal, void *private, user_ptr dsr_addr,
-                              void *src_addr, size_t len);
-
-       /*
-        * callback: Calls an event callback
-        * NULL => lib calls eq's callback (if any) directly.
-        */
-       void (*cb_callback) (nal_cb_t * nal, void *private, lib_eq_t *eq,
-                            ptl_event_t *ev);
-
-       /*
-        *  malloc: Acquire a block of memory in a system independent
-        * fashion.
-        */
-       void *(*cb_malloc) (nal_cb_t * nal, size_t len);
-
-       void (*cb_free) (nal_cb_t * nal, void *buf, size_t len);
-
-       /*
-        * (un)map: Tell the NAL about some memory it will access.
-        * *addrkey passed to cb_unmap() is what cb_map() set it to.
-        * type of *iov depends on options.
-        * Set to NULL if not required.
-        */
-       ptl_err_t (*cb_map) (nal_cb_t * nal, unsigned int niov, struct iovec *iov, 
-                            void **addrkey);
-       void (*cb_unmap) (nal_cb_t * nal, unsigned int niov, struct iovec *iov, 
-                         void **addrkey);
-
-       /* as (un)map, but with a set of page fragments */
-       ptl_err_t (*cb_map_pages) (nal_cb_t * nal, unsigned int niov, ptl_kiov_t *iov, 
-                                  void **addrkey);
-       void (*cb_unmap_pages) (nal_cb_t * nal, unsigned int niov, ptl_kiov_t *iov, 
-                         void **addrkey);
-
-       void (*cb_printf) (nal_cb_t * nal, const char *fmt, ...);
-
-       /* Turn interrupts off (begin of protected area) */
-       void (*cb_cli) (nal_cb_t * nal, unsigned long *flags);
-
-       /* Turn interrupts on (end of protected area) */
-       void (*cb_sti) (nal_cb_t * nal, unsigned long *flags);
-
-       /*
-        * Calculate a network "distance" to given node
-        */
-       int (*cb_dist) (nal_cb_t * nal, ptl_nid_t nid, unsigned long *dist);
-};
-
-#endif
index efa929c..4daf219 100644 (file)
 #else
 # include <portals/list.h>
 # include <string.h>
+# include <pthread.h>
 #endif
 #include <portals/types.h>
 #include <linux/kp30.h>
 #include <portals/p30.h>
+#include <portals/nal.h>
 #include <portals/lib-types.h>
-#include <portals/lib-nal.h>
-#include <portals/lib-dispatch.h>
 
 static inline int ptl_is_wire_handle_none (ptl_handle_wire_t *wh)
 {
@@ -31,17 +31,18 @@ static inline int ptl_is_wire_handle_none (ptl_handle_wire_t *wh)
                 wh->wh_object_cookie == PTL_WIRE_HANDLE_NONE.wh_object_cookie);
 }
 
-#define state_lock(nal,flagsp)                          \
-do {                                                    \
-        CDEBUG(D_PORTALS, "taking state lock\n");       \
-        nal->cb_cli(nal, flagsp);                       \
-} while (0)
+#ifdef __KERNEL__
+#define LIB_LOCK(nal,flags)                                     \
+        spin_lock_irqsave(&(nal)->libnal_ni.ni_lock, flags)
+#define LIB_UNLOCK(nal,flags)                                   \
+        spin_unlock_irqrestore(&(nal)->libnal_ni.ni_lock, flags)
+#else
+#define LIB_LOCK(nal,flags)                                             \
+        (pthread_mutex_lock(&(nal)->libnal_ni.ni_mutex), (flags) = 0)
+#define LIB_UNLOCK(nal,flags)                                   \
+        pthread_mutex_unlock(&(nal)->libnal_ni.ni_mutex)
+#endif
 
-#define state_unlock(nal,flagsp)                        \
-{                                                       \
-        CDEBUG(D_PORTALS, "releasing state lock\n");    \
-        nal->cb_sti(nal, flagsp);                       \
-}
 
 #ifdef PTL_USE_LIB_FREELIST
 
@@ -50,13 +51,13 @@ do {                                                    \
 #define MAX_MSGS        2048    /* Outstanding messages */
 #define MAX_EQS         512
 
-extern int lib_freelist_init (nal_cb_t *nal, lib_freelist_t *fl, int nobj, int objsize);
-extern void lib_freelist_fini (nal_cb_t *nal, lib_freelist_t *fl);
+extern int lib_freelist_init (lib_nal_t *nal, lib_freelist_t *fl, int nobj, int objsize);
+extern void lib_freelist_fini (lib_nal_t *nal, lib_freelist_t *fl);
 
 static inline void *
 lib_freelist_alloc (lib_freelist_t *fl)
 {
-        /* ALWAYS called with statelock held */
+        /* ALWAYS called with liblock held */
         lib_freeobj_t *o;
 
         if (list_empty (&fl->fl_list))
@@ -70,7 +71,7 @@ lib_freelist_alloc (lib_freelist_t *fl)
 static inline void
 lib_freelist_free (lib_freelist_t *fl, void *obj)
 {
-        /* ALWAYS called with statelock held */
+        /* ALWAYS called with liblock held */
         lib_freeobj_t *o = list_entry (obj, lib_freeobj_t, fo_contents);
         
         list_add (&o->fo_list, &fl->fl_list);
@@ -78,78 +79,78 @@ lib_freelist_free (lib_freelist_t *fl, void *obj)
 
 
 static inline lib_eq_t *
-lib_eq_alloc (nal_cb_t *nal)
+lib_eq_alloc (lib_nal_t *nal)
 {
-        /* NEVER called with statelock held */
+        /* NEVER called with liblock held */
         unsigned long  flags;
         lib_eq_t      *eq;
         
-        state_lock (nal, &flags);
-        eq = (lib_eq_t *)lib_freelist_alloc (&nal->ni.ni_free_eqs);
-        state_unlock (nal, &flags);
+        LIB_LOCK (nal, flags);
+        eq = (lib_eq_t *)lib_freelist_alloc (&nal->libnal_ni.ni_free_eqs);
+        LIB_UNLOCK (nal, flags);
 
         return (eq);
 }
 
 static inline void
-lib_eq_free (nal_cb_t *nal, lib_eq_t *eq)
+lib_eq_free (lib_nal_t *nal, lib_eq_t *eq)
 {
-        /* ALWAYS called with statelock held */
-        lib_freelist_free (&nal->ni.ni_free_eqs, eq);
+        /* ALWAYS called with liblock held */
+        lib_freelist_free (&nal->libnal_ni.ni_free_eqs, eq);
 }
 
 static inline lib_md_t *
-lib_md_alloc (nal_cb_t *nal, ptl_md_t *umd)
+lib_md_alloc (lib_nal_t *nal, ptl_md_t *umd)
 {
-        /* NEVER called with statelock held */
+        /* NEVER called with liblock held */
         unsigned long  flags;
         lib_md_t      *md;
         
-        state_lock (nal, &flags);
-        md = (lib_md_t *)lib_freelist_alloc (&nal->ni.ni_free_mds);
-        state_unlock (nal, &flags);
+        LIB_LOCK (nal, flags);
+        md = (lib_md_t *)lib_freelist_alloc (&nal->libnal_ni.ni_free_mds);
+        LIB_UNLOCK (nal, flags);
 
         return (md);
 }
 
 static inline void
-lib_md_free (nal_cb_t *nal, lib_md_t *md)
+lib_md_free (lib_nal_t *nal, lib_md_t *md)
 {
-        /* ALWAYS called with statelock held */
-        lib_freelist_free (&nal->ni.ni_free_mds, md);
+        /* ALWAYS called with liblock held */
+        lib_freelist_free (&nal->libnal_ni.ni_free_mds, md);
 }
 
 static inline lib_me_t *
-lib_me_alloc (nal_cb_t *nal)
+lib_me_alloc (lib_nal_t *nal)
 {
-        /* NEVER called with statelock held */
+        /* NEVER called with liblock held */
         unsigned long  flags;
         lib_me_t      *me;
         
-        state_lock (nal, &flags);
-        me = (lib_me_t *)lib_freelist_alloc (&nal->ni.ni_free_mes);
-        state_unlock (nal, &flags);
+        LIB_LOCK (nal, flags);
+        me = (lib_me_t *)lib_freelist_alloc (&nal->libnal_ni.ni_free_mes);
+        LIB_UNLOCK (nal, flags);
         
         return (me);
 }
 
 static inline void
-lib_me_free (nal_cb_t *nal, lib_me_t *me)
+lib_me_free (lib_nal_t *nal, lib_me_t *me)
 {
-        /* ALWAYS called with statelock held */
-        lib_freelist_free (&nal->ni.ni_free_mes, me);
+        /* ALWAYS called with liblock held */
+        lib_freelist_free (&nal->libnal_ni.ni_free_mes, me);
 }
 
 static inline lib_msg_t *
-lib_msg_alloc (nal_cb_t *nal)
+lib_msg_alloc (lib_nal_t *nal)
 {
-        /* NEVER called with statelock held */
+        /* NEVER called with liblock held */
         unsigned long  flags;
         lib_msg_t     *msg;
         
-        state_lock (nal, &flags);
-        msg = (lib_msg_t *)lib_freelist_alloc (&nal->ni.ni_free_msgs);
-        state_unlock (nal, &flags);
+        LIB_LOCK (nal, flags);
+        msg = (lib_msg_t *)lib_freelist_alloc (&nal->libnal_ni.ni_free_msgs);
+        LIB_UNLOCK (nal, flags);
 
         if (msg != NULL) {
                 /* NULL pointers, clear flags etc */
@@ -160,18 +161,18 @@ lib_msg_alloc (nal_cb_t *nal)
 }
 
 static inline void
-lib_msg_free (nal_cb_t *nal, lib_msg_t *msg)
+lib_msg_free (lib_nal_t *nal, lib_msg_t *msg)
 {
-        /* ALWAYS called with statelock held */
-        lib_freelist_free (&nal->ni.ni_free_msgs, msg);
+        /* ALWAYS called with liblock held */
+        lib_freelist_free (&nal->libnal_ni.ni_free_msgs, msg);
 }
 
 #else
 
 static inline lib_eq_t *
-lib_eq_alloc (nal_cb_t *nal)
+lib_eq_alloc (lib_nal_t *nal)
 {
-        /* NEVER called with statelock held */
+        /* NEVER called with liblock held */
         lib_eq_t *eq;
 
         PORTAL_ALLOC(eq, sizeof(*eq));
@@ -179,16 +180,16 @@ lib_eq_alloc (nal_cb_t *nal)
 }
 
 static inline void
-lib_eq_free (nal_cb_t *nal, lib_eq_t *eq)
+lib_eq_free (lib_nal_t *nal, lib_eq_t *eq)
 {
-        /* ALWAYS called with statelock held */
+        /* ALWAYS called with liblock held */
         PORTAL_FREE(eq, sizeof(*eq));
 }
 
 static inline lib_md_t *
-lib_md_alloc (nal_cb_t *nal, ptl_md_t *umd)
+lib_md_alloc (lib_nal_t *nal, ptl_md_t *umd)
 {
-        /* NEVER called with statelock held */
+        /* NEVER called with liblock held */
         lib_md_t *md;
         int       size;
         int       niov;
@@ -214,9 +215,9 @@ lib_md_alloc (nal_cb_t *nal, ptl_md_t *umd)
 }
 
 static inline void 
-lib_md_free (nal_cb_t *nal, lib_md_t *md)
+lib_md_free (lib_nal_t *nal, lib_md_t *md)
 {
-        /* ALWAYS called with statelock held */
+        /* ALWAYS called with liblock held */
         int       size;
 
         if ((md->options & PTL_MD_KIOV) != 0)
@@ -228,9 +229,9 @@ lib_md_free (nal_cb_t *nal, lib_md_t *md)
 }
 
 static inline lib_me_t *
-lib_me_alloc (nal_cb_t *nal)
+lib_me_alloc (lib_nal_t *nal)
 {
-        /* NEVER called with statelock held */
+        /* NEVER called with liblock held */
         lib_me_t *me;
 
         PORTAL_ALLOC(me, sizeof(*me));
@@ -238,16 +239,16 @@ lib_me_alloc (nal_cb_t *nal)
 }
 
 static inline void 
-lib_me_free(nal_cb_t *nal, lib_me_t *me)
+lib_me_free(lib_nal_t *nal, lib_me_t *me)
 {
-        /* ALWAYS called with statelock held */
+        /* ALWAYS called with liblock held */
         PORTAL_FREE(me, sizeof(*me));
 }
 
 static inline lib_msg_t *
-lib_msg_alloc(nal_cb_t *nal)
+lib_msg_alloc(lib_nal_t *nal)
 {
-        /* NEVER called with statelock held; may be in interrupt... */
+        /* NEVER called with liblock held; may be in interrupt... */
         lib_msg_t *msg;
 
         if (in_interrupt())
@@ -264,27 +265,28 @@ lib_msg_alloc(nal_cb_t *nal)
 }
 
 static inline void 
-lib_msg_free(nal_cb_t *nal, lib_msg_t *msg)
+lib_msg_free(lib_nal_t *nal, lib_msg_t *msg)
 {
-        /* ALWAYS called with statelock held */
+        /* ALWAYS called with liblock held */
         PORTAL_FREE(msg, sizeof(*msg));
 }
 #endif
 
-extern lib_handle_t *lib_lookup_cookie (nal_cb_t *nal, __u64 cookie, int type);
-extern void lib_initialise_handle (nal_cb_t *nal, lib_handle_t *lh, int type);
-extern void lib_invalidate_handle (nal_cb_t *nal, lib_handle_t *lh);
+extern lib_handle_t *lib_lookup_cookie (lib_nal_t *nal, __u64 cookie, int type);
+extern void lib_initialise_handle (lib_nal_t *nal, lib_handle_t *lh, int type);
+extern void lib_invalidate_handle (lib_nal_t *nal, lib_handle_t *lh);
 
 static inline void
-ptl_eq2handle (ptl_handle_eq_t *handle, lib_eq_t *eq)
+ptl_eq2handle (ptl_handle_eq_t *handle, lib_nal_t *nal, lib_eq_t *eq)
 {
+        handle->nal_idx = nal->libnal_ni.ni_api->nal_handle.nal_idx;
         handle->cookie = eq->eq_lh.lh_cookie;
 }
 
 static inline lib_eq_t *
-ptl_handle2eq (ptl_handle_eq_t *handle, nal_cb_t *nal)
+ptl_handle2eq (ptl_handle_eq_t *handle, lib_nal_t *nal)
 {
-        /* ALWAYS called with statelock held */
+        /* ALWAYS called with liblock held */
         lib_handle_t *lh = lib_lookup_cookie (nal, handle->cookie, 
                                               PTL_COOKIE_TYPE_EQ);
         if (lh == NULL)
@@ -294,15 +296,16 @@ ptl_handle2eq (ptl_handle_eq_t *handle, nal_cb_t *nal)
 }
 
 static inline void
-ptl_md2handle (ptl_handle_md_t *handle, lib_md_t *md)
+ptl_md2handle (ptl_handle_md_t *handle, lib_nal_t *nal, lib_md_t *md)
 {
+        handle->nal_idx = nal->libnal_ni.ni_api->nal_handle.nal_idx;
         handle->cookie = md->md_lh.lh_cookie;
 }
 
 static inline lib_md_t *
-ptl_handle2md (ptl_handle_md_t *handle, nal_cb_t *nal)
+ptl_handle2md (ptl_handle_md_t *handle, lib_nal_t *nal)
 {
-        /* ALWAYS called with statelock held */
+        /* ALWAYS called with liblock held */
         lib_handle_t *lh = lib_lookup_cookie (nal, handle->cookie,
                                               PTL_COOKIE_TYPE_MD);
         if (lh == NULL)
@@ -312,12 +315,12 @@ ptl_handle2md (ptl_handle_md_t *handle, nal_cb_t *nal)
 }
 
 static inline lib_md_t *
-ptl_wire_handle2md (ptl_handle_wire_t *wh, nal_cb_t *nal)
+ptl_wire_handle2md (ptl_handle_wire_t *wh, lib_nal_t *nal)
 {
-        /* ALWAYS called with statelock held */
+        /* ALWAYS called with liblock held */
         lib_handle_t *lh;
         
-        if (wh->wh_interface_cookie != nal->ni.ni_interface_cookie)
+        if (wh->wh_interface_cookie != nal->libnal_ni.ni_interface_cookie)
                 return (NULL);
         
         lh = lib_lookup_cookie (nal, wh->wh_object_cookie,
@@ -329,15 +332,16 @@ ptl_wire_handle2md (ptl_handle_wire_t *wh, nal_cb_t *nal)
 }
 
 static inline void
-ptl_me2handle (ptl_handle_me_t *handle, lib_me_t *me)
+ptl_me2handle (ptl_handle_me_t *handle, lib_nal_t *nal, lib_me_t *me)
 {
+        handle->nal_idx = nal->libnal_ni.ni_api->nal_handle.nal_idx;
         handle->cookie = me->me_lh.lh_cookie;
 }
 
 static inline lib_me_t *
-ptl_handle2me (ptl_handle_me_t *handle, nal_cb_t *nal)
+ptl_handle2me (ptl_handle_me_t *handle, lib_nal_t *nal)
 {
-        /* ALWAYS called with statelock held */
+        /* ALWAYS called with liblock held */
         lib_handle_t *lh = lib_lookup_cookie (nal, handle->cookie,
                                               PTL_COOKIE_TYPE_ME);
         if (lh == NULL)
@@ -346,35 +350,30 @@ ptl_handle2me (ptl_handle_me_t *handle, nal_cb_t *nal)
         return (lh_entry (lh, lib_me_t, me_lh));
 }
 
-extern int lib_init(nal_cb_t *cb, ptl_process_id_t pid,
+extern int lib_init(lib_nal_t *libnal, nal_t *apinal,
+                    ptl_process_id_t pid,
                     ptl_ni_limits_t *desired_limits, 
                     ptl_ni_limits_t *actual_limits);
-extern int lib_fini(nal_cb_t * cb);
-extern void lib_dispatch(nal_cb_t * cb, void *private, int index,
-                         void *arg_block, void *ret_block);
-extern char *dispatch_name(int index);
+extern int lib_fini(lib_nal_t *libnal);
 
 /*
- * When the NAL detects an incoming message, it should call
- * lib_parse() decode it.  The NAL callbacks will be handed
- * the private cookie as a way for the NAL to maintain state
- * about which transaction is being processed.  An extra parameter,
- * lib_cookie will contain the necessary information for
- * finalizing the message.
- *
- * After it has finished the handling the message, it should
- * call lib_finalize() with the lib_cookie parameter.
- * Call backs will be made to write events, send acks or
- * replies and so on.
+ * When the NAL detects an incoming message header, it should call
+ * lib_parse() decode it.  If the message header is garbage, lib_parse()
+ * returns immediately with failure, otherwise the NAL callbacks will be
+ * called to receive the message body.  They are handed the private cookie
+ * as a way for the NAL to maintain state about which transaction is being
+ * processed.  An extra parameter, lib_msg contains the lib-level message
+ * state for passing to lib_finalize() when the message body has been
+ * received.
  */
-extern void lib_enq_event_locked (nal_cb_t *nal, void *private,
+extern void lib_enq_event_locked (lib_nal_t *nal, void *private,
                                   lib_eq_t *eq, ptl_event_t *ev);
-extern void lib_finalize (nal_cb_t *nal, void *private, lib_msg_t *msg, 
+extern void lib_finalize (lib_nal_t *nal, void *private, lib_msg_t *msg, 
                           ptl_ni_fail_t ni_fail_type);
-extern void lib_parse (nal_cb_t *nal, ptl_hdr_t *hdr, void *private);
-extern lib_msg_t *lib_create_reply_msg (nal_cb_t *nal, ptl_nid_t peer_nid, 
+extern ptl_err_t lib_parse (lib_nal_t *nal, ptl_hdr_t *hdr, void *private);
+extern lib_msg_t *lib_create_reply_msg (lib_nal_t *nal, ptl_nid_t peer_nid, 
                                         lib_msg_t *get_msg);
-extern void print_hdr (nal_cb_t * nal, ptl_hdr_t * hdr);
+extern void print_hdr (lib_nal_t * nal, ptl_hdr_t * hdr);
 
 
 extern ptl_size_t lib_iov_nob (int niov, struct iovec *iov);
@@ -397,14 +396,65 @@ extern int lib_extract_kiov (int dst_niov, ptl_kiov_t *dst,
 
 extern void lib_assert_wire_constants (void);
 
-extern ptl_err_t lib_recv (nal_cb_t *nal, void *private, lib_msg_t *msg, lib_md_t *md,
+extern ptl_err_t lib_recv (lib_nal_t *nal, void *private, lib_msg_t *msg, lib_md_t *md,
                            ptl_size_t offset, ptl_size_t mlen, ptl_size_t rlen);
-extern ptl_err_t lib_send (nal_cb_t *nal, void *private, lib_msg_t *msg,
+extern ptl_err_t lib_send (lib_nal_t *nal, void *private, lib_msg_t *msg,
                            ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
                            lib_md_t *md, ptl_size_t offset, ptl_size_t len);
 
-extern void lib_md_deconstruct(nal_cb_t * nal, lib_md_t * md_in,
-                               ptl_md_t * md_out);
-extern void lib_md_unlink(nal_cb_t * nal, lib_md_t * md_in);
-extern void lib_me_unlink(nal_cb_t * nal, lib_me_t * me_in);
+extern int lib_api_ni_status (nal_t *nal, ptl_sr_index_t sr_idx,
+                              ptl_sr_value_t *status);
+extern int lib_api_ni_dist (nal_t *nal, ptl_process_id_t *pid, 
+                            unsigned long *dist);
+
+extern int lib_api_eq_alloc (nal_t *nal, ptl_size_t count,
+                             ptl_eq_handler_t callback, 
+                             ptl_handle_eq_t *handle);
+extern int lib_api_eq_free(nal_t *nal, ptl_handle_eq_t *eqh);
+extern int lib_api_eq_poll (nal_t *nal, 
+                            ptl_handle_eq_t *eventqs, int neq, int timeout_ms,
+                            ptl_event_t *event, int *which);
+
+extern int lib_api_me_attach(nal_t *nal,
+                             ptl_pt_index_t portal,
+                             ptl_process_id_t match_id, 
+                             ptl_match_bits_t match_bits, 
+                             ptl_match_bits_t ignore_bits,
+                             ptl_unlink_t unlink, ptl_ins_pos_t pos,
+                             ptl_handle_me_t *handle);
+extern int lib_api_me_insert(nal_t *nal,
+                             ptl_handle_me_t *current_meh,
+                             ptl_process_id_t match_id, 
+                             ptl_match_bits_t match_bits, 
+                             ptl_match_bits_t ignore_bits,
+                             ptl_unlink_t unlink, ptl_ins_pos_t pos,
+                             ptl_handle_me_t *handle);
+extern int lib_api_me_unlink (nal_t *nal, ptl_handle_me_t *meh);
+extern void lib_me_unlink(lib_nal_t *nal, lib_me_t *me);
+
+extern int lib_api_get_id(nal_t *nal, ptl_process_id_t *pid);
+
+extern void lib_md_unlink(lib_nal_t *nal, lib_md_t *md);
+extern void lib_md_deconstruct(lib_nal_t *nal, lib_md_t *lmd, ptl_md_t *umd);
+extern int lib_api_md_attach(nal_t *nal, ptl_handle_me_t *meh,
+                             ptl_md_t *umd, ptl_unlink_t unlink, 
+                             ptl_handle_md_t *handle);
+extern int lib_api_md_bind(nal_t *nal, ptl_md_t *umd, ptl_unlink_t unlink,
+                           ptl_handle_md_t *handle);
+extern int lib_api_md_unlink (nal_t *nal, ptl_handle_md_t *mdh);
+extern int lib_api_md_update (nal_t *nal, ptl_handle_md_t *mdh,
+                              ptl_md_t *oldumd, ptl_md_t *newumd,
+                              ptl_handle_eq_t *testqh);
+
+extern int lib_api_get(nal_t *apinal, ptl_handle_md_t *mdh, 
+                       ptl_process_id_t *id,
+                       ptl_pt_index_t portal, ptl_ac_index_t ac,
+                       ptl_match_bits_t match_bits, ptl_size_t offset);
+extern int lib_api_put(nal_t *apinal, ptl_handle_md_t *mdh, 
+                       ptl_ack_req_t ack, ptl_process_id_t *id,
+                       ptl_pt_index_t portal, ptl_ac_index_t ac,
+                       ptl_match_bits_t match_bits, 
+                       ptl_size_t offset, ptl_hdr_data_t hdr_data);
+extern int lib_api_fail_nid(nal_t *apinal, ptl_nid_t nid, unsigned int threshold);
+
 #endif
index ef618c7..6549988 100644 (file)
@@ -13,6 +13,7 @@
 #include "build_check.h"
 
 #include <portals/types.h>
+#include <portals/nal.h>
 #ifdef __KERNEL__
 # include <linux/uio.h>
 # include <linux/smp_lock.h>
@@ -22,9 +23,6 @@
 # include <sys/types.h>
 #endif
 
-/* struct nal_cb_t is defined in lib-nal.h */
-typedef struct nal_cb_t nal_cb_t;
-
 typedef char *user_ptr;
 typedef struct lib_msg_t lib_msg_t;
 typedef struct lib_ptl_t lib_ptl_t;
@@ -165,11 +163,12 @@ typedef struct {
 struct lib_eq_t {
         struct list_head  eq_list;
         lib_handle_t      eq_lh;
-        ptl_seq_t         sequence;
-        ptl_size_t        size;
-        ptl_event_t      *base;
+        ptl_seq_t         eq_enq_seq;
+        ptl_seq_t         eq_deq_seq;
+        ptl_size_t        eq_size;
+        ptl_event_t      *eq_events;
         int               eq_refcount;
-        ptl_eq_handler_t  event_callback;
+        ptl_eq_handler_t  eq_callback;
         void             *eq_addrkey;
 };
 
@@ -244,29 +243,117 @@ typedef struct {
 /* PTL_COOKIE_TYPES must be a power of 2, so the cookie type can be
  * extracted by masking with (PTL_COOKIE_TYPES - 1) */
 
-typedef struct {
-        ptl_nid_t nid;
-        ptl_pid_t pid;
-        lib_ptl_t tbl;
-        lib_counters_t counters;
-        ptl_ni_limits_t actual_limits;
+typedef struct lib_ni 
+{
+        nal_t            *ni_api;
+        ptl_process_id_t  ni_pid;
+        lib_ptl_t         ni_portals;
+        lib_counters_t    ni_counters;
+        ptl_ni_limits_t   ni_actual_limits;
 
         int               ni_lh_hash_size;      /* size of lib handle hash table */
         struct list_head *ni_lh_hash_table;     /* all extant lib handles, this interface */
         __u64             ni_next_object_cookie; /* cookie generator */
         __u64             ni_interface_cookie;  /* uniquely identifies this ni in this epoch */
         
-        struct list_head ni_test_peers;
+        struct list_head  ni_test_peers;
         
 #ifdef PTL_USE_LIB_FREELIST
-        lib_freelist_t   ni_free_mes;
-        lib_freelist_t   ni_free_msgs;
-        lib_freelist_t   ni_free_mds;
-        lib_freelist_t   ni_free_eqs;
+        lib_freelist_t    ni_free_mes;
+        lib_freelist_t    ni_free_msgs;
+        lib_freelist_t    ni_free_mds;
+        lib_freelist_t    ni_free_eqs;
+#endif
+
+        struct list_head  ni_active_msgs;
+        struct list_head  ni_active_mds;
+        struct list_head  ni_active_eqs;
+
+#ifdef __KERNEL__
+        spinlock_t        ni_lock;
+        wait_queue_head_t ni_waitq;
+#else
+        pthread_mutex_t   ni_mutex;
+        pthread_cond_t    ni_cond;
 #endif
-        struct list_head ni_active_msgs;
-        struct list_head ni_active_mds;
-        struct list_head ni_active_eqs;
 } lib_ni_t;
 
+
+typedef struct lib_nal
+{
+       /* lib-level interface state */
+       lib_ni_t libnal_ni;
+
+       /* NAL-private data */
+       void *libnal_data;
+
+       /*
+        * send: Sends a preformatted header and payload data to a
+        * specified remote process. The payload is scattered over 'niov'
+        * fragments described by iov, starting at 'offset' for 'mlen'
+        * bytes.  
+        * NB the NAL may NOT overwrite iov.  
+        * PTL_OK on success => NAL has committed to send and will call
+        * lib_finalize on completion
+        */
+       ptl_err_t (*libnal_send) 
+                (struct lib_nal *nal, void *private, lib_msg_t *cookie, 
+                 ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, 
+                 unsigned int niov, struct iovec *iov, 
+                 size_t offset, size_t mlen);
+        
+       /* as send, but with a set of page fragments (NULL if not supported) */
+       ptl_err_t (*libnal_send_pages)
+                (struct lib_nal *nal, void *private, lib_msg_t * cookie, 
+                 ptl_hdr_t * hdr, int type, ptl_nid_t nid, ptl_pid_t pid, 
+                 unsigned int niov, ptl_kiov_t *iov, 
+                 size_t offset, size_t mlen);
+       /*
+        * recv: Receives an incoming message from a remote process.  The
+        * payload is to be received into the scattered buffer of 'niov'
+        * fragments described by iov, starting at 'offset' for 'mlen'
+        * bytes.  Payload bytes after 'mlen' up to 'rlen' are to be
+        * discarded.  
+        * NB the NAL may NOT overwrite iov.
+        * PTL_OK on success => NAL has committed to receive and will call
+        * lib_finalize on completion
+        */
+       ptl_err_t (*libnal_recv) 
+                (struct lib_nal *nal, void *private, lib_msg_t * cookie,
+                 unsigned int niov, struct iovec *iov, 
+                 size_t offset, size_t mlen, size_t rlen);
+
+       /* as recv, but with a set of page fragments (NULL if not supported) */
+       ptl_err_t (*libnal_recv_pages) 
+                (struct lib_nal *nal, void *private, lib_msg_t * cookie,
+                 unsigned int niov, ptl_kiov_t *iov, 
+                 size_t offset, size_t mlen, size_t rlen);
+
+       /*
+        * (un)map: Tell the NAL about some memory it will access.
+        * *addrkey passed to libnal_unmap() is what libnal_map() set it to.
+        * type of *iov depends on options.
+        * Set to NULL if not required.
+        */
+       ptl_err_t (*libnal_map)
+                (struct lib_nal *nal, unsigned int niov, struct iovec *iov, 
+                 void **addrkey);
+       void (*libnal_unmap)
+                (struct lib_nal *nal, unsigned int niov, struct iovec *iov, 
+                 void **addrkey);
+
+       /* as (un)map, but with a set of page fragments */
+       ptl_err_t (*libnal_map_pages)
+                (struct lib_nal *nal, unsigned int niov, ptl_kiov_t *iov, 
+                 void **addrkey);
+       void (*libnal_unmap_pages)
+                (struct lib_nal *nal, unsigned int niov, ptl_kiov_t *iov, 
+                 void **addrkey);
+
+       void (*libnal_printf)(struct lib_nal *nal, const char *fmt, ...);
+
+       /* Calculate a network "distance" to given node */
+       int (*libnal_dist) (struct lib_nal *nal, ptl_nid_t nid, unsigned long *dist);
+} lib_nal_t;
+
 #endif
index 1f925c1..bf86569 100644 (file)
 
 #include <portals/types.h>
 
-#ifdef yield
-#undef yield
-#endif
-
 typedef struct nal_t nal_t;
 
 struct nal_t {
+       /* common interface state */
        int              nal_refct;
+        ptl_handle_ni_t  nal_handle;
+
+       /* NAL-private data */
        void            *nal_data;
 
-       int (*startup) (nal_t *nal, ptl_pid_t requested_pid,
-                       ptl_ni_limits_t *req, ptl_ni_limits_t *actual);
+       /* NAL API implementation 
+        * NB only nal_ni_init needs to be set when the NAL registers itself */
+       int (*nal_ni_init) (nal_t *nal, ptl_pid_t requested_pid,
+                           ptl_ni_limits_t *req, ptl_ni_limits_t *actual);
        
-       void (*shutdown) (nal_t *nal);
+       void (*nal_ni_fini) (nal_t *nal);
 
-       int (*forward) (nal_t *nal, int index,  /* Function ID */
-                       void *args, size_t arg_len, void *ret, size_t ret_len);
+       int (*nal_get_id) (nal_t *nal, ptl_process_id_t *id);
+       int (*nal_ni_status) (nal_t *nal, ptl_sr_index_t register, ptl_sr_value_t *status);
+       int (*nal_ni_dist) (nal_t *nal, ptl_process_id_t *id, unsigned long *distance);
+       int (*nal_fail_nid) (nal_t *nal, ptl_nid_t nid, unsigned int threshold);
 
-       int (*yield) (nal_t *nal, unsigned long *flags, int milliseconds);
+       int (*nal_me_attach) (nal_t *nal, ptl_pt_index_t portal,
+                             ptl_process_id_t match_id, 
+                             ptl_match_bits_t match_bits, ptl_match_bits_t ignore_bits,
+                             ptl_unlink_t unlink, ptl_ins_pos_t pos, 
+                             ptl_handle_me_t *handle);
+       int (*nal_me_insert) (nal_t *nal, ptl_handle_me_t *me,
+                             ptl_process_id_t match_id, 
+                             ptl_match_bits_t match_bits, ptl_match_bits_t ignore_bits,
+                             ptl_unlink_t unlink, ptl_ins_pos_t pos, 
+                             ptl_handle_me_t *handle);
+       int (*nal_me_unlink) (nal_t *nal, ptl_handle_me_t *me);
+       
+       int (*nal_md_attach) (nal_t *nal, ptl_handle_me_t *me,
+                             ptl_md_t *md, ptl_unlink_t unlink, 
+                             ptl_handle_md_t *handle);
+       int (*nal_md_bind) (nal_t *nal, 
+                           ptl_md_t *md, ptl_unlink_t unlink, 
+                           ptl_handle_md_t *handle);
+       int (*nal_md_unlink) (nal_t *nal, ptl_handle_md_t *md);
+       int (*nal_md_update) (nal_t *nal, ptl_handle_md_t *md,
+                             ptl_md_t *old_md, ptl_md_t *new_md,
+                             ptl_handle_eq_t *testq);
 
-       void (*lock) (nal_t *nal, unsigned long *flags);
+       int (*nal_eq_alloc) (nal_t *nal, ptl_size_t count,
+                            ptl_eq_handler_t handler,
+                            ptl_handle_eq_t *handle);
+       int (*nal_eq_free) (nal_t *nal, ptl_handle_eq_t *eq);
+       int (*nal_eq_poll) (nal_t *nal, 
+                           ptl_handle_eq_t *eqs, int neqs, int timeout,
+                           ptl_event_t *event, int *which);
 
-       void (*unlock) (nal_t *nal, unsigned long *flags);
+       int (*nal_ace_entry) (nal_t *nal, ptl_ac_index_t index,
+                             ptl_process_id_t match_id, ptl_pt_index_t portal);
+       
+       int (*nal_put) (nal_t *nal, ptl_handle_md_t *md, ptl_ack_req_t ack,
+                       ptl_process_id_t *target, ptl_pt_index_t portal,
+                       ptl_ac_index_t ac, ptl_match_bits_t match,
+                       ptl_size_t offset, ptl_hdr_data_t hdr_data);
+       int (*nal_get) (nal_t *nal, ptl_handle_md_t *md,
+                       ptl_process_id_t *target, ptl_pt_index_t portal,
+                       ptl_ac_index_t ac, ptl_match_bits_t match,
+                       ptl_size_t offset);
 };
 
-extern nal_t *ptl_hndl2nal(ptl_handle_any_t * any);
+extern nal_t *ptl_hndl2nal(ptl_handle_any_t *any);
 
 #ifdef __KERNEL__
 extern int ptl_register_nal(ptl_interface_t interface, nal_t *nal);
index ef2712b..250b954 100644 (file)
@@ -153,17 +153,6 @@ typedef void (*ptl_eq_handler_t)(ptl_event_t *event);
 #define PTL_EQ_HANDLER_NONE NULL
 
 typedef struct {
-        volatile ptl_seq_t sequence;
-        ptl_size_t size;
-        ptl_event_t *base;
-        ptl_handle_any_t cb_eq_handle;
-} ptl_eq_t;
-
-typedef struct {
-        ptl_eq_t *eq;
-} ptl_ni_t;
-
-typedef struct {
        int max_mes;
        int max_mds;
        int max_eqs;
index e48552e..ca98f84 100644 (file)
@@ -190,7 +190,6 @@ typedef struct _gmnal_rxtwe {
 #define NRXTHREADS 10 /* max number of receiver threads */
 
 typedef struct _gmnal_data_t {
-       spinlock_t      cb_lock;
        spinlock_t      stxd_lock;
        struct semaphore stxd_token;
        gmnal_stxd_t    *stxd;
@@ -205,7 +204,7 @@ typedef struct _gmnal_data_t {
        gmnal_srxd_t    *srxd;
        struct gm_hash  *srxd_hash;
        nal_t           *nal;   
-       nal_cb_t        *nal_cb;
+       lib_nal_t       *libnal;
        struct gm_port  *gm_port;
        unsigned int    gm_local_nid;
        unsigned int    gm_global_nid;
@@ -298,7 +297,6 @@ extern gmnal_data_t *global_nal_data;
 #define GMNAL_GM_LOCK_INIT(a)          spin_lock_init(&a->gm_lock);
 #define GMNAL_GM_LOCK(a)               spin_lock(&a->gm_lock);
 #define GMNAL_GM_UNLOCK(a)             spin_unlock(&a->gm_lock);
-#define GMNAL_CB_LOCK_INIT(a)          spin_lock_init(&a->cb_lock);
 
 
 /*
@@ -340,39 +338,19 @@ void gmnal_api_unlock(nal_t *, unsigned long *);
  *     CB NAL
  */
 
-int gmnal_cb_send(nal_cb_t *, void *, lib_msg_t *, ptl_hdr_t *,
+int gmnal_cb_send(lib_nal_t *, void *, lib_msg_t *, ptl_hdr_t *,
        int, ptl_nid_t, ptl_pid_t, unsigned int, struct iovec *, size_t);
 
-int gmnal_cb_send_pages(nal_cb_t *, void *, lib_msg_t *, ptl_hdr_t *,
+int gmnal_cb_send_pages(lib_nal_t *, void *, lib_msg_t *, ptl_hdr_t *,
        int, ptl_nid_t, ptl_pid_t, unsigned int, ptl_kiov_t *, size_t);
 
-int gmnal_cb_recv(nal_cb_t *, void *, lib_msg_t *, 
+int gmnal_cb_recv(lib_nal_t *, void *, lib_msg_t *, 
        unsigned int, struct iovec *, size_t, size_t);
 
-int gmnal_cb_recv_pages(nal_cb_t *, void *, lib_msg_t *, 
+int gmnal_cb_recv_pages(lib_nal_t *, void *, lib_msg_t *, 
        unsigned int, ptl_kiov_t *, size_t, size_t);
 
-int gmnal_cb_read(nal_cb_t *, void *private, void *, user_ptr, size_t);
-
-int gmnal_cb_write(nal_cb_t *, void *private, user_ptr, void *, size_t);
-
-int gmnal_cb_callback(nal_cb_t *, void *, lib_eq_t *, ptl_event_t *);
-
-void *gmnal_cb_malloc(nal_cb_t *, size_t);
-
-void gmnal_cb_free(nal_cb_t *, void *, size_t);
-
-void gmnal_cb_unmap(nal_cb_t *, unsigned int, struct iovec*, void **);
-
-int  gmnal_cb_map(nal_cb_t *, unsigned int, struct iovec*, void **); 
-
-void gmnal_cb_printf(nal_cb_t *, const char *fmt, ...);
-
-void gmnal_cb_cli(nal_cb_t *, unsigned long *);
-
-void gmnal_cb_sti(nal_cb_t *, unsigned long *);
-
-int gmnal_cb_dist(nal_cb_t *, ptl_nid_t, unsigned long *);
+int gmnal_cb_dist(lib_nal_t *, ptl_nid_t, unsigned long *);
 
 int gmnal_init(void);
 
@@ -381,22 +359,14 @@ void  gmnal_fini(void);
 
 
 #define GMNAL_INIT_NAL_CB(a)   do {    \
-                               a->cb_send = gmnal_cb_send; \
-                               a->cb_send_pages = gmnal_cb_send_pages; \
-                               a->cb_recv = gmnal_cb_recv; \
-                               a->cb_recv_pages = gmnal_cb_recv_pages; \
-                               a->cb_read = gmnal_cb_read; \
-                               a->cb_write = gmnal_cb_write; \
-                               a->cb_callback = gmnal_cb_callback; \
-                               a->cb_malloc = gmnal_cb_malloc; \
-                               a->cb_free = gmnal_cb_free; \
-                               a->cb_map = NULL; \
-                               a->cb_unmap = NULL; \
-                               a->cb_printf = gmnal_cb_printf; \
-                               a->cb_cli = gmnal_cb_cli; \
-                               a->cb_sti = gmnal_cb_sti; \
-                               a->cb_dist = gmnal_cb_dist; \
-                               a->nal_data = NULL; \
+                               a->libnal_send = gmnal_cb_send; \
+                               a->libnal_send_pages = gmnal_cb_send_pages; \
+                               a->libnal_recv = gmnal_cb_recv; \
+                               a->libnal_recv_pages = gmnal_cb_recv_pages; \
+                               a->libnal_map = NULL; \
+                               a->libnal_unmap = NULL; \
+                               a->libnal_dist = gmnal_cb_dist; \
+                               a->libnal_data = NULL; \
                                } while (0)
 
 
@@ -451,9 +421,9 @@ void                gmnal_remove_rxtwe(gmnal_data_t *);
 /*
  *     Small messages
  */
-int            gmnal_small_rx(nal_cb_t *, void *, lib_msg_t *, unsigned int, 
+int            gmnal_small_rx(lib_nal_t *, void *, lib_msg_t *, unsigned int, 
                                struct iovec *, size_t, size_t);
-int            gmnal_small_tx(nal_cb_t *, void *, lib_msg_t *, ptl_hdr_t *, 
+int            gmnal_small_tx(lib_nal_t *, void *, lib_msg_t *, ptl_hdr_t *, 
                                int, ptl_nid_t, ptl_pid_t, 
                                unsigned int, struct iovec*, int);
 void           gmnal_small_tx_callback(gm_port_t *, void *, gm_status_t);
@@ -463,10 +433,10 @@ void              gmnal_small_tx_callback(gm_port_t *, void *, gm_status_t);
 /*
  *     Large messages
  */
-int            gmnal_large_rx(nal_cb_t *, void *, lib_msg_t *, unsigned int, 
+int            gmnal_large_rx(lib_nal_t *, void *, lib_msg_t *, unsigned int, 
                                struct iovec *, size_t, size_t);
 
-int            gmnal_large_tx(nal_cb_t *, void *, lib_msg_t *, ptl_hdr_t *, 
+int            gmnal_large_tx(lib_nal_t *, void *, lib_msg_t *, ptl_hdr_t *, 
                                int, ptl_nid_t, ptl_pid_t, unsigned int, 
                                struct iovec*, int);
 
index 7c94f93..002587d 100644 (file)
@@ -50,77 +50,6 @@ static ctl_table gmnalnal_top_sysctl_table[] = {
         { 0 }
 };
 
-
-
-
-
-
-/*
- *     gmnal_api_forward
- *     This function takes a pack block of arguments from the NAL API
- *     module and passes them to the NAL CB module. The CB module unpacks
- *     the args and calls the appropriate function indicated by index.
- *     Typically this function is used to pass args between kernel and use
- *     space.
- *     As lgmanl exists entirely in kernel, just pass the arg block directly 
- *     to the NAL CB, buy passing the args to lib_dispatch
- *     Arguments are
- *     nal_t   nal     Our nal
- *     int     index   the api function that initiated this call 
- *     void    *args   packed block of function args
- *     size_t  arg_len length of args block
- *     void    *ret    A return value for the API NAL
- *     size_t  ret_len Size of the return value
- *     
- */
-
-int
-gmnal_api_forward(nal_t *nal, int index, void *args, size_t arg_len,
-               void *ret, size_t ret_len)
-{
-
-       nal_cb_t        *nal_cb = NULL;
-       gmnal_data_t    *nal_data = NULL;
-
-
-
-
-
-       if (!nal || !args || (index < 0) || (arg_len < 0)) {
-                       CDEBUG(D_ERROR, "Bad args to gmnal_api_forward\n");
-               return (PTL_FAIL);
-       }
-
-       if (ret && (ret_len <= 0)) {
-               CDEBUG(D_ERROR, "Bad args to gmnal_api_forward\n");
-               return (PTL_FAIL);
-       }
-
-
-       if (!nal->nal_data) {
-               CDEBUG(D_ERROR, "bad nal, no nal data\n");      
-               return (PTL_FAIL);
-       }
-       
-       nal_data = nal->nal_data;
-       CDEBUG(D_INFO, "nal_data is [%p]\n", nal_data); 
-
-       if (!nal_data->nal_cb) {
-               CDEBUG(D_ERROR, "bad nal_data, no nal_cb\n");   
-               return (PTL_FAIL);
-       }
-       
-       nal_cb = nal_data->nal_cb;
-       CDEBUG(D_INFO, "nal_cb is [%p]\n", nal_cb);     
-       
-       CDEBUG(D_PORTALS, "gmnal_api_forward calling lib_dispatch\n");
-       lib_dispatch(nal_cb, NULL, index, args, ret);
-       CDEBUG(D_PORTALS, "gmnal_api_forward returns from lib_dispatch\n");
-
-       return(PTL_OK);
-}
-
-
 /*
  *     gmnal_api_shutdown
  *      nal_refct == 0 => called on last matching PtlNIFini()
@@ -131,7 +60,7 @@ void
 gmnal_api_shutdown(nal_t *nal, int interface)
 {
        gmnal_data_t    *nal_data;
-       nal_cb_t        *nal_cb;
+       lib_nal_t       *libnal;
 
         if (nal->nal_refct != 0)
                 return;
@@ -139,9 +68,9 @@ gmnal_api_shutdown(nal_t *nal, int interface)
        CDEBUG(D_TRACE, "gmnal_api_shutdown: nal_data [%p]\n", nal_data);
 
         LASSERT(nal == global_nal_data->nal);
-        nal_data = nal->nal_data;
+        libnal = (lib_nal_t *)nal->nal_data;
+        nal_data = (gmnal_data_t *)libnal->libnal_data;
         LASSERT(nal_data == global_nal_data);
-        nal_cb = nal_data->nal_cb;
 
         /* Stop portals calling our ioctl handler */
         libcfs_nal_cmd_unregister(GMNAL);
@@ -150,7 +79,7 @@ gmnal_api_shutdown(nal_t *nal, int interface)
          * flag so when lib calls us we fail immediately and dont queue any
          * more work but our threads can still call into lib OK.  THEN
          * shutdown our threads, THEN lib_fini() */
-        lib_fini(nal_cb);
+        lib_fini(libnal);
 
        gmnal_stop_rxthread(nal_data);
        gmnal_stop_ctthread(nal_data);
@@ -162,94 +91,22 @@ gmnal_api_shutdown(nal_t *nal, int interface)
        GMNAL_GM_UNLOCK(nal_data);
         if (nal_data->sysctl)
                 unregister_sysctl_table (nal_data->sysctl);
-       PORTAL_FREE(nal, sizeof(nal_t));        
+        /* Don't free 'nal'; it's a static struct */
        PORTAL_FREE(nal_data, sizeof(gmnal_data_t));    
-       PORTAL_FREE(nal_cb, sizeof(nal_cb_t));
+       PORTAL_FREE(libnal, sizeof(lib_nal_t));
 
         global_nal_data = NULL;
         PORTAL_MODULE_UNUSE;
 }
 
 
-/*
- *     gmnal_api_validate
- *     validate a user address for use in communications
- *     There's nothing to be done here
- */
-int
-gmnal_api_validate(nal_t *nal, void *base, size_t extent)
-{
-
-       return(PTL_OK);
-}
-
-
-
-/*
- *     gmnal_api_yield
- *     Give up the processor
- */
-void
-gmnal_api_yield(nal_t *nal, unsigned long *flags, int milliseconds)
-{
-       CDEBUG(D_TRACE, "gmnal_api_yield : nal [%p]\n", nal);
-
-        if (milliseconds != 0) {
-                CERROR("Blocking yield not implemented yet\n");
-                LBUG();
-        }
-
-        our_cond_resched();
-       return;
-}
-
-
-
-/*
- *     gmnal_api_lock
- *     Take a threadsafe lock
- */
-void
-gmnal_api_lock(nal_t *nal, unsigned long *flags)
-{
-
-       gmnal_data_t    *nal_data;
-       nal_cb_t        *nal_cb;
-
-       nal_data = nal->nal_data;
-       nal_cb = nal_data->nal_cb;
-
-       nal_cb->cb_cli(nal_cb, flags);
-
-       return;
-}
-
-/*
- *     gmnal_api_unlock
- *     Release a threadsafe lock
- */
-void
-gmnal_api_unlock(nal_t *nal, unsigned long *flags)
-{
-       gmnal_data_t    *nal_data;
-       nal_cb_t        *nal_cb;
-
-       nal_data = nal->nal_data;
-       nal_cb = nal_data->nal_cb;
-
-       nal_cb->cb_sti(nal_cb, flags);
-
-       return;
-}
-
-
 int
 gmnal_api_startup(nal_t *nal, ptl_pid_t requested_pid,
                   ptl_ni_limits_t *requested_limits,
                   ptl_ni_limits_t *actual_limits)
 {
 
-       nal_cb_t        *nal_cb = NULL;
+       lib_nal_t       *libnal = NULL;
        gmnal_data_t    *nal_data = NULL;
        gmnal_srxd_t    *srxd = NULL;
        gm_status_t     gm_status;
@@ -258,9 +115,8 @@ gmnal_api_startup(nal_t *nal, ptl_pid_t requested_pid,
 
         if (nal->nal_refct != 0) {
                 if (actual_limits != NULL) {
-                        nal_data = (gmnal_data_t *)nal->nal_data;
-                        nal_cb = nal_data->nal_cb;
-                        *actual_limits = nal->_cb->ni.actual_limits;
+                        libnal = (lib_nal_t *)nal->nal_data;
+                        *actual_limits = nal->libnal_ni.ni_actual_limits;
                 return (PTL_OK);
         }
 
@@ -283,24 +139,22 @@ gmnal_api_startup(nal_t *nal, ptl_pid_t requested_pid,
        CDEBUG(D_INFO, "Allocd and reset nal_data[%p]\n", nal_data);
        CDEBUG(D_INFO, "small_msg_size is [%d]\n", nal_data->small_msg_size);
 
-       PORTAL_ALLOC(nal_cb, sizeof(nal_cb_t));
-       if (!nal_cb) {
+       PORTAL_ALLOC(libnal, sizeof(lib_nal_t));
+       if (!libnal) {
                PORTAL_FREE(nal_data, sizeof(gmnal_data_t));
                return(PTL_NO_SPACE);
        }
-       memset(nal_cb, 0, sizeof(nal_cb_t));
-       CDEBUG(D_INFO, "Allocd and reset nal_cb[%p]\n", nal_cb);
+       memset(libnal, 0, sizeof(lib_nal_t));
+       CDEBUG(D_INFO, "Allocd and reset libnal[%p]\n", libnal);
 
-       GMNAL_INIT_NAL_CB(nal_cb);
+       GMNAL_INIT_NAL_CB(libnal);
        /*
         *      String them all together
         */
-       nal->nal_data = (void*)nal_data;
-       nal_cb->nal_data = (void*)nal_data;
+       libnal->libnal_data = (void*)nal_data;
        nal_data->nal = nal;
-       nal_data->nal_cb = nal_cb;
+       nal_data->libnal = libnal;
 
-       GMNAL_CB_LOCK_INIT(nal_data);
        GMNAL_GM_LOCK_INIT(nal_data);
 
 
@@ -311,7 +165,7 @@ gmnal_api_startup(nal_t *nal, ptl_pid_t requested_pid,
        if (gm_init() != GM_SUCCESS) {
                CDEBUG(D_ERROR, "call to gm_init failed\n");
                PORTAL_FREE(nal_data, sizeof(gmnal_data_t));    
-               PORTAL_FREE(nal_cb, sizeof(nal_cb_t));
+               PORTAL_FREE(libnal, sizeof(lib_nal_t));
                return(PTL_FAIL);
        }
 
@@ -356,7 +210,7 @@ gmnal_api_startup(nal_t *nal, ptl_pid_t requested_pid,
                gm_finalize();
                GMNAL_GM_UNLOCK(nal_data);
                PORTAL_FREE(nal_data, sizeof(gmnal_data_t));    
-               PORTAL_FREE(nal_cb, sizeof(nal_cb_t));
+               PORTAL_FREE(libnal, sizeof(lib_nal_t));
                return(PTL_FAIL);
        }
 
@@ -373,7 +227,7 @@ gmnal_api_startup(nal_t *nal, ptl_pid_t requested_pid,
                gm_finalize();
                GMNAL_GM_UNLOCK(nal_data);
                PORTAL_FREE(nal_data, sizeof(gmnal_data_t));    
-               PORTAL_FREE(nal_cb, sizeof(nal_cb_t));
+               PORTAL_FREE(libnal, sizeof(lib_nal_t));
                return(PTL_FAIL);
        }
 
@@ -402,7 +256,7 @@ gmnal_api_startup(nal_t *nal, ptl_pid_t requested_pid,
                gm_finalize();
                GMNAL_GM_UNLOCK(nal_data);
                PORTAL_FREE(nal_data, sizeof(gmnal_data_t));    
-               PORTAL_FREE(nal_cb, sizeof(nal_cb_t));
+               PORTAL_FREE(libnal, sizeof(lib_nal_t));
                return(PTL_FAIL);
        }
 
@@ -434,7 +288,7 @@ gmnal_api_startup(nal_t *nal, ptl_pid_t requested_pid,
                gm_finalize();
                GMNAL_GM_UNLOCK(nal_data);
                PORTAL_FREE(nal_data, sizeof(gmnal_data_t));    
-               PORTAL_FREE(nal_cb, sizeof(nal_cb_t));
+               PORTAL_FREE(libnal, sizeof(lib_nal_t));
                return(PTL_FAIL);
        }
        nal_data->gm_local_nid = local_nid;
@@ -454,7 +308,7 @@ gmnal_api_startup(nal_t *nal, ptl_pid_t requested_pid,
                gm_finalize();
                GMNAL_GM_UNLOCK(nal_data);
                PORTAL_FREE(nal_data, sizeof(gmnal_data_t));    
-               PORTAL_FREE(nal_cb, sizeof(nal_cb_t));
+               PORTAL_FREE(libnal, sizeof(lib_nal_t));
                return(PTL_FAIL);
        }
        CDEBUG(D_INFO, "Global node id is [%u]\n", global_nid);
@@ -471,7 +325,7 @@ gmnal_api_startup(nal_t *nal, ptl_pid_t requested_pid,
        CDEBUG(D_INFO, "portals_nid is ["LPU64"]\n", process_id.nid);
        
        CDEBUG(D_PORTALS, "calling lib_init\n");
-       if (lib_init(nal_cb, process_id, 
+       if (lib_init(libnal, nal, process_id, 
                      requested_limits, actual_limits) != PTL_OK) {
                CDEBUG(D_ERROR, "lib_init failed\n");
                gmnal_stop_rxthread(nal_data);
@@ -483,7 +337,7 @@ gmnal_api_startup(nal_t *nal, ptl_pid_t requested_pid,
                gm_finalize();
                GMNAL_GM_UNLOCK(nal_data);
                PORTAL_FREE(nal_data, sizeof(gmnal_data_t));    
-               PORTAL_FREE(nal_cb, sizeof(nal_cb_t));
+               PORTAL_FREE(libnal, sizeof(lib_nal_t));
                return(PTL_FAIL);
                
        }
@@ -493,7 +347,7 @@ gmnal_api_startup(nal_t *nal, ptl_pid_t requested_pid,
 
                 /* XXX these cleanup cases should be restructured to
                  * minimise duplication... */
-                lib_fini(nal_cb);
+                lib_fini(libnal);
                 
                gmnal_stop_rxthread(nal_data);
                gmnal_stop_ctthread(nal_data);
@@ -504,7 +358,7 @@ gmnal_api_startup(nal_t *nal, ptl_pid_t requested_pid,
                gm_finalize();
                GMNAL_GM_UNLOCK(nal_data);
                PORTAL_FREE(nal_data, sizeof(gmnal_data_t));    
-               PORTAL_FREE(nal_cb, sizeof(nal_cb_t));
+               PORTAL_FREE(libnal, sizeof(lib_nal_t));
                return(PTL_FAIL);
         }
 
@@ -550,10 +404,6 @@ int gmnal_init(void)
  */
 void gmnal_fini()
 {
-       gmnal_data_t    *nal_data = global_nal_data;
-       nal_t           *nal = nal_data->nal;
-       nal_cb_t        *nal_cb = nal_data->nal_cb;
-
        CDEBUG(D_TRACE, "gmnal_fini\n");
 
         LASSERT(global_nal_data == NULL);
index ece1380..e99d3ec 100644 (file)
@@ -27,7 +27,7 @@
 
 #include "gmnal.h"
 
-int gmnal_cb_recv(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, 
+int gmnal_cb_recv(lib_nal_t *libnal, void *private, lib_msg_t *cookie, 
                   unsigned int niov, struct iovec *iov, size_t mlen, 
                   size_t rlen)
 {
@@ -35,19 +35,19 @@ int gmnal_cb_recv(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
        int             status = PTL_OK;
 
 
-       CDEBUG(D_TRACE, "gmnal_cb_recv nal_cb [%p], private[%p], cookie[%p], "
+       CDEBUG(D_TRACE, "gmnal_cb_recv libnal [%p], private[%p], cookie[%p], "
               "niov[%d], iov [%p], mlen["LPSZ"], rlen["LPSZ"]\n", 
-              nal_cb, private, cookie, niov, iov, mlen, rlen);
+              libnal, private, cookie, niov, iov, mlen, rlen);
 
        switch(srxd->type) {
        case(GMNAL_SMALL_MESSAGE):
                CDEBUG(D_INFO, "gmnal_cb_recv got small message\n");
-               status = gmnal_small_rx(nal_cb, private, cookie, niov, 
+               status = gmnal_small_rx(libnal, private, cookie, niov, 
                                         iov, mlen, rlen);
        break;
        case(GMNAL_LARGE_MESSAGE_INIT):
                CDEBUG(D_INFO, "gmnal_cb_recv got large message init\n");
-               status = gmnal_large_rx(nal_cb, private, cookie, niov, 
+               status = gmnal_large_rx(libnal, private, cookie, niov, 
                                         iov, mlen, rlen);
        }
                
@@ -56,7 +56,7 @@ int gmnal_cb_recv(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
        return(status);
 }
 
-int gmnal_cb_recv_pages(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, 
+int gmnal_cb_recv_pages(lib_nal_t *libnal, void *private, lib_msg_t *cookie, 
                         unsigned int kniov, ptl_kiov_t *kiov, size_t mlen, 
                         size_t rlen)
 {
@@ -67,9 +67,9 @@ int gmnal_cb_recv_pages(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
        ptl_kiov_t      *kiov_dup = kiov;;
 
 
-       CDEBUG(D_TRACE, "gmnal_cb_recv_pages nal_cb [%p],private[%p], "
+       CDEBUG(D_TRACE, "gmnal_cb_recv_pages libnal [%p],private[%p], "
               "cookie[%p], kniov[%d], kiov [%p], mlen["LPSZ"], rlen["LPSZ"]\n",
-              nal_cb, private, cookie, kniov, kiov, mlen, rlen);
+              libnal, private, cookie, kniov, kiov, mlen, rlen);
 
        if (srxd->type == GMNAL_SMALL_MESSAGE) {
                PORTAL_ALLOC(iovec, sizeof(struct iovec)*kniov);
@@ -98,7 +98,7 @@ int gmnal_cb_recv_pages(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
                         kiov++;
                }
                CDEBUG(D_INFO, "calling gmnal_small_rx\n");
-               status = gmnal_small_rx(nal_cb, private, cookie, kniov, 
+               status = gmnal_small_rx(libnal, private, cookie, kniov, 
                                         iovec_dup, mlen, rlen);
                for (i=0; i<kniov; i++) {
                        kunmap(kiov_dup->kiov_page);
@@ -113,7 +113,7 @@ int gmnal_cb_recv_pages(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
 }
 
 
-int gmnal_cb_send(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, 
+int gmnal_cb_send(lib_nal_t *libnal, void *private, lib_msg_t *cookie, 
                   ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, 
                   unsigned int niov, struct iovec *iov, size_t len)
 {
@@ -123,24 +123,25 @@ int gmnal_cb_send(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
 
        CDEBUG(D_TRACE, "gmnal_cb_send niov[%d] len["LPSZ"] nid["LPU64"]\n", 
               niov, len, nid);
-       nal_data = nal_cb->nal_data;
+       nal_data = libnal->libnal_data;
        
        if (GMNAL_IS_SMALL_MESSAGE(nal_data, niov, iov, len)) {
                CDEBUG(D_INFO, "This is a small message send\n");
-               gmnal_small_tx(nal_cb, private, cookie, hdr, type, nid, pid, 
+               gmnal_small_tx(libnal, private, cookie, hdr, type, nid, pid, 
                                niov, iov, len);
        } else {
                CDEBUG(D_ERROR, "Large message send it is not supported\n");
-               lib_finalize(nal_cb, private, cookie, PTL_FAIL);
+               lib_finalize(libnal, private, cookie, PTL_FAIL);
                return(PTL_FAIL);
-               gmnal_large_tx(nal_cb, private, cookie, hdr, type, nid, pid, 
+               gmnal_large_tx(libnal, private, cookie, hdr, type, nid, pid, 
                                niov, iov, len);
        }
        return(PTL_OK);
 }
 
-int gmnal_cb_send_pages(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, 
-                        ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,                         unsigned int kniov, ptl_kiov_t *kiov, size_t len)
+int gmnal_cb_send_pages(lib_nal_t *libnal, void *private, lib_msg_t *cookie, 
+                        ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
+                         unsigned int kniov, ptl_kiov_t *kiov, size_t len)
 {
 
        int     i = 0;
@@ -149,7 +150,7 @@ int gmnal_cb_send_pages(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
        ptl_kiov_t      *kiov_dup = kiov;
 
        CDEBUG(D_TRACE, "gmnal_cb_send_pages nid ["LPU64"] niov[%d] len["LPSZ"]\n", nid, kniov, len);
-       nal_data = nal_cb->nal_data;
+       nal_data = libnal->libnal_data;
        PORTAL_ALLOC(iovec, kniov*sizeof(struct iovec));
         iovec_dup = iovec;
        if (GMNAL_IS_SMALL_MESSAGE(nal_data, 0, NULL, len)) {
@@ -168,7 +169,7 @@ int gmnal_cb_send_pages(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
                         iovec++;
                         kiov++;
                }
-               gmnal_small_tx(nal_cb, private, cookie, hdr, type, nid, 
+               gmnal_small_tx(libnal, private, cookie, hdr, type, nid, 
                                pid, kniov, iovec_dup, len);
        } else {
                CDEBUG(D_ERROR, "Large message send it is not supported yet\n");
@@ -185,7 +186,7 @@ int gmnal_cb_send_pages(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
                         iovec++;
                         kiov++;
                }
-               gmnal_large_tx(nal_cb, private, cookie, hdr, type, nid, 
+               gmnal_large_tx(libnal, private, cookie, hdr, type, nid, 
                                pid, kniov, iovec, len);
        }
        for (i=0; i<kniov; i++) {
@@ -196,94 +197,7 @@ int gmnal_cb_send_pages(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
        return(PTL_OK);
 }
 
-int gmnal_cb_read(nal_cb_t *nal_cb, void *private, void *dst, 
-                  user_ptr src, size_t len)
-{
-       gm_bcopy(src, dst, len);
-       return(PTL_OK);
-}
-
-int gmnal_cb_write(nal_cb_t *nal_cb, void *private, user_ptr dst, 
-                   void *src, size_t len)
-{
-       gm_bcopy(src, dst, len);
-       return(PTL_OK);
-}
-
-int gmnal_cb_callback(nal_cb_t *nal_cb, void *private, lib_eq_t *eq, 
-                      ptl_event_t *ev)
-{
-
-       if (eq->event_callback != NULL) {
-               CDEBUG(D_INFO, "found callback\n");
-               eq->event_callback(ev);
-       }
-       
-       return(PTL_OK);
-}
-
-void *gmnal_cb_malloc(nal_cb_t *nal_cb, size_t len)
-{
-       void *ptr = NULL;
-       CDEBUG(D_TRACE, "gmnal_cb_malloc len["LPSZ"]\n", len);
-       PORTAL_ALLOC(ptr, len);
-       return(ptr);
-}
-
-void gmnal_cb_free(nal_cb_t *nal_cb, void *buf, size_t len)
-{
-       CDEBUG(D_TRACE, "gmnal_cb_free :: buf[%p] len["LPSZ"]\n", buf, len);
-       PORTAL_FREE(buf, len);
-       return;
-}
-
-void gmnal_cb_unmap(nal_cb_t *nal_cb, unsigned int niov, struct iovec *iov, 
-                    void **addrkey)
-{
-       return;
-}
-
-int  gmnal_cb_map(nal_cb_t *nal_cb, unsigned int niov, struct iovec *iov, 
-                  void**addrkey)
-{
-       return(PTL_OK);
-}
-
-void gmnal_cb_printf(nal_cb_t *nal_cb, const char *fmt, ...)
-{
-       CDEBUG(D_TRACE, "gmnal_cb_printf\n");
-       printk(fmt);
-       return;
-}
-
-void gmnal_cb_cli(nal_cb_t *nal_cb, unsigned long *flags)
-{
-       gmnal_data_t    *nal_data = (gmnal_data_t*)nal_cb->nal_data;
-
-       spin_lock_irqsave(&nal_data->cb_lock, *flags);
-       return;
-}
-
-void gmnal_cb_sti(nal_cb_t *nal_cb, unsigned long *flags)
-{
-       gmnal_data_t    *nal_data = (gmnal_data_t*)nal_cb->nal_data;
-
-       spin_unlock_irqrestore(&nal_data->cb_lock, *flags);
-       return;
-}
-
-void gmnal_cb_callback(nal_cb_t *nal_cb, void *private, lib_eq_t *eq, ptl_event_t *ev)
-{
-        /* holding cb_lock */
-
-        if (eq->event_callback != NULL)
-                eq->event_callback(ev);
-
-        /* We will wake theads sleeping in yield() here, AFTER the
-         * callback, when we implement blocking yield */
-}
-
-int gmnal_cb_dist(nal_cb_t *nal_cb, ptl_nid_t nid, unsigned long *dist)
+int gmnal_cb_dist(lib_nal_t *libnal, ptl_nid_t nid, unsigned long *dist)
 {
        CDEBUG(D_TRACE, "gmnal_cb_dist\n");
        if (dist)
index 1bcd9bd..4af7186 100644 (file)
@@ -189,6 +189,7 @@ gmnal_pre_receive(gmnal_data_t *nal_data, gmnal_rxtwe_t *we, int gmnal_type)
        unsigned int snode, sport, type, length;
        gmnal_msghdr_t  *gmnal_msghdr;
        ptl_hdr_t       *portals_hdr;
+        int              rc;
 
        CDEBUG(D_INFO, "nal_data [%p], we[%p] type [%d]\n", 
               nal_data, we, gmnal_type);
@@ -219,10 +220,12 @@ gmnal_pre_receive(gmnal_data_t *nal_data, gmnal_rxtwe_t *we, int gmnal_type)
         */
        srxd = gmnal_rxbuffer_to_srxd(nal_data, buffer);
        CDEBUG(D_INFO, "Back from gmnal_rxbuffer_to_srxd\n");
-       srxd->nal_data = nal_data;
        if (!srxd) {
                CDEBUG(D_ERROR, "Failed to get receive descriptor\n");
-               lib_parse(nal_data->nal_cb, portals_hdr, srxd);
+                /* I think passing a NULL srxd to lib_parse will crash
+                 * gmnal_recv() */
+                LBUG();
+               lib_parse(nal_data->libnal, portals_hdr, srxd);
                return(GMNAL_STATUS_FAIL);
        }
 
@@ -234,6 +237,7 @@ gmnal_pre_receive(gmnal_data_t *nal_data, gmnal_rxtwe_t *we, int gmnal_type)
                return(GMNAL_STATUS_OK);
        }
 
+       srxd->nal_data = nal_data;
        srxd->type = gmnal_type;
        srxd->nsiov = gmnal_msghdr->niov;
        srxd->gm_source_node = gmnal_msghdr->sender_node_id;
@@ -245,7 +249,12 @@ gmnal_pre_receive(gmnal_data_t *nal_data, gmnal_rxtwe_t *we, int gmnal_type)
         *      cb_recv is responsible for returning the buffer 
         *      for future receive
         */
-       lib_parse(nal_data->nal_cb, portals_hdr, srxd);
+       rc = lib_parse(nal_data->libnal, portals_hdr, srxd);
+
+        if (rc != PTL_OK) {
+                /* I just received garbage; take appropriate action... */
+                LBUG();
+        }
 
        return(GMNAL_STATUS_OK);
 }
@@ -309,19 +318,19 @@ gmnal_rx_bad(gmnal_data_t *nal_data, gmnal_rxtwe_t *we, gmnal_srxd_t *srxd)
  *     Call lib_finalize
  */
 int
-gmnal_small_rx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, 
+gmnal_small_rx(lib_nal_t *libnal, void *private, lib_msg_t *cookie, 
                unsigned int niov, struct iovec *iov, size_t mlen, size_t rlen)
 {
        gmnal_srxd_t    *srxd = NULL;
        void    *buffer = NULL;
-       gmnal_data_t    *nal_data = (gmnal_data_t*)nal_cb->nal_data;
+       gmnal_data_t    *nal_data = (gmnal_data_t*)libnal->nal_data;
 
 
        CDEBUG(D_TRACE, "niov [%d] mlen["LPSZ"]\n", niov, mlen);
 
        if (!private) {
                CDEBUG(D_ERROR, "gmnal_small_rx no context\n");
-               lib_finalize(nal_cb, private, cookie, PTL_FAIL);
+               lib_finalize(libnal, private, cookie, PTL_FAIL);
                return(PTL_FAIL);
        }
 
@@ -343,7 +352,7 @@ gmnal_small_rx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
         *      let portals library know receive is complete
         */
        CDEBUG(D_PORTALS, "calling lib_finalize\n");
-       lib_finalize(nal_cb, private, cookie, PTL_OK);
+       lib_finalize(libnal, private, cookie, PTL_OK);
        /*
         *      return buffer so it can be used again
         */
@@ -365,11 +374,11 @@ gmnal_small_rx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
  *     The callback function informs when the send is complete.
  */
 int
-gmnal_small_tx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, 
+gmnal_small_tx(lib_nal_t *libnal, void *private, lib_msg_t *cookie, 
                ptl_hdr_t *hdr, int type, ptl_nid_t global_nid, ptl_pid_t pid, 
                unsigned int niov, struct iovec *iov, int size)
 {
-       gmnal_data_t    *nal_data = (gmnal_data_t*)nal_cb->nal_data;
+       gmnal_data_t    *nal_data = (gmnal_data_t*)libnal->nal_data;
        gmnal_stxd_t    *stxd = NULL;
        void            *buffer = NULL;
        gmnal_msghdr_t  *msghdr = NULL;
@@ -377,9 +386,9 @@ gmnal_small_tx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
        unsigned int    local_nid;
        gm_status_t     gm_status = GM_SUCCESS;
 
-       CDEBUG(D_TRACE, "gmnal_small_tx nal_cb [%p] private [%p] cookie [%p] "
+       CDEBUG(D_TRACE, "gmnal_small_tx libnal [%p] private [%p] cookie [%p] "
               "hdr [%p] type [%d] global_nid ["LPU64"] pid [%d] niov [%d] "
-              "iov [%p] size [%d]\n", nal_cb, private, cookie, hdr, type, 
+              "iov [%p] size [%d]\n", libnal, private, cookie, hdr, type, 
               global_nid, pid, niov, iov, size);
 
        CDEBUG(D_INFO, "portals_hdr:: dest_nid ["LPU64"], src_nid ["LPU64"]\n",
@@ -472,7 +481,7 @@ gmnal_small_tx_callback(gm_port_t *gm_port, void *context, gm_status_t status)
        gmnal_stxd_t    *stxd = (gmnal_stxd_t*)context;
        lib_msg_t       *cookie = stxd->cookie;
        gmnal_data_t    *nal_data = (gmnal_data_t*)stxd->nal_data;
-       nal_cb_t        *nal_cb = nal_data->nal_cb;
+       lib_nal_t       *libnal = nal_data->libnal;
 
        if (!stxd) {
                CDEBUG(D_TRACE, "send completion event for unknown stxd\n");
@@ -592,7 +601,7 @@ gmnal_small_tx_callback(gm_port_t *gm_port, void *context, gm_status_t status)
                return;
        }
        gmnal_return_stxd(nal_data, stxd);
-       lib_finalize(nal_cb, stxd, cookie, PTL_OK);
+       lib_finalize(libnal, stxd, cookie, PTL_OK);
        return;
 }
 
@@ -645,7 +654,7 @@ void gmnal_drop_sends_callback(struct gm_port *gm_port, void *context,
  *     this ack, deregister the memory. Only 1 send token is required here.
  */
 int
-gmnal_large_tx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, 
+gmnal_large_tx(lib_nal_t *libnal, void *private, lib_msg_t *cookie, 
                ptl_hdr_t *hdr, int type, ptl_nid_t global_nid, ptl_pid_t pid, 
                unsigned int niov, struct iovec *iov, int size)
 {
@@ -661,15 +670,15 @@ gmnal_large_tx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
        int             niov_dup;
 
 
-       CDEBUG(D_TRACE, "gmnal_large_tx nal_cb [%p] private [%p], cookie [%p] "
+       CDEBUG(D_TRACE, "gmnal_large_tx libnal [%p] private [%p], cookie [%p] "
               "hdr [%p], type [%d] global_nid ["LPU64"], pid [%d], niov [%d], "
-              "iov [%p], size [%d]\n", nal_cb, private, cookie, hdr, type, 
+              "iov [%p], size [%d]\n", libnal, private, cookie, hdr, type, 
               global_nid, pid, niov, iov, size);
 
-       if (nal_cb)
-               nal_data = (gmnal_data_t*)nal_cb->nal_data;
+       if (libnal)
+               nal_data = (gmnal_data_t*)libnal->nal_data;
        else  {
-               CDEBUG(D_ERROR, "no nal_cb.\n");
+               CDEBUG(D_ERROR, "no libnal.\n");
                return(GMNAL_STATUS_FAIL);
        }
        
@@ -811,11 +820,11 @@ gmnal_large_tx_callback(gm_port_t *gm_port, void *context, gm_status_t status)
  *     data from the sender.
  */
 int
-gmnal_large_rx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, 
+gmnal_large_rx(lib_nal_t *libnal, void *private, lib_msg_t *cookie, 
                unsigned int nriov, struct iovec *riov, size_t mlen, 
                size_t rlen)
 {
-       gmnal_data_t    *nal_data = nal_cb->nal_data;
+       gmnal_data_t    *nal_data = libnal->nal_data;
        gmnal_srxd_t    *srxd = (gmnal_srxd_t*)private;
        void            *buffer = NULL;
        struct  iovec   *riov_dup;
@@ -823,13 +832,13 @@ gmnal_large_rx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
        gmnal_msghdr_t  *msghdr = NULL;
        gm_status_t     gm_status;
 
-       CDEBUG(D_TRACE, "gmnal_large_rx :: nal_cb[%p], private[%p], "
+       CDEBUG(D_TRACE, "gmnal_large_rx :: libnal[%p], private[%p], "
               "cookie[%p], niov[%d], iov[%p], mlen["LPSZ"], rlen["LPSZ"]\n",
-               nal_cb, private, cookie, nriov, riov, mlen, rlen);
+               libnal, private, cookie, nriov, riov, mlen, rlen);
 
        if (!srxd) {
                CDEBUG(D_ERROR, "gmnal_large_rx no context\n");
-               lib_finalize(nal_cb, private, cookie, PTL_FAIL);
+               lib_finalize(libnal, private, cookie, PTL_FAIL);
                return(PTL_FAIL);
        }
 
@@ -1092,7 +1101,7 @@ gmnal_remote_get_callback(gm_port_t *gm_port, void *context,
 
        gmnal_ltxd_t    *ltxd = (gmnal_ltxd_t*)context;
        gmnal_srxd_t    *srxd = ltxd->srxd;
-       nal_cb_t        *nal_cb = srxd->nal_data->nal_cb;
+       lib_nal_t       *libnal = srxd->nal_data->libnal;
        int             lastone;
        struct  iovec   *riov;
        int             nriov;
@@ -1126,7 +1135,7 @@ gmnal_remote_get_callback(gm_port_t *gm_port, void *context,
         *      Let our client application proceed
         */     
        CDEBUG(D_ERROR, "final callback context[%p]\n", srxd);
-       lib_finalize(nal_cb, srxd, srxd->cookie, PTL_OK);
+       lib_finalize(libnal, srxd, srxd->cookie, PTL_OK);
 
        /*
         *      send an ack to the sender to let him know we got the data
@@ -1276,7 +1285,7 @@ gmnal_large_tx_ack_callback(gm_port_t *gm_port, void *context,
 void 
 gmnal_large_tx_ack_received(gmnal_data_t *nal_data, gmnal_srxd_t *srxd)
 {
-       nal_cb_t        *nal_cb = nal_data->nal_cb;
+       lib_nal_t       *libnal = nal_data->libnal;
        gmnal_stxd_t    *stxd = NULL;
        gmnal_msghdr_t  *msghdr = NULL;
        void            *buffer = NULL;
@@ -1291,7 +1300,7 @@ gmnal_large_tx_ack_received(gmnal_data_t *nal_data, gmnal_srxd_t *srxd)
 
        CDEBUG(D_INFO, "gmnal_large_tx_ack_received stxd [%p]\n", stxd);
 
-       lib_finalize(nal_cb, stxd, stxd->cookie, PTL_OK);
+       lib_finalize(libnal, stxd, stxd->cookie, PTL_OK);
 
        /*
         *      extract the iovec from the stxd, deregister the memory.
index f4005de..c595450 100644 (file)
@@ -43,6 +43,9 @@ kpr_nal_interface_t kqswnal_router_interface = {
 #define QSWNAL_SYSCTL_COPY_SMALL_FWD     2
 
 static ctl_table kqswnal_ctl_table[] = {
+       {QSWNAL_SYSCTL_OPTIMIZED_GETS, "optimized_puts",
+        &kqswnal_tunables.kqn_optimized_puts, sizeof (int),
+        0644, NULL, &proc_dointvec},
        {QSWNAL_SYSCTL_OPTIMIZED_GETS, "optimized_gets",
         &kqswnal_tunables.kqn_optimized_gets, sizeof (int),
         0644, NULL, &proc_dointvec},
@@ -55,88 +58,6 @@ static ctl_table kqswnal_top_ctl_table[] = {
 };
 #endif
 
-static int
-kqswnal_forward(nal_t   *nal,
-               int     id,
-               void    *args,  size_t args_len,
-               void    *ret,   size_t ret_len)
-{
-       kqswnal_data_t *k = nal->nal_data;
-       nal_cb_t       *nal_cb = k->kqn_cb;
-
-       LASSERT (nal == &kqswnal_api);
-       LASSERT (k == &kqswnal_data);
-       LASSERT (nal_cb == &kqswnal_lib);
-
-       lib_dispatch(nal_cb, k, id, args, ret); /* nal needs k */
-       return (PTL_OK);
-}
-
-static void
-kqswnal_lock (nal_t *nal, unsigned long *flags)
-{
-       kqswnal_data_t *k = nal->nal_data;
-       nal_cb_t       *nal_cb = k->kqn_cb;
-
-       LASSERT (nal == &kqswnal_api);
-       LASSERT (k == &kqswnal_data);
-       LASSERT (nal_cb == &kqswnal_lib);
-
-       nal_cb->cb_cli(nal_cb,flags);
-}
-
-static void
-kqswnal_unlock(nal_t *nal, unsigned long *flags)
-{
-       kqswnal_data_t *k = nal->nal_data;
-       nal_cb_t       *nal_cb = k->kqn_cb;
-
-       LASSERT (nal == &kqswnal_api);
-       LASSERT (k == &kqswnal_data);
-       LASSERT (nal_cb == &kqswnal_lib);
-
-       nal_cb->cb_sti(nal_cb,flags);
-}
-
-static int
-kqswnal_yield(nal_t *nal, unsigned long *flags, int milliseconds)
-{
-       /* NB called holding statelock */
-        wait_queue_t       wait;
-       unsigned long      now = jiffies;
-
-       CDEBUG (D_NET, "yield\n");
-
-       if (milliseconds == 0) {
-               if (need_resched())
-                       schedule();
-               return 0;
-       }
-
-       init_waitqueue_entry(&wait, current);
-       set_current_state(TASK_INTERRUPTIBLE);
-       add_wait_queue(&kqswnal_data.kqn_yield_waitq, &wait);
-
-       kqswnal_unlock(nal, flags);
-
-       if (milliseconds < 0)
-               schedule ();
-       else
-               schedule_timeout((milliseconds * HZ) / 1000);
-       
-       kqswnal_lock(nal, flags);
-
-       remove_wait_queue(&kqswnal_data.kqn_yield_waitq, &wait);
-
-       if (milliseconds > 0) {
-               milliseconds -= ((jiffies - now) * 1000) / HZ;
-               if (milliseconds < 0)
-                       milliseconds = 0;
-       }
-       
-       return (milliseconds);
-}
-
 int
 kqswnal_get_tx_desc (struct portals_cfg *pcfg)
 {
@@ -186,7 +107,7 @@ kqswnal_cmd (struct portals_cfg *pcfg, void *private)
                        kqswnal_data.kqn_nid_offset);
                kqswnal_data.kqn_nid_offset =
                        pcfg->pcfg_nid - kqswnal_data.kqn_elanid;
-               kqswnal_lib.ni.nid = pcfg->pcfg_nid;
+               kqswnal_lib.libnal_ni.ni_pid.nid = pcfg->pcfg_nid;
                return (0);
                
        default:
@@ -469,9 +390,11 @@ kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid,
        ptl_process_id_t  my_process_id;
        int               pkmem = atomic_read(&portal_kmemory);
 
+       LASSERT (nal == &kqswnal_api);
+
        if (nal->nal_refct != 0) {
                if (actual_limits != NULL)
-                       *actual_limits = kqswnal_lib.ni.actual_limits;
+                       *actual_limits = kqswnal_lib.libnal_ni.ni_actual_limits;
                /* This module got the first ref */
                PORTAL_MODULE_USE;
                return (PTL_OK);
@@ -481,18 +404,9 @@ kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid,
 
        CDEBUG (D_MALLOC, "start kmem %d\n", atomic_read(&portal_kmemory));
 
-       memset(&kqswnal_rpc_success, 0, sizeof(kqswnal_rpc_success));
-       memset(&kqswnal_rpc_failed, 0, sizeof(kqswnal_rpc_failed));
-#if MULTIRAIL_EKC
-       kqswnal_rpc_failed.Data[0] = -ECONNREFUSED;
-#else
-       kqswnal_rpc_failed.Status = -ECONNREFUSED;
-#endif
        /* ensure all pointers NULL etc */
        memset (&kqswnal_data, 0, sizeof (kqswnal_data));
 
-       kqswnal_data.kqn_cb = &kqswnal_lib;
-
        INIT_LIST_HEAD (&kqswnal_data.kqn_idletxds);
        INIT_LIST_HEAD (&kqswnal_data.kqn_nblk_idletxds);
        INIT_LIST_HEAD (&kqswnal_data.kqn_activetxds);
@@ -507,8 +421,12 @@ kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid,
        spin_lock_init (&kqswnal_data.kqn_sched_lock);
        init_waitqueue_head (&kqswnal_data.kqn_sched_waitq);
 
-       spin_lock_init (&kqswnal_data.kqn_statelock);
-       init_waitqueue_head (&kqswnal_data.kqn_yield_waitq);
+       /* Leave kqn_rpc_success zeroed */
+#if MULTIRAIL_EKC
+       kqswnal_data.kqn_rpc_failed.Data[0] = -ECONNREFUSED;
+#else
+       kqswnal_data.kqn_rpc_failed.Status = -ECONNREFUSED;
+#endif
 
        /* pointers/lists/locks initialised */
        kqswnal_data.kqn_init = KQN_INIT_DATA;
@@ -517,13 +435,13 @@ kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid,
        kqswnal_data.kqn_ep = ep_system();
        if (kqswnal_data.kqn_ep == NULL) {
                CERROR("Can't initialise EKC\n");
-               kqswnal_shutdown(&kqswnal_api);
+               kqswnal_shutdown(nal);
                return (PTL_IFACE_INVALID);
        }
 
        if (ep_waitfor_nodeid(kqswnal_data.kqn_ep) == ELAN_INVALID_NODE) {
                CERROR("Can't get elan ID\n");
-               kqswnal_shutdown(&kqswnal_api);
+               kqswnal_shutdown(nal);
                return (PTL_IFACE_INVALID);
        }
 #else
@@ -534,7 +452,7 @@ kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid,
        if (kqswnal_data.kqn_ep == NULL)
        {
                CERROR ("Can't get elan device 0\n");
-               kqswnal_shutdown(&kqswnal_api);
+               kqswnal_shutdown(nal);
                return (PTL_IFACE_INVALID);
        }
 #endif
@@ -550,7 +468,7 @@ kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid,
        if (kqswnal_data.kqn_eptx == NULL)
        {
                CERROR ("Can't allocate transmitter\n");
-               kqswnal_shutdown (&kqswnal_api);
+               kqswnal_shutdown (nal);
                return (PTL_NO_SPACE);
        }
 
@@ -563,7 +481,7 @@ kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid,
        if (kqswnal_data.kqn_eprx_small == NULL)
        {
                CERROR ("Can't install small msg receiver\n");
-               kqswnal_shutdown (&kqswnal_api);
+               kqswnal_shutdown (nal);
                return (PTL_NO_SPACE);
        }
 
@@ -573,7 +491,7 @@ kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid,
        if (kqswnal_data.kqn_eprx_large == NULL)
        {
                CERROR ("Can't install large msg receiver\n");
-               kqswnal_shutdown (&kqswnal_api);
+               kqswnal_shutdown (nal);
                return (PTL_NO_SPACE);
        }
 
@@ -588,7 +506,7 @@ kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid,
                                EP_PERM_WRITE);
        if (kqswnal_data.kqn_ep_tx_nmh == NULL) {
                CERROR("Can't reserve tx dma space\n");
-               kqswnal_shutdown(&kqswnal_api);
+               kqswnal_shutdown(nal);
                return (PTL_NO_SPACE);
        }
 #else
@@ -603,7 +521,7 @@ kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid,
        if (rc != DDI_SUCCESS)
        {
                CERROR ("Can't reserve rx dma space\n");
-               kqswnal_shutdown (&kqswnal_api);
+               kqswnal_shutdown (nal);
                return (PTL_NO_SPACE);
        }
 #endif
@@ -617,7 +535,7 @@ kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid,
                                EP_PERM_WRITE);
        if (kqswnal_data.kqn_ep_tx_nmh == NULL) {
                CERROR("Can't reserve rx dma space\n");
-               kqswnal_shutdown(&kqswnal_api);
+               kqswnal_shutdown(nal);
                return (PTL_NO_SPACE);
        }
 #else
@@ -633,7 +551,7 @@ kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid,
        if (rc != DDI_SUCCESS)
        {
                CERROR ("Can't reserve rx dma space\n");
-               kqswnal_shutdown (&kqswnal_api);
+               kqswnal_shutdown (nal);
                return (PTL_NO_SPACE);
        }
 #endif
@@ -644,7 +562,7 @@ kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid,
                     sizeof(kqswnal_tx_t) * (KQSW_NTXMSGS + KQSW_NNBLK_TXMSGS));
        if (kqswnal_data.kqn_txds == NULL)
        {
-               kqswnal_shutdown (&kqswnal_api);
+               kqswnal_shutdown (nal);
                return (PTL_NO_SPACE);
        }
 
@@ -660,7 +578,7 @@ kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid,
                PORTAL_ALLOC (ktx->ktx_buffer, KQSW_TX_BUFFER_SIZE);
                if (ktx->ktx_buffer == NULL)
                {
-                       kqswnal_shutdown (&kqswnal_api);
+                       kqswnal_shutdown (nal);
                        return (PTL_NO_SPACE);
                }
 
@@ -697,7 +615,7 @@ kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid,
                      sizeof (kqswnal_rx_t) * (KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE));
        if (kqswnal_data.kqn_rxds == NULL)
        {
-               kqswnal_shutdown (&kqswnal_api);
+               kqswnal_shutdown (nal);
                return (PTL_NO_SPACE);
        }
 
@@ -732,7 +650,7 @@ kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid,
                        struct page *page = alloc_page(GFP_KERNEL);
                        
                        if (page == NULL) {
-                               kqswnal_shutdown (&kqswnal_api);
+                               kqswnal_shutdown (nal);
                                return (PTL_NO_SPACE);
                        }
 
@@ -780,12 +698,12 @@ kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid,
        my_process_id.nid = kqswnal_elanid2nid(kqswnal_data.kqn_elanid);
        my_process_id.pid = 0;
 
-       rc = lib_init(&kqswnal_lib, my_process_id,
+       rc = lib_init(&kqswnal_lib, nal, my_process_id,
                      requested_limits, actual_limits);
         if (rc != PTL_OK)
        {
                CERROR ("lib_init failed %d\n", rc);
-               kqswnal_shutdown (&kqswnal_api);
+               kqswnal_shutdown (nal);
                return (rc);
        }
 
@@ -799,6 +717,7 @@ kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid,
                kqswnal_rx_t *krx = &kqswnal_data.kqn_rxds[i];
 
                /* NB this enqueue can allocate/sleep (attr == 0) */
+               krx->krx_state = KRX_POSTED;
 #if MULTIRAIL_EKC
                rc = ep_queue_receive(krx->krx_eprx, kqswnal_rxhandler, krx,
                                      &krx->krx_elanbuffer, 0);
@@ -810,7 +729,7 @@ kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid,
                if (rc != EP_SUCCESS)
                {
                        CERROR ("failed ep_queue_receive %d\n", rc);
-                       kqswnal_shutdown (&kqswnal_api);
+                       kqswnal_shutdown (nal);
                        return (PTL_FAIL);
                }
        }
@@ -822,7 +741,7 @@ kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid,
                if (rc != 0)
                {
                        CERROR ("failed to spawn scheduling thread: %d\n", rc);
-                       kqswnal_shutdown (&kqswnal_api);
+                       kqswnal_shutdown (nal);
                        return (PTL_FAIL);
                }
        }
@@ -835,7 +754,7 @@ kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid,
        rc = libcfs_nal_cmd_register (QSWNAL, &kqswnal_cmd, NULL);
        if (rc != 0) {
                CERROR ("Can't initialise command interface (rc = %d)\n", rc);
-               kqswnal_shutdown (&kqswnal_api);
+               kqswnal_shutdown (nal);
                return (PTL_FAIL);
        }
 
@@ -867,17 +786,11 @@ kqswnal_initialise (void)
 {
        int   rc;
 
-       kqswnal_api.startup  = kqswnal_startup;
-       kqswnal_api.shutdown = kqswnal_shutdown;
-       kqswnal_api.forward  = kqswnal_forward;
-       kqswnal_api.yield    = kqswnal_yield;
-       kqswnal_api.lock     = kqswnal_lock;
-       kqswnal_api.unlock   = kqswnal_unlock;
-       kqswnal_api.nal_data = &kqswnal_data;
-
-       kqswnal_lib.nal_data = &kqswnal_data;
+       kqswnal_api.nal_ni_init = kqswnal_startup;
+       kqswnal_api.nal_ni_fini = kqswnal_shutdown;
 
        /* Initialise dynamic tunables to defaults once only */
+       kqswnal_tunables.kqn_optimized_puts = KQSW_OPTIMIZED_PUTS;
        kqswnal_tunables.kqn_optimized_gets = KQSW_OPTIMIZED_GETS;
        
        rc = ptl_register_nal(QSWNAL, &kqswnal_api);
index 6978aa0..b085caa 100644 (file)
@@ -109,7 +109,8 @@ typedef unsigned long kqsw_csum_t;
 
 #define KQSW_RESCHED                    100     /* # busy loops that forces scheduler to yield */
 
-#define KQSW_OPTIMIZED_GETS             1       /* optimized gets? */
+#define KQSW_OPTIMIZED_GETS             1       /* optimize gets >= this size */
+#define KQSW_OPTIMIZED_PUTS            (32<<10) /* optimize puts >= this size */
 #define KQSW_COPY_SMALL_FWD             0       /* copy small fwd messages to pre-mapped buffer? */
 
 /*
@@ -156,12 +157,18 @@ typedef struct
         int              krx_npages;            /* # pages in receive buffer */
         int              krx_nob;               /* Number Of Bytes received into buffer */
         int              krx_rpc_reply_needed;  /* peer waiting for EKC RPC reply */
-        int              krx_rpc_reply_sent;    /* rpc reply sent */
+        int              krx_rpc_reply_status;  /* what status to send */
+        int              krx_state;             /* what this RX is doing */
         atomic_t         krx_refcount;          /* how to tell when rpc is done */
         kpr_fwd_desc_t   krx_fwd;               /* embedded forwarding descriptor */
         ptl_kiov_t       krx_kiov[KQSW_NRXMSGPAGES_LARGE]; /* buffer frags */
 }  kqswnal_rx_t;
 
+#define KRX_POSTED       1                      /* receiving */
+#define KRX_PARSE        2                      /* ready to be parsed */
+#define KRX_COMPLETING   3                      /* waiting to be completed */
+
+
 typedef struct
 {
         struct list_head  ktx_list;             /* enqueue idle/active */
@@ -174,7 +181,7 @@ typedef struct
         int               ktx_nmappedpages;     /* # pages mapped for current message */
         int               ktx_port;             /* destination ep port */
         ptl_nid_t         ktx_nid;              /* destination node */
-        void             *ktx_args[2];          /* completion passthru */
+        void             *ktx_args[3];          /* completion passthru */
         char             *ktx_buffer;           /* pre-allocated contiguous buffer for hdr + small payloads */
         unsigned long     ktx_launchtime;       /* when (in jiffies) the transmit was launched */
 
@@ -193,13 +200,16 @@ typedef struct
 } kqswnal_tx_t;
 
 #define KTX_IDLE        0                       /* on kqn_(nblk_)idletxds */
-#define KTX_SENDING     1                       /* local send */
-#define KTX_FORWARDING  2                       /* routing a packet */
-#define KTX_GETTING     3                       /* local optimised get */
+#define KTX_FORWARDING  1                       /* sending a forwarded packet */
+#define KTX_SENDING     2                       /* normal send */
+#define KTX_GETTING     3                       /* sending optimised get */
+#define KTX_PUTTING     4                       /* sending optimised put */
+#define KTX_RDMAING     5                       /* handling optimised put/get */
 
 typedef struct
 {
         /* dynamic tunables... */
+        int                      kqn_optimized_puts;  /* optimized PUTs? */
         int                      kqn_optimized_gets;  /* optimized GETs? */
 #if CONFIG_SYSCTL
         struct ctl_table_header *kqn_sysctl;          /* sysctl interface */
@@ -230,9 +240,6 @@ typedef struct
         struct list_head   kqn_delayedfwds;     /* delayed forwards */
         struct list_head   kqn_delayedtxds;     /* delayed transmits */
 
-        spinlock_t         kqn_statelock;       /* cb_cli/cb_sti */
-        wait_queue_head_t  kqn_yield_waitq;     /* where yield waits */
-        nal_cb_t          *kqn_cb;              /* -> kqswnal_lib */
 #if MULTIRAIL_EKC
         EP_SYS            *kqn_ep;              /* elan system */
         EP_NMH            *kqn_ep_tx_nmh;       /* elan reserved tx vaddrs */
@@ -250,6 +257,9 @@ typedef struct
         ptl_nid_t          kqn_nid_offset;      /* this cluster's NID offset */
         int                kqn_nnodes;          /* this cluster's size */
         int                kqn_elanid;          /* this nodes's elan ID */
+
+        EP_STATUSBLK       kqn_rpc_success;     /* preset RPC reply status blocks */
+        EP_STATUSBLK       kqn_rpc_failed;
 }  kqswnal_data_t;
 
 /* kqn_init state */
@@ -258,21 +268,16 @@ typedef struct
 #define KQN_INIT_LIB            2
 #define KQN_INIT_ALL            3
 
-extern nal_cb_t            kqswnal_lib;
+extern lib_nal_t           kqswnal_lib;
 extern nal_t               kqswnal_api;
 extern kqswnal_tunables_t  kqswnal_tunables;
 extern kqswnal_data_t      kqswnal_data;
 
-/* global pre-prepared replies to keep off the stack */
-extern EP_STATUSBLK    kqswnal_rpc_success;
-extern EP_STATUSBLK    kqswnal_rpc_failed;
-
 extern int kqswnal_thread_start (int (*fn)(void *arg), void *arg);
 extern void kqswnal_rxhandler(EP_RXD *rxd);
 extern int kqswnal_scheduler (void *);
 extern void kqswnal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd);
-extern void kqswnal_dma_reply_complete (EP_RXD *rxd);
-extern void kqswnal_requeue_rx (kqswnal_rx_t *krx);
+extern void kqswnal_rx_done (kqswnal_rx_t *krx);
 
 static inline ptl_nid_t
 kqswnal_elanid2nid (int elanid) 
@@ -291,6 +296,12 @@ kqswnal_nid2elanid (ptl_nid_t nid)
         return (nid - kqswnal_data.kqn_nid_offset);
 }
 
+static inline ptl_nid_t
+kqswnal_rx_nid(kqswnal_rx_t *krx) 
+{
+        return (kqswnal_elanid2nid(ep_rxd_node(krx->krx_rxd)));
+}
+
 static inline int
 kqswnal_pages_spanned (void *base, int nob)
 {
@@ -313,11 +324,11 @@ static inline kqsw_csum_t kqsw_csum (kqsw_csum_t sum, void *base, int nob)
 }
 #endif
 
-static inline void kqswnal_rx_done (kqswnal_rx_t *krx)
+static inline void kqswnal_rx_decref (kqswnal_rx_t *krx)
 {
         LASSERT (atomic_read (&krx->krx_refcount) > 0);
         if (atomic_dec_and_test (&krx->krx_refcount))
-                kqswnal_requeue_rx(krx);
+                kqswnal_rx_done(krx);
 }
 
 #if MULTIRAIL_EKC
index 2bcb853..e1237a8 100644 (file)
 
 #include "qswnal.h"
 
-EP_STATUSBLK  kqswnal_rpc_success;
-EP_STATUSBLK  kqswnal_rpc_failed;
-
 /*
  *  LIB functions follow
  *
  */
-static ptl_err_t
-kqswnal_read(nal_cb_t *nal, void *private, void *dst_addr, user_ptr src_addr,
-             size_t len)
-{
-        CDEBUG (D_NET, LPX64": reading "LPSZ" bytes from %p -> %p\n",
-                nal->ni.nid, len, src_addr, dst_addr );
-        memcpy( dst_addr, src_addr, len );
-
-        return (PTL_OK);
-}
-
-static ptl_err_t
-kqswnal_write(nal_cb_t *nal, void *private, user_ptr dst_addr, void *src_addr,
-              size_t len)
-{
-        CDEBUG (D_NET, LPX64": writing "LPSZ" bytes from %p -> %p\n",
-                nal->ni.nid, len, src_addr, dst_addr );
-        memcpy( dst_addr, src_addr, len );
-
-        return (PTL_OK);
-}
-
-static void *
-kqswnal_malloc(nal_cb_t *nal, size_t len)
-{
-        void *buf;
-
-        PORTAL_ALLOC(buf, len);
-        return (buf);
-}
-
-static void
-kqswnal_free(nal_cb_t *nal, void *buf, size_t len)
-{
-        PORTAL_FREE(buf, len);
-}
-
-static void
-kqswnal_printf (nal_cb_t * nal, const char *fmt, ...)
-{
-        va_list ap;
-        char msg[256];
-
-        va_start (ap, fmt);
-        vsnprintf (msg, sizeof (msg), fmt, ap);        /* sprint safely */
-        va_end (ap);
-
-        msg[sizeof (msg) - 1] = 0;                /* ensure terminated */
-
-        CDEBUG (D_NET, "%s", msg);
-}
-
-#if (defined(CONFIG_SPARC32) || defined(CONFIG_SPARC64))
-# error "Can't save/restore irq contexts in different procedures"
-#endif
-
-static void
-kqswnal_cli(nal_cb_t *nal, unsigned long *flags)
-{
-        kqswnal_data_t *data= nal->nal_data;
-
-        spin_lock_irqsave(&data->kqn_statelock, *flags);
-}
-
-
-static void
-kqswnal_sti(nal_cb_t *nal, unsigned long *flags)
-{
-        kqswnal_data_t *data= nal->nal_data;
-
-        spin_unlock_irqrestore(&data->kqn_statelock, *flags);
-}
-
-static void
-kqswnal_callback(nal_cb_t *nal, void *private, lib_eq_t *eq, ptl_event_t *ev)
-{
-        /* holding kqn_statelock */
-
-        if (eq->event_callback != NULL)
-                eq->event_callback(ev);
-
-        if (waitqueue_active(&kqswnal_data.kqn_yield_waitq))
-                wake_up_all(&kqswnal_data.kqn_yield_waitq);
-}
-
 static int
-kqswnal_dist(nal_cb_t *nal, ptl_nid_t nid, unsigned long *dist)
+kqswnal_dist(lib_nal_t *nal, ptl_nid_t nid, unsigned long *dist)
 {
-        if (nid == nal->ni.nid)
+        if (nid == nal->libnal_ni.ni_pid.nid)
                 *dist = 0;                      /* it's me */
         else if (kqswnal_nid2elanid (nid) >= 0)
                 *dist = 1;                      /* it's my peer */
@@ -212,11 +124,12 @@ kqswnal_map_tx_kiov (kqswnal_tx_t *ktx, int offset, int nob, int niov, ptl_kiov_
         do {
                 int  fraglen = kiov->kiov_len - offset;
 
-                /* nob exactly spans the iovs */
-                LASSERT (fraglen <= nob);
-                /* each frag fits in a page */
+                /* each page frag is contained in one page */
                 LASSERT (kiov->kiov_offset + kiov->kiov_len <= PAGE_SIZE);
 
+                if (fraglen > nob)
+                        fraglen = nob;
+
                 nmapped++;
                 if (nmapped > maxmapped) {
                         CERROR("Can't map message in %d pages (max %d)\n",
@@ -328,11 +241,12 @@ kqswnal_map_tx_iov (kqswnal_tx_t *ktx, int offset, int nob,
         
         do {
                 int  fraglen = iov->iov_len - offset;
-                long npages  = kqswnal_pages_spanned (iov->iov_base, fraglen);
-
-                /* nob exactly spans the iovs */
-                LASSERT (fraglen <= nob);
+                long npages;
                 
+                if (fraglen > nob)
+                        fraglen = nob;
+                npages = kqswnal_pages_spanned (iov->iov_base, fraglen);
+
                 nmapped += npages;
                 if (nmapped > maxmapped) {
                         CERROR("Can't map message in %d pages (max %d)\n",
@@ -519,40 +433,29 @@ kqswnal_get_idle_tx (kpr_fwd_desc_t *fwd, int may_block)
 void
 kqswnal_tx_done (kqswnal_tx_t *ktx, int error)
 {
-        lib_msg_t     *msg;
-        lib_msg_t     *repmsg = NULL;
-
         switch (ktx->ktx_state) {
         case KTX_FORWARDING:       /* router asked me to forward this packet */
                 kpr_fwd_done (&kqswnal_data.kqn_router,
                               (kpr_fwd_desc_t *)ktx->ktx_args[0], error);
                 break;
 
-        case KTX_SENDING:          /* packet sourced locally */
-                lib_finalize (&kqswnal_lib, ktx->ktx_args[0],
+        case KTX_RDMAING:          /* optimized GET/PUT handled */
+        case KTX_PUTTING:          /* optimized PUT sent */
+        case KTX_SENDING:          /* normal send */
+                lib_finalize (&kqswnal_lib, NULL,
                               (lib_msg_t *)ktx->ktx_args[1],
-                              (error == 0) ? PTL_OK : 
-                              (error == -ENOMEM) ? PTL_NO_SPACE : PTL_FAIL);
+                              (error == 0) ? PTL_OK : PTL_FAIL);
                 break;
 
-        case KTX_GETTING:          /* Peer has DMA-ed direct? */
-                msg = (lib_msg_t *)ktx->ktx_args[1];
-
-                if (error == 0) {
-                        repmsg = lib_create_reply_msg (&kqswnal_lib, 
-                                                       ktx->ktx_nid, msg);
-                        if (repmsg == NULL)
-                                error = -ENOMEM;
-                }
-                
-                if (error == 0) {
-                        lib_finalize (&kqswnal_lib, ktx->ktx_args[0], 
-                                      msg, PTL_OK);
-                        lib_finalize (&kqswnal_lib, NULL, repmsg, PTL_OK);
-                } else {
-                        lib_finalize (&kqswnal_lib, ktx->ktx_args[0], msg,
-                                      (error == -ENOMEM) ? PTL_NO_SPACE : PTL_FAIL);
-                }
+        case KTX_GETTING:          /* optimized GET sent & REPLY received */
+                /* Complete the GET with success since we can't avoid
+                 * delivering a REPLY event; we committed to it when we
+                 * launched the GET */
+                lib_finalize (&kqswnal_lib, NULL, 
+                              (lib_msg_t *)ktx->ktx_args[1], PTL_OK);
+                lib_finalize (&kqswnal_lib, NULL,
+                              (lib_msg_t *)ktx->ktx_args[2],
+                              (error == 0) ? PTL_OK : PTL_FAIL);
                 break;
 
         default:
@@ -580,16 +483,27 @@ kqswnal_txhandler(EP_TXD *txd, void *arg, int status)
                 kqswnal_notify_peer_down(ktx);
                 status = -EHOSTDOWN;
 
-        } else if (ktx->ktx_state == KTX_GETTING) {
-                /* RPC completed OK; what did our peer put in the status
+        } else switch (ktx->ktx_state) {
+
+        case KTX_GETTING:
+        case KTX_PUTTING:
+                /* RPC completed OK; but what did our peer put in the status
                  * block? */
 #if MULTIRAIL_EKC
                 status = ep_txd_statusblk(txd)->Data[0];
 #else
                 status = ep_txd_statusblk(txd)->Status;
 #endif
-        } else {
+                break;
+                
+        case KTX_FORWARDING:
+        case KTX_SENDING:
                 status = 0;
+                break;
+                
+        default:
+                LBUG();
+                break;
         }
 
         kqswnal_tx_done (ktx, status);
@@ -610,21 +524,20 @@ kqswnal_launch (kqswnal_tx_t *ktx)
                 return (-ESHUTDOWN);
 
         LASSERT (dest >= 0);                    /* must be a peer */
-        if (ktx->ktx_state == KTX_GETTING) {
-                /* NB ktx_frag[0] is the GET hdr + kqswnal_remotemd_t.  The
-                 * other frags are the GET sink which we obviously don't
-                 * send here :) */
-#if MULTIRAIL_EKC
+
+        switch (ktx->ktx_state) {
+        case KTX_GETTING:
+        case KTX_PUTTING:
+                /* NB ktx_frag[0] is the GET/PUT hdr + kqswnal_remotemd_t.
+                 * The other frags are the payload, awaiting RDMA */
                 rc = ep_transmit_rpc(kqswnal_data.kqn_eptx, dest,
                                      ktx->ktx_port, attr,
                                      kqswnal_txhandler, ktx,
                                      NULL, ktx->ktx_frags, 1);
-#else
-                rc = ep_transmit_rpc(kqswnal_data.kqn_eptx, dest,
-                                     ktx->ktx_port, attr, kqswnal_txhandler,
-                                     ktx, NULL, ktx->ktx_frags, 1);
-#endif
-        } else {
+                break;
+
+        case KTX_FORWARDING:
+        case KTX_SENDING:
 #if MULTIRAIL_EKC
                 rc = ep_transmit_message(kqswnal_data.kqn_eptx, dest,
                                          ktx->ktx_port, attr,
@@ -636,6 +549,12 @@ kqswnal_launch (kqswnal_tx_t *ktx)
                                        kqswnal_txhandler, ktx, 
                                        ktx->ktx_frags, ktx->ktx_nfrag);
 #endif
+                break;
+                
+        default:
+                LBUG();
+                rc = -EINVAL;                   /* no compiler warning please */
+                break;
         }
 
         switch (rc) {
@@ -658,6 +577,7 @@ kqswnal_launch (kqswnal_tx_t *ktx)
         }
 }
 
+#if 0
 static char *
 hdr_type_string (ptl_hdr_t *hdr)
 {
@@ -726,6 +646,7 @@ kqswnal_cerror_hdr(ptl_hdr_t * hdr)
         }
 
 }                               /* end of print_hdr() */
+#endif
 
 #if !MULTIRAIL_EKC
 void
@@ -787,114 +708,291 @@ kqswnal_eiovs2datav (int ndv, EP_DATAVEC *dv,
         CERROR ("DATAVEC too small\n");
         return (-E2BIG);
 }
+#else
+int
+kqswnal_check_rdma (int nlfrag, EP_NMD *lfrag,
+                    int nrfrag, EP_NMD *rfrag)
+{
+        int  i;
+
+        if (nlfrag != nrfrag) {
+                CERROR("Can't cope with unequal # frags: %d local %d remote\n",
+                       nlfrag, nrfrag);
+                return (-EINVAL);
+        }
+        
+        for (i = 0; i < nlfrag; i++)
+                if (lfrag[i].nmd_len != rfrag[i].nmd_len) {
+                        CERROR("Can't cope with unequal frags %d(%d):"
+                               " %d local %d remote\n",
+                               i, nlfrag, lfrag[i].nmd_len, rfrag[i].nmd_len);
+                        return (-EINVAL);
+                }
+        
+        return (0);
+}
 #endif
 
-int
-kqswnal_dma_reply (kqswnal_tx_t *ktx, int nfrag, 
-                   struct iovec *iov, ptl_kiov_t *kiov, 
-                   int offset, int nob)
+kqswnal_remotemd_t *
+kqswnal_parse_rmd (kqswnal_rx_t *krx, int type, ptl_nid_t expected_nid)
 {
-        kqswnal_rx_t       *krx = (kqswnal_rx_t *)ktx->ktx_args[0];
         char               *buffer = (char *)page_address(krx->krx_kiov[0].kiov_page);
+        ptl_hdr_t          *hdr = (ptl_hdr_t *)buffer;
         kqswnal_remotemd_t *rmd = (kqswnal_remotemd_t *)(buffer + KQSW_HDR_SIZE);
-        int                 rc;
-#if MULTIRAIL_EKC
-        int                 i;
-#else
-        EP_DATAVEC          datav[EP_MAXFRAG];
-        int                 ndatav;
-#endif
-        LASSERT (krx->krx_rpc_reply_needed);
-        LASSERT ((iov == NULL) != (kiov == NULL));
+        ptl_nid_t           nid = kqswnal_rx_nid(krx);
+
+        /* Note (1) lib_parse has already flipped hdr.
+         *      (2) RDMA addresses are sent in native endian-ness.  When
+         *      EKC copes with different endian nodes, I'll fix this (and
+         *      eat my hat :) */
+
+        LASSERT (krx->krx_nob >= sizeof(*hdr));
+
+        if (hdr->type != type) {
+                CERROR ("Unexpected optimized get/put type %d (%d expected)"
+                        "from "LPX64"\n", hdr->type, type, nid);
+                return (NULL);
+        }
+        
+        if (hdr->src_nid != nid) {
+                CERROR ("Unexpected optimized get/put source NID "
+                        LPX64" from "LPX64"\n", hdr->src_nid, nid);
+                return (NULL);
+        }
+
+        LASSERT (nid == expected_nid);
 
-        /* see kqswnal_sendmsg comment regarding endian-ness */
         if (buffer + krx->krx_nob < (char *)(rmd + 1)) {
                 /* msg too small to discover rmd size */
                 CERROR ("Incoming message [%d] too small for RMD (%d needed)\n",
                         krx->krx_nob, (int)(((char *)(rmd + 1)) - buffer));
-                return (-EINVAL);
+                return (NULL);
         }
-        
+
         if (buffer + krx->krx_nob < (char *)&rmd->kqrmd_frag[rmd->kqrmd_nfrag]) {
                 /* rmd doesn't fit in the incoming message */
                 CERROR ("Incoming message [%d] too small for RMD[%d] (%d needed)\n",
                         krx->krx_nob, rmd->kqrmd_nfrag,
                         (int)(((char *)&rmd->kqrmd_frag[rmd->kqrmd_nfrag]) - buffer));
-                return (-EINVAL);
+                return (NULL);
         }
 
-        /* Map the source data... */
+        return (rmd);
+}
+
+void
+kqswnal_rdma_store_complete (EP_RXD *rxd) 
+{
+        int           status = ep_rxd_status(rxd);
+        kqswnal_tx_t *ktx = (kqswnal_tx_t *)ep_rxd_arg(rxd);
+        kqswnal_rx_t *krx = (kqswnal_rx_t *)ktx->ktx_args[0];
+        
+        CDEBUG((status == EP_SUCCESS) ? D_NET : D_ERROR,
+               "rxd %p, ktx %p, status %d\n", rxd, ktx, status);
+
+        LASSERT (ktx->ktx_state == KTX_RDMAING);
+        LASSERT (krx->krx_rxd == rxd);
+        LASSERT (krx->krx_rpc_reply_needed);
+
+        krx->krx_rpc_reply_needed = 0;
+        kqswnal_rx_decref (krx);
+
+        /* free ktx & finalize() its lib_msg_t */
+        kqswnal_tx_done(ktx, (status == EP_SUCCESS) ? 0 : -ECONNABORTED);
+}
+
+void
+kqswnal_rdma_fetch_complete (EP_RXD *rxd) 
+{
+        /* Completed fetching the PUT data */
+        int           status = ep_rxd_status(rxd);
+        kqswnal_tx_t *ktx = (kqswnal_tx_t *)ep_rxd_arg(rxd);
+        kqswnal_rx_t *krx = (kqswnal_rx_t *)ktx->ktx_args[0];
+        unsigned long flags;
+        
+        CDEBUG((status == EP_SUCCESS) ? D_NET : D_ERROR,
+               "rxd %p, ktx %p, status %d\n", rxd, ktx, status);
+
+        LASSERT (ktx->ktx_state == KTX_RDMAING);
+        LASSERT (krx->krx_rxd == rxd);
+        LASSERT (krx->krx_rpc_reply_needed);
+
+        /* Set the RPC completion status */
+        status = (status == EP_SUCCESS) ? 0 : -ECONNABORTED;
+        krx->krx_rpc_reply_status = status;
+
+        /* free ktx & finalize() its lib_msg_t */
+        kqswnal_tx_done(ktx, status);
+
+        if (!in_interrupt()) {
+                /* OK to complete the RPC now (iff I had the last ref) */
+                kqswnal_rx_decref (krx);
+                return;
+        }
+
+        LASSERT (krx->krx_state == KRX_PARSE);
+        krx->krx_state = KRX_COMPLETING;
+
+        /* Complete the RPC in thread context */
+        spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags);
+
+        list_add_tail (&krx->krx_list, &kqswnal_data.kqn_readyrxds);
+        wake_up (&kqswnal_data.kqn_sched_waitq);
+
+        spin_unlock_irqrestore (&kqswnal_data.kqn_sched_lock, flags);
+}
+
+int
+kqswnal_rdma (kqswnal_rx_t *krx, lib_msg_t *libmsg, int type,
+              int niov, struct iovec *iov, ptl_kiov_t *kiov,
+              size_t offset, size_t len)
+{
+        kqswnal_remotemd_t *rmd;
+        kqswnal_tx_t       *ktx;
+        int                 eprc;
+        int                 rc;
+#if !MULTIRAIL_EKC
+        EP_DATAVEC          datav[EP_MAXFRAG];
+        int                 ndatav;
+#endif
+
+        LASSERT (type == PTL_MSG_GET || type == PTL_MSG_PUT);
+        /* Not both mapped and paged payload */
+        LASSERT (iov == NULL || kiov == NULL);
+        /* RPC completes with failure by default */
+        LASSERT (krx->krx_rpc_reply_needed);
+        LASSERT (krx->krx_rpc_reply_status != 0);
+
+        rmd = kqswnal_parse_rmd(krx, type, libmsg->ev.initiator.nid);
+        if (rmd == NULL)
+                return (-EPROTO);
+
+        if (len == 0) {
+                /* data got truncated to nothing. */
+                lib_finalize(&kqswnal_lib, krx, libmsg, PTL_OK);
+                /* Let kqswnal_rx_done() complete the RPC with success */
+                krx->krx_rpc_reply_status = 0;
+                return (0);
+        }
+        
+        /* NB I'm using 'ktx' just to map the local RDMA buffers; I'm not
+           actually sending a portals message with it */
+        ktx = kqswnal_get_idle_tx(NULL, 0);
+        if (ktx == NULL) {
+                CERROR ("Can't get txd for RDMA with "LPX64"\n",
+                        libmsg->ev.initiator.nid);
+                return (-ENOMEM);
+        }
+
+        ktx->ktx_state   = KTX_RDMAING;
+        ktx->ktx_nid     = libmsg->ev.initiator.nid;
+        ktx->ktx_args[0] = krx;
+        ktx->ktx_args[1] = libmsg;
+
+        /* Start mapping at offset 0 (we're not mapping any headers) */
         ktx->ktx_nfrag = ktx->ktx_firsttmpfrag = 0;
+        
         if (kiov != NULL)
-                rc = kqswnal_map_tx_kiov (ktx, offset, nob, nfrag, kiov);
+                rc = kqswnal_map_tx_kiov(ktx, offset, len, niov, kiov);
         else
-                rc = kqswnal_map_tx_iov (ktx, offset, nob, nfrag, iov);
+                rc = kqswnal_map_tx_iov(ktx, offset, len, niov, iov);
 
         if (rc != 0) {
-                CERROR ("Can't map source data: %d\n", rc);
-                return (rc);
+                CERROR ("Can't map local RDMA data: %d\n", rc);
+                goto out;
         }
 
 #if MULTIRAIL_EKC
-        if (ktx->ktx_nfrag != rmd->kqrmd_nfrag) {
-                CERROR("Can't cope with unequal # frags: %d local %d remote\n",
-                       ktx->ktx_nfrag, rmd->kqrmd_nfrag);
-                return (-EINVAL);
+        rc = kqswnal_check_rdma (ktx->ktx_nfrag, ktx->ktx_frags,
+                                 rmd->kqrmd_nfrag, rmd->kqrmd_frag);
+        if (rc != 0) {
+                CERROR ("Incompatible RDMA descriptors\n");
+                goto out;
         }
-        
-        for (i = 0; i < rmd->kqrmd_nfrag; i++)
-                if (ktx->ktx_frags[i].nmd_len != rmd->kqrmd_frag[i].nmd_len) {
-                        CERROR("Can't cope with unequal frags %d(%d):"
-                               " %d local %d remote\n",
-                               i, rmd->kqrmd_nfrag, 
-                               ktx->ktx_frags[i].nmd_len, 
-                               rmd->kqrmd_frag[i].nmd_len);
-                        return (-EINVAL);
-                }
 #else
-        ndatav = kqswnal_eiovs2datav (EP_MAXFRAG, datav,
-                                      ktx->ktx_nfrag, ktx->ktx_frags,
-                                      rmd->kqrmd_nfrag, rmd->kqrmd_frag);
+        switch (type) {
+        default:
+                LBUG();
+
+        case PTL_MSG_GET:
+                ndatav = kqswnal_eiovs2datav(EP_MAXFRAG, datav,
+                                             ktx->ktx_nfrag, ktx->ktx_frags,
+                                             rmd->kqrmd_nfrag, rmd->kqrmd_frag);
+                break;
+
+        case PTL_MSG_PUT:
+                ndatav = kqswnal_eiovs2datav(EP_MAXFRAG, datav,
+                                             rmd->kqrmd_nfrag, rmd->kqrmd_frag,
+                                             ktx->ktx_nfrag, ktx->ktx_frags);
+                break;
+        }
+                
         if (ndatav < 0) {
                 CERROR ("Can't create datavec: %d\n", ndatav);
-                return (ndatav);
+                rc = ndatav;
+                goto out;
         }
 #endif
 
-        /* Our caller will start to race with kqswnal_dma_reply_complete... */
-        LASSERT (atomic_read (&krx->krx_refcount) == 1);
-        atomic_set (&krx->krx_refcount, 2);
+        LASSERT (atomic_read(&krx->krx_refcount) > 0);
+        /* Take an extra ref for the completion callback */
+        atomic_inc(&krx->krx_refcount);
 
-#if MULTIRAIL_EKC
-        rc = ep_complete_rpc(krx->krx_rxd, kqswnal_dma_reply_complete, ktx, 
-                             &kqswnal_rpc_success,
-                             ktx->ktx_frags, rmd->kqrmd_frag, rmd->kqrmd_nfrag);
-        if (rc == EP_SUCCESS)
-                return (0);
+        switch (type) {
+        default:
+                LBUG();
 
-        /* Well we tried... */
-        krx->krx_rpc_reply_needed = 0;
+        case PTL_MSG_GET:
+#if MULTIRAIL_EKC
+                eprc = ep_complete_rpc(krx->krx_rxd, 
+                                       kqswnal_rdma_store_complete, ktx, 
+                                       &kqswnal_data.kqn_rpc_success,
+                                       ktx->ktx_frags, rmd->kqrmd_frag, rmd->kqrmd_nfrag);
 #else
-        rc = ep_complete_rpc (krx->krx_rxd, kqswnal_dma_reply_complete, ktx,
-                              &kqswnal_rpc_success, datav, ndatav);
-        if (rc == EP_SUCCESS)
-                return (0);
-
-        /* "old" EKC destroys rxd on failed completion */
-        krx->krx_rxd = NULL;
+                eprc = ep_complete_rpc (krx->krx_rxd, 
+                                        kqswnal_rdma_store_complete, ktx,
+                                        &kqswnal_data.kqn_rpc_success, 
+                                        datav, ndatav);
+                if (eprc != EP_SUCCESS) /* "old" EKC destroys rxd on failed completion */
+                        krx->krx_rxd = NULL;
 #endif
+                if (eprc != EP_SUCCESS) {
+                        CERROR("can't complete RPC: %d\n", eprc);
+                        /* don't re-attempt RPC completion */
+                        krx->krx_rpc_reply_needed = 0;
+                        rc = -ECONNABORTED;
+                }
+                break;
+                
+        case PTL_MSG_PUT:
+#if MULTIRAIL_EKC
+                eprc = ep_rpc_get (krx->krx_rxd, 
+                                   kqswnal_rdma_fetch_complete, ktx,
+                                   rmd->kqrmd_frag, ktx->ktx_frags, ktx->ktx_nfrag);
+#else
+                eprc = ep_rpc_get (krx->krx_rxd,
+                                   kqswnal_rdma_fetch_complete, ktx,
+                                   datav, ndatav);
+#endif
+                if (eprc != EP_SUCCESS) {
+                        CERROR("ep_rpc_get failed: %d\n", eprc);
+                        rc = -ECONNABORTED;
+                }
+                break;
+        }
 
-        CERROR("can't complete RPC: %d\n", rc);
-
-        /* reset refcount back to 1: we're not going to be racing with
-         * kqswnal_dma_reply_complete. */
-        atomic_set (&krx->krx_refcount, 1);
+ out:
+        if (rc != 0) {
+                kqswnal_rx_decref(krx);                 /* drop callback's ref */
+                kqswnal_put_idle_tx (ktx);
+        }
 
-        return (-ECONNABORTED);
+        atomic_dec(&kqswnal_data.kqn_pending_txs);
+        return (rc);
 }
 
 static ptl_err_t
-kqswnal_sendmsg (nal_cb_t     *nal,
+kqswnal_sendmsg (lib_nal_t    *nal,
                  void         *private,
                  lib_msg_t    *libmsg,
                  ptl_hdr_t    *hdr,
@@ -916,6 +1014,8 @@ kqswnal_sendmsg (nal_cb_t     *nal,
         int                sumoff;
         int                sumnob;
 #endif
+        /* NB 1. hdr is in network byte order */
+        /*    2. 'private' depends on the message type */
         
         CDEBUG(D_NET, "sending "LPSZ" bytes in %d frags to nid: "LPX64
                " pid %u\n", payload_nob, payload_niov, nid, pid);
@@ -934,6 +1034,15 @@ kqswnal_sendmsg (nal_cb_t     *nal,
                 return (PTL_FAIL);
         }
 
+        if (type == PTL_MSG_REPLY &&            /* can I look in 'private' */
+            ((kqswnal_rx_t *)private)->krx_rpc_reply_needed) { /* is it an RPC */
+                /* Must be a REPLY for an optimized GET */
+                rc = kqswnal_rdma ((kqswnal_rx_t *)private, libmsg, PTL_MSG_GET,
+                                   payload_niov, payload_iov, payload_kiov, 
+                                   payload_offset, payload_nob);
+                return ((rc == 0) ? PTL_OK : PTL_FAIL);
+        }
+
         targetnid = nid;
         if (kqswnal_nid2elanid (nid) < 0) {     /* Can't send direct: find gateway? */
                 rc = kpr_lookup (&kqswnal_data.kqn_router, nid, 
@@ -956,35 +1065,16 @@ kqswnal_sendmsg (nal_cb_t     *nal,
                                           type == PTL_MSG_REPLY ||
                                           in_interrupt()));
         if (ktx == NULL) {
-                kqswnal_cerror_hdr (hdr);
+                CERROR ("Can't get txd for msg type %d for "LPX64"\n",
+                        type, libmsg->ev.initiator.nid);
                 return (PTL_NO_SPACE);
         }
 
+        ktx->ktx_state   = KTX_SENDING;
         ktx->ktx_nid     = targetnid;
         ktx->ktx_args[0] = private;
         ktx->ktx_args[1] = libmsg;
-
-        if (type == PTL_MSG_REPLY &&
-            ((kqswnal_rx_t *)private)->krx_rpc_reply_needed) {
-                if (nid != targetnid ||
-                    kqswnal_nid2elanid(nid) != 
-                    ep_rxd_node(((kqswnal_rx_t *)private)->krx_rxd)) {
-                        CERROR("Optimized reply nid conflict: "
-                               "nid "LPX64" via "LPX64" elanID %d\n",
-                               nid, targetnid,
-                               ep_rxd_node(((kqswnal_rx_t *)private)->krx_rxd));
-                        rc = -EINVAL;
-                        goto out;
-                }
-
-                /* peer expects RPC completion with GET data */
-                rc = kqswnal_dma_reply (ktx, payload_niov, 
-                                        payload_iov, payload_kiov, 
-                                        payload_offset, payload_nob);
-                if (rc != 0)
-                        CERROR ("Can't DMA reply to "LPX64": %d\n", nid, rc);
-                goto out;
-        }
+        ktx->ktx_args[2] = NULL;    /* set when a GET commits to REPLY */
 
         memcpy (ktx->ktx_buffer, hdr, sizeof (*hdr)); /* copy hdr from caller's stack */
         ktx->ktx_wire_hdr = (ptl_hdr_t *)ktx->ktx_buffer;
@@ -1027,28 +1117,31 @@ kqswnal_sendmsg (nal_cb_t     *nal,
         memcpy(ktx->ktx_buffer + sizeof(*hdr) + sizeof(csum), &csum, sizeof(csum));
 #endif
 
-        if (kqswnal_tunables.kqn_optimized_gets &&
-            type == PTL_MSG_GET &&              /* doing a GET */
-            nid == targetnid) {                 /* not forwarding */
+        /* The first frag will be the pre-mapped buffer for (at least) the
+         * portals header. */
+        ktx->ktx_nfrag = ktx->ktx_firsttmpfrag = 1;
+
+        if (nid == targetnid &&                 /* not forwarding */
+            ((type == PTL_MSG_GET &&            /* optimize GET? */
+              kqswnal_tunables.kqn_optimized_gets != 0 &&
+              NTOH__u32(hdr->msg.get.sink_length) >= kqswnal_tunables.kqn_optimized_gets) ||
+             (type == PTL_MSG_PUT &&            /* optimize PUT? */
+              kqswnal_tunables.kqn_optimized_puts != 0 &&
+              payload_nob >= kqswnal_tunables.kqn_optimized_puts))) {
                 lib_md_t           *md = libmsg->md;
                 kqswnal_remotemd_t *rmd = (kqswnal_remotemd_t *)(ktx->ktx_buffer + KQSW_HDR_SIZE);
                 
-                /* Optimised path: I send over the Elan vaddrs of the get
-                 * sink buffers, and my peer DMAs directly into them.
+                /* Optimised path: I send over the Elan vaddrs of the local
+                 * buffers, and my peer DMAs directly to/from them.
                  *
                  * First I set up ktx as if it was going to send this
                  * payload, (it needs to map it anyway).  This fills
                  * ktx_frags[1] and onward with the network addresses
                  * of the GET sink frags.  I copy these into ktx_buffer,
-                 * immediately after the header, and send that as my GET
-                 * message.
-                 *
-                 * Note that the addresses are sent in native endian-ness.
-                 * When EKC copes with different endian nodes, I'll fix
-                 * this (and eat my hat :) */
+                 * immediately after the header, and send that as my
+                 * message. */
 
-                ktx->ktx_nfrag = ktx->ktx_firsttmpfrag = 1;
-                ktx->ktx_state = KTX_GETTING;
+                ktx->ktx_state = (type == PTL_MSG_PUT) ? KTX_PUTTING : KTX_GETTING;
 
                 if ((libmsg->md->options & PTL_MD_KIOV) != 0) 
                         rc = kqswnal_map_tx_kiov (ktx, 0, md->length,
@@ -1078,12 +1171,21 @@ kqswnal_sendmsg (nal_cb_t     *nal,
                 ktx->ktx_frags[0].Base = ktx->ktx_ebuffer;
                 ktx->ktx_frags[0].Len = KQSW_HDR_SIZE + payload_nob;
 #endif
+                if (type == PTL_MSG_GET) {
+                        /* Allocate reply message now while I'm in thread context */
+                        ktx->ktx_args[2] = lib_create_reply_msg (&kqswnal_lib,
+                                                                 nid, libmsg);
+                        if (ktx->ktx_args[2] == NULL)
+                                goto out;
+
+                        /* NB finalizing the REPLY message is my
+                         * responsibility now, whatever happens. */
+                }
+                
         } else if (payload_nob <= KQSW_TX_MAXCONTIG) {
 
                 /* small message: single frag copied into the pre-mapped buffer */
 
-                ktx->ktx_nfrag = ktx->ktx_firsttmpfrag = 1;
-                ktx->ktx_state = KTX_SENDING;
 #if MULTIRAIL_EKC
                 ep_nmd_subset(&ktx->ktx_frags[0], &ktx->ktx_ebuffer,
                               0, KQSW_HDR_SIZE + payload_nob);
@@ -1105,8 +1207,6 @@ kqswnal_sendmsg (nal_cb_t     *nal,
 
                 /* large message: multiple frags: first is hdr in pre-mapped buffer */
 
-                ktx->ktx_nfrag = ktx->ktx_firsttmpfrag = 1;
-                ktx->ktx_state = KTX_SENDING;
 #if MULTIRAIL_EKC
                 ep_nmd_subset(&ktx->ktx_frags[0], &ktx->ktx_ebuffer,
                               0, KQSW_HDR_SIZE);
@@ -1135,15 +1235,29 @@ kqswnal_sendmsg (nal_cb_t     *nal,
                rc == 0 ? "Sent" : "Failed to send",
                payload_nob, nid, targetnid, rc);
 
-        if (rc != 0)
+        if (rc != 0) {
+                if (ktx->ktx_state == KTX_GETTING &&
+                    ktx->ktx_args[2] != NULL) {
+                        /* We committed to reply, but there was a problem
+                         * launching the GET.  We can't avoid delivering a
+                         * REPLY event since we committed above, so we
+                         * pretend the GET succeeded but the REPLY
+                         * failed. */
+                        rc = 0;
+                        lib_finalize (&kqswnal_lib, private, libmsg, PTL_OK);
+                        lib_finalize (&kqswnal_lib, private,
+                                      (lib_msg_t *)ktx->ktx_args[2], PTL_FAIL);
+                }
+                
                 kqswnal_put_idle_tx (ktx);
-
+        }
+        
         atomic_dec(&kqswnal_data.kqn_pending_txs);
         return (rc == 0 ? PTL_OK : PTL_FAIL);
 }
 
 static ptl_err_t
-kqswnal_send (nal_cb_t     *nal,
+kqswnal_send (lib_nal_t    *nal,
               void         *private,
               lib_msg_t    *libmsg,
               ptl_hdr_t    *hdr,
@@ -1161,7 +1275,7 @@ kqswnal_send (nal_cb_t     *nal,
 }
 
 static ptl_err_t
-kqswnal_send_pages (nal_cb_t     *nal,
+kqswnal_send_pages (lib_nal_t    *nal,
                     void         *private,
                     lib_msg_t    *libmsg,
                     ptl_hdr_t    *hdr,
@@ -1200,7 +1314,7 @@ kqswnal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd)
         if (ktx == NULL)        /* can't get txd right now */
                 return;         /* fwd will be scheduled when tx desc freed */
 
-        if (nid == kqswnal_lib.ni.nid)          /* gateway is me */
+        if (nid == kqswnal_lib.libnal_ni.ni_pid.nid) /* gateway is me */
                 nid = fwd->kprfd_target_nid;    /* target is final dest */
 
         if (kqswnal_nid2elanid (nid) < 0) {
@@ -1254,9 +1368,8 @@ kqswnal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd)
         if (rc != 0) {
                 CERROR ("Failed to forward [%p] to "LPX64": %d\n", fwd, nid, rc);
 
-                kqswnal_put_idle_tx (ktx);
                 /* complete now (with failure) */
-                kpr_fwd_done (&kqswnal_data.kqn_router, fwd, rc);
+                kqswnal_tx_done (ktx, rc);
         }
 
         atomic_dec(&kqswnal_data.kqn_pending_txs);
@@ -1277,29 +1390,48 @@ kqswnal_fwd_callback (void *arg, int error)
                        NTOH__u64(hdr->src_nid), NTOH__u64(hdr->dest_nid),error);
         }
 
-        kqswnal_requeue_rx (krx);
+        LASSERT (atomic_read(&krx->krx_refcount) == 1);
+        kqswnal_rx_decref (krx);
 }
 
 void
-kqswnal_dma_reply_complete (EP_RXD *rxd) 
+kqswnal_requeue_rx (kqswnal_rx_t *krx)
 {
-        int           status = ep_rxd_status(rxd);
-        kqswnal_tx_t *ktx = (kqswnal_tx_t *)ep_rxd_arg(rxd);
-        kqswnal_rx_t *krx = (kqswnal_rx_t *)ktx->ktx_args[0];
-        lib_msg_t    *msg = (lib_msg_t *)ktx->ktx_args[1];
-        
-        CDEBUG((status == EP_SUCCESS) ? D_NET : D_ERROR,
-               "rxd %p, ktx %p, status %d\n", rxd, ktx, status);
+        LASSERT (atomic_read(&krx->krx_refcount) == 0);
+        LASSERT (!krx->krx_rpc_reply_needed);
 
-        LASSERT (krx->krx_rxd == rxd);
-        LASSERT (krx->krx_rpc_reply_needed);
+        krx->krx_state = KRX_POSTED;
 
-        krx->krx_rpc_reply_needed = 0;
-        kqswnal_rx_done (krx);
+#if MULTIRAIL_EKC
+        if (kqswnal_data.kqn_shuttingdown) {
+                /* free EKC rxd on shutdown */
+                ep_complete_receive(krx->krx_rxd);
+        } else {
+                /* repost receive */
+                ep_requeue_receive(krx->krx_rxd, 
+                                   kqswnal_rxhandler, krx,
+                                   &krx->krx_elanbuffer, 0);
+        }
+#else                
+        if (kqswnal_data.kqn_shuttingdown)
+                return;
 
-        lib_finalize (&kqswnal_lib, NULL, msg,
-                      (status == EP_SUCCESS) ? PTL_OK : PTL_FAIL);
-        kqswnal_put_idle_tx (ktx);
+        if (krx->krx_rxd == NULL) {
+                /* We had a failed ep_complete_rpc() which nukes the
+                 * descriptor in "old" EKC */
+                int eprc = ep_queue_receive(krx->krx_eprx, 
+                                            kqswnal_rxhandler, krx,
+                                            krx->krx_elanbuffer, 
+                                            krx->krx_npages * PAGE_SIZE, 0);
+                LASSERT (eprc == EP_SUCCESS);
+                /* We don't handle failure here; it's incredibly rare
+                 * (never reported?) and only happens with "old" EKC */
+        } else {
+                ep_requeue_receive(krx->krx_rxd, kqswnal_rxhandler, krx,
+                                   krx->krx_elanbuffer, 
+                                   krx->krx_npages * PAGE_SIZE);
+        }
+#endif
 }
 
 void
@@ -1319,71 +1451,45 @@ kqswnal_rpc_complete (EP_RXD *rxd)
 }
 
 void
-kqswnal_requeue_rx (kqswnal_rx_t *krx) 
+kqswnal_rx_done (kqswnal_rx_t *krx) 
 {
-        int   rc;
+        int           rc;
+        EP_STATUSBLK *sblk;
 
         LASSERT (atomic_read(&krx->krx_refcount) == 0);
 
         if (krx->krx_rpc_reply_needed) {
+                /* We've not completed the peer's RPC yet... */
+                sblk = (krx->krx_rpc_reply_status == 0) ? 
+                       &kqswnal_data.kqn_rpc_success : 
+                       &kqswnal_data.kqn_rpc_failed;
 
-                /* We failed to complete the peer's optimized GET (e.g. we
-                 * couldn't map the source buffers).  We complete the
-                 * peer's EKC rpc now with failure. */
+                LASSERT (!in_interrupt());
 #if MULTIRAIL_EKC
-                rc = ep_complete_rpc(krx->krx_rxd, kqswnal_rpc_complete, krx,
-                                     &kqswnal_rpc_failed, NULL, NULL, 0);
+                rc = ep_complete_rpc(krx->krx_rxd, 
+                                     kqswnal_rpc_complete, krx,
+                                     sblk, NULL, NULL, 0);
                 if (rc == EP_SUCCESS)
                         return;
-                
-                CERROR("can't complete RPC: %d\n", rc);
 #else
-                if (krx->krx_rxd != NULL) {
-                        /* We didn't try (and fail) to complete earlier... */
-                        rc = ep_complete_rpc(krx->krx_rxd, 
-                                             kqswnal_rpc_complete, krx,
-                                             &kqswnal_rpc_failed, NULL, 0);
-                        if (rc == EP_SUCCESS)
-                                return;
-
-                        CERROR("can't complete RPC: %d\n", rc);
-                }
-                
-                /* NB the old ep_complete_rpc() frees rxd on failure, so we
-                 * have to requeue from scratch here, unless we're shutting
-                 * down */
-                if (kqswnal_data.kqn_shuttingdown)
+                rc = ep_complete_rpc(krx->krx_rxd, 
+                                     kqswnal_rpc_complete, krx,
+                                     sblk, NULL, 0);
+                if (rc == EP_SUCCESS)
                         return;
 
-                rc = ep_queue_receive(krx->krx_eprx, kqswnal_rxhandler, krx,
-                                      krx->krx_elanbuffer, 
-                                      krx->krx_npages * PAGE_SIZE, 0);
-                LASSERT (rc == EP_SUCCESS);
-                /* We don't handle failure here; it's incredibly rare
-                 * (never reported?) and only happens with "old" EKC */
-                return;
+                /* "old" EKC destroys rxd on failed completion */
+                krx->krx_rxd = NULL;
 #endif
+                CERROR("can't complete RPC: %d\n", rc);
+                krx->krx_rpc_reply_needed = 0;
         }
 
-#if MULTIRAIL_EKC
-        if (kqswnal_data.kqn_shuttingdown) {
-                /* free EKC rxd on shutdown */
-                ep_complete_receive(krx->krx_rxd);
-        } else {
-                /* repost receive */
-                ep_requeue_receive(krx->krx_rxd, kqswnal_rxhandler, krx,
-                                   &krx->krx_elanbuffer, 0);
-        }
-#else                
-        /* don't actually requeue on shutdown */
-        if (!kqswnal_data.kqn_shuttingdown) 
-                ep_requeue_receive(krx->krx_rxd, kqswnal_rxhandler, krx,
-                                   krx->krx_elanbuffer, krx->krx_npages * PAGE_SIZE);
-#endif
+        kqswnal_requeue_rx(krx);
 }
         
 void
-kqswnal_rx (kqswnal_rx_t *krx)
+kqswnal_parse (kqswnal_rx_t *krx)
 {
         ptl_hdr_t      *hdr = (ptl_hdr_t *) page_address(krx->krx_kiov[0].kiov_page);
         ptl_nid_t       dest_nid = NTOH__u64 (hdr->dest_nid);
@@ -1391,25 +1497,28 @@ kqswnal_rx (kqswnal_rx_t *krx)
         int             nob;
         int             niov;
 
-        LASSERT (atomic_read(&krx->krx_refcount) == 0);
+        LASSERT (atomic_read(&krx->krx_refcount) == 1);
+
+        if (dest_nid == kqswnal_lib.libnal_ni.ni_pid.nid) { /* It's for me :) */
+                /* I ignore parse errors since I'm not consuming a byte
+                 * stream */
+                (void)lib_parse (&kqswnal_lib, hdr, krx);
 
-        if (dest_nid == kqswnal_lib.ni.nid) { /* It's for me :) */
-                atomic_set(&krx->krx_refcount, 1);
-                lib_parse (&kqswnal_lib, hdr, krx);
-                kqswnal_rx_done(krx);
+                /* Drop my ref; any RDMA activity takes an additional ref */
+                kqswnal_rx_decref(krx);
                 return;
         }
 
 #if KQSW_CHECKSUM
-        CERROR ("checksums for forwarded packets not implemented\n");
-        LBUG ();
+        LASSERTF (0, "checksums for forwarded packets not implemented\n");
 #endif
+
         if (kqswnal_nid2elanid (dest_nid) >= 0)  /* should have gone direct to peer */
         {
                 CERROR("dropping packet from "LPX64" for "LPX64
                        ": target is peer\n", NTOH__u64(hdr->src_nid), dest_nid);
 
-                kqswnal_requeue_rx (krx);
+                kqswnal_rx_decref (krx);
                 return;
         }
 
@@ -1451,7 +1560,9 @@ kqswnal_rxhandler(EP_RXD *rxd)
                rxd, krx, nob, status);
 
         LASSERT (krx != NULL);
-
+        LASSERT (krx->krx_state = KRX_POSTED);
+        
+        krx->krx_state = KRX_PARSE;
         krx->krx_rxd = rxd;
         krx->krx_nob = nob;
 #if MULTIRAIL_EKC
@@ -1459,7 +1570,10 @@ kqswnal_rxhandler(EP_RXD *rxd)
 #else
         krx->krx_rpc_reply_needed = ep_rxd_isrpc(rxd);
 #endif
-        
+        /* Default to failure if an RPC reply is requested but not handled */
+        krx->krx_rpc_reply_status = -EPROTO;
+        atomic_set (&krx->krx_refcount, 1);
+
         /* must receive a whole header to be able to parse */
         if (status != EP_SUCCESS || nob < sizeof (ptl_hdr_t))
         {
@@ -1475,12 +1589,12 @@ kqswnal_rxhandler(EP_RXD *rxd)
                         CERROR("receive status failed with status %d nob %d\n",
                                ep_rxd_status(rxd), nob);
 #endif
-                kqswnal_requeue_rx (krx);
+                kqswnal_rx_decref(krx);
                 return;
         }
 
         if (!in_interrupt()) {
-                kqswnal_rx (krx);
+                kqswnal_parse(krx);
                 return;
         }
 
@@ -1540,7 +1654,7 @@ kqswnal_csum_error (kqswnal_rx_t *krx, int ishdr)
 #endif
 
 static ptl_err_t
-kqswnal_recvmsg (nal_cb_t     *nal,
+kqswnal_recvmsg (lib_nal_t    *nal,
                  void         *private,
                  lib_msg_t    *libmsg,
                  unsigned int  niov,
@@ -1552,16 +1666,18 @@ kqswnal_recvmsg (nal_cb_t     *nal,
 {
         kqswnal_rx_t *krx = (kqswnal_rx_t *)private;
         char         *buffer = page_address(krx->krx_kiov[0].kiov_page);
+        ptl_hdr_t    *hdr = (ptl_hdr_t *)buffer;
         int           page;
         char         *page_ptr;
         int           page_nob;
         char         *iov_ptr;
         int           iov_nob;
         int           frag;
+        int           rc;
 #if KQSW_CHECKSUM
         kqsw_csum_t   senders_csum;
         kqsw_csum_t   payload_csum = 0;
-        kqsw_csum_t   hdr_csum = kqsw_csum(0, buffer, sizeof(ptl_hdr_t));
+        kqsw_csum_t   hdr_csum = kqsw_csum(0, hdr, sizeof(*hdr));
         size_t        csum_len = mlen;
         int           csum_frags = 0;
         int           csum_nob = 0;
@@ -1574,8 +1690,18 @@ kqswnal_recvmsg (nal_cb_t     *nal,
         if (senders_csum != hdr_csum)
                 kqswnal_csum_error (krx, 1);
 #endif
+        /* NB lib_parse() has already flipped *hdr */
+
         CDEBUG(D_NET,"kqswnal_recv, mlen="LPSZ", rlen="LPSZ"\n", mlen, rlen);
 
+        if (krx->krx_rpc_reply_needed &&
+            hdr->type == PTL_MSG_PUT) {
+                /* This must be an optimized PUT */
+                rc = kqswnal_rdma (krx, libmsg, PTL_MSG_PUT,
+                                   niov, iov, kiov, offset, mlen);
+                return (rc == 0 ? PTL_OK : PTL_FAIL);
+        }
+
         /* What was actually received must be >= payload. */
         LASSERT (mlen <= rlen);
         if (krx->krx_nob < KQSW_HDR_SIZE + mlen) {
@@ -1691,7 +1817,7 @@ kqswnal_recvmsg (nal_cb_t     *nal,
 }
 
 static ptl_err_t
-kqswnal_recv(nal_cb_t     *nal,
+kqswnal_recv(lib_nal_t    *nal,
              void         *private,
              lib_msg_t    *libmsg,
              unsigned int  niov,
@@ -1706,7 +1832,7 @@ kqswnal_recv(nal_cb_t     *nal,
 }
 
 static ptl_err_t
-kqswnal_recv_pages (nal_cb_t     *nal,
+kqswnal_recv_pages (lib_nal_t    *nal,
                     void         *private,
                     lib_msg_t    *libmsg,
                     unsigned int  niov,
@@ -1766,7 +1892,18 @@ kqswnal_scheduler (void *arg)
                         spin_unlock_irqrestore(&kqswnal_data.kqn_sched_lock,
                                                flags);
 
-                        kqswnal_rx (krx);
+                        switch (krx->krx_state) {
+                        case KRX_PARSE:
+                                kqswnal_parse (krx);
+                                break;
+                        case KRX_COMPLETING:
+                                /* Drop last ref to reply to RPC and requeue */
+                                LASSERT (krx->krx_rpc_reply_needed);
+                                kqswnal_rx_decref (krx);
+                                break;
+                        default:
+                                LBUG();
+                        }
 
                         did_something = 1;
                         spin_lock_irqsave(&kqswnal_data.kqn_sched_lock, flags);
@@ -1835,20 +1972,12 @@ kqswnal_scheduler (void *arg)
         return (0);
 }
 
-nal_cb_t kqswnal_lib =
+lib_nal_t kqswnal_lib =
 {
-        nal_data:       &kqswnal_data,         /* NAL private data */
-        cb_send:        kqswnal_send,
-        cb_send_pages:  kqswnal_send_pages,
-        cb_recv:        kqswnal_recv,
-        cb_recv_pages:  kqswnal_recv_pages,
-        cb_read:        kqswnal_read,
-        cb_write:       kqswnal_write,
-        cb_malloc:      kqswnal_malloc,
-        cb_free:        kqswnal_free,
-        cb_printf:      kqswnal_printf,
-        cb_cli:         kqswnal_cli,
-        cb_sti:         kqswnal_sti,
-        cb_callback:    kqswnal_callback,
-        cb_dist:        kqswnal_dist
+        libnal_data:       &kqswnal_data,         /* NAL private data */
+        libnal_send:        kqswnal_send,
+        libnal_send_pages:  kqswnal_send_pages,
+        libnal_recv:        kqswnal_recv,
+        libnal_recv_pages:  kqswnal_recv_pages,
+        libnal_dist:        kqswnal_dist
 };
index 32bbbec..9d39cb1 100644 (file)
@@ -74,83 +74,9 @@ static ctl_table ksocknal_top_ctl_table[] = {
 #endif
 
 int
-ksocknal_api_forward(nal_t *nal, int id, void *args, size_t args_len,
-                       void *ret, size_t ret_len)
-{
-        ksock_nal_data_t *k;
-        nal_cb_t *nal_cb;
-
-        k = nal->nal_data;
-        nal_cb = k->ksnd_nal_cb;
-
-        lib_dispatch(nal_cb, k, id, args, ret); /* ksocknal_send needs k */
-        return PTL_OK;
-}
-
-void
-ksocknal_api_lock(nal_t *nal, unsigned long *flags)
-{
-        ksock_nal_data_t *k;
-        nal_cb_t *nal_cb;
-
-        k = nal->nal_data;
-        nal_cb = k->ksnd_nal_cb;
-        nal_cb->cb_cli(nal_cb,flags);
-}
-
-void
-ksocknal_api_unlock(nal_t *nal, unsigned long *flags)
-{
-        ksock_nal_data_t *k;
-        nal_cb_t *nal_cb;
-
-        k = nal->nal_data;
-        nal_cb = k->ksnd_nal_cb;
-        nal_cb->cb_sti(nal_cb,flags);
-}
-
-int
-ksocknal_api_yield(nal_t *nal, unsigned long *flags, int milliseconds)
-{
-       /* NB called holding statelock */
-        wait_queue_t       wait;
-       unsigned long      now = jiffies;
-
-       CDEBUG (D_NET, "yield\n");
-
-       if (milliseconds == 0) {
-                our_cond_resched();
-               return 0;
-       }
-
-       init_waitqueue_entry(&wait, current);
-       set_current_state (TASK_INTERRUPTIBLE);
-       add_wait_queue (&ksocknal_data.ksnd_yield_waitq, &wait);
-
-       ksocknal_api_unlock(nal, flags);
-
-       if (milliseconds < 0)
-               schedule ();
-       else
-               schedule_timeout((milliseconds * HZ) / 1000);
-       
-       ksocknal_api_lock(nal, flags);
-
-       remove_wait_queue (&ksocknal_data.ksnd_yield_waitq, &wait);
-
-       if (milliseconds > 0) {
-               milliseconds -= ((jiffies - now) * 1000) / HZ;
-               if (milliseconds < 0)
-                       milliseconds = 0;
-       }
-       
-       return (milliseconds);
-}
-
-int
 ksocknal_set_mynid(ptl_nid_t nid)
 {
-        lib_ni_t *ni = &ksocknal_lib.ni;
+        lib_ni_t *ni = &ksocknal_lib.libnal_ni;
 
         /* FIXME: we have to do this because we call lib_init() at module
          * insertion time, which is before we have 'mynid' available.  lib_init
@@ -159,9 +85,9 @@ ksocknal_set_mynid(ptl_nid_t nid)
          * problem. */
 
         CDEBUG(D_IOCTL, "setting mynid to "LPX64" (old nid="LPX64")\n",
-               nid, ni->nid);
+               nid, ni->ni_pid.nid);
 
-        ni->nid = nid;
+        ni->ni_pid.nid = nid;
         return (0);
 }
 
@@ -1527,14 +1453,18 @@ ksocknal_api_shutdown (nal_t *nal)
 
                 /* flag threads to terminate; wake and wait for them to die */
                 ksocknal_data.ksnd_shuttingdown = 1;
+                mb();
                 wake_up_all (&ksocknal_data.ksnd_autoconnectd_waitq);
                 wake_up_all (&ksocknal_data.ksnd_reaper_waitq);
 
                 for (i = 0; i < SOCKNAL_N_SCHED; i++)
                        wake_up_all(&ksocknal_data.ksnd_schedulers[i].kss_waitq);
 
+                i = 4;
                 while (atomic_read (&ksocknal_data.ksnd_nthreads) != 0) {
-                        CDEBUG (D_NET, "waitinf for %d threads to terminate\n",
+                        i++;
+                        CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
+                               "waiting for %d threads to terminate\n",
                                 atomic_read (&ksocknal_data.ksnd_nthreads));
                         set_current_state (TASK_UNINTERRUPTIBLE);
                         schedule_timeout (HZ);
@@ -1590,7 +1520,7 @@ ksocknal_api_startup (nal_t *nal, ptl_pid_t requested_pid,
 
         if (nal->nal_refct != 0) {
                 if (actual_limits != NULL)
-                        *actual_limits = ksocknal_lib.ni.actual_limits;
+                        *actual_limits = ksocknal_lib.libnal_ni.ni_actual_limits;
                 /* This module got the first ref */
                 PORTAL_MODULE_USE;
                 return (PTL_OK);
@@ -1613,10 +1543,6 @@ ksocknal_api_startup (nal_t *nal, ptl_pid_t requested_pid,
 
         rwlock_init(&ksocknal_data.ksnd_global_lock);
 
-        ksocknal_data.ksnd_nal_cb = &ksocknal_lib;
-        spin_lock_init (&ksocknal_data.ksnd_nal_cb_lock);
-        init_waitqueue_head(&ksocknal_data.ksnd_yield_waitq);
-        
         spin_lock_init(&ksocknal_data.ksnd_small_fmp.fmp_lock);
         INIT_LIST_HEAD(&ksocknal_data.ksnd_small_fmp.fmp_idle_fmbs);
         INIT_LIST_HEAD(&ksocknal_data.ksnd_small_fmp.fmp_blocked_conns);
@@ -1646,7 +1572,7 @@ ksocknal_api_startup (nal_t *nal, ptl_pid_t requested_pid,
         PORTAL_ALLOC(ksocknal_data.ksnd_schedulers,
                      sizeof(ksock_sched_t) * SOCKNAL_N_SCHED);
         if (ksocknal_data.ksnd_schedulers == NULL) {
-                ksocknal_api_shutdown (&ksocknal_api);
+                ksocknal_api_shutdown (nal);
                 return (-ENOMEM);
         }
 
@@ -1666,11 +1592,11 @@ ksocknal_api_startup (nal_t *nal, ptl_pid_t requested_pid,
         process_id.pid = 0;
         process_id.nid = 0;
         
-        rc = lib_init(&ksocknal_lib, process_id,
+        rc = lib_init(&ksocknal_lib, nal, process_id,
                       requested_limits, actual_limits);
         if (rc != PTL_OK) {
                 CERROR("lib_init failed: error %d\n", rc);
-                ksocknal_api_shutdown (&ksocknal_api);
+                ksocknal_api_shutdown (nal);
                 return (rc);
         }
 
@@ -1682,7 +1608,7 @@ ksocknal_api_startup (nal_t *nal, ptl_pid_t requested_pid,
                 if (rc != 0) {
                         CERROR("Can't spawn socknal scheduler[%d]: %d\n",
                                i, rc);
-                        ksocknal_api_shutdown (&ksocknal_api);
+                        ksocknal_api_shutdown (nal);
                         return (rc);
                 }
         }
@@ -1691,7 +1617,7 @@ ksocknal_api_startup (nal_t *nal, ptl_pid_t requested_pid,
                 rc = ksocknal_thread_start (ksocknal_autoconnectd, (void *)((long)i));
                 if (rc != 0) {
                         CERROR("Can't spawn socknal autoconnectd: %d\n", rc);
-                        ksocknal_api_shutdown (&ksocknal_api);
+                        ksocknal_api_shutdown (nal);
                         return (rc);
                 }
         }
@@ -1699,7 +1625,7 @@ ksocknal_api_startup (nal_t *nal, ptl_pid_t requested_pid,
         rc = ksocknal_thread_start (ksocknal_reaper, NULL);
         if (rc != 0) {
                 CERROR ("Can't spawn socknal reaper: %d\n", rc);
-                ksocknal_api_shutdown (&ksocknal_api);
+                ksocknal_api_shutdown (nal);
                 return (rc);
         }
 
@@ -1725,7 +1651,7 @@ ksocknal_api_startup (nal_t *nal, ptl_pid_t requested_pid,
                         PORTAL_ALLOC(fmb, offsetof(ksock_fmb_t, 
                                                    fmb_kiov[pool->fmp_buff_pages]));
                         if (fmb == NULL) {
-                                ksocknal_api_shutdown(&ksocknal_api);
+                                ksocknal_api_shutdown(nal);
                                 return (-ENOMEM);
                         }
 
@@ -1735,7 +1661,7 @@ ksocknal_api_startup (nal_t *nal, ptl_pid_t requested_pid,
                                 fmb->fmb_kiov[j].kiov_page = alloc_page(GFP_KERNEL);
 
                                 if (fmb->fmb_kiov[j].kiov_page == NULL) {
-                                        ksocknal_api_shutdown (&ksocknal_api);
+                                        ksocknal_api_shutdown (nal);
                                         return (-ENOMEM);
                                 }
 
@@ -1749,7 +1675,7 @@ ksocknal_api_startup (nal_t *nal, ptl_pid_t requested_pid,
         rc = libcfs_nal_cmd_register(SOCKNAL, &ksocknal_cmd, NULL);
         if (rc != 0) {
                 CERROR ("Can't initialise command interface (rc = %d)\n", rc);
-                ksocknal_api_shutdown (&ksocknal_api);
+                ksocknal_api_shutdown (nal);
                 return (rc);
         }
 
@@ -1794,14 +1720,8 @@ ksocknal_module_init (void)
         /* check ksnr_connected/connecting field large enough */
         LASSERT(SOCKNAL_CONN_NTYPES <= 4);
         
-        ksocknal_api.startup  = ksocknal_api_startup;
-        ksocknal_api.forward  = ksocknal_api_forward;
-        ksocknal_api.shutdown = ksocknal_api_shutdown;
-        ksocknal_api.lock     = ksocknal_api_lock;
-        ksocknal_api.unlock   = ksocknal_api_unlock;
-        ksocknal_api.nal_data = &ksocknal_data;
-
-        ksocknal_lib.nal_data = &ksocknal_data;
+        ksocknal_api.nal_ni_init = ksocknal_api_startup;
+        ksocknal_api.nal_ni_fini = ksocknal_api_shutdown;
 
         /* Initialise dynamic tunables to defaults once only */
         ksocknal_tunables.ksnd_io_timeout = SOCKNAL_IO_TIMEOUT;
index 87b23dc..ff73f71 100644 (file)
@@ -160,10 +160,6 @@ typedef struct {
         struct list_head *ksnd_peers;           /* hash table of all my known peers */
         int               ksnd_peer_hash_size;  /* size of ksnd_peers */
 
-        nal_cb_t         *ksnd_nal_cb;
-        spinlock_t        ksnd_nal_cb_lock;     /* lib cli/sti lock */
-        wait_queue_head_t ksnd_yield_waitq;     /* where yield waits */
-
         atomic_t          ksnd_nthreads;        /* # live threads */
         int               ksnd_shuttingdown;    /* tell threads to exit */
         ksock_sched_t    *ksnd_schedulers;      /* scheduler state */
@@ -364,7 +360,7 @@ typedef struct ksock_peer
 } ksock_peer_t;
 
 
-extern nal_cb_t         ksocknal_lib;
+extern lib_nal_t        ksocknal_lib;
 extern ksock_nal_data_t ksocknal_data;
 extern ksock_tunables_t ksocknal_tunables;
 
index 21e0abe..5815d16 100644 (file)
  *  LIB functions follow
  *
  */
-ptl_err_t
-ksocknal_read(nal_cb_t *nal, void *private, void *dst_addr,
-              user_ptr src_addr, size_t len)
-{
-        CDEBUG(D_NET, LPX64": reading %ld bytes from %p -> %p\n",
-               nal->ni.nid, (long)len, src_addr, dst_addr);
-
-        memcpy( dst_addr, src_addr, len );
-        return PTL_OK;
-}
-
-ptl_err_t
-ksocknal_write(nal_cb_t *nal, void *private, user_ptr dst_addr,
-               void *src_addr, size_t len)
-{
-        CDEBUG(D_NET, LPX64": writing %ld bytes from %p -> %p\n",
-               nal->ni.nid, (long)len, src_addr, dst_addr);
-
-        memcpy( dst_addr, src_addr, len );
-        return PTL_OK;
-}
-
-void *
-ksocknal_malloc(nal_cb_t *nal, size_t len)
-{
-        void *buf;
-
-        PORTAL_ALLOC(buf, len);
-
-        if (buf != NULL)
-                memset(buf, 0, len);
-
-        return (buf);
-}
-
-void
-ksocknal_free(nal_cb_t *nal, void *buf, size_t len)
-{
-        PORTAL_FREE(buf, len);
-}
-
-void
-ksocknal_printf(nal_cb_t *nal, const char *fmt, ...)
-{
-        va_list ap;
-        char msg[256];
-
-        va_start (ap, fmt);
-        vsnprintf (msg, sizeof (msg), fmt, ap); /* sprint safely */
-        va_end (ap);
-
-        msg[sizeof (msg) - 1] = 0;              /* ensure terminated */
-
-        CDEBUG (D_NET, "%s", msg);
-}
-
-void
-ksocknal_cli(nal_cb_t *nal, unsigned long *flags)
-{
-        ksock_nal_data_t *data = nal->nal_data;
-
-        /* OK to ignore 'flags'; we're only ever serialise threads and
-         * never need to lock out interrupts */
-        spin_lock(&data->ksnd_nal_cb_lock);
-}
-
-void
-ksocknal_sti(nal_cb_t *nal, unsigned long *flags)
-{
-        ksock_nal_data_t *data;
-        data = nal->nal_data;
-
-        /* OK to ignore 'flags'; we're only ever serialise threads and
-         * never need to lock out interrupts */
-        spin_unlock(&data->ksnd_nal_cb_lock);
-}
-
-void
-ksocknal_callback(nal_cb_t *nal, void *private, lib_eq_t *eq, ptl_event_t *ev)
-{
-        /* holding ksnd_nal_cb_lock */
-
-        if (eq->event_callback != NULL)
-                eq->event_callback(ev);
-        
-        if (waitqueue_active(&ksocknal_data.ksnd_yield_waitq))
-                wake_up_all(&ksocknal_data.ksnd_yield_waitq);
-}
-
 int
-ksocknal_dist(nal_cb_t *nal, ptl_nid_t nid, unsigned long *dist)
+ksocknal_dist(lib_nal_t *nal, ptl_nid_t nid, unsigned long *dist)
 {
         /* I would guess that if ksocknal_get_peer (nid) == NULL,
            and we're not routing, then 'nid' is very distant :) */
-        if ( nal->ni.nid == nid ) {
+        if (nal->libnal_ni.ni_pid.nid == nid) {
                 *dist = 0;
         } else {
                 *dist = 1;
@@ -882,8 +793,8 @@ ksocknal_find_connectable_route_locked (ksock_peer_t *peer)
 {
         struct list_head  *tmp;
         ksock_route_t     *route;
-        ksock_route_t     *candidate = NULL;
-        int                found = 0;
+        ksock_route_t     *first_lazy = NULL;
+        int                found_connecting_or_connected = 0;
         int                bits;
         
         list_for_each (tmp, &peer->ksnp_routes) {
@@ -896,7 +807,7 @@ ksocknal_find_connectable_route_locked (ksock_peer_t *peer)
                         /* All typed connections have been established, or
                          * an untyped connection has been established, or
                          * connections are currently being established */
-                        found = 1;
+                        found_connecting_or_connected = 1;
                         continue;
                 }
 
@@ -904,20 +815,24 @@ ksocknal_find_connectable_route_locked (ksock_peer_t *peer)
                 if (!time_after_eq (jiffies, route->ksnr_timeout))
                         continue;
                 
-                /* always do eager routes */
+                /* eager routes always want to be connected */
                 if (route->ksnr_eager)
                         return (route);
 
-                if (candidate == NULL) {
-                        /* If we don't find any other route that is fully
-                         * connected or connecting, the first connectable
-                         * route is returned.  If it fails to connect, it
-                         * will get placed at the end of the list */
-                        candidate = route;
-                }
+                if (first_lazy == NULL)
+                        first_lazy = route;
         }
-        return (found ? NULL : candidate);
+        
+        /* No eager routes need to be connected.  If some connection has
+         * already been established, or is being established there's nothing to
+         * do.  Otherwise we return the first lazy route we found.  If it fails
+         * to connect, it will go to the end of the list. */
+
+        if (!list_empty (&peer->ksnp_conns) ||
+            found_connecting_or_connected)
+                return (NULL);
+        
+        return (first_lazy);
 }
 
 ksock_route_t *
@@ -1028,7 +943,7 @@ ksocknal_launch_packet (ksock_tx_t *tx, ptl_nid_t nid)
 }
 
 ptl_err_t
-ksocknal_sendmsg(nal_cb_t     *nal, 
+ksocknal_sendmsg(lib_nal_t     *nal, 
                  void         *private, 
                  lib_msg_t    *cookie,
                  ptl_hdr_t    *hdr, 
@@ -1125,7 +1040,7 @@ ksocknal_sendmsg(nal_cb_t     *nal,
 }
 
 ptl_err_t
-ksocknal_send (nal_cb_t *nal, void *private, lib_msg_t *cookie,
+ksocknal_send (lib_nal_t *nal, void *private, lib_msg_t *cookie,
                ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
                unsigned int payload_niov, struct iovec *payload_iov,
                size_t payload_offset, size_t payload_len)
@@ -1137,7 +1052,7 @@ ksocknal_send (nal_cb_t *nal, void *private, lib_msg_t *cookie,
 }
 
 ptl_err_t
-ksocknal_send_pages (nal_cb_t *nal, void *private, lib_msg_t *cookie, 
+ksocknal_send_pages (lib_nal_t *nal, void *private, lib_msg_t *cookie, 
                      ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
                      unsigned int payload_niov, ptl_kiov_t *payload_kiov, 
                      size_t payload_offset, size_t payload_len)
@@ -1159,7 +1074,7 @@ ksocknal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd)
                 fwd->kprfd_gateway_nid, fwd->kprfd_target_nid);
 
         /* I'm the gateway; must be the last hop */
-        if (nid == ksocknal_lib.ni.nid)
+        if (nid == ksocknal_lib.libnal_ni.ni_pid.nid)
                 nid = fwd->kprfd_target_nid;
 
         /* setup iov for hdr */
@@ -1544,7 +1459,8 @@ ksocknal_process_receive (ksock_conn_t *conn)
         switch (conn->ksnc_rx_state) {
         case SOCKNAL_RX_HEADER:
                 if (conn->ksnc_hdr.type != HTON__u32(PTL_MSG_HELLO) &&
-                    NTOH__u64(conn->ksnc_hdr.dest_nid) != ksocknal_lib.ni.nid) {
+                    NTOH__u64(conn->ksnc_hdr.dest_nid) != 
+                    ksocknal_lib.libnal_ni.ni_pid.nid) {
                         /* This packet isn't for me */
                         ksocknal_fwd_parse (conn);
                         switch (conn->ksnc_rx_state) {
@@ -1561,7 +1477,13 @@ ksocknal_process_receive (ksock_conn_t *conn)
                 }
 
                 /* sets wanted_len, iovs etc */
-                lib_parse(&ksocknal_lib, &conn->ksnc_hdr, conn);
+                rc = lib_parse(&ksocknal_lib, &conn->ksnc_hdr, conn);
+
+                if (rc != PTL_OK) {
+                        /* I just received garbage: give up on this conn */
+                        ksocknal_close_conn_and_siblings (conn, rc);
+                        return (-EPROTO);
+                }
 
                 if (conn->ksnc_rx_nob_wanted != 0) { /* need to get payload? */
                         conn->ksnc_rx_state = SOCKNAL_RX_BODY;
@@ -1608,7 +1530,7 @@ ksocknal_process_receive (ksock_conn_t *conn)
 }
 
 ptl_err_t
-ksocknal_recv (nal_cb_t *nal, void *private, lib_msg_t *msg,
+ksocknal_recv (lib_nal_t *nal, void *private, lib_msg_t *msg,
                unsigned int niov, struct iovec *iov, 
                size_t offset, size_t mlen, size_t rlen)
 {
@@ -1636,7 +1558,7 @@ ksocknal_recv (nal_cb_t *nal, void *private, lib_msg_t *msg,
 }
 
 ptl_err_t
-ksocknal_recv_pages (nal_cb_t *nal, void *private, lib_msg_t *msg,
+ksocknal_recv_pages (lib_nal_t *nal, void *private, lib_msg_t *msg,
                      unsigned int niov, ptl_kiov_t *kiov, 
                      size_t offset, size_t mlen, size_t rlen)
 {
@@ -2029,7 +1951,7 @@ ksocknal_hello (struct socket *sock, ptl_nid_t *nid, int *type,
         hmv->version_major = __cpu_to_le16 (PORTALS_PROTO_VERSION_MAJOR);
         hmv->version_minor = __cpu_to_le16 (PORTALS_PROTO_VERSION_MINOR);
 
-        hdr.src_nid = __cpu_to_le64 (ksocknal_lib.ni.nid);
+        hdr.src_nid = __cpu_to_le64 (ksocknal_lib.libnal_ni.ni_pid.nid);
         hdr.type    = __cpu_to_le32 (PTL_MSG_HELLO);
 
         hdr.msg.hello.type = __cpu_to_le32 (*type);
@@ -2698,19 +2620,11 @@ ksocknal_reaper (void *arg)
         return (0);
 }
 
-nal_cb_t ksocknal_lib = {
-        nal_data:       &ksocknal_data,                /* NAL private data */
-        cb_send:         ksocknal_send,
-        cb_send_pages:   ksocknal_send_pages,
-        cb_recv:         ksocknal_recv,
-        cb_recv_pages:   ksocknal_recv_pages,
-        cb_read:         ksocknal_read,
-        cb_write:        ksocknal_write,
-        cb_malloc:       ksocknal_malloc,
-        cb_free:         ksocknal_free,
-        cb_printf:       ksocknal_printf,
-        cb_cli:          ksocknal_cli,
-        cb_sti:          ksocknal_sti,
-        cb_callback:     ksocknal_callback,
-        cb_dist:         ksocknal_dist
+lib_nal_t ksocknal_lib = {
+        libnal_data:       &ksocknal_data,      /* NAL private data */
+        libnal_send:        ksocknal_send,
+        libnal_send_pages:  ksocknal_send_pages,
+        libnal_recv:        ksocknal_recv,
+        libnal_recv_pages:  ksocknal_recv_pages,
+        libnal_dist:        ksocknal_dist
 };
index 4e63c86..06f1578 100644 (file)
 #define PORTAL_MINOR 240
 
 struct nal_cmd_handler {
+        int                  nch_number;
         nal_cmd_handler_fn  *nch_handler;
         void                *nch_private;
 };
 
-static struct nal_cmd_handler nal_cmd[NAL_MAX_NR + 1];
+static struct nal_cmd_handler nal_cmd[16];
 static DECLARE_MUTEX(nal_cmd_sem);
 
 #ifdef PORTAL_DEBUG
@@ -245,23 +246,53 @@ static inline void freedata(void *data, int len)
         PORTAL_FREE(data, len);
 }
 
+struct nal_cmd_handler *
+libcfs_find_nal_cmd_handler(int nal)
+{
+        int    i;
+
+        for (i = 0; i < sizeof(nal_cmd)/sizeof(nal_cmd[0]); i++)
+                if (nal_cmd[i].nch_handler != NULL &&
+                    nal_cmd[i].nch_number == nal)
+                        return (&nal_cmd[i]);
+
+        return (NULL);
+}
+
 int
 libcfs_nal_cmd_register(int nal, nal_cmd_handler_fn *handler, void *private)
 {
-        int rc = 0;
+        struct nal_cmd_handler *cmd;
+        int                     i;
+        int                     rc;
 
         CDEBUG(D_IOCTL, "Register NAL %d, handler: %p\n", nal, handler);
 
-        if (nal > 0  && nal <= NAL_MAX_NR) {
-                down(&nal_cmd_sem);
-                if (nal_cmd[nal].nch_handler != NULL)
-                        rc = -EBUSY;
-                else {
-                        nal_cmd[nal].nch_handler = handler;
-                        nal_cmd[nal].nch_private = private;
+        down(&nal_cmd_sem);
+
+        if (libcfs_find_nal_cmd_handler(nal) != NULL) {
+                up (&nal_cmd_sem);
+                return (-EBUSY);
+        }
+
+        cmd = NULL;
+        for (i = 0; i < sizeof(nal_cmd)/sizeof(nal_cmd[0]); i++)
+                if (nal_cmd[i].nch_handler == NULL) {
+                        cmd = &nal_cmd[i];
+                        break;
                 }
-                up(&nal_cmd_sem);
+        
+        if (cmd == NULL) {
+                rc = -EBUSY;
+        } else {
+                rc = 0;
+                cmd->nch_number = nal;
+                cmd->nch_handler = handler;
+                cmd->nch_private = private;
         }
+
+        up(&nal_cmd_sem);
+
         return rc;
 }
 EXPORT_SYMBOL(libcfs_nal_cmd_register);
@@ -269,14 +300,15 @@ EXPORT_SYMBOL(libcfs_nal_cmd_register);
 void
 libcfs_nal_cmd_unregister(int nal)
 {
-        CDEBUG(D_IOCTL, "Unregister NAL %d\n", nal);
+        struct nal_cmd_handler *cmd;
 
-        LASSERT(nal > 0 && nal <= NAL_MAX_NR);
-        LASSERT(nal_cmd[nal].nch_handler != NULL);
+        CDEBUG(D_IOCTL, "Unregister NAL %d\n", nal);
 
         down(&nal_cmd_sem);
-        nal_cmd[nal].nch_handler = NULL;
-        nal_cmd[nal].nch_private = NULL;
+        cmd = libcfs_find_nal_cmd_handler(nal);
+        LASSERT (cmd != NULL);
+        cmd->nch_handler = NULL;
+        cmd->nch_private = NULL;
         up(&nal_cmd_sem);
 }
 EXPORT_SYMBOL(libcfs_nal_cmd_unregister);
@@ -284,16 +316,17 @@ EXPORT_SYMBOL(libcfs_nal_cmd_unregister);
 int
 libcfs_nal_cmd(struct portals_cfg *pcfg)
 {
+        struct nal_cmd_handler *cmd;
         __u32 nal = pcfg->pcfg_nal;
         int   rc = -EINVAL;
         ENTRY;
 
         down(&nal_cmd_sem);
-        if (nal > 0 && nal <= NAL_MAX_NR && 
-            nal_cmd[nal].nch_handler != NULL) {
+        cmd = libcfs_find_nal_cmd_handler(nal);
+        if (cmd != NULL) {
                 CDEBUG(D_IOCTL, "calling handler nal: %d, cmd: %d\n", nal, 
                        pcfg->pcfg_command);
-                rc = nal_cmd[nal].nch_handler(pcfg, nal_cmd[nal].nch_private);
+                rc = cmd->nch_handler(pcfg, cmd->nch_private);
         }
         up(&nal_cmd_sem);
 
index 6ce334b..c0f2e71 100644 (file)
@@ -1,6 +1,6 @@
 MODULES := portals
-portals-objs := api-eq.o api-init.o api-me.o api-errno.o api-ni.o api-wrap.o
-portals-objs += lib-dispatch.o lib-init.o lib-me.o lib-msg.o lib-eq.o lib-md.o
+portals-objs := api-errno.o api-ni.o api-wrap.o
+portals-objs += lib-init.o lib-me.o lib-msg.o lib-eq.o lib-md.o
 portals-objs += lib-move.o lib-ni.o lib-pid.o module.o
 
 @INCLUDE_RULES@
index de01765..088902a 100644 (file)
@@ -6,7 +6,7 @@
 include $(src)/../Kernelenv
 
 obj-y += portals.o
-portals-objs    :=     lib-dispatch.o lib-eq.o lib-init.o lib-md.o lib-me.o \
+portals-objs    :=     lib-eq.o lib-init.o lib-md.o lib-me.o \
                        lib-move.o lib-msg.o lib-ni.o lib-pid.o \
-                       api-eq.o api-errno.o api-init.o api-me.o api-ni.o \
-                       api-wrap.o module.o
+                       api-errno.o api-ni.o api-wrap.o \
+                       module.o
diff --git a/lustre/portals/portals/api-eq.c b/lustre/portals/portals/api-eq.c
deleted file mode 100644 (file)
index 0306043..0000000
+++ /dev/null
@@ -1,120 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * api/api-eq.c
- * User-level event queue management routines
- *
- *  Copyright (c) 2001-2003 Cluster File Systems, Inc.
- *  Copyright (c) 2001-2002 Sandia National Laboratories
- *
- *   This file is part of Lustre, http://www.sf.net/projects/lustre/
- *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#define DEBUG_SUBSYSTEM S_PORTALS
-#include <portals/api-support.h>
-
-int ptl_get_event (ptl_eq_t *eq, ptl_event_t *ev)
-{
-        int          new_index = eq->sequence & (eq->size - 1);
-        ptl_event_t *new_event = &eq->base[new_index];
-        ENTRY;
-
-        CDEBUG(D_INFO, "new_event: %p, sequence: %lu, eq->size: %u\n",
-               new_event, eq->sequence, eq->size);
-
-        if (PTL_SEQ_GT (eq->sequence, new_event->sequence)) {
-                RETURN(PTL_EQ_EMPTY);
-        }
-
-        *ev = *new_event;
-
-        /* ensure event is delivered correctly despite possible 
-           races with lib_finalize */
-        if (eq->sequence != new_event->sequence) {
-                CERROR("DROPPING EVENT: eq seq %lu ev seq %lu\n",
-                       eq->sequence, new_event->sequence);
-                RETURN(PTL_EQ_DROPPED);
-        }
-
-        eq->sequence = new_event->sequence + 1;
-        RETURN(PTL_OK);
-}
-
-int PtlEQGet(ptl_handle_eq_t eventq, ptl_event_t * ev)
-{
-        int which;
-        
-        return (PtlEQPoll (&eventq, 1, 0, ev, &which));
-}
-
-int PtlEQWait(ptl_handle_eq_t eventq_in, ptl_event_t *event_out)
-{
-        int which;
-        
-        return (PtlEQPoll (&eventq_in, 1, PTL_TIME_FOREVER, 
-                           event_out, &which));
-}
-
-int PtlEQPoll(ptl_handle_eq_t *eventqs_in, int neq_in, int timeout,
-              ptl_event_t *event_out, int *which_out)
-{
-        nal_t        *nal;
-        int           i;
-        int           rc;
-        unsigned long flags;
-        
-        if (!ptl_init)
-                RETURN(PTL_NO_INIT);
-
-        if (neq_in < 1)
-                RETURN(PTL_EQ_INVALID);
-        
-        nal = ptl_hndl2nal(&eventqs_in[0]);
-        if (nal == NULL)
-                RETURN(PTL_EQ_INVALID);
-
-        nal->lock(nal, &flags);
-
-        for (;;) {
-                for (i = 0; i < neq_in; i++) {
-                        ptl_eq_t *eq = ptl_handle2usereq(&eventqs_in[i]);
-
-                        if (i > 0 &&
-                            ptl_hndl2nal(&eventqs_in[i]) != nal) {
-                                nal->unlock(nal, &flags);
-                                RETURN (PTL_EQ_INVALID);
-                        }
-
-                        /* size must be a power of 2 to handle a wrapped sequence # */
-                        LASSERT (eq->size != 0 &&
-                                 eq->size == LOWEST_BIT_SET (eq->size));
-
-                        rc = ptl_get_event (eq, event_out);
-                        if (rc != PTL_EQ_EMPTY) {
-                                nal->unlock(nal, &flags);
-                                *which_out = i;
-                                RETURN(rc);
-                        }
-                }
-                
-                if (timeout == 0) {
-                        nal->unlock(nal, &flags);
-                        RETURN (PTL_EQ_EMPTY);
-                }
-                        
-                timeout = nal->yield(nal, &flags, timeout);
-        }
-}
index 1c01c88..9a4e5ac 100644 (file)
@@ -40,6 +40,9 @@ const char *ptl_err_str[] = {
 
         "PTL_EQ_IN_USE",
 
+        "PTL_NI_INVALID",
+        "PTL_MD_ILLEGAL",
+
         "PTL_MAX_ERRNO"
 };
 /* If you change these, you must update the number table in portals/errno.h */
diff --git a/lustre/portals/portals/api-init.c b/lustre/portals/portals/api-init.c
deleted file mode 100644 (file)
index 9a98714..0000000
+++ /dev/null
@@ -1,49 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * api/api-init.c
- * Initialization and global data for the p30 user side library
- *
- *  Copyright (c) 2001-2003 Cluster File Systems, Inc.
- *  Copyright (c) 2001-2002 Sandia National Laboratories
- *
- *   This file is part of Lustre, http://www.sf.net/projects/lustre/
- *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#define DEBUG_SUBSYSTEM S_PORTALS
-#include <portals/api-support.h>
-
-int PtlInit(int *max_interfaces)
-{
-        if (max_interfaces != NULL)
-                *max_interfaces = NAL_MAX_NR;
-
-        LASSERT(!strcmp(ptl_err_str[PTL_MAX_ERRNO], "PTL_MAX_ERRNO"));
-
-        return ptl_ni_init();
-}
-
-
-void PtlFini(void)
-{
-        ptl_ni_fini();
-}
-
-
-void PtlSnprintHandle(char *str, int len, ptl_handle_any_t h)
-{
-        snprintf(str, len, "0x%lx."LPX64, h.nal_idx, h.cookie);
-}
diff --git a/lustre/portals/portals/api-me.c b/lustre/portals/portals/api-me.c
deleted file mode 100644 (file)
index 37f0150..0000000
+++ /dev/null
@@ -1,28 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * api/api-me.c
- * Match Entry local operations.
- *
- *  Copyright (c) 2001-2003 Cluster File Systems, Inc.
- *  Copyright (c) 2001-2002 Sandia National Laboratories
- *
- *   This file is part of Lustre, http://www.sf.net/projects/lustre/
- *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#define DEBUG_SUBSYSTEM S_PORTALS
-#include <portals/api-support.h>
-
index 4f37d13..56afd45 100644 (file)
@@ -66,6 +66,8 @@ nal_t *ptl_hndl2nal(ptl_handle_any_t *handle)
          * invalidated out from under her (or worse, swapped for a
          * completely different interface!) */
 
+        LASSERT (ptl_init);
+
         if (((idx ^ NI_HANDLE_MAGIC) & ~NI_HANDLE_MASK) != 0)
                 return NULL;
 
@@ -112,12 +114,17 @@ void ptl_unregister_nal (ptl_interface_t interface)
         ptl_mutex_exit();
 }
 
-int ptl_ni_init(void)
+int PtlInit(int *max_interfaces)
 {
+        LASSERT(!strcmp(ptl_err_str[PTL_MAX_ERRNO], "PTL_MAX_ERRNO"));
+
         /* If this assertion fails, we need more bits in NI_HANDLE_MASK and
          * to shift NI_HANDLE_MAGIC left appropriately */
         LASSERT (NAL_MAX_NR <= (NI_HANDLE_MASK + 1));
         
+        if (max_interfaces != NULL)
+                *max_interfaces = NAL_MAX_NR;
+
         ptl_mutex_enter();
 
         if (!ptl_init) {
@@ -143,7 +150,7 @@ int ptl_ni_init(void)
         return PTL_OK;
 }
 
-void ptl_ni_fini(void)
+void PtlFini(void)
 {
         nal_t  *nal;
         int     i;
@@ -160,7 +167,7 @@ void ptl_ni_fini(void)
                         if (nal->nal_refct != 0) {
                                 CWARN("NAL %d has outstanding refcount %d\n",
                                       i, nal->nal_refct);
-                                nal->shutdown(nal);
+                                nal->nal_ni_fini(nal);
                         }
                         
                         ptl_nal_table[i] = NULL;
@@ -202,9 +209,11 @@ int PtlNIInit(ptl_interface_t interface, ptl_pid_t requested_pid,
         }
 
         nal = ptl_nal_table[interface];
-
+        nal->nal_handle.nal_idx = (NI_HANDLE_MAGIC & ~NI_HANDLE_MASK) | interface;
+        nal->nal_handle.cookie = 0;
+        
         CDEBUG(D_OTHER, "Starting up NAL (%d) refs %d\n", interface, nal->nal_refct);
-        rc = nal->startup(nal, requested_pid, desired_limits, actual_limits);
+        rc = nal->nal_ni_init(nal, requested_pid, desired_limits, actual_limits);
 
         if (rc != PTL_OK) {
                 CERROR("Error %d starting up NAL %d, refs %d\n", rc,
@@ -218,10 +227,11 @@ int PtlNIInit(ptl_interface_t interface, ptl_pid_t requested_pid,
         }
         
         nal->nal_refct++;
-        handle->nal_idx = (NI_HANDLE_MAGIC & ~NI_HANDLE_MASK) | interface;
+        *handle = nal->nal_handle;
 
  out:
         ptl_mutex_exit ();
+
         return rc;
 }
 
@@ -248,15 +258,8 @@ int PtlNIFini(ptl_handle_ni_t ni)
         nal->nal_refct--;
 
         /* nal_refct == 0 tells nal->shutdown to really shut down */
-        nal->shutdown(nal);
+        nal->nal_ni_fini(nal);
 
         ptl_mutex_exit ();
         return PTL_OK;
 }
-
-int PtlNIHandle(ptl_handle_any_t handle_in, ptl_handle_ni_t * ni_out)
-{
-        *ni_out = handle_in;
-
-        return PTL_OK;
-}
index 3e6f9ce..d7ff020 100644 (file)
 # define DEBUG_SUBSYSTEM S_PORTALS
 #include <portals/api-support.h>
 
-static int do_forward(ptl_handle_any_t any_h, int cmd, void *argbuf,
-                      int argsize, void *retbuf, int retsize)
+void PtlSnprintHandle(char *str, int len, ptl_handle_any_t h)
 {
-        nal_t *nal;
+        snprintf(str, len, "0x%lx."LPX64, h.nal_idx, h.cookie);
+}
 
-        if (!ptl_init) {
-                CERROR("Not initialized\n");
+int PtlNIHandle(ptl_handle_any_t handle_in, ptl_handle_ni_t *ni_out)
+{
+        if (!ptl_init)
                 return PTL_NO_INIT;
-        }
-
-        nal = ptl_hndl2nal(&any_h);
-        if (!nal)
+        
+        if (ptl_hndl2nal(&handle_in) == NULL)
                 return PTL_HANDLE_INVALID;
-
-        nal->forward(nal, cmd, argbuf, argsize, retbuf, retsize);
-
+        
+        *ni_out = handle_in;
         return PTL_OK;
 }
 
 int PtlGetId(ptl_handle_ni_t ni_handle, ptl_process_id_t *id)
 {
-        PtlGetId_in args;
-        PtlGetId_out ret;
-        int rc;
-
-        args.handle_in = ni_handle;
+        nal_t     *nal;
 
-        rc = do_forward(ni_handle, PTL_GETID, &args, sizeof(args), &ret,
-                        sizeof(ret));
-        if (rc != PTL_OK)
-                return rc;
+        if (!ptl_init)
+                return PTL_NO_INIT;
         
-        if (id)
-                *id = ret.id_out;
+        nal = ptl_hndl2nal(&ni_handle);
+        if (nal == NULL)
+                return PTL_NI_INVALID;
 
-        return ret.rc;
+        return nal->nal_get_id(nal, id);
 }
 
 int PtlFailNid (ptl_handle_ni_t interface, ptl_nid_t nid, unsigned int threshold) 
 {
-        PtlFailNid_in  args;
-        PtlFailNid_out ret;
-        int            rc;
-        
-        args.interface = interface;
-        args.nid       = nid;
-        args.threshold = threshold;
+        nal_t     *nal;
+
+        if (!ptl_init)
+                return PTL_NO_INIT;
         
-        rc = do_forward (interface, PTL_FAILNID, 
-                         &args, sizeof(args), &ret, sizeof (ret));
+        nal = ptl_hndl2nal(&interface);
+        if (nal == NULL)
+                return PTL_NI_INVALID;
 
-        return ((rc != PTL_OK) ? rc : ret.rc);
+        return nal->nal_fail_nid(nal, nid, threshold);
 }
 
 int PtlNIStatus(ptl_handle_ni_t interface_in, ptl_sr_index_t register_in,
-                ptl_sr_value_t * status_out)
+                ptl_sr_value_t *status_out)
 {
-        PtlNIStatus_in args;
-        PtlNIStatus_out ret;
-        int rc;
+        nal_t     *nal;
 
-        args.interface_in = interface_in;
-        args.register_in = register_in;
-
-        rc = do_forward(interface_in, PTL_NISTATUS, &args, sizeof(args), &ret,
-                        sizeof(ret));
-
-        if (rc != PTL_OK)
-                return rc;
-
-        if (status_out)
-                *status_out = ret.status_out;
+        if (!ptl_init)
+                return PTL_NO_INIT;
+        
+        nal = ptl_hndl2nal(&interface_in);
+        if (nal == NULL)
+                return PTL_NI_INVALID;
 
-        return ret.rc;
+        return nal->nal_ni_status(nal, register_in, status_out);
 }
 
 int PtlNIDist(ptl_handle_ni_t interface_in, ptl_process_id_t process_in,
               unsigned long *distance_out)
 {
-        PtlNIDist_in args;
-        PtlNIDist_out ret;
-        int rc;
-
-        args.interface_in = interface_in;
-        args.process_in = process_in;
-
-        rc = do_forward(interface_in, PTL_NIDIST, &args, sizeof(args), &ret,
-                        sizeof(ret));
+        nal_t     *nal;
 
-        if (rc != PTL_OK)
-                return rc;
-
-        if (distance_out)
-                *distance_out = ret.distance_out;
+        if (!ptl_init)
+                return PTL_NO_INIT;
+        
+        nal = ptl_hndl2nal(&interface_in);
+        if (nal == NULL)
+                return PTL_NI_INVALID;
 
-        return ret.rc;
+        return nal->nal_ni_dist(nal, &process_in, distance_out);
 }
 
 int PtlMEAttach(ptl_handle_ni_t interface_in, ptl_pt_index_t index_in,
                 ptl_process_id_t match_id_in, ptl_match_bits_t match_bits_in,
                 ptl_match_bits_t ignore_bits_in, ptl_unlink_t unlink_in,
-                ptl_ins_pos_t pos_in, ptl_handle_me_t * handle_out)
+                ptl_ins_pos_t pos_in, ptl_handle_me_t *handle_out)
 {
-        PtlMEAttach_in args;
-        PtlMEAttach_out ret;
-        int rc;
-
-        args.interface_in = interface_in;
-        args.index_in = index_in;
-        args.match_id_in = match_id_in;
-        args.match_bits_in = match_bits_in;
-        args.ignore_bits_in = ignore_bits_in;
-        args.unlink_in = unlink_in;
-        args.position_in = pos_in;
-
-        rc = do_forward(interface_in, PTL_MEATTACH, &args, sizeof(args), &ret,
-                        sizeof(ret));
-
-        if (rc != PTL_OK)
-                return rc;
-
-        if (handle_out) {
-                handle_out->nal_idx = interface_in.nal_idx;
-                handle_out->cookie = ret.handle_out.cookie;
-        }
-
-        return ret.rc;
+        nal_t     *nal;
+
+        if (!ptl_init)
+                return PTL_NO_INIT;
+        
+        nal = ptl_hndl2nal(&interface_in);
+        if (nal == NULL)
+                return PTL_NI_INVALID;
+
+        return nal->nal_me_attach(nal, index_in, match_id_in, 
+                                  match_bits_in, ignore_bits_in,
+                                  unlink_in, pos_in, handle_out);
 }
 
 int PtlMEInsert(ptl_handle_me_t current_in, ptl_process_id_t match_id_in,
@@ -160,367 +125,226 @@ int PtlMEInsert(ptl_handle_me_t current_in, ptl_process_id_t match_id_in,
                 ptl_unlink_t unlink_in, ptl_ins_pos_t position_in,
                 ptl_handle_me_t * handle_out)
 {
-        PtlMEInsert_in args;
-        PtlMEInsert_out ret;
-        int rc;
-
-        args.current_in = current_in;
-        args.match_id_in = match_id_in;
-        args.match_bits_in = match_bits_in;
-        args.ignore_bits_in = ignore_bits_in;
-        args.unlink_in = unlink_in;
-        args.position_in = position_in;
-
-        rc = do_forward(current_in, PTL_MEINSERT, &args, sizeof(args), &ret,
-                        sizeof(ret));
-
-        if (rc != PTL_OK)
-                return (rc == PTL_HANDLE_INVALID) ? PTL_ME_INVALID : rc;
-
-        if (handle_out) {
-                handle_out->nal_idx = current_in.nal_idx;
-                handle_out->cookie = ret.handle_out.cookie;
-        }
-        return ret.rc;
+        nal_t     *nal;
+
+        if (!ptl_init)
+                return PTL_NO_INIT;
+        
+        nal = ptl_hndl2nal(&current_in);
+        if (nal == NULL)
+                return PTL_ME_INVALID;
+
+        return nal->nal_me_insert(nal, &current_in, match_id_in,
+                                  match_bits_in, ignore_bits_in,
+                                  unlink_in, position_in, handle_out);
 }
 
 int PtlMEUnlink(ptl_handle_me_t current_in)
 {
-        PtlMEUnlink_in args;
-        PtlMEUnlink_out ret;
-        int rc;
+        nal_t     *nal;
 
-        args.current_in = current_in;
-        args.unlink_in = PTL_RETAIN;
-
-        rc = do_forward(current_in, PTL_MEUNLINK, &args, sizeof(args), &ret,
-                        sizeof(ret));
-
-        if (rc != PTL_OK)
-                return (rc == PTL_HANDLE_INVALID) ? PTL_ME_INVALID : rc;
+        if (!ptl_init)
+                return PTL_NO_INIT;
+        
+        nal = ptl_hndl2nal(&current_in);
+        if (nal == NULL)
+                return PTL_ME_INVALID;
 
-        return ret.rc;
+        return nal->nal_me_unlink(nal, &current_in);
 }
 
-int PtlTblDump(ptl_handle_ni_t ni, int index_in)
+int PtlMDAttach(ptl_handle_me_t me_in, ptl_md_t md_in,
+                ptl_unlink_t unlink_in, ptl_handle_md_t * handle_out)
 {
-        PtlTblDump_in args;
-        PtlTblDump_out ret;
-        int rc;
+        nal_t     *nal;
 
-        args.index_in = index_in;
-
-        rc = do_forward(ni, PTL_TBLDUMP, &args, sizeof(args), &ret,
-                        sizeof(ret));
+        if (!ptl_init)
+                return PTL_NO_INIT;
+        
+        nal = ptl_hndl2nal(&me_in);
+        if (nal == NULL)
+                return PTL_ME_INVALID;
 
-        if (rc != PTL_OK)
-                return rc;
+        if (!PtlHandleIsEqual(md_in.eventq, PTL_EQ_NONE) &&
+            ptl_hndl2nal(&md_in.eventq) != nal)
+                return PTL_MD_ILLEGAL;
 
-        return ret.rc;
+        return (nal->nal_md_attach)(nal, &me_in, &md_in, 
+                                    unlink_in, handle_out);
 }
 
-int PtlMEDump(ptl_handle_me_t current_in)
+int PtlMDBind(ptl_handle_ni_t ni_in, ptl_md_t md_in,
+              ptl_unlink_t unlink_in, ptl_handle_md_t *handle_out)
 {
-        PtlMEDump_in args;
-        PtlMEDump_out ret;
-        int rc;
+        nal_t     *nal;
 
-        args.current_in = current_in;
-
-        rc = do_forward(current_in, PTL_MEDUMP, &args, sizeof(args), &ret,
-                        sizeof(ret));
+        if (!ptl_init)
+                return PTL_NO_INIT;
+        
+        nal = ptl_hndl2nal(&ni_in);
+        if (nal == NULL)
+                return PTL_NI_INVALID;
 
-        if (rc != PTL_OK)
-                return (rc == PTL_HANDLE_INVALID) ? PTL_ME_INVALID : rc;
+        if (!PtlHandleIsEqual(md_in.eventq, PTL_EQ_NONE) &&
+            ptl_hndl2nal(&md_in.eventq) != nal)
+                return PTL_MD_ILLEGAL;
 
-        return ret.rc;
+        return (nal->nal_md_bind)(nal, &md_in, unlink_in, handle_out);
 }
 
-static ptl_handle_eq_t md2eq (ptl_md_t *md)
+int PtlMDUpdate(ptl_handle_md_t md_in, ptl_md_t *old_inout,
+                ptl_md_t *new_inout, ptl_handle_eq_t testq_in)
 {
-        if (PtlHandleIsEqual (md->eventq, PTL_EQ_NONE))
-                return (PTL_EQ_NONE);
+        nal_t    *nal;
         
-        return (ptl_handle2usereq (&md->eventq)->cb_eq_handle);
-}
-
-
-int PtlMDAttach(ptl_handle_me_t me_in, ptl_md_t md_in,
-                ptl_unlink_t unlink_in, ptl_handle_md_t * handle_out)
-{
-        PtlMDAttach_in args;
-        PtlMDAttach_out ret;
-        int rc;
-
-        args.eq_in = md2eq(&md_in);
-        args.me_in = me_in;
-        args.md_in = md_in;
-        args.unlink_in = unlink_in;
-                
-        rc = do_forward(me_in, PTL_MDATTACH, 
-                        &args, sizeof(args), &ret, sizeof(ret));
-
-        if (rc != PTL_OK)
-                return (rc == PTL_HANDLE_INVALID) ? PTL_ME_INVALID : rc;
-
-        if (handle_out) {
-                handle_out->nal_idx = me_in.nal_idx;
-                handle_out->cookie = ret.handle_out.cookie;
-        }
-        return ret.rc;
-}
-
+        if (!ptl_init)
+                return PTL_NO_INIT;
+        
+        nal = ptl_hndl2nal(&md_in);
+        if (nal == NULL)
+                return PTL_MD_INVALID;
 
+        if (!PtlHandleIsEqual(testq_in, PTL_EQ_NONE) &&
+            ptl_hndl2nal(&testq_in) != nal)
+                return PTL_EQ_INVALID;
 
-int PtlMDBind(ptl_handle_ni_t ni_in, ptl_md_t md_in,
-              ptl_unlink_t unlink_in, ptl_handle_md_t * handle_out)
-{
-        PtlMDBind_in args;
-        PtlMDBind_out ret;
-        int rc;
-
-        args.eq_in = md2eq(&md_in);
-        args.ni_in = ni_in;
-        args.md_in = md_in;
-        args.unlink_in = unlink_in;
-
-        rc = do_forward(ni_in, PTL_MDBIND, 
-                        &args, sizeof(args), &ret, sizeof(ret));
-
-        if (rc != PTL_OK)
-                return rc;
-
-        if (handle_out) {
-                handle_out->nal_idx = ni_in.nal_idx;
-                handle_out->cookie = ret.handle_out.cookie;
-        }
-        return ret.rc;
+        return (nal->nal_md_update)(nal, &md_in, 
+                                    old_inout, new_inout, &testq_in);
 }
 
-int PtlMDUpdate(ptl_handle_md_t md_in, ptl_md_t *old_inout,
-                ptl_md_t *new_inout, ptl_handle_eq_t testq_in)
+int PtlMDUnlink(ptl_handle_md_t md_in)
 {
-        PtlMDUpdate_internal_in args;
-        PtlMDUpdate_internal_out ret;
-        int rc;
-
-        args.md_in = md_in;
-
-        if (old_inout) {
-                args.old_inout = *old_inout;
-                args.old_inout_valid = 1;
-        } else
-                args.old_inout_valid = 0;
-
-        if (new_inout) {
-                args.new_inout = *new_inout;
-                args.new_inout_valid = 1;
-        } else
-                args.new_inout_valid = 0;
-
-        if (PtlHandleIsEqual (testq_in, PTL_EQ_NONE)) {
-                args.testq_in = PTL_EQ_NONE;
-                args.sequence_in = -1;
-        } else {
-                ptl_eq_t *eq = ptl_handle2usereq (&testq_in);
-                
-                args.testq_in = eq->cb_eq_handle;
-                args.sequence_in = eq->sequence;
-        }
-
-        rc = do_forward(md_in, PTL_MDUPDATE, &args, sizeof(args), &ret,
-                        sizeof(ret));
-        if (rc != PTL_OK)
-                return (rc == PTL_HANDLE_INVALID) ? PTL_MD_INVALID : rc;
-
-        if (old_inout)
-                *old_inout = ret.old_inout;
-
-        return ret.rc;
+        nal_t    *nal;
+        
+        if (!ptl_init)
+                return PTL_NO_INIT;
+        
+        nal = ptl_hndl2nal(&md_in);
+        if (nal == NULL)
+                return PTL_MD_INVALID;
+        
+        return (nal->nal_md_unlink)(nal, &md_in);
 }
 
-int PtlMDUnlink(ptl_handle_md_t md_in)
+int PtlEQAlloc(ptl_handle_ni_t interface, ptl_size_t count,
+               ptl_eq_handler_t callback,
+               ptl_handle_eq_t *handle_out)
 {
-        PtlMDUnlink_in args;
-        PtlMDUnlink_out ret;
-        int rc;
-
-        args.md_in = md_in;
-        rc = do_forward(md_in, PTL_MDUNLINK, &args, sizeof(args), &ret,
-                        sizeof(ret));
-        if (rc != PTL_OK)
-                return (rc == PTL_HANDLE_INVALID) ? PTL_MD_INVALID : rc;
+        nal_t    *nal;
+        
+        if (!ptl_init)
+                return PTL_NO_INIT;
+        
+        nal = ptl_hndl2nal(&interface);
+        if (nal == NULL)
+                return PTL_NI_INVALID;
 
-        return ret.rc;
+        return (nal->nal_eq_alloc)(nal, count, callback, handle_out);
 }
 
-int PtlEQAlloc(ptl_handle_ni_t interface, ptl_size_t count,
-               ptl_eq_handler_t callback,
-               ptl_handle_eq_t * handle_out)
+int PtlEQFree(ptl_handle_eq_t eventq)
 {
-        ptl_eq_t *eq = NULL;
-        ptl_event_t *ev = NULL;
-        PtlEQAlloc_in args;
-        PtlEQAlloc_out ret;
-        int rc, i;
-        nal_t *nal;
+        nal_t       *nal;
 
         if (!ptl_init)
                 return PTL_NO_INIT;
         
-        nal = ptl_hndl2nal (&interface);
+        nal = ptl_hndl2nal(&eventq);
         if (nal == NULL)
-                return PTL_HANDLE_INVALID;
+                return PTL_EQ_INVALID;
 
-        if (count != LOWEST_BIT_SET(count)) {   /* not a power of 2 already */
-                do {                    /* knock off all but the top bit... */
-                        count &= ~LOWEST_BIT_SET (count);
-                } while (count != LOWEST_BIT_SET(count));
-
-                count <<= 1;                             /* ...and round up */
-        }
-
-        if (count == 0)        /* catch bad parameter / overflow on roundup */
-                return (PTL_VAL_FAILED);
-
-        PORTAL_ALLOC(ev, count * sizeof(ptl_event_t));
-        if (!ev)
-                return PTL_NO_SPACE;
-
-        for (i = 0; i < count; i++)
-                ev[i].sequence = 0;
-
-        args.ni_in = interface;
-        args.count_in = count;
-        args.base_in = ev;
-        args.len_in = count * sizeof(*ev);
-        args.callback_in = callback;
-
-        rc = do_forward(interface, PTL_EQALLOC, &args, sizeof(args), &ret,
-                        sizeof(ret));
-        if (rc != PTL_OK)
-                goto fail;
-        if (ret.rc)
-                GOTO(fail, rc = ret.rc);
-
-        PORTAL_ALLOC(eq, sizeof(*eq));
-        if (!eq) {
-                rc = PTL_NO_SPACE;
-                goto fail;
-        }
-
-        eq->sequence = 1;
-        eq->size = count;
-        eq->base = ev;
-
-        /* EQ handles are a little wierd.  PtlEQGet() just looks at the
-         * queued events in shared memory.  It doesn't want to do_forward()
-         * at all, so the cookie in the EQ handle we pass out of here is
-         * simply a pointer to the event queue we just set up.  We stash
-         * the handle returned by do_forward(), so we can pass it back via
-         * do_forward() when we need to. */
-
-        eq->cb_eq_handle.nal_idx = interface.nal_idx;
-        eq->cb_eq_handle.cookie = ret.handle_out.cookie;
-
-        handle_out->nal_idx = interface.nal_idx;
-        handle_out->cookie = (__u64)((unsigned long)eq);
-        return PTL_OK;
+        return (nal->nal_eq_free)(nal, &eventq);
+}
 
-fail:
-        PORTAL_FREE(ev, count * sizeof(ptl_event_t));
-        return rc;
+int PtlEQGet(ptl_handle_eq_t eventq, ptl_event_t *ev)
+{
+        int which;
+        
+        return (PtlEQPoll (&eventq, 1, 0, ev, &which));
 }
 
-int PtlEQFree(ptl_handle_eq_t eventq)
+int PtlEQWait(ptl_handle_eq_t eventq_in, ptl_event_t *event_out)
 {
-        PtlEQFree_in args;
-        PtlEQFree_out ret;
-        ptl_eq_t *eq;
-        int rc;
+        int which;
+        
+        return (PtlEQPoll (&eventq_in, 1, PTL_TIME_FOREVER, 
+                           event_out, &which));
+}
 
-        eq = ptl_handle2usereq (&eventq);
-        args.eventq_in = eq->cb_eq_handle;
+int PtlEQPoll(ptl_handle_eq_t *eventqs_in, int neq_in, int timeout,
+              ptl_event_t *event_out, int *which_out)
+{
+        int           i;
+        nal_t        *nal;
 
-        rc = do_forward(eq->cb_eq_handle, PTL_EQFREE, &args,
-                        sizeof(args), &ret, sizeof(ret));
+        if (!ptl_init)
+                return PTL_NO_INIT;
+
+        if (neq_in < 1)
+                return PTL_EQ_INVALID;
+
+        nal = ptl_hndl2nal(&eventqs_in[0]);
+        if (nal == NULL)
+                return PTL_EQ_INVALID;
 
-        /* XXX we're betting rc == PTL_OK here */
-        PORTAL_FREE(eq->base, eq->size * sizeof(ptl_event_t));
-        PORTAL_FREE(eq, sizeof(*eq));
+        for (i = 1; i < neq_in; i++)
+                if (ptl_hndl2nal(&eventqs_in[i]) != nal)
+                        return PTL_EQ_INVALID;
 
-        return rc;
+        return (nal->nal_eq_poll)(nal, eventqs_in, neq_in, timeout,
+                                  event_out, which_out);
 }
 
+
 int PtlACEntry(ptl_handle_ni_t ni_in, ptl_ac_index_t index_in,
                ptl_process_id_t match_id_in, ptl_pt_index_t portal_in)
 {
-        PtlACEntry_in args;
-        PtlACEntry_out ret;
-        int rc;
-
-        /*
-         * Copy arguments into the argument block to
-         * hand to the forwarding object
-         */
-        args.ni_in = ni_in;
-        args.index_in = index_in;
-        args.match_id_in = match_id_in;
-        args.portal_in = portal_in;
-
-        rc = do_forward(ni_in, PTL_ACENTRY, &args, sizeof(args), &ret,
-                        sizeof(ret));
-
-        return (rc != PTL_OK) ? rc : ret.rc;
+        nal_t    *nal;
+
+        if (!ptl_init)
+                return PTL_NO_INIT;
+        
+        nal = ptl_hndl2nal(&ni_in);
+        if (nal == NULL)
+                return PTL_NI_INVALID;
+        
+        return (nal->nal_ace_entry)(nal, index_in, match_id_in, portal_in);
 }
 
 int PtlPut(ptl_handle_md_t md_in, ptl_ack_req_t ack_req_in,
            ptl_process_id_t target_in, ptl_pt_index_t portal_in,
-           ptl_ac_index_t cookie_in, ptl_match_bits_t match_bits_in,
+           ptl_ac_index_t ac_in, ptl_match_bits_t match_bits_in,
            ptl_size_t offset_in, ptl_hdr_data_t hdr_data_in)
 {
-        PtlPut_in args;
-        PtlPut_out ret;
-        int rc;
-
-        /*
-         * Copy arguments into the argument block to
-         * hand to the forwarding object
-         */
-        args.md_in = md_in;
-        args.ack_req_in = ack_req_in;
-        args.target_in = target_in;
-        args.portal_in = portal_in;
-        args.cookie_in = cookie_in;
-        args.match_bits_in = match_bits_in;
-        args.offset_in = offset_in;
-        args.hdr_data_in = hdr_data_in;
-
-        rc = do_forward(md_in, PTL_PUT, &args, sizeof(args), &ret, sizeof(ret));
-
-        return (rc != PTL_OK) ? rc : ret.rc;
+        nal_t    *nal;
+
+        if (!ptl_init)
+                return PTL_NO_INIT;
+        
+        nal = ptl_hndl2nal(&md_in);
+        if (nal == NULL)
+                return PTL_MD_INVALID;
+
+        return (nal->nal_put)(nal, &md_in, ack_req_in,
+                              &target_in, portal_in, ac_in,
+                              match_bits_in, offset_in, hdr_data_in);
 }
 
 int PtlGet(ptl_handle_md_t md_in, ptl_process_id_t target_in,
-           ptl_pt_index_t portal_in, ptl_ac_index_t cookie_in,
+           ptl_pt_index_t portal_in, ptl_ac_index_t ac_in,
            ptl_match_bits_t match_bits_in, ptl_size_t offset_in)
 {
-        PtlGet_in args;
-        PtlGet_out ret;
-        int rc;
-
-        /*
-         * Copy arguments into the argument block to
-         * hand to the forwarding object
-         */
-        args.md_in = md_in;
-        args.target_in = target_in;
-        args.portal_in = portal_in;
-        args.cookie_in = cookie_in;
-        args.match_bits_in = match_bits_in;
-        args.offset_in = offset_in;
-
-        rc = do_forward(md_in, PTL_GET, &args, sizeof(args), &ret, sizeof(ret));
-
-        return (rc != PTL_OK) ? rc : ret.rc;
+        nal_t  *nal;
+
+        if (!ptl_init)
+                return PTL_NO_INIT;
+
+        nal = ptl_hndl2nal(&md_in);
+        if (nal == NULL)
+                return PTL_MD_INVALID;
+
+        return (nal->nal_get)(nal, &md_in, 
+                              &target_in, portal_in, ac_in,
+                              match_bits_in, offset_in);
 }
+
index bf7a107..285f8fe 100644 (file)
@@ -3,8 +3,8 @@
 # This code is issued under the GNU General Public License.
 # See the file COPYING in this distribution
 
-my_sources = api-eq.c api-init.c api-me.c api-errno.c api-ni.c api-wrap.c \
-               lib-dispatch.c lib-init.c lib-me.c lib-msg.c lib-eq.c \
+my_sources =    api-errno.c api-ni.c api-wrap.c \
+               lib-init.c lib-me.c lib-msg.c lib-eq.c \
                lib-md.c lib-move.c lib-ni.c lib-pid.c
 
 if !CRAY_PORTALS
diff --git a/lustre/portals/portals/lib-dispatch.c b/lustre/portals/portals/lib-dispatch.c
deleted file mode 100644 (file)
index 798e117..0000000
+++ /dev/null
@@ -1,79 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * lib/lib-dispatch.c
- *
- *  Copyright (c) 2001-2003 Cluster File Systems, Inc.
- *  Copyright (c) 2001-2002 Sandia National Laboratories
- *
- *   This file is part of Lustre, http://www.sf.net/projects/lustre/
- *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#define DEBUG_SUBSYSTEM S_PORTALS
-#include <portals/lib-p30.h>
-#include <portals/lib-dispatch.h>
-
-typedef struct {
-        int (*fun) (nal_cb_t * nal, void *private, void *in, void *out);
-        char *name;
-} dispatch_table_t;
-
-static dispatch_table_t dispatch_table[] = {
-        [PTL_GETID] {do_PtlGetId, "PtlGetId"},
-        [PTL_NISTATUS] {do_PtlNIStatus, "PtlNIStatus"},
-        [PTL_NIDIST] {do_PtlNIDist, "PtlNIDist"},
-        [PTL_MEATTACH] {do_PtlMEAttach, "PtlMEAttach"},
-        [PTL_MEINSERT] {do_PtlMEInsert, "PtlMEInsert"},
-        [PTL_MEUNLINK] {do_PtlMEUnlink, "PtlMEUnlink"},
-        [PTL_TBLDUMP] {do_PtlTblDump, "PtlTblDump"},
-        [PTL_MEDUMP] {do_PtlMEDump, "PtlMEDump"},
-        [PTL_MDATTACH] {do_PtlMDAttach, "PtlMDAttach"},
-        [PTL_MDBIND] {do_PtlMDBind, "PtlMDBind"},
-        [PTL_MDUPDATE] {do_PtlMDUpdate_internal, "PtlMDUpdate_internal"},
-        [PTL_MDUNLINK] {do_PtlMDUnlink, "PtlMDUnlink"},
-        [PTL_EQALLOC] {do_PtlEQAlloc_internal, "PtlEQAlloc_internal"},
-        [PTL_EQFREE] {do_PtlEQFree_internal, "PtlEQFree_internal"},
-        [PTL_PUT] {do_PtlPut, "PtlPut"},
-        [PTL_GET] {do_PtlGet, "PtlGet"},
-        [PTL_FAILNID] {do_PtlFailNid, "PtlFailNid"},
-        /*    */ {0, ""}
-};
-
-/*
- * This really should be elsewhere, but lib-p30/dispatch.c is
- * an automatically generated file.
- */
-void lib_dispatch(nal_cb_t * nal, void *private, int index, void *arg_block,
-                  void *ret_block)
-{
-        lib_ni_t *ni = &nal->ni;
-
-        if (index < 0 || index > LIB_MAX_DISPATCH ||
-            !dispatch_table[index].fun) {
-                CDEBUG(D_NET, LPU64": Invalid API call %d\n", ni->nid, index);
-                return;
-        }
-
-        CDEBUG(D_NET, LPU64": API call %s (%d)\n", ni->nid,
-               dispatch_table[index].name, index);
-
-        dispatch_table[index].fun(nal, private, arg_block, ret_block);
-}
-
-char *dispatch_name(int index)
-{
-        return dispatch_table[index].name;
-}
index 8a91860..8ea6fdd 100644 (file)
 
 #define DEBUG_SUBSYSTEM S_PORTALS
 #include <portals/lib-p30.h>
-#include <portals/arg-blocks.h>
 
-int do_PtlEQAlloc_internal(nal_cb_t * nal, void *private, void *v_args,
-                           void *v_ret)
+int 
+lib_api_eq_alloc (nal_t *apinal, ptl_size_t count,
+                  ptl_eq_handler_t callback, 
+                  ptl_handle_eq_t *handle)
 {
-        /*
-         * Incoming:
-         *      ptl_handle_ni_t ni_in
-         *      ptl_size_t count_in
-         *      void                    * base_in
-         *
-         * Outgoing:
-         *      ptl_handle_eq_t         * handle_out
-         */
-
-        PtlEQAlloc_in *args = v_args;
-        PtlEQAlloc_out *ret = v_ret;
-
-        lib_eq_t *eq;
-        unsigned long flags;
-
-        /* api should have rounded up */
-        if (args->count_in != LOWEST_BIT_SET (args->count_in))
-                return ret->rc = PTL_VAL_FAILED;
+        lib_nal_t     *nal = apinal->nal_data;
+        lib_eq_t      *eq;
+        unsigned long  flags;
+        int            rc;
 
+        /* We need count to be a power of 2 so that when eq_{enq,deq}_seq
+         * overflow, they don't skip entries, so the queue has the same
+         * apparant capacity at all times */
+
+        if (count != LOWEST_BIT_SET(count)) {   /* not a power of 2 already */
+                do {                    /* knock off all but the top bit... */
+                        count &= ~LOWEST_BIT_SET (count);
+                } while (count != LOWEST_BIT_SET(count));
+
+                count <<= 1;                             /* ...and round up */
+        }
+
+        if (count == 0)        /* catch bad parameter / overflow on roundup */
+                return (PTL_VAL_FAILED);
+        
         eq = lib_eq_alloc (nal);
         if (eq == NULL)
-                return (ret->rc = PTL_NO_SPACE);
+                return (PTL_NO_SPACE);
 
-        state_lock(nal, &flags);
+        PORTAL_ALLOC(eq->eq_events, count * sizeof(ptl_event_t));
+        if (eq->eq_events == NULL) {
+                LIB_LOCK(nal, flags);
+                lib_eq_free (nal, eq);
+                LIB_UNLOCK(nal, flags);
+        }
 
-        if (nal->cb_map != NULL) {
+        if (nal->libnal_map != NULL) {
                 struct iovec iov = {
-                        .iov_base = args->base_in,
-                        .iov_len = args->count_in * sizeof (ptl_event_t) };
+                        .iov_base = eq->eq_events,
+                        .iov_len = count * sizeof(ptl_event_t)};
 
-                ret->rc = nal->cb_map (nal, 1, &iov, &eq->eq_addrkey);
-                if (ret->rc != PTL_OK) {
+                rc = nal->libnal_map(nal, 1, &iov, &eq->eq_addrkey);
+                if (rc != PTL_OK) {
+                        LIB_LOCK(nal, flags);
                         lib_eq_free (nal, eq);
-                        
-                        state_unlock (nal, &flags);
-                        return (ret->rc);
+                        LIB_UNLOCK(nal, flags);
+                        return (rc);
                 }
         }
 
-        eq->sequence = 1;
-        eq->base = args->base_in;
-        eq->size = args->count_in;
+        /* NB this resets all event sequence numbers to 0, to be earlier
+         * than eq_deq_seq */
+        memset(eq->eq_events, 0, count * sizeof(ptl_event_t));
+
+        eq->eq_deq_seq = 1;
+        eq->eq_enq_seq = 1;
+        eq->eq_size = count;
         eq->eq_refcount = 0;
-        eq->event_callback = args->callback_in;
+        eq->eq_callback = callback;
+
+        LIB_LOCK(nal, flags);
 
         lib_initialise_handle (nal, &eq->eq_lh, PTL_COOKIE_TYPE_EQ);
-        list_add (&eq->eq_list, &nal->ni.ni_active_eqs);
+        list_add (&eq->eq_list, &nal->libnal_ni.ni_active_eqs);
 
-        state_unlock(nal, &flags);
+        LIB_UNLOCK(nal, flags);
 
-        ptl_eq2handle(&ret->handle_out, eq);
-        return (ret->rc = PTL_OK);
+        ptl_eq2handle(handle, nal, eq);
+        return (PTL_OK);
 }
 
-int do_PtlEQFree_internal(nal_cb_t * nal, void *private, void *v_args,
-                          void *v_ret)
+int 
+lib_api_eq_free(nal_t *apinal, ptl_handle_eq_t *eqh)
 {
-        /*
-         * Incoming:
-         *      ptl_handle_eq_t eventq_in
-         *
-         * Outgoing:
-         */
-
-        PtlEQFree_in *args = v_args;
-        PtlEQFree_out *ret = v_ret;
-        lib_eq_t *eq;
-        long flags;
+        lib_nal_t     *nal = apinal->nal_data;
+        lib_eq_t      *eq;
+        int            size;
+        ptl_event_t   *events;
+        void          *addrkey;
+        unsigned long  flags;
 
-        state_lock (nal, &flags);
+        LIB_LOCK(nal, flags);
 
-        eq = ptl_handle2eq(&args->eventq_in, nal);
+        eq = ptl_handle2eq(eqh, nal);
         if (eq == NULL) {
-                ret->rc = PTL_EQ_INVALID;
-        } else if (eq->eq_refcount != 0) {
-                ret->rc = PTL_EQ_IN_USE;
+                LIB_UNLOCK(nal, flags);
+                return (PTL_EQ_INVALID);
+        }
+
+        if (eq->eq_refcount != 0) {
+                LIB_UNLOCK(nal, flags);
+                return (PTL_EQ_IN_USE);
+        }
+
+        /* stash for free after lock dropped */
+        events  = eq->eq_events;
+        size    = eq->eq_size;
+        addrkey = eq->eq_addrkey;
+
+        lib_invalidate_handle (nal, &eq->eq_lh);
+        list_del (&eq->eq_list);
+        lib_eq_free (nal, eq);
+
+        LIB_UNLOCK(nal, flags);
+
+        if (nal->libnal_unmap != NULL) {
+                struct iovec iov = {
+                        .iov_base = events,
+                        .iov_len = size * sizeof(ptl_event_t)};
+
+                nal->libnal_unmap(nal, 1, &iov, &addrkey);
+        }
+
+        PORTAL_FREE(events, size * sizeof (ptl_event_t));
+
+        return (PTL_OK);
+}
+
+int
+lib_get_event (lib_eq_t *eq, ptl_event_t *ev)
+{
+        int          new_index = eq->eq_deq_seq & (eq->eq_size - 1);
+        ptl_event_t *new_event = &eq->eq_events[new_index];
+        int          rc;
+        ENTRY;
+
+        CDEBUG(D_INFO, "event: %p, sequence: %lu, eq->size: %u\n",
+               new_event, eq->eq_deq_seq, eq->eq_size);
+
+        if (PTL_SEQ_GT (eq->eq_deq_seq, new_event->sequence)) {
+                RETURN(PTL_EQ_EMPTY);
+        }
+
+        /* We've got a new event... */
+        *ev = *new_event;
+
+        /* ...but did it overwrite an event we've not seen yet? */
+        if (eq->eq_deq_seq == new_event->sequence) {
+                rc = PTL_OK;
         } else {
-                if (nal->cb_unmap != NULL) {
-                        struct iovec iov = {
-                                .iov_base = eq->base,
-                                .iov_len = eq->size * sizeof (ptl_event_t) };
-                        
-                        nal->cb_unmap(nal, 1, &iov, &eq->eq_addrkey);
+                CERROR("Event Queue Overflow: eq seq %lu ev seq %lu\n",
+                       eq->eq_deq_seq, new_event->sequence);
+                rc = PTL_EQ_DROPPED;
+        }
+
+        eq->eq_deq_seq = new_event->sequence + 1;
+        RETURN(rc);
+}
+
+
+int
+lib_api_eq_poll (nal_t *apinal, 
+                 ptl_handle_eq_t *eventqs, int neq, int timeout_ms,
+                 ptl_event_t *event, int *which)
+{
+        lib_nal_t       *nal = apinal->nal_data;
+        lib_ni_t        *ni = &nal->libnal_ni;
+        unsigned long    flags;
+        int              i;
+        int              rc;
+#ifdef __KERNEL__
+        wait_queue_t     wq;
+        unsigned long    now;
+#else
+        struct timeval   then;
+        struct timeval   now;
+        struct timespec  ts;
+#endif
+        ENTRY;
+
+        LIB_LOCK(nal, flags);
+
+        for (;;) {
+                for (i = 0; i < neq; i++) {
+                        lib_eq_t *eq = ptl_handle2eq(&eventqs[i], nal);
+
+                        rc = lib_get_event (eq, event);
+                        if (rc != PTL_EQ_EMPTY) {
+                                LIB_UNLOCK(nal, flags);
+                                *which = i;
+                                RETURN(rc);
+                        }
+                }
+                
+                if (timeout_ms == 0) {
+                        LIB_UNLOCK (nal, flags);
+                        RETURN (PTL_EQ_EMPTY);
                 }
 
-                lib_invalidate_handle (nal, &eq->eq_lh);
-                list_del (&eq->eq_list);
-                lib_eq_free (nal, eq);
-                ret->rc = PTL_OK;
-        }
+                /* Some architectures force us to do spin locking/unlocking
+                 * in the same stack frame, means we can abstract the
+                 * locking here */
+#ifdef __KERNEL__
+                init_waitqueue_entry(&wq, current);
+                set_current_state(TASK_INTERRUPTIBLE);
+                add_wait_queue(&ni->ni_waitq, &wq);
 
-        state_unlock (nal, &flags);
+                LIB_UNLOCK(nal, flags);
 
-        return (ret->rc);
+                if (timeout_ms < 0) {
+                        schedule ();
+                } else {
+                        now = jiffies;
+                        schedule_timeout((timeout_ms * HZ)/1000);
+                        timeout_ms -= ((jiffies - now) * 1000)/HZ;
+                        if (timeout_ms < 0)
+                                timeout_ms = 0;
+                }
+                
+                LIB_LOCK(nal, flags);
+#else
+                if (timeout_ms < 0) {
+                        pthread_cond_wait(&ni->ni_cond, &ni->ni_mutex);
+                } else {
+                        gettimeofday(&then, NULL);
+                        
+                        ts.tv_sec = then.tv_sec + timeout_ms/1000;
+                        ts.tv_nsec = then.tv_usec * 1000 + 
+                                     (timeout_ms%1000) * 1000000;
+                        if (ts.tv_nsec >= 1000000000) {
+                                ts.tv_sec++;
+                                ts.tv_nsec -= 1000000000;
+                        }
+                        
+                        pthread_cond_timedwait(&ni->ni_cond,
+                                               &ni->ni_mutex, &ts);
+                        
+                        gettimeofday(&now, NULL);
+                        timeout_ms -= (now.tv_sec - then.tv_sec) * 1000 +
+                                      (now.tv_usec - then.tv_usec) / 1000;
+                        
+                        if (timeout_ms < 0)
+                                timeout_ms = 0;
+                }
+#endif
+        }
 }
index c62dbc2..9d97bc1 100644 (file)
@@ -41,7 +41,7 @@
 #ifndef PTL_USE_LIB_FREELIST
 
 int
-kportal_descriptor_setup (nal_cb_t *nal,
+kportal_descriptor_setup (lib_nal_t *nal,
                           ptl_ni_limits_t *requested_limits,
                           ptl_ni_limits_t *actual_limits)
 {
@@ -54,13 +54,13 @@ kportal_descriptor_setup (nal_cb_t *nal,
 }
 
 void
-kportal_descriptor_cleanup (nal_cb_t *nal)
+kportal_descriptor_cleanup (lib_nal_t *nal)
 {
 }
 #else
 
 int
-lib_freelist_init (nal_cb_t *nal, lib_freelist_t *fl, int n, int size)
+lib_freelist_init (lib_nal_t *nal, lib_freelist_t *fl, int n, int size)
 {
         char *space;
 
@@ -68,7 +68,7 @@ lib_freelist_init (nal_cb_t *nal, lib_freelist_t *fl, int n, int size)
 
         size += offsetof (lib_freeobj_t, fo_contents);
 
-        space = nal->cb_malloc (nal, n * size);
+        PORTAL_ALLOC(space, n * size);
         if (space == NULL)
                 return (PTL_NO_SPACE);
 
@@ -88,7 +88,7 @@ lib_freelist_init (nal_cb_t *nal, lib_freelist_t *fl, int n, int size)
 }
 
 void
-lib_freelist_fini (nal_cb_t *nal, lib_freelist_t *fl)
+lib_freelist_fini (lib_nal_t *nal, lib_freelist_t *fl)
 {
         struct list_head *el;
         int               count;
@@ -102,23 +102,24 @@ lib_freelist_fini (nal_cb_t *nal, lib_freelist_t *fl)
 
         LASSERT (count == fl->fl_nobjs);
 
-        nal->cb_free (nal, fl->fl_objs, fl->fl_nobjs * fl->fl_objsize);
+        PORTAL_FREE(fl->fl_objs, fl->fl_nobjs * fl->fl_objsize);
         memset (fl, 0, sizeof (fl));
 }
 
 int
-kportal_descriptor_setup (nal_cb_t *nal,
+kportal_descriptor_setup (lib_nal_t *nal,
                           ptl_ni_limits_t *requested_limits,
                           ptl_ni_limits_t *actual_limits)
 {
         /* NB on failure caller must still call kportal_descriptor_cleanup */
         /*               ******                                            */
-        int rc;
+        lib_ni_t  *ni = &nal->libnal_ni;
+        int        rc;
 
-        memset (&nal->ni.ni_free_mes,  0, sizeof (nal->ni.ni_free_mes));
-        memset (&nal->ni.ni_free_msgs, 0, sizeof (nal->ni.ni_free_msgs));
-        memset (&nal->ni.ni_free_mds,  0, sizeof (nal->ni.ni_free_mds));
-        memset (&nal->ni.ni_free_eqs,  0, sizeof (nal->ni.ni_free_eqs));
+        memset (&ni->ni_free_mes,  0, sizeof (ni->ni_free_mes));
+        memset (&ni->ni_free_msgs, 0, sizeof (ni->ni_free_msgs));
+        memset (&ni->ni_free_mds,  0, sizeof (ni->ni_free_mds));
+        memset (&ni->ni_free_eqs,  0, sizeof (ni->ni_free_eqs));
 
         /* Ignore requested limits! */
         actual_limits->max_mes = MAX_MES;
@@ -127,39 +128,41 @@ kportal_descriptor_setup (nal_cb_t *nal,
         /* Hahahah what a load of bollocks.  There's nowhere to
          * specify the max # messages in-flight */
 
-        rc = lib_freelist_init (nal, &nal->ni.ni_free_mes,
+        rc = lib_freelist_init (nal, &ni->ni_free_mes,
                                 MAX_MES, sizeof (lib_me_t));
         if (rc != PTL_OK)
                 return (rc);
 
-        rc = lib_freelist_init (nal, &nal->ni.ni_free_msgs,
+        rc = lib_freelist_init (nal, &ni->ni_free_msgs,
                                 MAX_MSGS, sizeof (lib_msg_t));
         if (rc != PTL_OK)
                 return (rc);
 
-        rc = lib_freelist_init (nal, &nal->ni.ni_free_mds,
+        rc = lib_freelist_init (nal, &ni->ni_free_mds,
                                 MAX_MDS, sizeof (lib_md_t));
         if (rc != PTL_OK)
                 return (rc);
 
-        rc = lib_freelist_init (nal, &nal->ni.ni_free_eqs,
+        rc = lib_freelist_init (nal, &ni->ni_free_eqs,
                                 MAX_EQS, sizeof (lib_eq_t));
         return (rc);
 }
 
 void
-kportal_descriptor_cleanup (nal_cb_t *nal)
+kportal_descriptor_cleanup (lib_nal_t *nal)
 {
-        lib_freelist_fini (nal, &nal->ni.ni_free_mes);
-        lib_freelist_fini (nal, &nal->ni.ni_free_msgs);
-        lib_freelist_fini (nal, &nal->ni.ni_free_mds);
-        lib_freelist_fini (nal, &nal->ni.ni_free_eqs);
+        lib_ni_t   *ni = &nal->libnal_ni;
+        
+        lib_freelist_fini (nal, &ni->ni_free_mes);
+        lib_freelist_fini (nal, &ni->ni_free_msgs);
+        lib_freelist_fini (nal, &ni->ni_free_mds);
+        lib_freelist_fini (nal, &ni->ni_free_eqs);
 }
 
 #endif
 
 __u64
-lib_create_interface_cookie (nal_cb_t *nal)
+lib_create_interface_cookie (lib_nal_t *nal)
 {
         /* NB the interface cookie in wire handles guards against delayed
          * replies and ACKs appearing valid in a new instance of the same
@@ -180,9 +183,9 @@ lib_create_interface_cookie (nal_cb_t *nal)
 }
 
 int
-lib_setup_handle_hash (nal_cb_t *nal) 
+lib_setup_handle_hash (lib_nal_t *nal) 
 {
-        lib_ni_t *ni = &nal->ni;
+        lib_ni_t *ni = &nal->libnal_ni;
         int       i;
         
         /* Arbitrary choice of hash table size */
@@ -191,9 +194,8 @@ lib_setup_handle_hash (nal_cb_t *nal)
 #else
         ni->ni_lh_hash_size = (MAX_MES + MAX_MDS + MAX_EQS)/4;
 #endif
-        ni->ni_lh_hash_table = 
-                (struct list_head *)nal->cb_malloc (nal, ni->ni_lh_hash_size
-                                                    * sizeof (struct list_head));
+        PORTAL_ALLOC(ni->ni_lh_hash_table,
+                     ni->ni_lh_hash_size * sizeof (struct list_head));
         if (ni->ni_lh_hash_table == NULL)
                 return (PTL_NO_SPACE);
         
@@ -206,22 +208,22 @@ lib_setup_handle_hash (nal_cb_t *nal)
 }
 
 void
-lib_cleanup_handle_hash (nal_cb_t *nal)
+lib_cleanup_handle_hash (lib_nal_t *nal)
 {
-        lib_ni_t *ni = &nal->ni;
+        lib_ni_t *ni = &nal->libnal_ni;
 
         if (ni->ni_lh_hash_table == NULL)
                 return;
         
-        nal->cb_free (nal, ni->ni_lh_hash_table,
-                      ni->ni_lh_hash_size * sizeof (struct list_head));
+        PORTAL_FREE(ni->ni_lh_hash_table,
+                    ni->ni_lh_hash_size * sizeof (struct list_head));
 }
 
 lib_handle_t *
-lib_lookup_cookie (nal_cb_t *nal, __u64 cookie, int type) 
+lib_lookup_cookie (lib_nal_t *nal, __u64 cookie, int type) 
 {
         /* ALWAYS called with statelock held */
-        lib_ni_t            *ni = &nal->ni;
+        lib_ni_t            *ni = &nal->libnal_ni;
         struct list_head    *list;
         struct list_head    *el;
         unsigned int         hash;
@@ -243,10 +245,10 @@ lib_lookup_cookie (nal_cb_t *nal, __u64 cookie, int type)
 }
 
 void
-lib_initialise_handle (nal_cb_t *nal, lib_handle_t *lh, int type) 
+lib_initialise_handle (lib_nal_t *nal, lib_handle_t *lh, int type) 
 {
         /* ALWAYS called with statelock held */
-        lib_ni_t       *ni = &nal->ni;
+        lib_ni_t       *ni = &nal->libnal_ni;
         unsigned int    hash;
 
         LASSERT (type >= 0 && type < PTL_COOKIE_TYPES);
@@ -258,95 +260,120 @@ lib_initialise_handle (nal_cb_t *nal, lib_handle_t *lh, int type)
 }
 
 void
-lib_invalidate_handle (nal_cb_t *nal, lib_handle_t *lh)
+lib_invalidate_handle (lib_nal_t *nal, lib_handle_t *lh)
 {
         list_del (&lh->lh_hash_chain);
 }
 
 int
-lib_init(nal_cb_t *nal, ptl_process_id_t process_id,
+lib_init(lib_nal_t *libnal, nal_t *apinal, 
+         ptl_process_id_t process_id,
          ptl_ni_limits_t *requested_limits,
          ptl_ni_limits_t *actual_limits)
 {
         int       rc = PTL_OK;
-        lib_ni_t *ni = &nal->ni;
-        int ptl_size;
-        int i;
+        lib_ni_t *ni = &libnal->libnal_ni;
+        int       ptl_size;
+        int       i;
         ENTRY;
 
         /* NB serialised in PtlNIInit() */
 
         lib_assert_wire_constants ();
-        
-        /*
-         * Allocate the portal table for this interface
-         * and all per-interface objects.
-         */
-        memset(&ni->counters, 0, sizeof(lib_counters_t));
 
-        rc = kportal_descriptor_setup (nal, requested_limits, 
-                                       &ni->actual_limits);
+        /* Setup the API nal with the lib API handling functions */
+        apinal->nal_get_id    = lib_api_get_id;
+        apinal->nal_ni_status = lib_api_ni_status;
+        apinal->nal_ni_dist   = lib_api_ni_dist;
+        apinal->nal_fail_nid  = lib_api_fail_nid;
+        apinal->nal_me_attach = lib_api_me_attach;
+        apinal->nal_me_insert = lib_api_me_insert;
+        apinal->nal_me_unlink = lib_api_me_unlink;
+        apinal->nal_md_attach = lib_api_md_attach;
+        apinal->nal_md_bind   = lib_api_md_bind;
+        apinal->nal_md_unlink = lib_api_md_unlink;
+        apinal->nal_md_update = lib_api_md_update;
+        apinal->nal_eq_alloc  = lib_api_eq_alloc;
+        apinal->nal_eq_free   = lib_api_eq_free;
+        apinal->nal_eq_poll   = lib_api_eq_poll;
+        apinal->nal_put       = lib_api_put;
+        apinal->nal_get       = lib_api_get;
+
+        apinal->nal_data      = libnal;
+        ni->ni_api            = apinal;
+
+        rc = kportal_descriptor_setup (libnal, requested_limits, 
+                                       &ni->ni_actual_limits);
         if (rc != PTL_OK)
                 goto out;
 
+        memset(&ni->ni_counters, 0, sizeof(lib_counters_t));
+
         INIT_LIST_HEAD (&ni->ni_active_msgs);
         INIT_LIST_HEAD (&ni->ni_active_mds);
         INIT_LIST_HEAD (&ni->ni_active_eqs);
-
         INIT_LIST_HEAD (&ni->ni_test_peers);
 
-        ni->ni_interface_cookie = lib_create_interface_cookie (nal);
+#ifdef __KERNEL__
+        spin_lock_init (&ni->ni_lock);
+        init_waitqueue_head (&ni->ni_waitq);
+#else
+        pthread_mutex_init(&ni->ni_mutex, NULL);
+        pthread_cond_init(&ni->ni_cond, NULL);
+#endif
+
+        ni->ni_interface_cookie = lib_create_interface_cookie (libnal);
         ni->ni_next_object_cookie = 0;
-        rc = lib_setup_handle_hash (nal);
+        rc = lib_setup_handle_hash (libnal);
         if (rc != PTL_OK)
                 goto out;
         
-        ni->nid = process_id.nid;
-        ni->pid = process_id.pid;
+        ni->ni_pid = process_id;
 
         if (requested_limits != NULL)
                 ptl_size = requested_limits->max_pt_index + 1;
         else
                 ptl_size = 64;
 
-        ni->tbl.size = ptl_size;
-        ni->tbl.tbl = nal->cb_malloc(nal, sizeof(struct list_head) * ptl_size);
-        if (ni->tbl.tbl == NULL) {
+        ni->ni_portals.size = ptl_size;
+        PORTAL_ALLOC(ni->ni_portals.tbl,
+                     ptl_size * sizeof(struct list_head));
+        if (ni->ni_portals.tbl == NULL) {
                 rc = PTL_NO_SPACE;
                 goto out;
         }
 
         for (i = 0; i < ptl_size; i++)
-                INIT_LIST_HEAD(&(ni->tbl.tbl[i]));
+                INIT_LIST_HEAD(&(ni->ni_portals.tbl[i]));
 
         /* max_{mes,mds,eqs} set in kportal_descriptor_setup */
 
         /* We don't have an access control table! */
-        ni->actual_limits.max_ac_index = -1;
+        ni->ni_actual_limits.max_ac_index = -1;
 
-        ni->actual_limits.max_pt_index = ptl_size - 1;
-        ni->actual_limits.max_md_iovecs = PTL_MD_MAX_IOV;
-        ni->actual_limits.max_me_list = INT_MAX;
+        ni->ni_actual_limits.max_pt_index = ptl_size - 1;
+        ni->ni_actual_limits.max_md_iovecs = PTL_MD_MAX_IOV;
+        ni->ni_actual_limits.max_me_list = INT_MAX;
 
         /* We don't support PtlGetPut! */
-        ni->actual_limits.max_getput_md = 0;
+        ni->ni_actual_limits.max_getput_md = 0;
 
         if (actual_limits != NULL)
-                *actual_limits = ni->actual_limits;
+                *actual_limits = ni->ni_actual_limits;
 
  out:
         if (rc != PTL_OK) {
-                lib_cleanup_handle_hash (nal);
-                kportal_descriptor_cleanup (nal);
+                lib_cleanup_handle_hash (libnal);
+                kportal_descriptor_cleanup (libnal);
         }
 
         RETURN (rc);
 }
 
 int
-lib_fini(nal_cb_t * nal)
+lib_fini(lib_nal_t *nal)
 {
-        lib_ni_t *ni = &nal->ni;
+        lib_ni_t *ni = &nal->libnal_ni;
         int       idx;
 
         /* NB no state_lock() since this is the last reference.  The NAL
@@ -355,9 +382,9 @@ lib_fini(nal_cb_t * nal)
          * network op (eg MD with non-zero pending count)
          */
 
-        for (idx = 0; idx < ni->tbl.size; idx++)
-                while (!list_empty (&ni->tbl.tbl[idx])) {
-                        lib_me_t *me = list_entry (ni->tbl.tbl[idx].next,
+        for (idx = 0; idx < ni->ni_portals.size; idx++)
+                while (!list_empty (&ni->ni_portals.tbl[idx])) {
+                        lib_me_t *me = list_entry (ni->ni_portals.tbl[idx].next,
                                                    lib_me_t, me_list);
 
                         CERROR ("Active me %p on exit\n", me);
@@ -392,10 +419,16 @@ lib_fini(nal_cb_t * nal)
                 lib_msg_free (nal, msg);
         }
 
-        nal->cb_free(nal, ni->tbl.tbl, sizeof(struct list_head) * ni->tbl.size);
+        PORTAL_FREE(ni->ni_portals.tbl,  
+                    ni->ni_portals.size * sizeof(struct list_head));
 
         lib_cleanup_handle_hash (nal);
         kportal_descriptor_cleanup (nal);
 
+#ifndef __KERNEL__
+        pthread_mutex_destroy(&ni->ni_mutex);
+        pthread_cond_destroy(&ni->ni_cond);
+#endif
+
         return (PTL_OK);
 }
index 64a55b9..a4df791 100644 (file)
 #endif
 
 #include <portals/lib-p30.h>
-#include <portals/arg-blocks.h>
 
 /* must be called with state lock held */
-void lib_md_unlink(nal_cb_t * nal, lib_md_t * md)
+void
+lib_md_unlink(lib_nal_t *nal, lib_md_t *md)
 {
         if ((md->md_flags & PTL_MD_FLAG_ZOMBIE) == 0) {
                 /* first unlink attempt... */
@@ -62,12 +62,15 @@ void lib_md_unlink(nal_cb_t * nal, lib_md_t * md)
         CDEBUG(D_NET, "Unlinking md %p\n", md);
 
         if ((md->options & PTL_MD_KIOV) != 0) {
-                if (nal->cb_unmap_pages != NULL)
-                        nal->cb_unmap_pages (nal, md->md_niov, md->md_iov.kiov, 
-                                             &md->md_addrkey);
-        } else if (nal->cb_unmap != NULL) {
-                nal->cb_unmap (nal, md->md_niov, md->md_iov.iov, 
-                               &md->md_addrkey);
+                if (nal->libnal_unmap_pages != NULL)
+                        nal->libnal_unmap_pages (nal, 
+                                                 md->md_niov, 
+                                                 md->md_iov.kiov, 
+                                                 &md->md_addrkey);
+        } else if (nal->libnal_unmap != NULL) {
+                nal->libnal_unmap (nal, 
+                                   md->md_niov, md->md_iov.iov, 
+                                   &md->md_addrkey);
         }
 
         if (md->eq != NULL) {
@@ -80,124 +83,124 @@ void lib_md_unlink(nal_cb_t * nal, lib_md_t * md)
 }
 
 /* must be called with state lock held */
-static int lib_md_build(nal_cb_t *nal, lib_md_t *new, void *private,
-                        ptl_md_t *md, ptl_handle_eq_t *eqh, int unlink)
+static int
+lib_md_build(lib_nal_t *nal, lib_md_t *lmd, ptl_md_t *umd, int unlink)
 {
         lib_eq_t     *eq = NULL;
         int           rc;
         int           i;
         int           niov;
+        int           total_length = 0;
 
         /* NB we are passed an allocated, but uninitialised/active md.
          * if we return success, caller may lib_md_unlink() it.
          * otherwise caller may only lib_md_free() it.
          */
 
-        if (!PtlHandleIsEqual (*eqh, PTL_EQ_NONE)) {
-                eq = ptl_handle2eq(eqh, nal);
+        if (!PtlHandleIsEqual (umd->eventq, PTL_EQ_NONE)) {
+                eq = ptl_handle2eq(&umd->eventq, nal);
                 if (eq == NULL)
                         return PTL_EQ_INVALID;
         }
 
-        /* Must check this _before_ allocation.  Also, note that non-iov
-         * MDs must set md_niov to 0. */
-        LASSERT((md->options & (PTL_MD_IOVEC | PTL_MD_KIOV)) == 0 ||
-                md->length <= PTL_MD_MAX_IOV);
-
         /* This implementation doesn't know how to create START events or
          * disable END events.  Best to LASSERT our caller is compliant so
          * we find out quickly...  */
-        LASSERT (PtlHandleIsEqual (*eqh, PTL_EQ_NONE) ||
-                 ((md->options & PTL_MD_EVENT_START_DISABLE) != 0 &&
-                  (md->options & PTL_MD_EVENT_END_DISABLE) == 0));
-
-        if ((md->options & PTL_MD_MAX_SIZE) != 0 && /* max size used */
-            (md->max_size < 0 || md->max_size > md->length)) // illegal max_size
-                return PTL_MD_INVALID;
-
-        new->me = NULL;
-        new->start = md->start;
-        new->offset = 0;
-        new->max_size = md->max_size;
-        new->options = md->options;
-        new->user_ptr = md->user_ptr;
-        new->eq = eq;
-        new->threshold = md->threshold;
-        new->pending = 0;
-        new->md_flags = (unlink == PTL_UNLINK) ? PTL_MD_FLAG_AUTO_UNLINK : 0;
-
-        if ((md->options & PTL_MD_IOVEC) != 0) {
-                int total_length = 0;
-
-                if ((md->options & PTL_MD_KIOV) != 0) /* Can't specify both */
-                        return PTL_MD_INVALID; 
-
-                new->md_niov = niov = md->length;
-                
-                if (nal->cb_read (nal, private, new->md_iov.iov, md->start,
-                                  niov * sizeof (new->md_iov.iov[0])))
-                        return PTL_SEGV;
+        LASSERT (eq == NULL ||
+                 ((umd->options & PTL_MD_EVENT_START_DISABLE) != 0 &&
+                  (umd->options & PTL_MD_EVENT_END_DISABLE) == 0));
+
+        lmd->me = NULL;
+        lmd->start = umd->start;
+        lmd->offset = 0;
+        lmd->max_size = umd->max_size;
+        lmd->options = umd->options;
+        lmd->user_ptr = umd->user_ptr;
+        lmd->eq = eq;
+        lmd->threshold = umd->threshold;
+        lmd->pending = 0;
+        lmd->md_flags = (unlink == PTL_UNLINK) ? PTL_MD_FLAG_AUTO_UNLINK : 0;
+
+        if ((umd->options & PTL_MD_IOVEC) != 0) {
+
+                if ((umd->options & PTL_MD_KIOV) != 0) /* Can't specify both */
+                        return PTL_MD_ILLEGAL; 
+
+                lmd->md_niov = niov = umd->length;
+                memcpy(lmd->md_iov.iov, umd->start,
+                       niov * sizeof (lmd->md_iov.iov[0]));
 
                 for (i = 0; i < niov; i++) {
                         /* We take the base address on trust */
-                        if (new->md_iov.iov[i].iov_len <= 0) /* invalid length */
-                                return PTL_VAL_FAILED;
+                        if (lmd->md_iov.iov[i].iov_len <= 0) /* invalid length */
+                                return PTL_MD_ILLEGAL;
 
-                        total_length += new->md_iov.iov[i].iov_len;
+                        total_length += lmd->md_iov.iov[i].iov_len;
                 }
 
-                new->length = total_length;
+                lmd->length = total_length;
 
-                if (nal->cb_map != NULL) {
-                        rc = nal->cb_map (nal, niov, new->md_iov.iov, 
-                                          &new->md_addrkey);
+                if ((umd->options & PTL_MD_MAX_SIZE) != 0 && /* max size used */
+                    (umd->max_size < 0 || 
+                     umd->max_size > total_length)) // illegal max_size
+                        return PTL_MD_ILLEGAL;
+
+                if (nal->libnal_map != NULL) {
+                        rc = nal->libnal_map (nal, niov, lmd->md_iov.iov, 
+                                              &lmd->md_addrkey);
                         if (rc != PTL_OK)
                                 return (rc);
                 }
-        } else if ((md->options & PTL_MD_KIOV) != 0) {
+        } else if ((umd->options & PTL_MD_KIOV) != 0) {
 #ifndef __KERNEL__
-                return PTL_MD_INVALID;
-#else
-                int total_length = 0;
-                
+                return PTL_MD_ILLEGAL;
+#else                
                 /* Trap attempt to use paged I/O if unsupported early. */
-                if (nal->cb_send_pages == NULL ||
-                    nal->cb_recv_pages == NULL)
+                if (nal->libnal_send_pages == NULL ||
+                    nal->libnal_recv_pages == NULL)
                         return PTL_MD_INVALID;
 
-                new->md_niov = niov = md->length;
+                lmd->md_niov = niov = umd->length;
+                memcpy(lmd->md_iov.kiov, umd->start,
+                       niov * sizeof (lmd->md_iov.kiov[0]));
 
-                if (nal->cb_read (nal, private, new->md_iov.kiov, md->start,
-                                  niov * sizeof (new->md_iov.kiov[0])))
-                        return PTL_SEGV;
-                
                 for (i = 0; i < niov; i++) {
                         /* We take the page pointer on trust */
-                        if (new->md_iov.kiov[i].kiov_offset + 
-                            new->md_iov.kiov[i].kiov_len > PAGE_SIZE )
+                        if (lmd->md_iov.kiov[i].kiov_offset + 
+                            lmd->md_iov.kiov[i].kiov_len > PAGE_SIZE )
                                 return PTL_VAL_FAILED; /* invalid length */
 
-                        total_length += new->md_iov.kiov[i].kiov_len;
+                        total_length += lmd->md_iov.kiov[i].kiov_len;
                 }
 
-                new->length = total_length;
+                lmd->length = total_length;
+
+                if ((umd->options & PTL_MD_MAX_SIZE) != 0 && /* max size used */
+                    (umd->max_size < 0 || 
+                     umd->max_size > total_length)) // illegal max_size
+                        return PTL_MD_ILLEGAL;
 
-                if (nal->cb_map_pages != NULL) {
-                        rc = nal->cb_map_pages (nal, niov, new->md_iov.kiov, 
-                                                &new->md_addrkey);
+                if (nal->libnal_map_pages != NULL) {
+                        rc = nal->libnal_map_pages (nal, niov, lmd->md_iov.kiov, 
+                                                    &lmd->md_addrkey);
                         if (rc != PTL_OK)
                                 return (rc);
                 }
 #endif
         } else {   /* contiguous */
-                new->length = md->length;
-                new->md_niov = niov = 1;
-                new->md_iov.iov[0].iov_base = md->start;
-                new->md_iov.iov[0].iov_len = md->length;
-
-                if (nal->cb_map != NULL) {
-                        rc = nal->cb_map (nal, niov, new->md_iov.iov, 
-                                          &new->md_addrkey);
+                lmd->length = umd->length;
+                lmd->md_niov = niov = 1;
+                lmd->md_iov.iov[0].iov_base = umd->start;
+                lmd->md_iov.iov[0].iov_len = umd->length;
+
+                if ((umd->options & PTL_MD_MAX_SIZE) != 0 && /* max size used */
+                    (umd->max_size < 0 || 
+                     umd->max_size > umd->length)) // illegal max_size
+                        return PTL_MD_ILLEGAL;
+
+                if (nal->libnal_map != NULL) {
+                        rc = nal->libnal_map (nal, niov, lmd->md_iov.iov, 
+                                              &lmd->md_addrkey);
                         if (rc != PTL_OK)
                                 return (rc);
                 }
@@ -207,140 +210,125 @@ static int lib_md_build(nal_cb_t *nal, lib_md_t *new, void *private,
                 eq->eq_refcount++;
 
         /* It's good; let handle2md succeed and add to active mds */
-        lib_initialise_handle (nal, &new->md_lh, PTL_COOKIE_TYPE_MD);
-        list_add (&new->md_list, &nal->ni.ni_active_mds);
+        lib_initialise_handle (nal, &lmd->md_lh, PTL_COOKIE_TYPE_MD);
+        list_add (&lmd->md_list, &nal->libnal_ni.ni_active_mds);
 
         return PTL_OK;
 }
 
 /* must be called with state lock held */
-void lib_md_deconstruct(nal_cb_t * nal, lib_md_t * md, ptl_md_t * new)
+void
+lib_md_deconstruct(lib_nal_t *nal, lib_md_t *lmd, ptl_md_t *umd)
 {
         /* NB this doesn't copy out all the iov entries so when a
          * discontiguous MD is copied out, the target gets to know the
          * original iov pointer (in start) and the number of entries it had
          * and that's all.
          */
-        new->start = md->start;
-        new->length = ((md->options & (PTL_MD_IOVEC | PTL_MD_KIOV)) == 0) ?
-                      md->length : md->md_niov;
-        new->threshold = md->threshold;
-        new->max_size = md->max_size;
-        new->options = md->options;
-        new->user_ptr = md->user_ptr;
-        ptl_eq2handle(&new->eventq, md->eq);
+        umd->start = lmd->start;
+        umd->length = ((lmd->options & (PTL_MD_IOVEC | PTL_MD_KIOV)) == 0) ?
+                      lmd->length : lmd->md_niov;
+        umd->threshold = lmd->threshold;
+        umd->max_size = lmd->max_size;
+        umd->options = lmd->options;
+        umd->user_ptr = lmd->user_ptr;
+        ptl_eq2handle(&umd->eventq, nal, lmd->eq);
 }
 
-int do_PtlMDAttach(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
+int 
+lib_api_md_attach(nal_t *apinal, ptl_handle_me_t *meh,
+                  ptl_md_t *umd, ptl_unlink_t unlink, 
+                  ptl_handle_md_t *handle)
 {
-        /*
-         * Incoming:
-         *      ptl_handle_me_t current_in
-         *      ptl_md_t md_in
-         *      ptl_unlink_t unlink_in
-         *
-         * Outgoing:
-         *      ptl_handle_md_t         * handle_out
-         */
-
-        PtlMDAttach_in *args = v_args;
-        PtlMDAttach_out *ret = v_ret;
-        lib_me_t *me;
-        lib_md_t *md;
+        lib_nal_t    *nal = apinal->nal_data;
+        lib_me_t     *me;
+        lib_md_t     *md;
         unsigned long flags;
+        int           rc;
 
-        if ((args->md_in.options & (PTL_MD_KIOV | PTL_MD_IOVEC)) != 0 &&
-            args->md_in.length > PTL_MD_MAX_IOV) /* too many fragments */
-                return (ret->rc = PTL_IOV_INVALID);
+        if ((umd->options & (PTL_MD_KIOV | PTL_MD_IOVEC)) != 0 &&
+            umd->length > PTL_MD_MAX_IOV) /* too many fragments */
+                return PTL_IOV_INVALID;
 
-        md = lib_md_alloc(nal, &args->md_in);
+        md = lib_md_alloc(nal, umd);
         if (md == NULL)
-                return (ret->rc = PTL_NO_SPACE);
+                return PTL_NO_SPACE;
 
-        state_lock(nal, &flags);
+        LIB_LOCK(nal, flags);
 
-        me = ptl_handle2me(&args->me_in, nal);
+        me = ptl_handle2me(meh, nal);
         if (me == NULL) {
-                ret->rc = PTL_ME_INVALID;
+                rc = PTL_ME_INVALID;
         } else if (me->md != NULL) {
-                ret->rc = PTL_ME_IN_USE;
+                rc = PTL_ME_IN_USE;
         } else {
-                ret->rc = lib_md_build(nal, md, private, &args->md_in,
-                                       &args->eq_in, args->unlink_in);
-
-                if (ret->rc == PTL_OK) {
+                rc = lib_md_build(nal, md, umd, unlink);
+                if (rc == PTL_OK) {
                         me->md = md;
                         md->me = me;
 
-                        ptl_md2handle(&ret->handle_out, md);
+                        ptl_md2handle(handle, nal, md);
 
-                        state_unlock (nal, &flags);
+                        LIB_UNLOCK(nal, flags);
                         return (PTL_OK);
                 }
         }
 
         lib_md_free (nal, md);
 
-        state_unlock (nal, &flags);
-        return (ret->rc);
+        LIB_UNLOCK(nal, flags);
+        return (rc);
 }
 
-int do_PtlMDBind(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
+int
+lib_api_md_bind(nal_t *apinal, 
+                ptl_md_t *umd, ptl_unlink_t unlink,
+                ptl_handle_md_t *handle)
 {
-        /*
-         * Incoming:
-         *      ptl_handle_ni_t ni_in
-         *      ptl_md_t md_in
-         *
-         * Outgoing:
-         *      ptl_handle_md_t         * handle_out
-         */
-
-        PtlMDBind_in *args = v_args;
-        PtlMDBind_out *ret = v_ret;
-        lib_md_t *md;
+        lib_nal_t    *nal = apinal->nal_data;
+        lib_md_t     *md;
         unsigned long flags;
+        int           rc;
 
-        if ((args->md_in.options & (PTL_MD_KIOV | PTL_MD_IOVEC)) != 0 &&
-            args->md_in.length > PTL_MD_MAX_IOV) /* too many fragments */
-                return (ret->rc = PTL_IOV_INVALID);
+        if ((umd->options & (PTL_MD_KIOV | PTL_MD_IOVEC)) != 0 &&
+            umd->length > PTL_MD_MAX_IOV) /* too many fragments */
+                return PTL_IOV_INVALID;
 
-        md = lib_md_alloc(nal, &args->md_in);
+        md = lib_md_alloc(nal, umd);
         if (md == NULL)
-                return (ret->rc = PTL_NO_SPACE);
+                return PTL_NO_SPACE;
 
-        state_lock(nal, &flags);
+        LIB_LOCK(nal, flags);
 
-        ret->rc = lib_md_build(nal, md, private, &args->md_in, 
-                               &args->eq_in, args->unlink_in);
+        rc = lib_md_build(nal, md, umd, unlink);
 
-        if (ret->rc == PTL_OK) {
-                ptl_md2handle(&ret->handle_out, md);
+        if (rc == PTL_OK) {
+                ptl_md2handle(handle, nal, md);
 
-                state_unlock(nal, &flags);
+                LIB_UNLOCK(nal, flags);
                 return (PTL_OK);
         }
 
         lib_md_free (nal, md);
 
-        state_unlock(nal, &flags);
-        return (ret->rc);
+        LIB_UNLOCK(nal, flags);
+        return (rc);
 }
 
-int do_PtlMDUnlink(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
+int
+lib_api_md_unlink (nal_t *apinal, ptl_handle_md_t *mdh)
 {
-        PtlMDUnlink_in  *args = v_args;
-        PtlMDUnlink_out *ret = v_ret;
+        lib_nal_t       *nal = apinal->nal_data;
         ptl_event_t      ev;
         lib_md_t        *md;
         unsigned long    flags;
 
-        state_lock(nal, &flags);
+        LIB_LOCK(nal, flags);
 
-        md = ptl_handle2md(&args->md_in, nal);
+        md = ptl_handle2md(mdh, nal);
         if (md == NULL) {
-                state_unlock(nal, &flags);
-                return (ret->rc = PTL_MD_INVALID);
+                LIB_UNLOCK(nal, flags);
+                return PTL_MD_INVALID;
         }
 
         /* If the MD is busy, lib_md_unlink just marks it for deletion, and
@@ -356,95 +344,82 @@ int do_PtlMDUnlink(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
                 ev.unlinked = 1;
                 lib_md_deconstruct(nal, md, &ev.mem_desc);
                 
-                lib_enq_event_locked(nal, private, md->eq, &ev);
+                lib_enq_event_locked(nal, NULL, md->eq, &ev);
         }
 
-        lib_md_deconstruct(nal, md, &ret->status_out);
         lib_md_unlink(nal, md);
-        ret->rc = PTL_OK;
 
-        state_unlock(nal, &flags);
-
-        return (PTL_OK);
+        LIB_UNLOCK(nal, flags);
+        return PTL_OK;
 }
 
-int do_PtlMDUpdate_internal(nal_cb_t * nal, void *private, void *v_args,
-                            void *v_ret)
+int
+lib_api_md_update (nal_t *apinal,
+                   ptl_handle_md_t *mdh,
+                   ptl_md_t *oldumd, ptl_md_t *newumd,
+                   ptl_handle_eq_t *testqh)
 {
-        /*
-         * Incoming:
-         *      ptl_handle_md_t md_in
-         *      ptl_md_t                * old_inout
-         *      ptl_md_t                * new_inout
-         *      ptl_handle_eq_t testq_in
-         *      ptl_seq_t               sequence_in
-         *
-         * Outgoing:
-         *      ptl_md_t                * old_inout
-         *      ptl_md_t                * new_inout
-         */
-        PtlMDUpdate_internal_in *args = v_args;
-        PtlMDUpdate_internal_out *ret = v_ret;
-        lib_md_t *md;
-        lib_eq_t *test_eq = NULL;
-        ptl_md_t *new = &args->new_inout;
+        lib_nal_t    *nal = apinal->nal_data;
+        lib_md_t     *md;
+        lib_eq_t     *test_eq = NULL;
         unsigned long flags;
+        int           rc;
 
-        state_lock(nal, &flags);
+        LIB_LOCK(nal, flags);
 
-        md = ptl_handle2md(&args->md_in, nal);
+        md = ptl_handle2md(mdh, nal);
         if (md == NULL) {
-                 ret->rc = PTL_MD_INVALID;
+                 rc = PTL_MD_INVALID;
                  goto out;
         }
 
-        if (args->old_inout_valid)
-                lib_md_deconstruct(nal, md, &ret->old_inout);
+        if (oldumd != NULL)
+                lib_md_deconstruct(nal, md, oldumd);
 
-        if (!args->new_inout_valid) {
-                ret->rc = PTL_OK;
+        if (newumd == NULL) {
+                rc = PTL_OK;
                 goto out;
         }
 
         /* XXX fttb, the new MD must be the same "shape" wrt fragmentation,
          * since we simply overwrite the old lib-md */
-        if ((((new->options ^ md->options) & 
+        if ((((newumd->options ^ md->options) & 
               (PTL_MD_IOVEC | PTL_MD_KIOV)) != 0) ||
-            ((new->options & (PTL_MD_IOVEC | PTL_MD_KIOV)) != 0 && 
-             new->length != md->md_niov)) {
-                ret->rc = PTL_IOV_INVALID;
+            ((newumd->options & (PTL_MD_IOVEC | PTL_MD_KIOV)) != 0 && 
+             newumd->length != md->md_niov)) {
+                rc = PTL_IOV_INVALID;
                 goto out;
         } 
 
-        if (!PtlHandleIsEqual (args->testq_in, PTL_EQ_NONE)) {
-                test_eq = ptl_handle2eq(&args->testq_in, nal);
+        if (!PtlHandleIsEqual (*testqh, PTL_EQ_NONE)) {
+                test_eq = ptl_handle2eq(testqh, nal);
                 if (test_eq == NULL) {
-                        ret->rc = PTL_EQ_INVALID;
+                        rc = PTL_EQ_INVALID;
                         goto out;
                 }
         }
 
         if (md->pending != 0) {
-                        ret->rc = PTL_MD_NO_UPDATE;
-                        goto out;
+                rc = PTL_MD_NO_UPDATE;
+                goto out;
         }
 
         if (test_eq == NULL ||
-            test_eq->sequence == args->sequence_in) {
+            test_eq->eq_deq_seq == test_eq->eq_enq_seq) {
                 lib_me_t *me = md->me;
                 int       unlink = (md->md_flags & PTL_MD_FLAG_AUTO_UNLINK) ?
                                    PTL_UNLINK : PTL_RETAIN;
 
                 // #warning this does not track eq refcounts properly 
-                ret->rc = lib_md_build(nal, md, private,
-                                       new, &new->eventq, unlink);
+                rc = lib_md_build(nal, md, newumd, unlink);
 
                 md->me = me;
         } else {
-                ret->rc = PTL_MD_NO_UPDATE;
+                rc = PTL_MD_NO_UPDATE;
         }
 
  out:
-        state_unlock(nal, &flags);
-        return (ret->rc);
+        LIB_UNLOCK(nal, flags);
+
+        return rc;
 }
index 271fc82..9665b4f 100644 (file)
 #endif
 
 #include <portals/lib-p30.h>
-#include <portals/arg-blocks.h>
 
-static void lib_me_dump(nal_cb_t * nal, lib_me_t * me);
-
-int do_PtlMEAttach(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
+int
+lib_api_me_attach(nal_t *apinal,
+                  ptl_pt_index_t portal,
+                  ptl_process_id_t match_id, 
+                  ptl_match_bits_t match_bits, 
+                  ptl_match_bits_t ignore_bits,
+                  ptl_unlink_t unlink, ptl_ins_pos_t pos,
+                  ptl_handle_me_t *handle)
 {
-        PtlMEAttach_in *args = v_args;
-        PtlMEAttach_out *ret = v_ret;
-        lib_ni_t *ni = &nal->ni;
-        lib_ptl_t *tbl = &ni->tbl;
+        lib_nal_t    *nal = apinal->nal_data;
+        lib_ni_t     *ni = &nal->libnal_ni;
+        lib_ptl_t    *tbl = &ni->ni_portals;
+        lib_me_t     *me;
         unsigned long flags;
-        lib_me_t *me;
 
-        if (args->index_in >= tbl->size)
-                return ret->rc = PTL_PT_INDEX_INVALID;
+        if (portal >= tbl->size)
+                return PTL_PT_INDEX_INVALID;
 
         /* Should check for valid matchid, but not yet */
-        if (0)
-                return ret->rc = PTL_PROCESS_INVALID;
 
         me = lib_me_alloc (nal);
         if (me == NULL)
-                return (ret->rc = PTL_NO_SPACE);
+                return PTL_NO_SPACE;
 
-        state_lock(nal, &flags);
+        LIB_LOCK(nal, flags);
 
-        me->match_id = args->match_id_in;
-        me->match_bits = args->match_bits_in;
-        me->ignore_bits = args->ignore_bits_in;
-        me->unlink = args->unlink_in;
+        me->match_id = match_id;
+        me->match_bits = match_bits;
+        me->ignore_bits = ignore_bits;
+        me->unlink = unlink;
         me->md = NULL;
 
         lib_initialise_handle (nal, &me->me_lh, PTL_COOKIE_TYPE_ME);
 
-        if (args->position_in == PTL_INS_AFTER)
-                list_add_tail(&me->me_list, &(tbl->tbl[args->index_in]));
+        if (pos == PTL_INS_AFTER)
+                list_add_tail(&me->me_list, &(tbl->tbl[portal]));
         else
-                list_add(&me->me_list, &(tbl->tbl[args->index_in]));
+                list_add(&me->me_list, &(tbl->tbl[portal]));
 
-        ptl_me2handle(&ret->handle_out, me);
+        ptl_me2handle(handle, nal, me);
 
-        state_unlock(nal, &flags);
+        LIB_UNLOCK(nal, flags);
 
-        return ret->rc = PTL_OK;
+        return PTL_OK;
 }
 
-int do_PtlMEInsert(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
+int
+lib_api_me_insert(nal_t *apinal,
+                  ptl_handle_me_t *current_meh,
+                  ptl_process_id_t match_id, 
+                  ptl_match_bits_t match_bits, 
+                  ptl_match_bits_t ignore_bits,
+                  ptl_unlink_t unlink, ptl_ins_pos_t pos,
+                  ptl_handle_me_t *handle)
 {
-        PtlMEInsert_in *args = v_args;
-        PtlMEInsert_out *ret = v_ret;
+        lib_nal_t    *nal = apinal->nal_data;
+        lib_me_t     *current_me;
+        lib_me_t     *new_me;
         unsigned long flags;
-        lib_me_t *me;
-        lib_me_t *new;
 
-        new = lib_me_alloc (nal);
-        if (new == NULL)
-                return (ret->rc = PTL_NO_SPACE);
+        new_me = lib_me_alloc (nal);
+        if (new_me == NULL)
+                return PTL_NO_SPACE;
 
         /* Should check for valid matchid, but not yet */
 
-        state_lock(nal, &flags);
+        LIB_LOCK(nal, flags);
 
-        me = ptl_handle2me(&args->current_in, nal);
-        if (me == NULL) {
-                lib_me_free (nal, new);
+        current_me = ptl_handle2me(current_meh, nal);
+        if (current_me == NULL) {
+                lib_me_free (nal, new_me);
 
-                state_unlock (nal, &flags);
-                return (ret->rc = PTL_ME_INVALID);
+                LIB_UNLOCK(nal, flags);
+                return PTL_ME_INVALID;
         }
 
-        new->match_id = args->match_id_in;
-        new->match_bits = args->match_bits_in;
-        new->ignore_bits = args->ignore_bits_in;
-        new->unlink = args->unlink_in;
-        new->md = NULL;
+        new_me->match_id = match_id;
+        new_me->match_bits = match_bits;
+        new_me->ignore_bits = ignore_bits;
+        new_me->unlink = unlink;
+        new_me->md = NULL;
 
-        lib_initialise_handle (nal, &new->me_lh, PTL_COOKIE_TYPE_ME);
+        lib_initialise_handle (nal, &new_me->me_lh, PTL_COOKIE_TYPE_ME);
 
-        if (args->position_in == PTL_INS_AFTER)
-                list_add_tail(&new->me_list, &me->me_list);
+        if (pos == PTL_INS_AFTER)
+                list_add_tail(&new_me->me_list, &current_me->me_list);
         else
-                list_add(&new->me_list, &me->me_list);
+                list_add(&new_me->me_list, &current_me->me_list);
 
-        ptl_me2handle(&ret->handle_out, new);
+        ptl_me2handle(handle, nal, new_me);
 
-        state_unlock(nal, &flags);
+        LIB_UNLOCK(nal, flags);
 
-        return ret->rc = PTL_OK;
+        return PTL_OK;
 }
 
-int do_PtlMEUnlink(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
+int
+lib_api_me_unlink (nal_t *apinal, ptl_handle_me_t *meh)
 {
-        PtlMEUnlink_in *args = v_args;
-        PtlMEUnlink_out *ret = v_ret;
+        lib_nal_t    *nal = apinal->nal_data;
         unsigned long flags;
-        lib_me_t *me;
+        lib_me_t     *me;
+        int           rc;
 
-        state_lock(nal, &flags);
+        LIB_LOCK(nal, flags);
 
-        me = ptl_handle2me(&args->current_in, nal);
+        me = ptl_handle2me(meh, nal);
         if (me == NULL) {
-                ret->rc = PTL_ME_INVALID;
+                rc = PTL_ME_INVALID;
         } else {
                 lib_me_unlink(nal, me);
-                ret->rc = PTL_OK;
+                rc = PTL_OK;
         }
 
-        state_unlock(nal, &flags);
+        LIB_UNLOCK(nal, flags);
 
-        return (ret->rc);
+        return (rc);
 }
 
 /* call with state_lock please */
-void lib_me_unlink(nal_cb_t *nal, lib_me_t *me)
+void 
+lib_me_unlink(lib_nal_t *nal, lib_me_t *me)
 {
         list_del (&me->me_list);
 
@@ -157,64 +166,20 @@ void lib_me_unlink(nal_cb_t *nal, lib_me_t *me)
         lib_me_free(nal, me);
 }
 
-int do_PtlTblDump(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
+#if 0
+static void 
+lib_me_dump(lib_nal_t *nal, lib_me_t * me)
 {
-        PtlTblDump_in *args = v_args;
-        PtlTblDump_out *ret = v_ret;
-        lib_ptl_t *tbl = &nal->ni.tbl;
-        ptl_handle_any_t handle;
-        struct list_head *tmp;
-        unsigned long flags;
+        CWARN("Match Entry %p ("LPX64")\n", me, 
+              me->me_lh.lh_cookie);
 
-        if (args->index_in < 0 || args->index_in >= tbl->size)
-                return ret->rc = PTL_PT_INDEX_INVALID;
-
-        nal->cb_printf(nal, "Portal table index %d\n", args->index_in);
-
-        state_lock(nal, &flags);
-        list_for_each(tmp, &(tbl->tbl[args->index_in])) {
-                lib_me_t *me = list_entry(tmp, lib_me_t, me_list);
-                ptl_me2handle(&handle, me);
-                lib_me_dump(nal, me);
-        }
-        state_unlock(nal, &flags);
+        CWARN("\tMatch/Ignore\t= %016lx / %016lx\n",
+              me->match_bits, me->ignore_bits);
 
-        return ret->rc = PTL_OK;
-}
-
-int do_PtlMEDump(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
-{
-        PtlMEDump_in *args = v_args;
-        PtlMEDump_out *ret = v_ret;
-        lib_me_t *me;
-        unsigned long flags;
-
-        state_lock(nal, &flags);
-
-        me = ptl_handle2me(&args->current_in, nal);
-        if (me == NULL) {
-                ret->rc = PTL_ME_INVALID;
-        } else {
-                lib_me_dump(nal, me);
-                ret->rc = PTL_OK;
-        }
-
-        state_unlock(nal, &flags);
-
-        return ret->rc;
-}
-
-static void lib_me_dump(nal_cb_t * nal, lib_me_t * me)
-{
-        nal->cb_printf(nal, "Match Entry %p ("LPX64")\n", me, 
-                       me->me_lh.lh_cookie);
-
-        nal->cb_printf(nal, "\tMatch/Ignore\t= %016lx / %016lx\n",
-                       me->match_bits, me->ignore_bits);
-
-        nal->cb_printf(nal, "\tMD\t= %p\n", me->md);
-        nal->cb_printf(nal, "\tprev\t= %p\n",
-                       list_entry(me->me_list.prev, lib_me_t, me_list));
-        nal->cb_printf(nal, "\tnext\t= %p\n",
-                       list_entry(me->me_list.next, lib_me_t, me_list));
+        CWARN("\tMD\t= %p\n", me->md);
+        CWARN("\tprev\t= %p\n",
+              list_entry(me->me_list.prev, lib_me_t, me_list));
+        CWARN("\tnext\t= %p\n",
+              list_entry(me->me_list.next, lib_me_t, me_list));
 }
+#endif
index 477ddf8..9dcc06e 100644 (file)
 #endif
 #include <portals/p30.h>
 #include <portals/lib-p30.h>
-#include <portals/arg-blocks.h>
 
 /* forward ref */
-static void lib_commit_md (nal_cb_t *nal, lib_md_t *md, lib_msg_t *msg);
+static void lib_commit_md (lib_nal_t *nal, lib_md_t *md, lib_msg_t *msg);
 
 static lib_md_t *
-lib_match_md(nal_cb_t *nal, int index, int op_mask, 
+lib_match_md(lib_nal_t *nal, int index, int op_mask, 
              ptl_nid_t src_nid, ptl_pid_t src_pid, 
              ptl_size_t rlength, ptl_size_t roffset,
              ptl_match_bits_t match_bits, lib_msg_t *msg,
              ptl_size_t *mlength_out, ptl_size_t *offset_out)
 {
-        lib_ni_t         *ni = &nal->ni;
-        struct list_head *match_list = &ni->tbl.tbl[index];
+        lib_ni_t         *ni = &nal->libnal_ni;
+        struct list_head *match_list = &ni->ni_portals.tbl[index];
         struct list_head *tmp;
         lib_me_t         *me;
         lib_md_t         *md;
@@ -55,9 +54,9 @@ lib_match_md(nal_cb_t *nal, int index, int op_mask,
         CDEBUG (D_NET, "Request from "LPU64".%d of length %d into portal %d "
                 "MB="LPX64"\n", src_nid, src_pid, rlength, index, match_bits);
 
-        if (index < 0 || index >= ni->tbl.size) {
+        if (index < 0 || index >= ni->ni_portals.size) {
                 CERROR("Invalid portal %d not in [0-%d]\n",
-                       index, ni->tbl.size);
+                       index, ni->ni_portals.size);
                 goto failed;
         }
 
@@ -153,66 +152,65 @@ lib_match_md(nal_cb_t *nal, int index, int op_mask,
  failed:
         CERROR (LPU64": Dropping %s from "LPU64".%d portal %d match "LPX64
                 " offset %d length %d: no match\n",
-                ni->nid, (op_mask == PTL_MD_OP_GET) ? "GET" : "PUT",
+                ni->ni_pid.nid, (op_mask == PTL_MD_OP_GET) ? "GET" : "PUT",
                 src_nid, src_pid, index, match_bits, roffset, rlength);
         RETURN(NULL);
 }
 
-int do_PtlFailNid (nal_cb_t *nal, void *private, void *v_args, void *v_ret)
+int lib_api_fail_nid (nal_t *apinal, ptl_nid_t nid, unsigned int threshold)
 {
-        PtlFailNid_in     *args = v_args;
-        PtlFailNid_out    *ret  = v_ret;
+        lib_nal_t         *nal = apinal->nal_data;
         lib_test_peer_t   *tp;
         unsigned long      flags;
         struct list_head  *el;
         struct list_head  *next;
         struct list_head   cull;
         
-        if (args->threshold != 0) {
+        if (threshold != 0) {
                 /* Adding a new entry */
-                tp = (lib_test_peer_t *)nal->cb_malloc (nal, sizeof (*tp));
+                PORTAL_ALLOC(tp, sizeof(*tp));
                 if (tp == NULL)
-                        return (ret->rc = PTL_FAIL);
+                        return PTL_NO_SPACE;
                 
-                tp->tp_nid = args->nid;
-                tp->tp_threshold = args->threshold;
+                tp->tp_nid = nid;
+                tp->tp_threshold = threshold;
                 
-                state_lock (nal, &flags);
-                list_add (&tp->tp_list, &nal->ni.ni_test_peers);
-                state_unlock (nal, &flags);
-                return (ret->rc = PTL_OK);
+                LIB_LOCK(nal, flags);
+                list_add_tail (&tp->tp_list, &nal->libnal_ni.ni_test_peers);
+                LIB_UNLOCK(nal, flags);
+                return PTL_OK;
         }
         
         /* removing entries */
         INIT_LIST_HEAD (&cull);
         
-        state_lock (nal, &flags);
+        LIB_LOCK(nal, flags);
 
-        list_for_each_safe (el, next, &nal->ni.ni_test_peers) {
+        list_for_each_safe (el, next, &nal->libnal_ni.ni_test_peers) {
                 tp = list_entry (el, lib_test_peer_t, tp_list);
                 
                 if (tp->tp_threshold == 0 ||    /* needs culling anyway */
-                    args->nid == PTL_NID_ANY || /* removing all entries */
-                    tp->tp_nid == args->nid)    /* matched this one */
+                    nid == PTL_NID_ANY ||       /* removing all entries */
+                    tp->tp_nid == nid)          /* matched this one */
                 {
                         list_del (&tp->tp_list);
                         list_add (&tp->tp_list, &cull);
                 }
         }
         
-        state_unlock (nal, &flags);
+        LIB_UNLOCK(nal, flags);
                 
         while (!list_empty (&cull)) {
                 tp = list_entry (cull.next, lib_test_peer_t, tp_list);
 
                 list_del (&tp->tp_list);
-                nal->cb_free (nal, tp, sizeof (*tp));
+                PORTAL_FREE(tp, sizeof (*tp));
         }
-        return (ret->rc = PTL_OK);
+        return PTL_OK;
 }
 
 static int
-fail_peer (nal_cb_t *nal, ptl_nid_t nid, int outgoing) 
+fail_peer (lib_nal_t *nal, ptl_nid_t nid, int outgoing) 
 {
         lib_test_peer_t  *tp;
         struct list_head *el;
@@ -223,9 +221,9 @@ fail_peer (nal_cb_t *nal, ptl_nid_t nid, int outgoing)
 
         INIT_LIST_HEAD (&cull);
         
-        state_lock (nal, &flags);
+        LIB_LOCK (nal, flags);
 
-        list_for_each_safe (el, next, &nal->ni.ni_test_peers) {
+        list_for_each_safe (el, next, &nal->libnal_ni.ni_test_peers) {
                 tp = list_entry (el, lib_test_peer_t, tp_list);
 
                 if (tp->tp_threshold == 0) {
@@ -257,13 +255,13 @@ fail_peer (nal_cb_t *nal, ptl_nid_t nid, int outgoing)
                 }
         }
         
-        state_unlock (nal, &flags);
+        LIB_UNLOCK (nal, flags);
 
         while (!list_empty (&cull)) {
                 tp = list_entry (cull.next, lib_test_peer_t, tp_list);
                 list_del (&tp->tp_list);
                 
-                nal->cb_free (nal, tp, sizeof (*tp));
+                PORTAL_FREE(tp, sizeof (*tp));
         }
 
         return (fail);
@@ -554,52 +552,52 @@ lib_extract_kiov (int dst_niov, ptl_kiov_t *dst,
 #endif
 
 ptl_err_t
-lib_recv (nal_cb_t *nal, void *private, lib_msg_t *msg, lib_md_t *md,
+lib_recv (lib_nal_t *nal, void *private, lib_msg_t *msg, lib_md_t *md,
           ptl_size_t offset, ptl_size_t mlen, ptl_size_t rlen)
 {
         if (mlen == 0)
-                return (nal->cb_recv(nal, private, msg,
-                                     0, NULL,
-                                     offset, mlen, rlen));
+                return (nal->libnal_recv(nal, private, msg,
+                                         0, NULL,
+                                         offset, mlen, rlen));
 
         if ((md->options & PTL_MD_KIOV) == 0)
-                return (nal->cb_recv(nal, private, msg,
-                                     md->md_niov, md->md_iov.iov, 
-                                     offset, mlen, rlen));
+                return (nal->libnal_recv(nal, private, msg,
+                                         md->md_niov, md->md_iov.iov, 
+                                         offset, mlen, rlen));
 
-        return (nal->cb_recv_pages(nal, private, msg, 
-                                   md->md_niov, md->md_iov.kiov,
-                                   offset, mlen, rlen));
+        return (nal->libnal_recv_pages(nal, private, msg, 
+                                       md->md_niov, md->md_iov.kiov,
+                                       offset, mlen, rlen));
 }
 
 ptl_err_t
-lib_send (nal_cb_t *nal, void *private, lib_msg_t *msg,
+lib_send (lib_nal_t *nal, void *private, lib_msg_t *msg,
           ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
           lib_md_t *md, ptl_size_t offset, ptl_size_t len) 
 {
         if (len == 0)
-                return (nal->cb_send(nal, private, msg,
-                                     hdr, type, nid, pid,
-                                     0, NULL,
-                                     offset, len));
+                return (nal->libnal_send(nal, private, msg,
+                                         hdr, type, nid, pid,
+                                         0, NULL,
+                                         offset, len));
         
         if ((md->options & PTL_MD_KIOV) == 0)
-                return (nal->cb_send(nal, private, msg, 
-                                     hdr, type, nid, pid,
-                                     md->md_niov, md->md_iov.iov,
-                                     offset, len));
-
-        return (nal->cb_send_pages(nal, private, msg, 
-                                   hdr, type, nid, pid,
-                                   md->md_niov, md->md_iov.kiov,
-                                   offset, len));
+                return (nal->libnal_send(nal, private, msg, 
+                                         hdr, type, nid, pid,
+                                         md->md_niov, md->md_iov.iov,
+                                         offset, len));
+
+        return (nal->libnal_send_pages(nal, private, msg, 
+                                       hdr, type, nid, pid,
+                                       md->md_niov, md->md_iov.kiov,
+                                       offset, len));
 }
 
 static void
-lib_commit_md (nal_cb_t *nal, lib_md_t *md, lib_msg_t *msg)
+lib_commit_md (lib_nal_t *nal, lib_md_t *md, lib_msg_t *msg)
 {
-        /* ALWAYS called holding the state_lock */
-        lib_counters_t *counters = &nal->ni.counters;
+        /* ALWAYS called holding the LIB_LOCK */
+        lib_counters_t *counters = &nal->libnal_ni.ni_counters;
 
         /* Here, we commit the MD to a network OP by marking it busy and
          * decrementing its threshold.  Come what may, the network "owns"
@@ -616,11 +614,11 @@ lib_commit_md (nal_cb_t *nal, lib_md_t *md, lib_msg_t *msg)
         if (counters->msgs_alloc > counters->msgs_max)
                 counters->msgs_max = counters->msgs_alloc;
 
-        list_add (&msg->msg_list, &nal->ni.ni_active_msgs);
+        list_add (&msg->msg_list, &nal->libnal_ni.ni_active_msgs);
 }
 
 static void
-lib_drop_message (nal_cb_t *nal, void *private, ptl_hdr_t *hdr)
+lib_drop_message (lib_nal_t *nal, void *private, ptl_hdr_t *hdr)
 {
         unsigned long flags;
 
@@ -628,10 +626,10 @@ lib_drop_message (nal_cb_t *nal, void *private, ptl_hdr_t *hdr)
          * to receive (init_msg() not called) and therefore can't cause an
          * event. */
         
-        state_lock(nal, &flags);
-        nal->ni.counters.drop_count++;
-        nal->ni.counters.drop_length += hdr->payload_length;
-        state_unlock(nal, &flags);
+        LIB_LOCK(nal, flags);
+        nal->libnal_ni.ni_counters.drop_count++;
+        nal->libnal_ni.ni_counters.drop_length += hdr->payload_length;
+        LIB_UNLOCK(nal, flags);
 
         /* NULL msg => if NAL calls lib_finalize it will be a noop */
         (void) lib_recv(nal, private, NULL, NULL, 0, 0, hdr->payload_length);
@@ -645,9 +643,9 @@ lib_drop_message (nal_cb_t *nal, void *private, ptl_hdr_t *hdr)
  *
  */
 static ptl_err_t
-parse_put(nal_cb_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
+parse_put(lib_nal_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
 {
-        lib_ni_t        *ni = &nal->ni;
+        lib_ni_t        *ni = &nal->libnal_ni;
         ptl_size_t       mlength = 0;
         ptl_size_t       offset = 0;
         ptl_err_t        rc;
@@ -659,7 +657,7 @@ parse_put(nal_cb_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
         hdr->msg.put.ptl_index = NTOH__u32 (hdr->msg.put.ptl_index);
         hdr->msg.put.offset = NTOH__u32 (hdr->msg.put.offset);
 
-        state_lock(nal, &flags);
+        LIB_LOCK(nal, flags);
 
         md = lib_match_md(nal, hdr->msg.put.ptl_index, PTL_MD_OP_PUT,
                           hdr->src_nid, hdr->src_pid,
@@ -667,7 +665,7 @@ parse_put(nal_cb_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
                           hdr->msg.put.match_bits, msg,
                           &mlength, &offset);
         if (md == NULL) {
-                state_unlock(nal, &flags);
+                LIB_UNLOCK(nal, flags);
                 return (PTL_FAIL);
         }
 
@@ -679,24 +677,24 @@ parse_put(nal_cb_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
                 msg->ack_wmd = hdr->msg.put.ack_wmd;
         }
 
-        ni->counters.recv_count++;
-        ni->counters.recv_length += mlength;
+        ni->ni_counters.recv_count++;
+        ni->ni_counters.recv_length += mlength;
 
-        state_unlock(nal, &flags);
+        LIB_UNLOCK(nal, flags);
 
         rc = lib_recv(nal, private, msg, md, offset, mlength,
                       hdr->payload_length);
         if (rc != PTL_OK)
                 CERROR(LPU64": error on receiving PUT from "LPU64": %d\n",
-                       ni->nid, hdr->src_nid, rc);
+                       ni->ni_pid.nid, hdr->src_nid, rc);
 
         return (rc);
 }
 
 static ptl_err_t
-parse_get(nal_cb_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
+parse_get(lib_nal_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
 {
-        lib_ni_t        *ni = &nal->ni;
+        lib_ni_t        *ni = &nal->libnal_ni;
         ptl_size_t       mlength = 0;
         ptl_size_t       offset = 0;
         lib_md_t        *md;
@@ -710,7 +708,7 @@ parse_get(nal_cb_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
         hdr->msg.get.sink_length = NTOH__u32 (hdr->msg.get.sink_length);
         hdr->msg.get.src_offset = NTOH__u32 (hdr->msg.get.src_offset);
 
-        state_lock(nal, &flags);
+        LIB_LOCK(nal, flags);
 
         md = lib_match_md(nal, hdr->msg.get.ptl_index, PTL_MD_OP_GET,
                           hdr->src_nid, hdr->src_pid,
@@ -718,24 +716,24 @@ parse_get(nal_cb_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
                           hdr->msg.get.match_bits, msg,
                           &mlength, &offset);
         if (md == NULL) {
-                state_unlock(nal, &flags);
+                LIB_UNLOCK(nal, flags);
                 return (PTL_FAIL);
         }
 
         msg->ev.type = PTL_EVENT_GET_END;
         msg->ev.hdr_data = 0;
 
-        ni->counters.send_count++;
-        ni->counters.send_length += mlength;
+        ni->ni_counters.send_count++;
+        ni->ni_counters.send_length += mlength;
 
-        state_unlock(nal, &flags);
+        LIB_UNLOCK(nal, flags);
 
         memset (&reply, 0, sizeof (reply));
         reply.type     = HTON__u32 (PTL_MSG_REPLY);
         reply.dest_nid = HTON__u64 (hdr->src_nid);
-        reply.src_nid  = HTON__u64 (ni->nid);
         reply.dest_pid = HTON__u32 (hdr->src_pid);
-        reply.src_pid  = HTON__u32 (ni->pid);
+        reply.src_nid  = HTON__u64 (ni->ni_pid.nid);
+        reply.src_pid  = HTON__u32 (ni->ni_pid.pid);
         reply.payload_length = HTON__u32 (mlength);
 
         reply.msg.reply.dst_wmd = hdr->msg.get.return_wmd;
@@ -747,7 +745,7 @@ parse_get(nal_cb_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
                        hdr->src_nid, hdr->src_pid, md, offset, mlength);
         if (rc != PTL_OK)
                 CERROR(LPU64": Unable to send REPLY for GET from "LPU64": %d\n",
-                       ni->nid, hdr->src_nid, rc);
+                       ni->ni_pid.nid, hdr->src_nid, rc);
 
         /* Discard any junk after the hdr */
         (void) lib_recv(nal, private, NULL, NULL, 0, 0, hdr->payload_length);
@@ -756,27 +754,27 @@ parse_get(nal_cb_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
 }
 
 static ptl_err_t
-parse_reply(nal_cb_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
+parse_reply(lib_nal_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
 {
-        lib_ni_t        *ni = &nal->ni;
+        lib_ni_t        *ni = &nal->libnal_ni;
         lib_md_t        *md;
         int              rlength;
         int              length;
         unsigned long    flags;
         ptl_err_t        rc;
 
-        state_lock(nal, &flags);
+        LIB_LOCK(nal, flags);
 
         /* NB handles only looked up by creator (no flips) */
         md = ptl_wire_handle2md(&hdr->msg.reply.dst_wmd, nal);
         if (md == NULL || md->threshold == 0) {
                 CERROR (LPU64": Dropping REPLY from "LPU64" for %s MD "LPX64"."LPX64"\n",
-                        ni->nid, hdr->src_nid,
+                        ni->ni_pid.nid, hdr->src_nid,
                         md == NULL ? "invalid" : "inactive",
                         hdr->msg.reply.dst_wmd.wh_interface_cookie,
                         hdr->msg.reply.dst_wmd.wh_object_cookie);
 
-                state_unlock(nal, &flags);
+                LIB_UNLOCK(nal, flags);
                 return (PTL_FAIL);
         }
 
@@ -788,10 +786,10 @@ parse_reply(nal_cb_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
                 if ((md->options & PTL_MD_TRUNCATE) == 0) {
                         CERROR (LPU64": Dropping REPLY from "LPU64
                                 " length %d for MD "LPX64" would overflow (%d)\n",
-                                ni->nid, hdr->src_nid, length,
+                                ni->ni_pid.nid, hdr->src_nid, length,
                                 hdr->msg.reply.dst_wmd.wh_object_cookie,
                                 md->length);
-                        state_unlock(nal, &flags);
+                        LIB_UNLOCK(nal, flags);
                         return (PTL_FAIL);
                 }
                 length = md->length;
@@ -812,23 +810,23 @@ parse_reply(nal_cb_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
 
         lib_md_deconstruct(nal, md, &msg->ev.mem_desc);
 
-        ni->counters.recv_count++;
-        ni->counters.recv_length += length;
+        ni->ni_counters.recv_count++;
+        ni->ni_counters.recv_length += length;
 
-        state_unlock(nal, &flags);
+        LIB_UNLOCK(nal, flags);
 
         rc = lib_recv(nal, private, msg, md, 0, length, rlength);
         if (rc != PTL_OK)
                 CERROR(LPU64": error on receiving REPLY from "LPU64": %d\n",
-                       ni->nid, hdr->src_nid, rc);
+                       ni->ni_pid.nid, hdr->src_nid, rc);
 
         return (rc);
 }
 
 static ptl_err_t
-parse_ack(nal_cb_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
+parse_ack(lib_nal_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
 {
-        lib_ni_t      *ni = &nal->ni;
+        lib_ni_t      *ni = &nal->libnal_ni;
         lib_md_t      *md;
         unsigned long  flags;
 
@@ -836,23 +834,23 @@ parse_ack(nal_cb_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
         hdr->msg.ack.match_bits = NTOH__u64 (hdr->msg.ack.match_bits);
         hdr->msg.ack.mlength = NTOH__u32 (hdr->msg.ack.mlength);
 
-        state_lock(nal, &flags);
+        LIB_LOCK(nal, flags);
 
         /* NB handles only looked up by creator (no flips) */
         md = ptl_wire_handle2md(&hdr->msg.ack.dst_wmd, nal);
         if (md == NULL || md->threshold == 0) {
                 CDEBUG(D_INFO, LPU64": Dropping ACK from "LPU64" to %s MD "
-                       LPX64"."LPX64"\n", ni->nid, hdr->src_nid, 
+                       LPX64"."LPX64"\n", ni->ni_pid.nid, hdr->src_nid, 
                        (md == NULL) ? "invalid" : "inactive",
                        hdr->msg.ack.dst_wmd.wh_interface_cookie,
                        hdr->msg.ack.dst_wmd.wh_object_cookie);
 
-                state_unlock(nal, &flags);
+                LIB_UNLOCK(nal, flags);
                 return (PTL_FAIL);
         }
 
         CDEBUG(D_NET, LPU64": ACK from "LPU64" into md "LPX64"\n",
-               ni->nid, hdr->src_nid, 
+               ni->ni_pid.nid, hdr->src_nid, 
                hdr->msg.ack.dst_wmd.wh_object_cookie);
 
         lib_commit_md(nal, md, msg);
@@ -865,9 +863,9 @@ parse_ack(nal_cb_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
 
         lib_md_deconstruct(nal, md, &msg->ev.mem_desc);
 
-        ni->counters.recv_count++;
+        ni->ni_counters.recv_count++;
 
-        state_unlock(nal, &flags);
+        LIB_UNLOCK(nal, flags);
         
         /* We have received and matched up the ack OK, create the
          * completion event now... */
@@ -898,125 +896,152 @@ hdr_type_string (ptl_hdr_t *hdr)
         }
 }
 
-void print_hdr(nal_cb_t * nal, ptl_hdr_t * hdr)
+void print_hdr(lib_nal_t *nal, ptl_hdr_t * hdr)
 {
         char *type_str = hdr_type_string (hdr);
 
-        nal->cb_printf(nal, "P3 Header at %p of type %s\n", hdr, type_str);
-        nal->cb_printf(nal, "    From nid/pid %Lu/%Lu", hdr->src_nid,
-                       hdr->src_pid);
-        nal->cb_printf(nal, "    To nid/pid %Lu/%Lu\n", hdr->dest_nid,
-                       hdr->dest_pid);
+        CWARN("P3 Header at %p of type %s\n", hdr, type_str);
+        CWARN("    From nid/pid "LPX64"/%u", hdr->src_nid, hdr->src_pid);
+        CWARN("    To nid/pid "LPX64"/%u\n", hdr->dest_nid, hdr->dest_pid);
 
         switch (hdr->type) {
         default:
                 break;
 
         case PTL_MSG_PUT:
-                nal->cb_printf(nal,
-                               "    Ptl index %d, ack md "LPX64"."LPX64", "
-                               "match bits "LPX64"\n",
-                               hdr->msg.put.ptl_index,
-                               hdr->msg.put.ack_wmd.wh_interface_cookie,
-                               hdr->msg.put.ack_wmd.wh_object_cookie,
-                               hdr->msg.put.match_bits);
-                nal->cb_printf(nal,
-                               "    Length %d, offset %d, hdr data "LPX64"\n",
-                               hdr->payload_length, hdr->msg.put.offset,
-                               hdr->msg.put.hdr_data);
+                CWARN("    Ptl index %d, ack md "LPX64"."LPX64", "
+                      "match bits "LPX64"\n",
+                      hdr->msg.put.ptl_index,
+                      hdr->msg.put.ack_wmd.wh_interface_cookie,
+                      hdr->msg.put.ack_wmd.wh_object_cookie,
+                      hdr->msg.put.match_bits);
+                CWARN("    Length %d, offset %d, hdr data "LPX64"\n",
+                      hdr->payload_length, hdr->msg.put.offset,
+                      hdr->msg.put.hdr_data);
                 break;
 
         case PTL_MSG_GET:
-                nal->cb_printf(nal,
-                               "    Ptl index %d, return md "LPX64"."LPX64", "
-                               "match bits "LPX64"\n", hdr->msg.get.ptl_index,
-                               hdr->msg.get.return_wmd.wh_interface_cookie,
-                               hdr->msg.get.return_wmd.wh_object_cookie,
-                               hdr->msg.get.match_bits);
-                nal->cb_printf(nal,
-                               "    Length %d, src offset %d\n",
-                               hdr->msg.get.sink_length,
-                               hdr->msg.get.src_offset);
+                CWARN("    Ptl index %d, return md "LPX64"."LPX64", "
+                      "match bits "LPX64"\n", hdr->msg.get.ptl_index,
+                      hdr->msg.get.return_wmd.wh_interface_cookie,
+                      hdr->msg.get.return_wmd.wh_object_cookie,
+                      hdr->msg.get.match_bits);
+                CWARN("    Length %d, src offset %d\n",
+                      hdr->msg.get.sink_length,
+                      hdr->msg.get.src_offset);
                 break;
 
         case PTL_MSG_ACK:
-                nal->cb_printf(nal, "    dst md "LPX64"."LPX64", "
-                               "manipulated length %d\n",
-                               hdr->msg.ack.dst_wmd.wh_interface_cookie,
-                               hdr->msg.ack.dst_wmd.wh_object_cookie,
-                               hdr->msg.ack.mlength);
+                CWARN("    dst md "LPX64"."LPX64", "
+                      "manipulated length %d\n",
+                      hdr->msg.ack.dst_wmd.wh_interface_cookie,
+                      hdr->msg.ack.dst_wmd.wh_object_cookie,
+                      hdr->msg.ack.mlength);
                 break;
 
         case PTL_MSG_REPLY:
-                nal->cb_printf(nal, "    dst md "LPX64"."LPX64", "
-                               "length %d\n",
-                               hdr->msg.reply.dst_wmd.wh_interface_cookie,
-                               hdr->msg.reply.dst_wmd.wh_object_cookie,
-                               hdr->payload_length);
+                CWARN("    dst md "LPX64"."LPX64", "
+                      "length %d\n",
+                      hdr->msg.reply.dst_wmd.wh_interface_cookie,
+                      hdr->msg.reply.dst_wmd.wh_object_cookie,
+                      hdr->payload_length);
         }
 
 }                               /* end of print_hdr() */
 
 
-void 
-lib_parse(nal_cb_t *nal, ptl_hdr_t *hdr, void *private)
+ptl_err_t
+lib_parse(lib_nal_t *nal, ptl_hdr_t *hdr, void *private)
 {
         unsigned long  flags;
         ptl_err_t      rc;
         lib_msg_t     *msg;
+
+        /* NB we return PTL_OK if we manage to parse the header and believe
+         * it looks OK.  Anything that goes wrong with receiving the
+         * message after that point is the responsibility of the NAL */
         
         /* convert common fields to host byte order */
-        hdr->dest_nid = NTOH__u64 (hdr->dest_nid);
+        hdr->type = NTOH__u32 (hdr->type);
         hdr->src_nid = NTOH__u64 (hdr->src_nid);
-        hdr->dest_pid = NTOH__u32 (hdr->dest_pid);
         hdr->src_pid = NTOH__u32 (hdr->src_pid);
-        hdr->type = NTOH__u32 (hdr->type);
+        hdr->dest_pid = NTOH__u32 (hdr->dest_pid);
         hdr->payload_length = NTOH__u32(hdr->payload_length);
-#if 0
-        nal->cb_printf(nal, "%d: lib_parse: nal=%p hdr=%p type=%d\n",
-                       nal->ni.nid, nal, hdr, hdr->type);
-        print_hdr(nal, hdr);
-#endif
-        if (hdr->type == PTL_MSG_HELLO) {
+
+        switch (hdr->type) {
+        case PTL_MSG_HELLO: {
                 /* dest_nid is really ptl_magicversion_t */
                 ptl_magicversion_t *mv = (ptl_magicversion_t *)&hdr->dest_nid;
 
-                CERROR (LPU64": Dropping unexpected HELLO message: "
+                mv->magic = NTOH__u32(mv->magic);
+                mv->version_major = NTOH__u16(mv->version_major);
+                mv->version_minor = NTOH__u16(mv->version_minor);
+
+                if (mv->magic == PORTALS_PROTO_MAGIC &&
+                    mv->version_major == PORTALS_PROTO_VERSION_MAJOR &&
+                    mv->version_minor == PORTALS_PROTO_VERSION_MINOR) {
+                        CWARN (LPU64": Dropping unexpected HELLO message: "
+                               "magic %d, version %d.%d from "LPD64"\n",
+                               nal->libnal_ni.ni_pid.nid, mv->magic, 
+                               mv->version_major, mv->version_minor,
+                               hdr->src_nid);
+
+                        /* it's good but we don't want it */
+                        lib_drop_message(nal, private, hdr);
+                        return PTL_OK;
+                }
+
+                /* we got garbage */
+                CERROR (LPU64": Bad HELLO message: "
                         "magic %d, version %d.%d from "LPD64"\n",
-                        nal->ni.nid, mv->magic, 
+                        nal->libnal_ni.ni_pid.nid, mv->magic, 
                         mv->version_major, mv->version_minor,
                         hdr->src_nid);
-                lib_drop_message(nal, private, hdr);
-                return;
+                return PTL_FAIL;
         }
-        
-        if (hdr->dest_nid != nal->ni.nid) {
-                CERROR(LPU64": Dropping %s message from "LPU64" to "LPU64
-                       " (not me)\n", nal->ni.nid, hdr_type_string (hdr),
-                       hdr->src_nid, hdr->dest_nid);
-                lib_drop_message(nal, private, hdr);
-                return;
+
+        case PTL_MSG_ACK:
+        case PTL_MSG_PUT:
+        case PTL_MSG_GET:
+        case PTL_MSG_REPLY:
+                hdr->dest_nid = NTOH__u64 (hdr->dest_nid);
+                if (hdr->dest_nid != nal->libnal_ni.ni_pid.nid) {
+                        CERROR(LPU64": BAD dest NID in %s message from"
+                               LPU64" to "LPU64" (not me)\n", 
+                               nal->libnal_ni.ni_pid.nid, hdr_type_string (hdr),
+                               hdr->src_nid, hdr->dest_nid);
+                        return PTL_FAIL;
+                }
+                break;
+
+        default:
+                CERROR(LPU64": Bad message type 0x%x from "LPU64"\n",
+                       nal->libnal_ni.ni_pid.nid, hdr->type, hdr->src_nid);
+                return PTL_FAIL;
         }
 
-        if (!list_empty (&nal->ni.ni_test_peers) && /* normally we don't */
+        /* We've decided we're not receiving garbage since we can parse the
+         * header.  We will return PTL_OK come what may... */
+
+        if (!list_empty (&nal->libnal_ni.ni_test_peers) && /* normally we don't */
             fail_peer (nal, hdr->src_nid, 0))      /* shall we now? */
         {
                 CERROR(LPU64": Dropping incoming %s from "LPU64
                        ": simulated failure\n",
-                       nal->ni.nid, hdr_type_string (hdr), 
+                       nal->libnal_ni.ni_pid.nid, hdr_type_string (hdr), 
                        hdr->src_nid);
                 lib_drop_message(nal, private, hdr);
-                return;
+                return PTL_OK;
         }
 
         msg = lib_msg_alloc(nal);
         if (msg == NULL) {
                 CERROR(LPU64": Dropping incoming %s from "LPU64
                        ": can't allocate a lib_msg_t\n",
-                       nal->ni.nid, hdr_type_string (hdr), 
+                       nal->libnal_ni.ni_pid.nid, hdr_type_string (hdr), 
                        hdr->src_nid);
                 lib_drop_message(nal, private, hdr);
-                return;
+                return PTL_OK;
         }
 
         switch (hdr->type) {
@@ -1033,10 +1058,8 @@ lib_parse(nal_cb_t *nal, ptl_hdr_t *hdr, void *private)
                 rc = parse_reply(nal, hdr, private, msg);
                 break;
         default:
-                CERROR(LPU64": Dropping <unknown> message from "LPU64
-                       ": Bad type=0x%x\n",  nal->ni.nid, hdr->src_nid,
-                       hdr->type);
-                rc = PTL_FAIL;
+                LASSERT(0);
+                rc = PTL_FAIL;                  /* no compiler warning please */
                 break;
         }
                 
@@ -1045,123 +1068,114 @@ lib_parse(nal_cb_t *nal, ptl_hdr_t *hdr, void *private)
                         /* committed... */
                         lib_finalize(nal, private, msg, rc);
                 } else {
-                        state_lock(nal, &flags);
-                        lib_msg_free(nal, msg); /* expects state_lock held */
-                        state_unlock(nal, &flags);
+                        LIB_LOCK(nal, flags);
+                        lib_msg_free(nal, msg); /* expects LIB_LOCK held */
+                        LIB_UNLOCK(nal, flags);
 
                         lib_drop_message(nal, private, hdr);
                 }
         }
+
+        return PTL_OK;
+        /* That's "OK I can parse it", not "OK I like it" :) */
 }
 
 int 
-do_PtlPut(nal_cb_t *nal, void *private, void *v_args, void *v_ret)
+lib_api_put(nal_t *apinal, ptl_handle_md_t *mdh, 
+            ptl_ack_req_t ack, ptl_process_id_t *id,
+            ptl_pt_index_t portal, ptl_ac_index_t ac,
+            ptl_match_bits_t match_bits, 
+            ptl_size_t offset, ptl_hdr_data_t hdr_data)
 {
-        /*
-         * Incoming:
-         *      ptl_handle_md_t md_in
-         *      ptl_ack_req_t ack_req_in
-         *      ptl_process_id_t target_in
-         *      ptl_pt_index_t portal_in
-         *      ptl_ac_index_t cookie_in
-         *      ptl_match_bits_t match_bits_in
-         *      ptl_size_t offset_in
-         *
-         * Outgoing:
-         */
-
-        PtlPut_in        *args = v_args;
-        ptl_process_id_t *id = &args->target_in;
-        PtlPut_out       *ret = v_ret;
-        lib_ni_t         *ni = &nal->ni;
+        lib_nal_t        *nal = apinal->nal_data;
+        lib_ni_t         *ni = &nal->libnal_ni;
         lib_msg_t        *msg;
         ptl_hdr_t         hdr;
         lib_md_t         *md;
         unsigned long     flags;
         int               rc;
         
-        if (!list_empty (&nal->ni.ni_test_peers) && /* normally we don't */
+        if (!list_empty (&ni->ni_test_peers) && /* normally we don't */
             fail_peer (nal, id->nid, 1))           /* shall we now? */
         {
-                CERROR(LPU64": Dropping PUT to "LPU64": simulated failure\n",
-                       nal->ni.nid, id->nid);
-                return (ret->rc = PTL_PROCESS_INVALID);
+                CERROR("Dropping PUT to "LPU64": simulated failure\n",
+                       id->nid);
+                return PTL_PROCESS_INVALID;
         }
 
         msg = lib_msg_alloc(nal);
         if (msg == NULL) {
                 CERROR(LPU64": Dropping PUT to "LPU64": ENOMEM on lib_msg_t\n",
-                       ni->nid, id->nid);
-                return (ret->rc = PTL_NO_SPACE);
+                       ni->ni_pid.nid, id->nid);
+                return PTL_NO_SPACE;
         }
 
-        state_lock(nal, &flags);
+        LIB_LOCK(nal, flags);
 
-        md = ptl_handle2md(&args->md_in, nal);
+        md = ptl_handle2md(mdh, nal);
         if (md == NULL || md->threshold == 0) {
                 lib_msg_free(nal, msg);
-                state_unlock(nal, &flags);
+                LIB_UNLOCK(nal, flags);
         
-                return (ret->rc = PTL_MD_INVALID);
+                return PTL_MD_INVALID;
         }
 
-        CDEBUG(D_NET, "PtlPut -> %Lu: %lu\n", (unsigned long long)id->nid,
-               (unsigned long)id->pid);
+        CDEBUG(D_NET, "PtlPut -> "LPX64"\n", id->nid);
 
         memset (&hdr, 0, sizeof (hdr));
         hdr.type     = HTON__u32 (PTL_MSG_PUT);
         hdr.dest_nid = HTON__u64 (id->nid);
-        hdr.src_nid  = HTON__u64 (ni->nid);
         hdr.dest_pid = HTON__u32 (id->pid);
-        hdr.src_pid  = HTON__u32 (ni->pid);
+        hdr.src_nid  = HTON__u64 (ni->ni_pid.nid);
+        hdr.src_pid  = HTON__u32 (ni->ni_pid.pid);
         hdr.payload_length = HTON__u32 (md->length);
 
         /* NB handles only looked up by creator (no flips) */
-        if (args->ack_req_in == PTL_ACK_REQ) {
+        if (ack == PTL_ACK_REQ) {
                 hdr.msg.put.ack_wmd.wh_interface_cookie = ni->ni_interface_cookie;
                 hdr.msg.put.ack_wmd.wh_object_cookie = md->md_lh.lh_cookie;
         } else {
                 hdr.msg.put.ack_wmd = PTL_WIRE_HANDLE_NONE;
         }
 
-        hdr.msg.put.match_bits = HTON__u64 (args->match_bits_in);
-        hdr.msg.put.ptl_index = HTON__u32 (args->portal_in);
-        hdr.msg.put.offset = HTON__u32 (args->offset_in);
-        hdr.msg.put.hdr_data = args->hdr_data_in;
+        hdr.msg.put.match_bits = HTON__u64 (match_bits);
+        hdr.msg.put.ptl_index = HTON__u32 (portal);
+        hdr.msg.put.offset = HTON__u32 (offset);
+        hdr.msg.put.hdr_data = hdr_data;
 
         lib_commit_md(nal, md, msg);
         
         msg->ev.type = PTL_EVENT_SEND_END;
-        msg->ev.initiator.nid = ni->nid;
-        msg->ev.initiator.pid = ni->pid;
-        msg->ev.portal = args->portal_in;
-        msg->ev.match_bits = args->match_bits_in;
+        msg->ev.initiator.nid = ni->ni_pid.nid;
+        msg->ev.initiator.pid = ni->ni_pid.pid;
+        msg->ev.portal = portal;
+        msg->ev.match_bits = match_bits;
         msg->ev.rlength = md->length;
         msg->ev.mlength = md->length;
-        msg->ev.offset = args->offset_in;
-        msg->ev.hdr_data = args->hdr_data_in;
+        msg->ev.offset = offset;
+        msg->ev.hdr_data = hdr_data;
 
         lib_md_deconstruct(nal, md, &msg->ev.mem_desc);
 
-        ni->counters.send_count++;
-        ni->counters.send_length += md->length;
+        ni->ni_counters.send_count++;
+        ni->ni_counters.send_length += md->length;
 
-        state_unlock(nal, &flags);
+        LIB_UNLOCK(nal, flags);
         
-        rc = lib_send (nal, private, msg, &hdr, PTL_MSG_PUT,
+        rc = lib_send (nal, NULL, msg, &hdr, PTL_MSG_PUT,
                        id->nid, id->pid, md, 0, md->length);
         if (rc != PTL_OK) {
-                CERROR(LPU64": error sending PUT to "LPU64": %d\n",
-                       ni->nid, id->nid, rc);
-                lib_finalize (nal, private, msg, rc);
+                CERROR("Error sending PUT to "LPX64": %d\n",
+                       id->nid, rc);
+                lib_finalize (nal, NULL, msg, rc);
         }
         
         /* completion will be signalled by an event */
-        return ret->rc = PTL_OK;
+        return PTL_OK;
 }
 
 lib_msg_t * 
-lib_create_reply_msg (nal_cb_t *nal, ptl_nid_t peer_nid, lib_msg_t *getmsg)
+lib_create_reply_msg (lib_nal_t *nal, ptl_nid_t peer_nid, lib_msg_t *getmsg)
 {
         /* The NAL can DMA direct to the GET md (i.e. no REPLY msg).  This
          * returns a msg for the NAL to pass to lib_finalize() when the sink
@@ -1170,12 +1184,12 @@ lib_create_reply_msg (nal_cb_t *nal, ptl_nid_t peer_nid, lib_msg_t *getmsg)
          * CAVEAT EMPTOR: 'getmsg' is the original GET, which is freed when
          * lib_finalize() is called on it, so the NAL must call this first */
 
-        lib_ni_t        *ni = &nal->ni;
+        lib_ni_t        *ni = &nal->libnal_ni;
         lib_msg_t       *msg = lib_msg_alloc(nal);
         lib_md_t        *getmd = getmsg->md;
         unsigned long    flags;
 
-        state_lock(nal, &flags);
+        LIB_LOCK(nal, flags);
 
         LASSERT (getmd->pending > 0);
 
@@ -1205,72 +1219,60 @@ lib_create_reply_msg (nal_cb_t *nal, ptl_nid_t peer_nid, lib_msg_t *getmsg)
 
         lib_md_deconstruct(nal, getmd, &msg->ev.mem_desc);
 
-        ni->counters.recv_count++;
-        ni->counters.recv_length += getmd->length;
+        ni->ni_counters.recv_count++;
+        ni->ni_counters.recv_length += getmd->length;
 
-        state_unlock(nal, &flags);
+        LIB_UNLOCK(nal, flags);
 
         return msg;
 
  drop_msg:
         lib_msg_free(nal, msg);
  drop:
-        nal->ni.counters.drop_count++;
-        nal->ni.counters.drop_length += getmd->length;
+        nal->libnal_ni.ni_counters.drop_count++;
+        nal->libnal_ni.ni_counters.drop_length += getmd->length;
 
-        state_unlock (nal, &flags);
+        LIB_UNLOCK (nal, flags);
 
         return NULL;
 }
 
 int 
-do_PtlGet(nal_cb_t *nal, void *private, void *v_args, void *v_ret)
+lib_api_get(nal_t *apinal, ptl_handle_md_t *mdh, ptl_process_id_t *id,
+            ptl_pt_index_t portal, ptl_ac_index_t ac,
+            ptl_match_bits_t match_bits, ptl_size_t offset)
 {
-        /*
-         * Incoming:
-         *      ptl_handle_md_t md_in
-         *      ptl_process_id_t target_in
-         *      ptl_pt_index_t portal_in
-         *      ptl_ac_index_t cookie_in
-         *      ptl_match_bits_t match_bits_in
-         *      ptl_size_t offset_in
-         *
-         * Outgoing:
-         */
-
-        PtlGet_in        *args = v_args;
-        ptl_process_id_t *id = &args->target_in;
-        PtlGet_out       *ret = v_ret;
-        lib_ni_t         *ni = &nal->ni;
+        lib_nal_t        *nal = apinal->nal_data;
+        lib_ni_t         *ni = &nal->libnal_ni;
         lib_msg_t        *msg;
         ptl_hdr_t         hdr;
         lib_md_t         *md;
         unsigned long     flags;
         int               rc;
         
-        if (!list_empty (&nal->ni.ni_test_peers) && /* normally we don't */
+        if (!list_empty (&ni->ni_test_peers) && /* normally we don't */
             fail_peer (nal, id->nid, 1))           /* shall we now? */
         {
-                CERROR(LPU64": Dropping PUT to "LPU64": simulated failure\n",
-                       nal->ni.nid, id->nid);
-                return (ret->rc = PTL_PROCESS_INVALID);
+                CERROR("Dropping PUT to "LPX64": simulated failure\n",
+                       id->nid);
+                return PTL_PROCESS_INVALID;
         }
 
         msg = lib_msg_alloc(nal);
         if (msg == NULL) {
-                CERROR(LPU64": Dropping GET to "LPU64": ENOMEM on lib_msg_t\n",
-                       ni->nid, id->nid);
-                return (ret->rc = PTL_NO_SPACE);
+                CERROR("Dropping GET to "LPU64": ENOMEM on lib_msg_t\n",
+                       id->nid);
+                return PTL_NO_SPACE;
         }
 
-        state_lock(nal, &flags);
+        LIB_LOCK(nal, flags);
 
-        md = ptl_handle2md(&args->md_in, nal);
+        md = ptl_handle2md(mdh, nal);
         if (md == NULL || !md->threshold) {
                 lib_msg_free(nal, msg);
-                state_unlock(nal, &flags);
+                LIB_UNLOCK(nal, flags);
 
-                return ret->rc = PTL_MD_INVALID;
+                return PTL_MD_INVALID;
         }
 
         CDEBUG(D_NET, "PtlGet -> %Lu: %lu\n", (unsigned long long)id->nid,
@@ -1279,48 +1281,47 @@ do_PtlGet(nal_cb_t *nal, void *private, void *v_args, void *v_ret)
         memset (&hdr, 0, sizeof (hdr));
         hdr.type     = HTON__u32 (PTL_MSG_GET);
         hdr.dest_nid = HTON__u64 (id->nid);
-        hdr.src_nid  = HTON__u64 (ni->nid);
         hdr.dest_pid = HTON__u32 (id->pid);
-        hdr.src_pid  = HTON__u32 (ni->pid);
+        hdr.src_nid  = HTON__u64 (ni->ni_pid.nid);
+        hdr.src_pid  = HTON__u32 (ni->ni_pid.pid);
         hdr.payload_length = 0;
 
         /* NB handles only looked up by creator (no flips) */
         hdr.msg.get.return_wmd.wh_interface_cookie = ni->ni_interface_cookie;
         hdr.msg.get.return_wmd.wh_object_cookie = md->md_lh.lh_cookie;
 
-        hdr.msg.get.match_bits = HTON__u64 (args->match_bits_in);
-        hdr.msg.get.ptl_index = HTON__u32 (args->portal_in);
-        hdr.msg.get.src_offset = HTON__u32 (args->offset_in);
+        hdr.msg.get.match_bits = HTON__u64 (match_bits);
+        hdr.msg.get.ptl_index = HTON__u32 (portal);
+        hdr.msg.get.src_offset = HTON__u32 (offset);
         hdr.msg.get.sink_length = HTON__u32 (md->length);
 
         lib_commit_md(nal, md, msg);
 
         msg->ev.type = PTL_EVENT_SEND_END;
-        msg->ev.initiator.nid = ni->nid;
-        msg->ev.initiator.pid = ni->pid;
-        msg->ev.portal = args->portal_in;
-        msg->ev.match_bits = args->match_bits_in;
+        msg->ev.initiator = ni->ni_pid;
+        msg->ev.portal = portal;
+        msg->ev.match_bits = match_bits;
         msg->ev.rlength = md->length;
         msg->ev.mlength = md->length;
-        msg->ev.offset = args->offset_in;
+        msg->ev.offset = offset;
         msg->ev.hdr_data = 0;
 
         lib_md_deconstruct(nal, md, &msg->ev.mem_desc);
 
-        ni->counters.send_count++;
+        ni->ni_counters.send_count++;
 
-        state_unlock(nal, &flags);
+        LIB_UNLOCK(nal, flags);
 
-        rc = lib_send (nal, private, msg, &hdr, PTL_MSG_GET,
+        rc = lib_send (nal, NULL, msg, &hdr, PTL_MSG_GET,
                        id->nid, id->pid, NULL, 0, 0);
         if (rc != PTL_OK) {
                 CERROR(LPU64": error sending GET to "LPU64": %d\n",
-                       ni->nid, id->nid, rc);
-                lib_finalize (nal, private, msg, rc);
+                       ni->ni_pid.nid, id->nid, rc);
+                lib_finalize (nal, NULL, msg, rc);
         }
         
         /* completion will be signalled by an event */
-        return ret->rc = PTL_OK;
+        return PTL_OK;
 }
 
 void lib_assert_wire_constants (void)
index 1b69533..328b8d8 100644 (file)
 #include <portals/lib-p30.h>
 
 void
-lib_enq_event_locked (nal_cb_t *nal, void *private, 
+lib_enq_event_locked (lib_nal_t *nal, void *private, 
                       lib_eq_t *eq, ptl_event_t *ev)
 {
         ptl_event_t  *eq_slot;
-        int           rc;
         
-        ev->sequence = eq->sequence++; /* Allocate the next queue slot */
-
-        /* size must be a power of 2 to handle a wrapped sequence # */
-        LASSERT (eq->size != 0 &&
-                 eq->size == LOWEST_BIT_SET (eq->size));
-        eq_slot = eq->base + (ev->sequence & (eq->size - 1));
+        ev->sequence = eq->eq_enq_seq++; /* Allocate the next queue slot */
 
-        /* Copy the event into the allocated slot, ensuring all the rest of
-         * the event's contents have been copied _before_ the sequence
-         * number gets updated.  A processes 'getting' an event waits on
-         * the next queue slot's sequence to be 'new'.  When it is, _all_
-         * other event fields had better be consistent.  I assert
-         * 'sequence' is the last member, so I only need a 2 stage copy. */
+        /* size must be a power of 2 to handle sequence # overflow */
+        LASSERT (eq->eq_size != 0 &&
+                 eq->eq_size == LOWEST_BIT_SET (eq->eq_size));
+        eq_slot = eq->eq_events + (ev->sequence & (eq->eq_size - 1));
 
-        LASSERT(sizeof (ptl_event_t) ==
-                offsetof(ptl_event_t, sequence) + sizeof(ev->sequence));
+        /* There is no race since both event consumers and event producers
+         * take the LIB_LOCK(), so we don't screw around with memory
+         * barriers, setting the sequence number last or wierd structure
+         * layout assertions. */
+        *eq_slot = *ev;
 
-        rc = nal->cb_write (nal, private, (user_ptr)eq_slot, ev,
-                            offsetof (ptl_event_t, sequence));
-        LASSERT (rc == PTL_OK);
+        /* Call the callback handler (if any) */
+        if (eq->eq_callback != NULL)
+                eq->eq_callback (eq_slot);
 
+        /* Wake anyone sleeping for an event (see lib-eq.c) */
 #ifdef __KERNEL__
-        barrier();
-#endif
-        /* Updating the sequence number is what makes the event 'new' NB if
-         * the cb_write below isn't atomic, this could cause a race with
-         * PtlEQGet */
-        rc = nal->cb_write(nal, private, (user_ptr)&eq_slot->sequence,
-                           (void *)&ev->sequence,sizeof (ev->sequence));
-        LASSERT (rc == PTL_OK);
-
-#ifdef __KERNEL__
-        barrier();
+        if (waitqueue_active(&nal->libnal_ni.ni_waitq))
+                wake_up_all(&nal->libnal_ni.ni_waitq);
+#else
+        pthread_cond_broadcast(&nal->libnal_ni.ni_cond);
 #endif
-
-        if (nal->cb_callback != NULL)
-                nal->cb_callback(nal, private, eq, ev);
-        else if (eq->event_callback != NULL)
-                eq->event_callback(ev);
 }
 
 void 
-lib_finalize(nal_cb_t *nal, void *private, lib_msg_t *msg, ptl_err_t status)
+lib_finalize (lib_nal_t *nal, void *private, lib_msg_t *msg, ptl_err_t status)
 {
         lib_md_t     *md;
         int           unlink;
@@ -101,9 +85,9 @@ lib_finalize(nal_cb_t *nal, void *private, lib_msg_t *msg, ptl_err_t status)
                 memset (&ack, 0, sizeof (ack));
                 ack.type     = HTON__u32 (PTL_MSG_ACK);
                 ack.dest_nid = HTON__u64 (msg->ev.initiator.nid);
-                ack.src_nid  = HTON__u64 (nal->ni.nid);
                 ack.dest_pid = HTON__u32 (msg->ev.initiator.pid);
-                ack.src_pid  = HTON__u32 (nal->ni.pid);
+                ack.src_nid  = HTON__u64 (nal->libnal_ni.ni_pid.nid);
+                ack.src_pid  = HTON__u32 (nal->libnal_ni.ni_pid.pid);
                 ack.payload_length = 0;
 
                 ack.msg.ack.dst_wmd = msg->ack_wmd;
@@ -122,7 +106,7 @@ lib_finalize(nal_cb_t *nal, void *private, lib_msg_t *msg, ptl_err_t status)
 
         md = msg->md;
 
-        state_lock(nal, &flags);
+        LIB_LOCK(nal, flags);
 
         /* Now it's safe to drop my caller's ref */
         md->pending--;
@@ -148,8 +132,8 @@ lib_finalize(nal_cb_t *nal, void *private, lib_msg_t *msg, ptl_err_t status)
                 lib_md_unlink(nal, md);
 
         list_del (&msg->msg_list);
-        nal->ni.counters.msgs_alloc--;
+        nal->libnal_ni.ni_counters.msgs_alloc--;
         lib_msg_free(nal, msg);
 
-        state_unlock(nal, &flags);
+        LIB_UNLOCK(nal, flags);
 }
index aa959fc..0f298a0 100644 (file)
 
 #define DEBUG_SUBSYSTEM S_PORTALS
 #include <portals/lib-p30.h>
-#include <portals/arg-blocks.h>
 
 #define MAX_DIST 18446744073709551615ULL
 
-int do_PtlNIStatus(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
+int lib_api_ni_status (nal_t *apinal, ptl_sr_index_t sr_idx,
+                       ptl_sr_value_t *status)
 {
-        /*
-         * Incoming:
-         *      ptl_handle_ni_t interface_in
-         *      ptl_sr_index_t register_in
-         *
-         * Outgoing:
-         *      ptl_sr_value_t          * status_out
-         */
-
-        PtlNIStatus_in *args = v_args;
-        PtlNIStatus_out *ret = v_ret;
-        lib_ni_t *ni = &nal->ni;
-        lib_counters_t *count = &ni->counters;
-
-        if (!args)
-                return ret->rc = PTL_SEGV;
-
-        ret->rc = PTL_OK;
-        ret->status_out = 0;
-
-        /*
-         * I hate this sort of code....  Hash tables, offset lists?
-         * Treat the counters as an array of ints?
-         */
-        if (args->register_in == PTL_SR_DROP_COUNT)
-                ret->status_out = count->drop_count;
-
-        else if (args->register_in == PTL_SR_DROP_LENGTH)
-                ret->status_out = count->drop_length;
-
-        else if (args->register_in == PTL_SR_RECV_COUNT)
-                ret->status_out = count->recv_count;
-
-        else if (args->register_in == PTL_SR_RECV_LENGTH)
-                ret->status_out = count->recv_length;
-
-        else if (args->register_in == PTL_SR_SEND_COUNT)
-                ret->status_out = count->send_count;
-
-        else if (args->register_in == PTL_SR_SEND_LENGTH)
-                ret->status_out = count->send_length;
-
-        else if (args->register_in == PTL_SR_MSGS_MAX)
-                ret->status_out = count->msgs_max;
-        else
-                ret->rc = PTL_SR_INDEX_INVALID;
-
-        return ret->rc;
+        lib_nal_t      *nal = apinal->nal_data;
+        lib_ni_t       *ni = &nal->libnal_ni;
+        lib_counters_t *count = &ni->ni_counters;
+
+        switch (sr_idx) {
+        case PTL_SR_DROP_COUNT:
+                *status = count->drop_count;
+                return PTL_OK;
+        case PTL_SR_DROP_LENGTH:
+                *status = count->drop_length;
+                return PTL_OK;
+        case PTL_SR_RECV_COUNT:
+                *status = count->recv_count;
+                return PTL_OK;
+        case PTL_SR_RECV_LENGTH:
+                *status = count->recv_length;
+                return PTL_OK;
+        case PTL_SR_SEND_COUNT:
+                *status = count->send_count;
+                return PTL_OK;
+        case PTL_SR_SEND_LENGTH:
+                *status = count->send_length;
+                return PTL_OK;
+        case PTL_SR_MSGS_MAX:
+                *status = count->msgs_max;
+                return PTL_OK;
+        default:
+                *status = 0;
+                return PTL_SR_INDEX_INVALID;
+        }
 }
 
 
-int do_PtlNIDist(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
+int lib_api_ni_dist (nal_t *apinal, ptl_process_id_t *pid, unsigned long *dist)
 {
-        /*
-         * Incoming:
-         *      ptl_handle_ni_t interface_in
-         *      ptl_process_id_t process_in
-
-         *
-         * Outgoing:
-         *      unsigned long   * distance_out
-
-         */
-
-        PtlNIDist_in *args = v_args;
-        PtlNIDist_out *ret = v_ret;
-
-        unsigned long dist;
-        ptl_process_id_t id_in = args->process_in;
-        ptl_nid_t nid;
-        int rc;
-
-        nid = id_in.nid;
-
-        if ((rc = nal->cb_dist(nal, nid, &dist)) != 0) {
-                ret->distance_out = (unsigned long) MAX_DIST;
-                return PTL_PROCESS_INVALID;
-        }
-
-        ret->distance_out = dist;
+        lib_nal_t *nal = apinal->nal_data;
 
-        return ret->rc = PTL_OK;
+        return (nal->libnal_dist(nal, pid->nid, dist));
 }
index 12eebb5..ff2a601 100644 (file)
@@ -35,24 +35,12 @@ extern int getpid(void);
 #       include <unistd.h>
 #endif
 #include <portals/lib-p30.h>
-#include <portals/arg-blocks.h>
 
-int do_PtlGetId(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
+int
+lib_api_get_id(nal_t *apinal, ptl_process_id_t *pid)
 {
-        /*
-         * Incoming:
-         *      ptl_handle_ni_t handle_in
-         *
-         * Outgoing:
-         *      ptl_process_id_t        * id_out
-         *      ptl_id_t                * gsize_out
-         */
-
-        PtlGetId_out *ret = v_ret;
-        lib_ni_t *ni = &nal->ni;
-
-        ret->id_out.nid = ni->nid;
-        ret->id_out.pid = ni->pid;
-
-        return ret->rc = PTL_OK;
+        lib_nal_t *nal = apinal->nal_data;
+        
+        *pid = nal->libnal_ni.ni_pid;
+        return PTL_OK;
 }
index 40e9da4..5615a72 100644 (file)
@@ -160,7 +160,6 @@ EXPORT_SYMBOL(ptl_register_nal);
 EXPORT_SYMBOL(ptl_unregister_nal);
 
 EXPORT_SYMBOL(ptl_err_str);
-EXPORT_SYMBOL(lib_dispatch);
 EXPORT_SYMBOL(PtlMEAttach);
 EXPORT_SYMBOL(PtlMEInsert);
 EXPORT_SYMBOL(PtlMEUnlink);
@@ -192,7 +191,6 @@ EXPORT_SYMBOL(lib_parse);
 EXPORT_SYMBOL(lib_create_reply_msg);
 EXPORT_SYMBOL(lib_init);
 EXPORT_SYMBOL(lib_fini);
-EXPORT_SYMBOL(dispatch_name);
 
 MODULE_AUTHOR("Peter J. Braam <braam@clusterfs.com>");
 MODULE_DESCRIPTION("Portals v3.1");
index 6507924..f329e2a 100644 (file)
@@ -91,8 +91,8 @@ void set_address(bridge t,ptl_pid_t pidrequest)
     int port;
     if (pidrequest==(unsigned short)PTL_PID_ANY) port = 0;
     else port=pidrequest;
-    t->nal_cb->ni.nid=get_node_id();
-    t->nal_cb->ni.pid=port;
+    t->lib_nal->libnal_ni.ni_pid.nid=get_node_id();
+    t->lib_nal->libnal_ni.ni_pid.pid=port;
 }
 #else
 
@@ -120,10 +120,9 @@ void set_address(bridge t,ptl_pid_t pidrequest)
     in_addr = get_node_id();
 
     t->iptop8 = in_addr >> PNAL_HOSTID_SHIFT;/* for making new connections */
-    t->nal_cb->ni.nid = ((in_addr & PNAL_HOSTID_MASK) 
-                            << PNAL_VNODE_SHIFT)
-        + virtnode;
-
+    t->lib_nal->libnal_ni.ni_pid.nid = ((in_addr & PNAL_HOSTID_MASK) 
+                                        << PNAL_VNODE_SHIFT)
+                                       + virtnode;
     pid=pidrequest;
     /* TODO: Support of pid PTL_ID_ANY with virtual nodes needs more work. */
 #ifdef notyet
@@ -141,6 +140,6 @@ void set_address(bridge t,ptl_pid_t pidrequest)
             return;
         }
     else port = ((virtnode << PNAL_VNODE_SHIFT) + pid) + PNAL_BASE_PORT;
-    t->nal_cb->ni.pid=pid;
+    t->lib_nal->libnal_ni.ni_pid.pid=pid;
 }
 #endif
index 90ce324..d2f0f2c 100644 (file)
@@ -19,7 +19,7 @@
 
 typedef struct bridge {
     int alive;
-    nal_cb_t *nal_cb;
+    lib_nal_t *lib_nal;
     void *lower;
     void *local;
     void (*shutdown)(struct bridge *);
index e40c4b9..f3843d7 100644 (file)
@@ -60,34 +60,6 @@ void procbridge_wakeup_nal(procbridge p)
     syscall(SYS_write, p->notifier[0], buf, sizeof(buf));
 }
 
-/* Function: forward
- * Arguments: nal_t *nal: pointer to my top-side nal structure
- *            id: the command to pass to the lower layer
- *            args, args_len:pointer to and length of the request
- *            ret, ret_len:  pointer to and size of the result
- * Returns: a portals status code
- *
- * forwards a packaged api call from the 'api' side to the 'library'
- *   side, and collects the result
- */
-static int procbridge_forward(nal_t *n, int id, void *args, size_t args_len,
-                             void *ret, size_t ret_len)
-{
-    bridge b = (bridge) n->nal_data;
-
-    if (id == PTL_FINI) {
-            lib_fini(b->nal_cb);
-
-            if (b->shutdown)
-                (*b->shutdown)(b);
-    }
-
-    lib_dispatch(b->nal_cb, NULL, id, args, ret);
-
-    return (PTL_OK);
-}
-
-
 /* Function: shutdown
  * Arguments: nal: a pointer to my top side nal structure
  *            ni: my network interface index
@@ -97,7 +69,8 @@ static int procbridge_forward(nal_t *n, int id, void *args, size_t args_len,
  */
 static void procbridge_shutdown(nal_t *n)
 {
-    bridge b=(bridge)n->nal_data;
+    lib_nal_t *nal = n->nal_data;
+    bridge b=(bridge)nal->libnal_data;
     procbridge p=(procbridge)b->local;
 
     p->nal_flags |= NAL_FLAG_STOPPING;
@@ -117,83 +90,19 @@ static void procbridge_shutdown(nal_t *n)
 }
 
 
-static void procbridge_lock(nal_t * n, unsigned long *flags)
-{
-    bridge b=(bridge)n->nal_data;
-    procbridge p=(procbridge)b->local;
-
-    pthread_mutex_lock(&p->mutex);
-}
-
-static void procbridge_unlock(nal_t * n, unsigned long *flags)
-{
-    bridge b=(bridge)n->nal_data;
-    procbridge p=(procbridge)b->local;
-
-    pthread_mutex_unlock(&p->mutex);
-}
-
-/* Function: yield
- * Arguments:  pid:
- *
- *  this function was originally intended to allow the
- *   lower half thread to be scheduled to allow progress. we
- *   overload it to explicitly block until signalled by the
- *   lower half.
- */
-static int procbridge_yield(nal_t *n, unsigned long *flags, int milliseconds)
-{
-    bridge b=(bridge)n->nal_data;
-    procbridge p=(procbridge)b->local;
-
-    if (milliseconds == 0)
-            return 0;
-            
-    if (milliseconds < 0) {
-        pthread_cond_wait(&p->cond,&p->mutex);
-    } else {
-        struct timeval then;
-        struct timeval now;
-        struct timespec timeout;
-
-        gettimeofday(&then, NULL);
-        timeout.tv_sec = then.tv_sec + milliseconds/1000;
-        timeout.tv_nsec = then.tv_usec * 1000 + milliseconds % 1000 * 1000000;
-        if (timeout.tv_nsec >= 1000000000) {
-                timeout.tv_sec++;
-                timeout.tv_nsec -= 1000000000;
-        }
-
-        pthread_cond_timedwait(&p->cond, &p->mutex, &timeout);
-
-        gettimeofday(&now, NULL);
-        milliseconds -= (now.tv_sec - then.tv_sec) * 1000 + 
-                        (now.tv_usec - then.tv_usec) / 1000;
-        
-        if (milliseconds < 0)
-                milliseconds = 0;
-    }
-
-    return (milliseconds);
-}
-
 /* forward decl */
 extern int procbridge_startup (nal_t *, ptl_pid_t,
                                ptl_ni_limits_t *, ptl_ni_limits_t *);
 
 /* api_nal
  *  the interface vector to allow the generic code to access
- *  this nal. this is seperate from the library side nal_cb.
+ *  this nal. this is seperate from the library side lib_nal.
  *  TODO: should be dyanmically allocated
  */
 nal_t procapi_nal = {
     nal_data: NULL,
-    startup:  procbridge_startup,
-    shutdown: procbridge_shutdown,
-    forward:  procbridge_forward,
-    yield:    procbridge_yield,
-    lock:     procbridge_lock,
-    unlock:   procbridge_unlock
+    nal_ni_init: procbridge_startup,
+    nal_ni_fini: procbridge_shutdown,
 };
 
 ptl_nid_t tcpnal_mynid;
@@ -228,7 +137,6 @@ int procbridge_startup (nal_t *nal, ptl_pid_t requested_pid,
 
     b=(bridge)malloc(sizeof(struct bridge));
     p=(procbridge)malloc(sizeof(struct procbridge));
-    nal->nal_data=b;
     b->local=p;
 
     args.nia_requested_pid = requested_pid;
@@ -236,6 +144,7 @@ int procbridge_startup (nal_t *nal, ptl_pid_t requested_pid,
     args.nia_actual_limits = actual_limits;
     args.nia_nal_type = nal_type;
     args.nia_bridge = b;
+    args.nia_apinal = nal;
 
     /* init procbridge */
     pthread_mutex_init(&p->mutex,0);
@@ -273,7 +182,7 @@ int procbridge_startup (nal_t *nal, ptl_pid_t requested_pid,
     if (p->nal_flags & NAL_FLAG_STOPPED)
         return PTL_FAIL;
 
-    b->nal_cb->ni.nid = tcpnal_mynid;
+    b->lib_nal->libnal_ni.ni_pid.nid = tcpnal_mynid;
 
     return PTL_OK;
 }
index 1c8e7dd..1f91ced 100644 (file)
@@ -30,7 +30,6 @@ typedef struct procbridge {
 
     int nal_flags;
 
-    pthread_mutex_t nal_cb_lock;
 } *procbridge;
 
 typedef struct nal_init_args {
@@ -39,6 +38,7 @@ typedef struct nal_init_args {
     ptl_ni_limits_t *nia_actual_limits;
     int              nia_nal_type;
     bridge           nia_bridge;
+    nal_t           *nia_apinal;
 } nal_init_args_t;
 
 extern void *nal_thread(void *);
index af0745b..7ee7c71 100644 (file)
 /* the following functions are stubs to satisfy the nal definition
    without doing anything particularily useful*/
 
-static ptl_err_t nal_write(nal_cb_t *nal,
-                           void *private,
-                           user_ptr dst_addr,
-                           void *src_addr,
-                           size_t len)
-{
-    memcpy(dst_addr, src_addr, len);
-    return PTL_OK;
-}
-
-static ptl_err_t nal_read(nal_cb_t * nal,
-                          void *private,
-                          void *dst_addr,
-                          user_ptr src_addr,
-                          size_t len)
-{
-       memcpy(dst_addr, src_addr, len);
-       return PTL_OK;
-}
-
-static void *nal_malloc(nal_cb_t *nal,
-                        size_t len)
-{
-    void *buf =  malloc(len);
-    return buf;
-}
-
-static void nal_free(nal_cb_t *nal,
-                     void *buf,
-                     size_t len)
-{
-    free(buf);
-}
-
-static void nal_printf(nal_cb_t *nal,
-                       const char *fmt,
-                       ...)
-{
-    va_list        ap;
-
-    va_start(ap, fmt);
-    vprintf(fmt, ap);
-    va_end(ap);
-}
-
-
-static void nal_cli(nal_cb_t *nal,
-                    unsigned long *flags)
-{
-    bridge b = (bridge) nal->nal_data;
-    procbridge p = (procbridge) b->local;
-
-    pthread_mutex_lock(&p->mutex);
-}
-
-
-static void nal_sti(nal_cb_t *nal,
-                    unsigned long *flags)
-{
-    bridge b = (bridge)nal->nal_data;
-    procbridge p = (procbridge) b->local;
-
-    pthread_mutex_unlock(&p->mutex);
-}
-
-static void nal_callback(nal_cb_t *nal, void *private,
-                         lib_eq_t *eq, ptl_event_t *ev)
-{
-        bridge b = (bridge)nal->nal_data;
-        procbridge p = (procbridge) b->local;
-
-        /* holding p->mutex */
-        if (eq->event_callback != NULL)
-                eq->event_callback(ev);
-        
-        pthread_cond_broadcast(&p->cond);
-}
-
-static int nal_dist(nal_cb_t *nal,
+static int nal_dist(lib_nal_t *nal,
                     ptl_nid_t nid,
                     unsigned long *dist)
 {
@@ -170,33 +92,25 @@ void *nal_thread(void *z)
     ptl_process_id_t process_id;
     int nal_type;
     
-    b->nal_cb=(nal_cb_t *)malloc(sizeof(nal_cb_t));
-    b->nal_cb->nal_data=b;
-    b->nal_cb->cb_read=nal_read;
-    b->nal_cb->cb_write=nal_write;
-    b->nal_cb->cb_malloc=nal_malloc;
-    b->nal_cb->cb_free=nal_free;
-    b->nal_cb->cb_map=NULL;
-    b->nal_cb->cb_unmap=NULL;
-    b->nal_cb->cb_printf=nal_printf;
-    b->nal_cb->cb_cli=nal_cli;
-    b->nal_cb->cb_sti=nal_sti;
-    b->nal_cb->cb_callback=nal_callback;
-    b->nal_cb->cb_dist=nal_dist;
+    b->lib_nal=(lib_nal_t *)malloc(sizeof(lib_nal_t));
+    b->lib_nal->libnal_data=b;
+    b->lib_nal->libnal_map=NULL;
+    b->lib_nal->libnal_unmap=NULL;
+    b->lib_nal->libnal_dist=nal_dist;
 
     nal_type = args->nia_nal_type;
 
-    /* Wierd, but this sets b->nal_cb->ni.{nid,pid}, which lib_init() is
-     * about to do from the process_id passed to it...*/
+    /* Wierd, but this sets b->lib_nal->libnal_ni.ni_pid.{nid,pid}, which
+     * lib_init() is about to do from the process_id passed to it...*/
     set_address(b,args->nia_requested_pid);
 
-    process_id.pid = b->nal_cb->ni.pid;
-    process_id.nid = b->nal_cb->ni.nid;
+    process_id = b->lib_nal->libnal_ni.ni_pid;
     
     if (nal_table[nal_type]) rc=(*nal_table[nal_type])(b);
     /* initialize the generic 'library' level code */
 
-    rc = lib_init(b->nal_cb, process_id, 
+    rc = lib_init(b->lib_nal, args->nia_apinal, 
+                  process_id, 
                   args->nia_requested_limits, 
                   args->nia_actual_limits);
 
index 0c47f42..34a9c9d 100644 (file)
@@ -55,7 +55,7 @@
  *
  * sends a packet to the peer, after insuring that a connection exists
  */
-ptl_err_t tcpnal_send(nal_cb_t *n,
+ptl_err_t tcpnal_send(lib_nal_t *n,
                       void *private,
                       lib_msg_t *cookie,
                       ptl_hdr_t *hdr,
@@ -68,7 +68,7 @@ ptl_err_t tcpnal_send(nal_cb_t *n,
                       size_t len)
 {
     connection c;
-    bridge b=(bridge)n->nal_data;
+    bridge b=(bridge)n->libnal_data;
     struct iovec tiov[257];
     static pthread_mutex_t send_lock = PTHREAD_MUTEX_INITIALIZER;
     ptl_err_t rc = PTL_OK;
@@ -142,7 +142,7 @@ ptl_err_t tcpnal_send(nal_cb_t *n,
 
 
 /* Function:  tcpnal_recv
- * Arguments: nal_cb_t *nal:     pointer to my nal control block
+ * Arguments: lib_nal_t *nal:    pointer to my nal control block
  *            void *private:     connection pointer passed through
  *                               lib_parse()
  *            lib_msg_t *cookie: passed back to portals library
@@ -154,7 +154,7 @@ ptl_err_t tcpnal_send(nal_cb_t *n,
  * blocking read of the requested data. must drain out the
  * difference of mainpulated and requested lengths from the network
  */
-ptl_err_t tcpnal_recv(nal_cb_t *n,
+ptl_err_t tcpnal_recv(lib_nal_t *n,
                       void *private,
                       lib_msg_t *cookie,
                       unsigned int niov,
@@ -217,7 +217,8 @@ static int from_connection(void *a, void *d)
     ptl_hdr_t hdr;
 
     if (read_connection(c, (unsigned char *)&hdr, sizeof(hdr))){
-        lib_parse(b->nal_cb, &hdr, c);
+        lib_parse(b->lib_nal, &hdr, c);
+        /*TODO: check error status*/
         return(1);
     }
     return(0);
@@ -239,19 +240,19 @@ int tcpnal_init(bridge b)
 {
     manager m;
         
-    b->nal_cb->cb_send=tcpnal_send;
-    b->nal_cb->cb_recv=tcpnal_recv;
+    b->lib_nal->libnal_send=tcpnal_send;
+    b->lib_nal->libnal_recv=tcpnal_recv;
     b->shutdown=tcpnal_shutdown;
     
-    if (!(m=init_connections(PNAL_PORT(b->nal_cb->ni.nid,
-                                       b->nal_cb->ni.pid),
+    if (!(m=init_connections(PNAL_PORT(b->lib_nal->libnal_ni.ni_pid.nid,
+                                       b->lib_nal->libnal_ni.ni_pid.pid),
                              from_connection,b))){
         /* TODO: this needs to shut down the
            newly created junk */
         return(PTL_NAL_FAILED);
     }
     /* XXX cfs hack */
-    b->nal_cb->ni.pid=0;
+    b->lib_nal->libnal_ni.ni_pid.pid=0;
     b->lower=m;
     return(PTL_OK);
 }
index 0446dc8..6b76199 100644 (file)
@@ -3,9 +3,11 @@
 # This code is issued under the GNU General Public License.
 # See the file COPYING in this distribution
 
+if MODULES
 if !LINUX25
 modulefs_DATA = ptlbd$(KMODEXT)
 endif
+endif
 
 MOSTLYCLEANFILES = *.o *.ko *.mod.c
 DIST_SOURCES = $(ptlbd-objs:%.o=%.c)
index d865e1f..254ae30 100644 (file)
@@ -37,7 +37,7 @@ static void cray_portals_callback(ptl_event_t *ev);
 #endif
 
 
-struct ptlrpc_ni  ptlrpc_interfaces[NAL_MAX_NR];
+struct ptlrpc_ni  ptlrpc_interfaces[8];
 int               ptlrpc_ninterfaces;
 
 /*  
@@ -597,12 +597,16 @@ int ptlrpc_init_portals(void)
                 int   number;
                 char *name;
         } ptl_nis[] = {
+#ifndef CRAY_PORTALS
                 {QSWNAL,  "qswnal"},
                 {SOCKNAL, "socknal"},
                 {GMNAL,   "gmnal"},
                 {IBNAL,   "ibnal"},
                 {TCPNAL,  "tcpnal"},
-                {CRAY_KB_ERNAL, "cray_kb_ernal"}};
+#else
+                {CRAY_KB_ERNAL, "cray_kb_ernal"},
+#endif
+        };
         int   rc;
         int   i;
 
index 81f46dc..3fca883 100644 (file)
@@ -32,8 +32,6 @@ struct obd_import;
 struct ldlm_res_id;
 struct ptlrpc_request_set;
 
-void ptlrpc_daemonize(void);
-
 void ptlrpc_request_handle_notconn(struct ptlrpc_request *);
 void lustre_assert_wire_constants(void);
 int ptlrpc_import_in_recovery(struct obd_import *imp);
index 6f3ce27..26ae032 100644 (file)
@@ -134,6 +134,7 @@ EXPORT_SYMBOL(ptlrpc_stop_all_threads);
 EXPORT_SYMBOL(ptlrpc_start_n_threads);
 EXPORT_SYMBOL(ptlrpc_start_thread);
 EXPORT_SYMBOL(ptlrpc_unregister_service);
+EXPORT_SYMBOL(ptlrpc_daemonize);
 
 /* pack_generic.c */
 EXPORT_SYMBOL(lustre_msg_swabbed);
index dab1e90..45325c7 100755 (executable)
@@ -1,12 +1,27 @@
 #!/bin/bash
+# Put this script and cvs-modified-files.pl into your PATH (~bin is good) and 
+#
+# export CVSEDITOR=cvsdiffclient
+#
+# in your .bashrc and you will get a nice bunch of CVS commit reminders:
+# <merge/land tag information>
+# b=<bug_number>
+# r=<reviewed by>
+#
+# Remember to remove the leading "CVS: " part of the comment before saving
+# your commit comment if you want those entries to be saved.
 
 [ -f .mergeinfo ] && . ./.mergeinfo
 
 FILES=`cvs-modified-files.pl $1`
 TMP=`mktemp /tmp/cvslog-XXXXXXXX`
 if [ -f $TMP ]; then
-       [ -f .mergeinfo ] && \
-               echo "CVS: Update $child from $parent ($date)" >> $TMP
+       if [ -f .mergeinfo ]; then
+               . .mergeinfo
+               [ "$OPERATION" ] || OPERATION=Update
+               [ "$OPERWHERE" ] || OPERWHERE=from
+               echo "CVS: $OPERATION $child $OPERWHERE $parent ($date)" >> $TMP
+       fi
        echo "CVS: did you update the ChangeLog for a bug fix?" >> $TMP
        echo "CVS: b=" >> $TMP
        echo "CVS: r=" >> $TMP
index c3a0468..08f559a 100755 (executable)
@@ -27,20 +27,32 @@ module=lustre
 
 case $parent in
   HEAD) : ;;
-  b_*|b1*) : ;;
+  b_*|b[1-4]*) : ;;
   *) parent="b_$parent" ;;
 esac
 case $child in
   HEAD) : ;;
-  b_*|b1*) : ;;
+  b_*|b[1-4]*) : ;;
   *) child="b_$child"
 esac
 
 if [ "$parent" != "HEAD" -a "`cat CVS/Tag 2> /dev/null`" != "T$parent" ]; then
-        echo "This script must be run within the $parent branch"
+        echo "$0: this script must be run within the $parent branch"
        exit 1
 fi
 
+TEST_FILE=${TEST_FILE:-ChangeLog} # does this need to be smarter?
+check_tag() {
+       [ -z "$1" ] && echo "check_tag() missing arg" && exit3
+       [ "$1" = "HEAD" ] && return
+       $CVS log $TEST_FILE | grep -q " $1: " && return
+       echo "$0: tag $1 not found in $TEST_FILE"
+       exit 2
+}
+
+check_tag $child
+check_tag ${CHILD}_BASE
+
 dir=$3
 
 cat << EOF > .mergeinfo
@@ -52,6 +64,8 @@ date=$date
 module=$module
 dir=$dir
 CONFLICTS=$CONFLICTS
+OPERATION=Land
+OPERWHERE=onto
 EOF
 
 echo PARENT $PARENT parent $parent CHILD $CHILD child $child date $date
index 9f92230..2e93089 100755 (executable)
@@ -358,35 +358,40 @@ install_kernel()
     install -m 644 "$CONFIG_FILE" "$DESTDIR/boot/config-${FULL_VERSION}"
 
     mkdir -p "$DESTDIR/dev/shm"
+    mkdir -p "$DESTDIR/lib/modules/${FULL_VERSION}"
+
+    make CC="$CC" INSTALL_MOD_PATH="$DESTDIR" KERNELRELEASE="$FULL_VERSION" \
+       -s modules_install || \
+       fatal 1 "Error installing modules."
 
     case "$TARGET_ARCH" in
        i386 | i586 | i686 | athlon)
            cp arch/i386/boot/bzImage "$DESTDIR/boot/vmlinuz-${FULL_VERSION}"
-           cp vmlinux "$DESTDIR/boot/vmlinux-${FULL_VERSION}"
+           cp vmlinux "$DESTDIR/lib/modules/${FULL_VERSION}/"
+           ln -sf "../lib/modules/${FULL_VERSION}/vmlinux" "$DESTDIR/boot/vmlinux-${FULL_VERSION}"
            ;;
        x86_64)
            cp arch/x86_64/boot/bzImage "$DESTDIR/boot/vmlinuz-${FULL_VERSION}"
-           cp vmlinux "$DESTDIR/boot/vmlinux-${FULL_VERSION}"
+           cp vmlinux "$DESTDIR/lib/modules/${FULL_VERSION}/"
+           ln -sf "../lib/modules/${FULL_VERSION}/vmlinux" "$DESTDIR/boot/vmlinux-${FULL_VERSION}"
            ;;
        ia64)
            gzip -cfv vmlinux > vmlinuz
            mkdir -p "$DESTDIR/boot/efi/redhat"
-           install -m 755 vmlinux "$DESTDIR/boot/efi/redhat/vmlinux-${FULL_VERSION}"
+           install -m 755 vmlinux "$DESTDIR/lib/modules/${FULL_VERSION}/"
            install -m 755 vmlinuz "$DESTDIR/boot/efi/redhat/vmlinuz-${FULL_VERSION}"
+           ln -sf "../lib/modules/${FULL_VERSION}/vmlinux" "$DESTDIR/boot/efi/redhat/vmlinux-${FULL_VERSION}"
            ln -sf "efi/redhat/vmlinux-${FULL_VERSION}" "$DESTDIR/boot/vmlinux-${FULL_VERSION}"
            ln -sf "efi/redhat/vmlinuz-${FULL_VERSION}" "$DESTDIR/boot/vmlinuz-${FULL_VERSION}"
            ;;
        *)
            cp vmlinuz "$DESTDIR/boot/vmlinuz-${FULL_VERSION}"
-           cp vmlinux "$DESTDIR/boot/vmlinux-${FULL_VERSION}"
+           cp vmlinux "$DESTDIR/lib/modules/${FULL_VERSION}/vmlinux-${FULL_VERSION}"
+           ln -sf "../lib/modules/${FULL_VERSION}/vmlinux-${FULL_VERSION}" "$DESTDIR/boot/vmlinux-${FULL_VERSION}"
+
            ;;
     esac
 
-    mkdir -p "$DESTDIR/lib/modules/${FULL_VERSION}"
-    make CC="$CC" INSTALL_MOD_PATH="$DESTDIR" KERNELRELEASE="$FULL_VERSION" \
-       -s modules_install || \
-       fatal 1 "Error installing modules."
-
     popd >/dev/null
 }
 
index 9ae6368..f177c17 100644 (file)
@@ -21,6 +21,7 @@ Summary: The Linux kernel (the core of the Linux operating system)
 %define nptlarchs %{all_x86}
 #define nptlarchs noarch
 %define rhbuild @RHBUILD@
+%define linux26 @LINUX26@
 
 # disable build root strip policy
 %define __spec_install_post /usr/lib/rpm/brp-compress || :
@@ -444,7 +445,8 @@ ln -sf linux-%{KVERREL} $RPM_BUILD_ROOT/usr/src/linux
 
 #clean up the destination
 make -s mrproper -C $RPM_BUILD_ROOT/usr/src/linux-%{KVERREL}
-rm -rf $RPM_BUILD_ROOT/usr/src/linux-%{KVERREL}/configs/*
+rm -rf $RPM_BUILD_ROOT/usr/src/linux-%{KVERREL}/configs
+mkdir -p $RPM_BUILD_ROOT/usr/src/linux-%{KVERREL}/configs
 cp ../kernel_patches/kernel_configs/kernel-%{kversion}-@LUSTRE_TARGET@*.config $RPM_BUILD_ROOT/usr/src/linux-%{KVERREL}/configs
 cp ../kernel_patches/kernel_configs/kernel-%{kversion}-@LUSTRE_TARGET@-%{_target_cpu}%{dashtargetboard}.config $RPM_BUILD_ROOT/usr/src/linux-%{KVERREL}/.config
 if grep -q oldconfig_nonint $RPM_BUILD_ROOT/usr/src/linux-%{KVERREL}/Makefile ; then
@@ -453,7 +455,11 @@ else
        OLDCONFIG='oldconfig'
 fi
 make -s $OLDCONFIG -C $RPM_BUILD_ROOT/usr/src/linux-%{KVERREL}
+%if %{linux26}
+make -s include/asm -C $RPM_BUILD_ROOT/usr/src/linux-%{KVERREL}
+%else
 make -s symlinks -C $RPM_BUILD_ROOT/usr/src/linux-%{KVERREL}
+%endif
 make -s include/linux/version.h -C $RPM_BUILD_ROOT/usr/src/linux-%{KVERREL}
 
 #this generates modversions info which we want to include and we may as
@@ -753,7 +759,6 @@ exit 0
 /usr/src/linux-%{KVERREL}/Makefile
 /usr/src/linux-%{KVERREL}/README
 /usr/src/linux-%{KVERREL}/REPORTING-BUGS
-/usr/src/linux-%{KVERREL}/Rules.make
 /usr/src/linux-%{KVERREL}/arch
 %ifarch sparc
 /usr/src/linux-%{KVERREL}/arch/sparc64
@@ -795,6 +800,14 @@ exit 0
 %ifarch alpha sparc
 /usr/src/linux-%{KVERREL}/include/math-emu
 %endif
+%if %{linux26}
+%dir /usr/src/linux-%{KVERREL}/crypto
+%dir /usr/src/linux-%{KVERREL}/kdb
+%dir /usr/src/linux-%{KVERREL}/rpmify
+%dir /usr/src/linux-%{KVERREL}/security
+%else
+/usr/src/linux-%{KVERREL}/Rules.make
+%endif
 %endif
 
 %files doc
index ac074d7..5fefc71 100755 (executable)
@@ -27,12 +27,12 @@ module=lustre
 
 case $parent in
   HEAD) : ;;
-  b_*|b1*) : ;;
+  b_*|b[1-4]*) : ;;
   *) parent="b_$parent" ;;
 esac
 case $child in
   HEAD) : ;;
-  b_*|b1*) : ;;
+  b_*|b[1-4]*) : ;;
   *) child="b_$child"
 esac
 
@@ -41,6 +41,18 @@ if [ "$child" != "HEAD" -a "`cat CVS/Tag 2> /dev/null`" != "T$child" ]; then
        exit 1
 fi
 
+TEST_FILE=${TEST_FILE:-ChangeLog} # does this need to be smarter?
+check_tag() {
+       [ -z "$1" ] && echo "check_tag() missing arg" && exit3
+       [ "$1" = "HEAD" ] && return
+       $CVS log $TEST_FILE | grep -q " $1: " && return
+       echo "$0: tag $1 not found in $TEST_FILE"
+       exit 2
+}
+
+check_tag $parent
+check_tag ${CHILD}_BASE
+
 cat << EOF > .mergeinfo
 parent=$parent
 PARENT=$PARENT
@@ -49,6 +61,8 @@ CHILD=$CHILD
 date=$date
 module=$module
 CONFLICTS=$CONFLICTS
+OPERATION=Merge
+OPERWHERE=from
 EOF
 
 echo PARENT: $PARENT parent: $parent CHILD: $CHILD child: $child date: $date
index 0b00c70..bc148be 100644 (file)
@@ -64,3 +64,4 @@ ostactive
 ll_dirstripe_verify
 openfilleddirunlink
 copy_attr
+rename_many
index 1430099..a74483c 100644 (file)
@@ -20,7 +20,7 @@ noinst_PROGRAMS += opendirunlink opendevunlink unlinkmany fchdir_test checkstat
 noinst_PROGRAMS += wantedi statone runas openfile getdents mkdirdeep o_directory
 noinst_PROGRAMS += small_write multiop sleeptest ll_sparseness_verify cmknod
 noinst_PROGRAMS += ll_sparseness_write mrename ll_dirstripe_verify copy_attr
-noinst_PROGRAMS += openfilleddirunlink
+noinst_PROGRAMS += openfilleddirunlink rename_many
 # noinst_PROGRAMS += ldaptest
 bin_PROGRAMS = mcreate munlink mkdirmany iopentest1 iopentest2
 endif # TESTS
@@ -68,6 +68,7 @@ mkdirdeep_SOURCES = mkdirdeep.c
 mkdirdeep_LDADD=-L$(top_builddir)/portals/utils -lptlctl $(LIBREADLINE)
 small_write_SOURCES = small_write.c
 sleeptest_SOURCES = sleeptest.c
+rename_many_SOURCES = rename_many.c
 #write_append_truncate_SOURCES=write_append_truncate.c
 #write_append_truncate_CC=mpicc
 #createmany_mpi_SOURCES=createmany_mpi.c
index 38effed..9d7ca4b 100644 (file)
@@ -24,7 +24,7 @@ MDSSIZE=${MDSSIZE:-10000}
 OSTDEV=${OSTDEV:-$ROOT/tmp/ost1-`hostname`}
 OSTSIZE=${OSTSIZE:-50000}
 FSTYPE=${FSTYPE:-ext3}
-TIMEOUT=${TIMEOUT:-10}
+TIMEOUT=${TIMEOUT:-20}
 UPCALL=${UPCALL:-$PWD/replay-single-upcall.sh}
 
 STRIPE_BYTES=${STRIPE_BYTES:-524288}
index 3c6763f..9c310e5 100755 (executable)
@@ -221,9 +221,22 @@ test_15() {
 }
 run_test 15 "failed open (-ENOMEM)"
 
+stop_read_ahead() {
+   for f in /proc/fs/lustre/llite/*/read_ahead; do 
+      echo 0 > $f
+   done
+}
+
+start_read_ahead() {
+   for f in /proc/fs/lustre/llite/*/read_ahead; do 
+      echo 1 > $f
+   done
+}
+
 test_16() {
     do_facet client cp /etc/termcap $MOUNT
     sync
+    stop_read_ahead
 
 #define OBD_FAIL_PTLRPC_BULK_PUT_NET 0x504 | OBD_FAIL_ONCE
     sysctl -w lustre.fail_loc=0x80000504
@@ -234,6 +247,7 @@ test_16() {
     # give recovery a chance to finish (shouldn't take long)
     sleep $TIMEOUT
     do_facet client "cmp /etc/termcap $MOUNT/termcap"  || return 2
+    start_read_ahead
 }
 run_test 16 "timeout bulk put, evict client (2732)"
 
diff --git a/lustre/tests/rename_many.c b/lustre/tests/rename_many.c
new file mode 100644 (file)
index 0000000..faf5085
--- /dev/null
@@ -0,0 +1,263 @@
+#define PATH_LENGTH 35
+#include <math.h>
+#include <signal.h>
+#include <unistd.h>
+#include <time.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <stdio.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <getopt.h>
+
+struct names {
+       char from[PATH_LENGTH];
+       char to[PATH_LENGTH];
+} *names;
+
+unsigned int loop_count = 500;
+int file_count = 1000;
+int seed;
+int loops;
+int stop;
+long start;
+
+int opt_exit_on_err;
+int opt_verbose;
+int opt_create_only;
+int opt_rename_only;
+int creat_errors;
+int rename_errors;
+int unlink_errors;
+
+void usage(const char *progname)
+{
+       fprintf(stderr, "usage: %s [-n numfiles] [-s seed] [-v] [-x] [dir]\n"
+               "\t-c: only do the create step of first loop\n"
+               "\t-f: number of files to create/rename/unlink per loop\n"
+               "\t-n: number of test loops (0 to run forever)\n"
+               "\t-r: only do the rename step of first loop\n"
+               "\t-s: starting seed (equals loop number by default)\n"
+               "\t-v: verbose\n"
+               "\t-x: don't exit on error\n", progname);
+}
+
+void handler(int sig) {
+       static long last_time;
+       long now = time(0);
+
+       signal(SIGINT, handler);
+       signal(SIGALRM, handler);
+       printf("%6lds %8d iterations %d/%d/%d errors",
+              now - start, loops, creat_errors, rename_errors, unlink_errors);
+       if (sig != 0)
+               printf(" - use SIGQUIT (^\\) or ^C^C to kill\n");
+       else
+               printf("\n");
+
+       if (sig == SIGQUIT)
+               stop = 1;
+       else if (sig == SIGINT) {
+               if (now - last_time < 2)
+                       stop = 1;
+               last_time = now;
+       }
+       alarm(60);
+}
+
+extern char *optarg;
+extern int optind;
+
+int main(int argc, char *argv[])
+{
+       unsigned long n;
+       char msg[100], c, *end = NULL;
+       int h1, h2;
+       int i;
+
+       while ((c = getopt(argc, argv, "cf:n:rs:vx")) != EOF) {
+               switch(c) {
+               case 'c':
+                       ++opt_create_only;
+                       break;
+               case 'f':
+                       i = strtoul(optarg, &end, 0);
+                       if (i && end != NULL && *end == '\0') {
+                               file_count = i;
+                       } else {
+                               fprintf(stderr, "bad file count '%s'\n",optarg);
+                               usage(argv[0]);
+                               return 1;
+                       }
+                       break;
+               case 'n':
+                       i = strtoul(optarg, &end, 0);
+                       if (i && end != NULL && *end == '\0') {
+                               loop_count = i;
+                       } else {
+                               fprintf(stderr, "bad loop count '%s'\n",optarg);
+                               usage(argv[0]);
+                               return 1;
+                       }
+                       break;
+               case 'r':
+                       ++opt_rename_only;
+                       break;
+               case 's':
+                       i = strtoul(optarg, &end, 0);
+                       if (end && *end == '\0') {
+                               seed = i;
+                       } else {
+                               fprintf(stderr, "bad seed '%s'\n", optarg);
+                               usage(argv[0]);
+                               return 1;
+                       }
+                       break;
+               case 'v':
+                       ++opt_verbose;
+                       break;
+               case 'x':
+                       ++opt_exit_on_err;
+                       break;
+               default:
+                       usage(argv[0]);
+                       return 1;
+               }
+       }
+
+       names = malloc(sizeof(struct names) * file_count);
+       if (names == NULL) {
+               perror("calloc");
+               return(1);
+       }
+
+       h2 = sprintf(msg, "%x", file_count); /* just to figure length */
+       h1 = (PATH_LENGTH - h2 - 2) / 4;
+
+       n = (1ULL << h1 * 4) - 1;
+
+       //printf("h1 = %d, h2 = %d n = %lu\n", h1, h2, n);
+
+       start = time(0);
+
+       signal(SIGQUIT, handler);
+       signal(SIGINT, handler);
+       signal(SIGALRM, handler);
+       signal(SIGUSR1, handler);
+       alarm(60);
+
+       if (argc > optind + 1) {
+               fprintf(stderr, "too many extra args %d\n", argc - optind);
+               usage(argv[0]);
+               return 1;
+       } else if (argv[optind] != NULL) {
+               if (chdir(argv[optind]) < 0) {
+                       sprintf(msg, "chdir '%s'\n", argv[optind]);
+                       perror(msg);
+                       return 2;
+               }
+       }
+
+       while (!stop && loop_count != 0 && loops < loop_count) {
+               int j,k,l,m;
+
+               srand(seed + loops);
+               if (mkdir("tmp", S_IRWXU) == -1) {
+                       perror("mkdir tmp");
+                       return(1);
+               }
+               if (chdir("tmp") == -1) {
+                       perror("chdir tmp");
+                       return(1);
+               }
+
+               for (i = 0; i < file_count ; i++) {
+                       j = random() & n;
+                       k = random() & n;
+                       l = random() & n;
+                       m = random() & n;
+                       sprintf(names[i].from, "%0*x%0*x%0*x%0*x0%0*x",
+                               h1, j, h1, k, h1, l, h1, m, h2, i);
+                       sprintf(names[i].to, "%0*x%0*x%0*x%0*x1%0*x",
+                               h1, j, h1, k, h1, l, h1, m, h2, i);
+
+               }
+
+               for (i = 0; i < file_count; i++) {
+                       if (mknod(names[i].from, S_IFREG | S_IRWXU, 0) == -1) {
+                               sprintf(msg, "loop %d.%d: creat %s",
+                                       loops, i, names[i].from);
+                               perror(msg);
+                               creat_errors++;
+                               if (!opt_exit_on_err)
+                                       return 4;
+                       }
+               }
+
+               if (opt_create_only)
+                       return 0;
+
+               for (i = 0; i < file_count; i++) {
+                       if (rename(names[i].from, names[i].to) == -1) {
+                               sprintf(msg, "loop %d.%d: rename %s to %s",
+                                       loops, i, names[i].from, names[i].to);
+                               perror(msg);
+                               rename_errors++;
+                               if (!opt_exit_on_err)
+                                       return 4;
+                       }
+               }
+
+               if (opt_rename_only)
+                       return 0;
+
+               for (i = 0; i < file_count; i++) {
+                       if (unlink(names[i].to) == -1) {
+                               sprintf(msg, "loop %d.%d: unlink %s",
+                                       loops, i, names[i].to);
+                               perror(msg);
+                               unlink_errors++;
+                               if (!opt_exit_on_err)
+                                       return 4;
+                       }
+               }
+
+               if (chdir("..") == -1) {
+                       perror("chdir ..");
+                       return(1);
+               }
+
+               if (rmdir("tmp") == -1) {
+                       if (chdir("tmp") == -1) {
+                               perror("chdir tmp 2");
+                               return(1);
+                       }
+                       for (i = 0; i < file_count; i++) {
+                               if (unlink(names[i].from) != -1) {
+                                       fprintf(stderr, "loop %d.%d: "
+                                               "unexpected file %s\n",
+                                               loops, i, names[i].to);
+                                       unlink_errors++;
+                                       if (!opt_exit_on_err)
+                                               return 4;
+                               }
+                       }
+                       if (chdir("..") == -1) {
+                               perror("chdir .. 2");
+                               return(1);
+                       }
+                       if (rmdir("tmp") == -1) {
+                               perror("rmdir tmp");
+                               return(1);
+                       }
+               }
+
+               loops++;
+               if (opt_verbose)
+                       handler(0);
+       }
+
+       if (!opt_verbose)
+               handler(0);
+       return(0);
+}
index 3c84d8d..c91837b 100755 (executable)
@@ -175,6 +175,27 @@ test_6() {
 }
 run_test 6 "open1, open2, unlink |X| close1 [fail mds] close2"
 
+test_7() {
+    mcreate $MOUNT1/a
+    multiop $MOUNT2/a o_c &
+    pid1=$!
+    multiop $MOUNT1/a o_c &
+    pid2=$!
+    # give multiop a chance to open
+    sleep 1
+    rm -f $MOUNT1/a
+    replay_barrier mds
+    kill -USR1 $pid2
+    wait $pid2 || return 1
+
+    fail mds
+    kill -USR1 $pid1
+    wait $pid1 || return 1
+    [ -e $MOUNT2/a ] && return 2
+    return 0
+}
+run_test 7 "open1, open2, unlink |X| close2 [fail mds] close1"
+
 if [ "$ONLY" != "setup" ]; then
        equals_msg test complete, cleaning up
        cleanup
index c0d023b..a65ba63 100755 (executable)
@@ -876,7 +876,7 @@ test_42() {
 run_test 42 "recovery after ost failure"
 
 # b=2530
-# directory orphans can't be unlinked from PENDING directory
+# timeout in MDS/OST recovery RPC will LBUG MDS
 test_43() {
     replay_barrier mds
 
@@ -936,6 +936,31 @@ test_46() {
 }
 run_test 46 "Don't leak file handle after open resend (3325)"
 
+# b=2824
+test_47() {
+
+    # create some files to make sure precreate has been done on all 
+    # OSTs. (just in case this test is run independently)
+    createmany -o $DIR/$tfile 20  || return 1
+
+    # OBD_FAIL_OST_CREATE_NET 0x204
+    fail ost
+    do_facet ost "sysctl -w lustre.fail_loc=0x80000204"
+    df $MOUNT || return 2
+
+    # let the MDS discover the OST failure, attempt to recover, fail
+    # and recover again.  
+    sleep $((3 * TIMEOUT))
+
+    # Without 2824, this createmany would hang 
+    createmany -o $DIR/$tfile 20 || return 3
+    unlinkmany $DIR/$tfile 20 || return 4
+
+    do_facet ost "sysctl -w lustre.fail_loc=0"
+    return 0
+}
+run_test 47 "MDS->OSC failure during precreate cleanup (2824)"
+
 equals_msg test complete, cleaning up
 $CLEANUP
 
index 451030a..073b1e5 100644 (file)
@@ -10,6 +10,9 @@ ONLY=${ONLY:-"$*"}
 # bug number for skipped test: 2108
 ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"24j 48c 48d 58"}
 # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
+case `uname -r` in
+2.6.*) ALWAYS_EXCEPT="$ALWAYS_EXCEPT 54c 55" # bug 3117
+esac
 
 [ "$ALWAYS_EXCEPT$EXCEPT" ] && echo "Skipping tests: $ALWAYS_EXCEPT $EXCEPT"
 
@@ -17,6 +20,7 @@ SRCDIR=`dirname $0`
 export PATH=$PWD/$SRCDIR:$SRCDIR:$SRCDIR/../utils:$PATH
 
 TMP=${TMP:-/tmp}
+FSTYPE=${FSTYPE:-ext3}
 
 CHECKSTAT=${CHECKSTAT:-"checkstat -v"}
 CREATETEST=${CREATETEST:-createtest}
@@ -190,11 +194,7 @@ build_test_filter
 echo preparing for tests involving mounts
 EXT2_DEV=${EXT2_DEV:-/tmp/SANITY.LOOP}
 touch $EXT2_DEV
-mke2fs -F $EXT2_DEV 1000 > /dev/null
-
-EXT3_DEV=${EXT3_DEV:-/tmp/SANITY_EXT3_DEV.LOOP}
-touch $EXT3_DEV
-mkfs.ext3 -F $EXT3_DEV 10000 > /dev/null
+mke2fs -j -F $EXT2_DEV 8000 > /dev/null
 
 test_0() {
        touch $DIR/f
@@ -663,6 +663,12 @@ test_24n() {
 }
 run_test 24n "Statting the old file after renameing (Posix rename 2)"
 
+test_24o() {
+       check_kernel_version 37 || return 0
+       rename_many -s 3287 -v -n 10 $DIR
+}
+run_test 24o "rename of files during htree split ==============="
+
 test_25a() {
        echo '== symlink sanity ============================================='
        mkdir $DIR/d25
@@ -1576,7 +1582,7 @@ test_48c() { # bug 2350
        #set -vx
        mkdir -p $DIR/d48c/dir
        cd $DIR/d48c/dir
-       rmdir $DIR/d48c/dir || error "remove cwd $DIR/d48c/dir failed"
+       $TRACE rmdir $DIR/d48c/dir || error "remove cwd $DIR/d48c/dir failed"
        $TRACE touch foo && error "'touch foo' worked after removing cwd"
        $TRACE mkdir foo && error "'mkdir foo' worked after removing cwd"
        $TRACE ls . && error "'ls .' worked after removing cwd"
@@ -1585,7 +1591,7 @@ test_48c() { # bug 2350
        $TRACE mkdir . && error "'mkdir .' worked after removing cwd"
        $TRACE rmdir . && error "'rmdir .' worked after removing cwd"
        $TRACE ln -s . foo && error "'ln -s .' worked after removing cwd" ||true
-       $TRACE cd .. || error "'cd ..' failed after removing cwd"
+       $TRACE cd .. || echo "'cd ..' failed after removing cwd (`pwd)`"
 }
 run_test 48c "Access removed working subdir (should return errors)"
 
@@ -1595,11 +1601,13 @@ test_48d() { # bug 2350
        #set -vx
        mkdir -p $DIR/d48d/dir
        cd $DIR/d48d/dir
-       rm -r $DIR/d48d || error "remove cwd and parent $DIR/d48d failed"
+       pwd
+       ls .
+       $TRACE rm -vr $DIR/d48d || error "remove cwd+parent $DIR/d48d failed"
        $TRACE touch foo && error "'touch foo' worked after removing cwd"
        $TRACE mkdir foo && error "'mkdir foo' worked after removing cwd"
        $TRACE ls . && error "'ls .' worked after removing cwd"
-       $TRACE ls .. && error "'ls ..' worked after removing cwd"
+       $TRACE ls .. && echo "'ls ..' worked after removing cwd" # bug 3415
        $TRACE cd . && error "'cd .' worked after recreate cwd"
        $TRACE mkdir . && error "'mkdir .' worked after removing cwd"
        $TRACE rmdir . && error "'rmdir .' worked after removing cwd"
@@ -1735,11 +1743,11 @@ run_test 54d "fifo device works in lustre ======================"
 test_55() {
         rm -rf $DIR/d55
         mkdir $DIR/d55
-        mount -t ext3 -o loop,iopen $EXT3_DEV $DIR/d55 || error
+        mount -t $FSTYPE -o loop,iopen $EXT2_DEV $DIR/d55 || error
         touch $DIR/d55/foo
         $IOPENTEST1 $DIR/d55/foo $DIR/d55 || error
         $IOPENTEST2 $DIR/d55 || error
-        echo "check for $EXT3_DEV. Please wait..."
+        echo "check for $EXT2_DEV. Please wait..."
         rm -rf $DIR/d55/*
         umount $DIR/d55 || error
 }
index 687ea05..455ab96 100755 (executable)
@@ -2852,7 +2852,7 @@ def sys_optimize_elan ():
                  "/proc/qsnet/elan4/config/elan4_mainint_punt_loops"]
     for p in procfiles:
         if os.access(p, os.R_OK):
-            run ("echo 0 > " + p)
+            run ("echo 1 > " + p)
 
 def sys_set_ptldebug(ptldebug):
     if config.ptldebug:
index 980f9fe..8e75f5c 100644 (file)
 #include <string.h>
 #include <sys/mount.h>
 #include <mntent.h>
+#define _GNU_SOURCE
+#include <getopt.h>
 
 #include "obdctl.h"
 #include <portals/ptlctl.h>
 
-int debug = 0;
-int verbose = 0;
-int nomtab = 0;
+int debug;
+int verbose;
+int nomtab;
+int force;
 static char *progname = NULL;
 
+typedef struct {
+        ptl_nid_t gw;
+        ptl_nid_t lo;
+        ptl_nid_t hi;
+} llmount_route_t;
+
+#define MAX_ROUTES  1024
+int route_index;
+ptl_nid_t lmd_cluster_id = 0;
+llmount_route_t routes[MAX_ROUTES];
+
+static int check_mtab_entry(char *spec, char *mtpt, char *type)
+{
+        FILE *fp;
+        struct mntent *mnt;
+
+        if (!force) {
+                fp = setmntent(MOUNTED, "r");
+                if (fp == NULL)
+                        return(0);
+
+                while ((mnt = getmntent(fp)) != NULL) {
+                        if (strcmp(mnt->mnt_fsname, spec) == 0 &&
+                            strcmp(mnt->mnt_dir, mtpt) == 0 &&
+                            strcmp(mnt->mnt_type, type) == 0) {
+                                fprintf(stderr, "%s: according to %s %s is "
+                                        "already mounted on %s\n",
+                                        progname, MOUNTED, spec, mtpt);
+                                return(1); /* or should we return an error? */
+                        }
+                }
+                endmntent(fp);
+        }
+        return(0);
+}
+
 static void
-update_mtab_entry(char *spec, char *node, char *type, char *opts,
-                 int flags, int freq, int pass)
+update_mtab_entry(char *spec, char *mtpt, char *type, char *opts,
+                  int flags, int freq, int pass)
 {
         FILE *fp;
         struct mntent mnt;
 
         mnt.mnt_fsname = spec;
-        mnt.mnt_dir = node;
+        mnt.mnt_dir = mtpt;
         mnt.mnt_type = type;
         mnt.mnt_opts = opts ? opts : "";
         mnt.mnt_freq = freq;
@@ -55,7 +94,7 @@ update_mtab_entry(char *spec, char *node, char *type, char *opts,
         if (!nomtab) {
                 fp = setmntent(MOUNTED, "a+");
                 if (fp == NULL) {
-                        fprintf(stderr, "%s: setmntent(%s): %s:", 
+                        fprintf(stderr, "%s: setmntent(%s): %s:",
                                 progname, MOUNTED, strerror (errno));
                 } else {
                         if ((addmntent (fp, &mnt)) == 1) {
@@ -82,6 +121,8 @@ init_options(struct lustre_mount_data *lmd)
 int
 print_options(struct lustre_mount_data *lmd)
 {
+        int i;
+
         printf("mds:             %s\n", lmd->lmd_mds);
         printf("profile:         %s\n", lmd->lmd_profile);
         printf("server_nid:      "LPX64"\n", lmd->lmd_server_nid);
@@ -90,16 +131,77 @@ print_options(struct lustre_mount_data *lmd)
         printf("server_ipaddr:   0x%x\n", lmd->lmd_server_ipaddr);
         printf("port:            %d\n", lmd->lmd_port);
 
+        for (i = 0; i < route_index; i++)
+                printf("route:           0x%llx : 0x%llx - 0x%llx\n",
+                       routes[i].gw, routes[i].lo, routes[i].hi);
+
         return 0;
 }
 
-int
-parse_options(char * options, struct lustre_mount_data *lmd)
+static int parse_route(char *opteq, char *opttgts)
 {
-        ptl_nid_t nid = 0;
+        char *gw_lo_ptr, *gw_hi_ptr, *tgt_lo_ptr, *tgt_hi_ptr;
+        ptl_nid_t gw_lo, gw_hi, tgt_lo, tgt_hi;
+
+        opttgts[0] = '\0';
+        gw_lo_ptr = opteq + 1;
+        if (!(gw_hi_ptr = strchr(gw_lo_ptr, '-'))) {
+                gw_hi_ptr = gw_lo_ptr;
+        } else {
+                gw_hi_ptr[0] = '\0';
+                gw_hi_ptr++;
+        }
+
+        if (ptl_parse_nid(&gw_lo, gw_lo_ptr) != 0) {
+                fprintf(stderr, "%s: can't parse NID %s\n", progname,gw_lo_ptr);
+                return(-1);
+        }
+
+        if (ptl_parse_nid(&gw_hi, gw_hi_ptr) != 0) {
+                fprintf(stderr, "%s: can't parse NID %s\n", progname,gw_hi_ptr);
+                return(-1);
+        }
+
+        tgt_lo_ptr = opttgts + 1;
+        if (!(tgt_hi_ptr = strchr(tgt_lo_ptr, '-'))) {
+                tgt_hi_ptr = tgt_lo_ptr;
+        } else {
+                tgt_hi_ptr[0] = '\0';
+                tgt_hi_ptr++;
+        }
+
+        if (ptl_parse_nid(&tgt_lo, tgt_lo_ptr) != 0) {
+                fprintf(stderr, "%s: can't parse NID %s\n",progname,tgt_lo_ptr);
+                return(-1);
+        }
+
+        if (ptl_parse_nid(&tgt_hi, tgt_hi_ptr) != 0) {
+                fprintf(stderr, "%s: can't parse NID %s\n",progname,tgt_hi_ptr);
+                return(-1);
+        }
+
+        while (gw_lo <= gw_hi) {
+                if (route_index >= MAX_ROUTES) {
+                        fprintf(stderr, "%s: to many routes %d\n",
+                                progname, MAX_ROUTES);
+                        return(-1);
+                }
+
+                routes[route_index].gw = gw_lo;
+                routes[route_index].lo = tgt_lo;
+                routes[route_index].hi = tgt_hi;
+                route_index++;
+                gw_lo++;
+        }
+
+        return(0);
+}
+
+int parse_options(char * options, struct lustre_mount_data *lmd)
+{
+        ptl_nid_t nid = 0, cluster_id = 0;
         int val;
-        char *opt;
-        char * opteq;
+        char *opt, *opteq, *opttgts;
 
         /* parsing ideas here taken from util-linux/mount/nfsmount.c */
         for (opt = strtok(options, ","); opt; opt = strtok(NULL, ",")) {
@@ -107,9 +209,25 @@ parse_options(char * options, struct lustre_mount_data *lmd)
                         val = atoi(opteq + 1);
                         *opteq = '\0';
                         if (!strcmp(opt, "nettype")) {
-                                lmd->lmd_nal = ptl_name2nal(opteq+1);
-                        } else if(!strcmp(opt, "local_nid")) {
-                                if (ptl_parse_nid(&nid, opteq+1) != 0) {
+                                lmd->lmd_nal = ptl_name2nal(opteq + 1);
+                        } else if(!strcmp(opt, "cluster_id")) {
+                                if (ptl_parse_nid(&cluster_id, opteq+1) != 0) {
+                                        fprintf (stderr, "%s: can't parse NID "
+                                                 "%s\n", progname, opteq+1);
+                                        return (-1);
+                                }
+                                lmd_cluster_id = cluster_id;
+                        } else if(!strcmp(opt, "route")) {
+                                if (!(opttgts = strchr(opteq + 1, ':'))) {
+                                        fprintf(stderr, "%s: Route must be "
+                                                "of the form: route="
+                                                "<gw>[-<gw>]:<low>[-<high>]\n",
+                                                progname);
+                                        return(-1);
+                                }
+                                parse_route(opteq, opttgts);
+                        } else if (!strcmp(opt, "local_nid")) {
+                                if (ptl_parse_nid(&nid, opteq + 1) != 0) {
                                         fprintf (stderr, "%s: "
                                                  "can't parse NID %s\n",
                                                  progname,
@@ -117,11 +235,11 @@ parse_options(char * options, struct lustre_mount_data *lmd)
                                         return (-1);
                                 }
                                 lmd->lmd_local_nid = nid;
-                        } else if(!strcmp(opt, "server_nid")) {
-                                if (ptl_parse_nid(&nid, opteq+1) != 0) {
+                        } else if (!strcmp(opt, "server_nid")) {
+                                if (ptl_parse_nid(&nid, opteq + 1) != 0) {
                                         fprintf (stderr, "%s: "
                                                  "can't parse NID %s\n",
-                                                 progname, opteq+1);
+                                                 progname, opteq + 1);
                                         return (-1);
                                 }
                                 lmd->lmd_server_nid = nid;
@@ -204,7 +322,7 @@ set_local(struct lustre_mount_data *lmd)
                 return (-1);
         }
 
-        lmd->lmd_local_nid = nid;
+        lmd->lmd_local_nid = nid + lmd_cluster_id;
         return 0;
 }
 
@@ -252,25 +370,21 @@ set_peer(char *hostname, struct lustre_mount_data *lmd)
 int
 build_data(char *source, char *options, struct lustre_mount_data *lmd)
 {
-        char target[1024];
-        char *hostname = NULL;
-        char *mds = NULL;
-        char *profile = NULL;
-        char *s;
+        char buf[1024];
+        char *hostname = NULL, *mds = NULL, *profile = NULL, *s;
         int rc;
 
         if (lmd_bad_magic(lmd))
                 return -EINVAL;
 
-        if (strlen(source) > sizeof(target) + 1) {
-                fprintf(stderr, "%s: "
-                        "exessively long host:/mds/profile argument\n",
+        if (strlen(source) > sizeof(buf) + 1) {
+                fprintf(stderr, "%s: host:/mds/profile argument too long\n",
                         progname);
                 return -EINVAL;
         }
-        strcpy(target, source);
-        if ((s = strchr(target, ':'))) {
-                hostname = target;
+        strcpy(buf, source);
+        if ((s = strchr(buf, ':'))) {
+                hostname = buf;
                 *s = '\0';
 
                 while (*++s == '/')
@@ -280,8 +394,7 @@ build_data(char *source, char *options, struct lustre_mount_data *lmd)
                         *s = '\0';
                         profile = s + 1;
                 } else {
-                        fprintf(stderr, "%s: "
-                                "directory to mount not in "
+                        fprintf(stderr, "%s: directory to mount not in "
                                 "host:/mds/profile format\n",
                                 progname);
                         return(-1);
@@ -292,9 +405,6 @@ build_data(char *source, char *options, struct lustre_mount_data *lmd)
                         progname);
                 return(-1);
         }
-        if (verbose)
-                printf("host: %s\nmds: %s\nprofile: %s\n", hostname, mds,
-                       profile);
 
         rc = parse_options(options, lmd);
         if (rc)
@@ -324,55 +434,143 @@ build_data(char *source, char *options, struct lustre_mount_data *lmd)
         return 0;
 }
 
-int
-main(int argc, char * const argv[])
+static int set_routes(struct lustre_mount_data *lmd) {
+       struct portals_cfg pcfg;
+       struct portal_ioctl_data data;
+       int i, j, route_exists, rc, err = 0;
+
+       register_ioc_dev(PORTALS_DEV_ID, PORTALS_DEV_PATH);
+
+       for (i = 0; i < route_index; i++) {
+
+               /* Check for existing routes so as not to add duplicates */
+              for (j = 0; ; j++) {
+                      PCFG_INIT(pcfg, NAL_CMD_GET_ROUTE);
+                      pcfg.pcfg_nal = ROUTER;
+                      pcfg.pcfg_count = j;
+
+                      PORTAL_IOC_INIT(data);
+                      data.ioc_pbuf1 = (char*)&pcfg;
+                      data.ioc_plen1 = sizeof(pcfg);
+                      data.ioc_nid = pcfg.pcfg_nid;
+
+                      rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_NAL_CMD, &data);
+                      if (rc != 0) {
+                              route_exists = 0;
+                              break;
+                      }
+
+                      if ((pcfg.pcfg_gw_nal == lmd->lmd_nal) &&
+                          (pcfg.pcfg_nid    == routes[i].gw) &&
+                          (pcfg.pcfg_nid2   == routes[i].lo) &&
+                          (pcfg.pcfg_nid3   == routes[i].hi)) {
+                              route_exists = 1;
+                              break;
+                      }
+              }
+
+              if (route_exists)
+                      continue;
+
+              PCFG_INIT(pcfg, NAL_CMD_ADD_ROUTE);
+              pcfg.pcfg_nid = routes[i].gw;
+              pcfg.pcfg_nal = ROUTER;
+              pcfg.pcfg_gw_nal = lmd->lmd_nal;
+              pcfg.pcfg_nid2 = MIN(routes[i].lo, routes[i].hi);
+              pcfg.pcfg_nid3 = MAX(routes[i].lo, routes[i].hi);
+
+              PORTAL_IOC_INIT(data);
+              data.ioc_pbuf1 = (char*)&pcfg;
+              data.ioc_plen1 = sizeof(pcfg);
+              data.ioc_nid = pcfg.pcfg_nid;
+
+              rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_NAL_CMD, &data);
+              if (rc != 0) {
+                      fprintf(stderr, "%s: Unable to add route "
+                              "0x%llx : 0x%llx - 0x%llx\n[%d] %s\n",
+                               progname, routes[i].gw, routes[i].lo,
+                               routes[i].hi, errno, strerror(errno));
+                      err = -1;
+                      break;
+              }
+       }
+
+       unregister_ioc_dev(PORTALS_DEV_ID);
+       return err;
+}
+
+void usage(FILE *out)
 {
-        char * source = argv[1];
-        char * target = argv[2];
-        char * options = "";
-        int opt;
-        int i = 3;
-        struct lustre_mount_data lmd;
+        fprintf(out, "usage: %s <source> <target> [-f] [-v] [-n] [-o mntopt]\n",
+                progname);
+        exit(out != stdout);
+}
 
-        int rc;
+int main(int argc, char *const argv[])
+{
+        char *source, *target, *options = "";
+        int i, nargs = 3, opt, rc;
+        struct lustre_mount_data lmd;
+        static struct option long_opt[] = {
+                {"force", 0, 0, 'f'},
+                {"help", 0, 0, 'h'},
+                {"nomtab", 0, 0, 'n'},
+                {"options", 1, 0, 'o'},
+                {"verbose", 0, 0, 'v'},
+                {0, 0, 0, 0}
+        };
 
         progname = strrchr(argv[0], '/');
         progname = progname ? progname + 1 : argv[0];
 
-        while ((opt = getopt(argc, argv, "vno:")) != EOF) {
+        while ((opt = getopt_long(argc, argv, "fno:v", long_opt, NULL)) != EOF){
                 switch (opt) {
-                case 'v':
-                        verbose = 1;
-                        printf("verbose: %d\n", verbose);
-                        i++;
+                case 'f':
+                        ++force;
+                        printf("force: %d\n", force);
+                        nargs++;
+                        break;
+                case 'h':
+                        usage(stdout);
                         break;
                 case 'n':
-                        nomtab = 1;
+                        ++nomtab;
                         printf("nomtab: %d\n", nomtab);
-                        i++;
+                        nargs++;
                         break;
                 case 'o':
                         options = optarg;
-                        i++;
+                        nargs++;
+                        break;
+                case 'v':
+                        ++verbose;
+                        printf("verbose: %d\n", verbose);
+                        nargs++;
                         break;
                 default:
-                        i++;
+                        fprintf(stderr, "%s: unknown option '%c'\n",
+                                progname, opt);
+                        usage(stderr);
                         break;
                 }
         }
 
-        if (argc < i) {
-                fprintf(stderr, 
-                        "%s: too few arguments\n"
-                        "Usage: %s <source> <target> [-v] [-n] [-o ...]\n",
-                        progname, progname);
-                exit(1);
+        if (optind + 2 > argc) {
+                fprintf(stderr, "%s: too few arguments\n", progname);
+                usage(stderr);
         }
 
-        if (verbose)
-                for (i = 0; i < argc; i++) {
+        source = argv[optind];
+        target = argv[optind + 1];
+
+        if (verbose) {
+                for (i = 0; i < argc; i++)
                         printf("arg[%d] = %s\n", i, argv[i]);
-                }
+                printf("source = %s, target = %s\n", source, target);
+        }
+
+        if (check_mtab_entry(source, target, "lustre"))
+                exit(32);
 
         init_options(&lmd);
         rc = build_data(source, options, &lmd);
@@ -380,6 +578,11 @@ main(int argc, char * const argv[])
                 exit(1);
         }
 
+        rc = set_routes(&lmd);
+        if (rc) {
+                exit(1);
+        }
+
         if (debug) {
                 printf("%s: debug mode, not mounting\n", progname);
                 exit(0);