Index: linux-stage/fs/ext3/Makefile
===================================================================
---- linux-stage.orig/fs/ext3/Makefile 2004-05-07 16:00:16.000000000 -0400
-+++ linux-stage/fs/ext3/Makefile 2004-05-07 16:00:17.000000000 -0400
+--- linux-stage.orig/fs/ext3/Makefile 2004-05-11 17:21:20.000000000 -0400
++++ linux-stage/fs/ext3/Makefile 2004-05-11 17:21:21.000000000 -0400
@@ -4,7 +4,7 @@
obj-$(CONFIG_EXT3_FS) += ext3.o
ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
Index: linux-stage/fs/ext3/inode.c
===================================================================
---- linux-stage.orig/fs/ext3/inode.c 2004-05-07 16:00:16.000000000 -0400
-+++ linux-stage/fs/ext3/inode.c 2004-05-07 17:21:59.000000000 -0400
+--- linux-stage.orig/fs/ext3/inode.c 2004-05-11 17:21:21.000000000 -0400
++++ linux-stage/fs/ext3/inode.c 2004-05-11 17:21:21.000000000 -0400
@@ -37,6 +37,7 @@
#include <linux/mpage.h>
#include <linux/uio.h>
bh = iloc.bh;
Index: linux-stage/fs/ext3/iopen.c
===================================================================
---- linux-stage.orig/fs/ext3/iopen.c 2004-05-07 16:00:17.000000000 -0400
-+++ linux-stage/fs/ext3/iopen.c 2004-05-07 17:22:37.000000000 -0400
+--- linux-stage.orig/fs/ext3/iopen.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-stage/fs/ext3/iopen.c 2004-05-11 17:21:21.000000000 -0400
@@ -0,0 +1,272 @@
+/*
+ * linux/fs/ext3/iopen.c
+}
Index: linux-stage/fs/ext3/iopen.h
===================================================================
---- linux-stage.orig/fs/ext3/iopen.h 2004-05-07 16:00:17.000000000 -0400
-+++ linux-stage/fs/ext3/iopen.h 2004-05-07 16:00:17.000000000 -0400
+--- linux-stage.orig/fs/ext3/iopen.h 1969-12-31 19:00:00.000000000 -0500
++++ linux-stage/fs/ext3/iopen.h 2004-05-11 17:21:21.000000000 -0400
@@ -0,0 +1,15 @@
+/*
+ * iopen.h
+ struct inode *inode, int rehash);
Index: linux-stage/fs/ext3/namei.c
===================================================================
---- linux-stage.orig/fs/ext3/namei.c 2004-05-07 16:00:16.000000000 -0400
-+++ linux-stage/fs/ext3/namei.c 2004-05-07 16:00:17.000000000 -0400
+--- linux-stage.orig/fs/ext3/namei.c 2004-05-11 17:21:20.000000000 -0400
++++ linux-stage/fs/ext3/namei.c 2004-05-11 17:21:21.000000000 -0400
@@ -37,6 +37,7 @@
#include <linux/buffer_head.h>
#include <linux/smp_lock.h>
}
Index: linux-stage/fs/ext3/super.c
===================================================================
---- linux-stage.orig/fs/ext3/super.c 2004-05-07 16:00:16.000000000 -0400
-+++ linux-stage/fs/ext3/super.c 2004-05-07 17:21:59.000000000 -0400
+--- linux-stage.orig/fs/ext3/super.c 2004-05-11 17:21:21.000000000 -0400
++++ linux-stage/fs/ext3/super.c 2004-05-11 17:44:53.000000000 -0400
@@ -536,7 +536,7 @@
Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, Opt_noload,
Opt_commit, Opt_journal_update, Opt_journal_inum,
Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
-- Opt_ignore, Opt_err,
-+ Opt_ignore, Opt_err, Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
+- Opt_ignore, Opt_barrier,
++ Opt_ignore, Opt_barrier, Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
+ Opt_err,
};
- static match_table_t tokens = {
-@@ -575,6 +575,9 @@
- {Opt_ignore, "noquota"},
+@@ -577,6 +577,9 @@
{Opt_ignore, "quota"},
{Opt_ignore, "usrquota"},
-+ {Opt_iopen, "iopen"},
-+ {Opt_noiopen, "noiopen"},
-+ {Opt_iopen_nopriv, "iopen_nopriv"},
+ {Opt_barrier, "barrier=%u"},
++ {Opt_iopen, "iopen"},
++ {Opt_noiopen, "noiopen"},
++ {Opt_iopen_nopriv, "iopen_nopriv"},
{Opt_err, NULL}
};
-@@ -762,6 +765,18 @@
- case Opt_abort:
- set_opt(sbi->s_mount_opt, ABORT);
+@@ -772,6 +775,18 @@
+ else
+ clear_opt(sbi->s_mount_opt, BARRIER);
break;
+ case Opt_iopen:
+ set_opt (sbi->s_mount_opt, IOPEN);
default:
Index: linux-stage/include/linux/ext3_fs.h
===================================================================
---- linux-stage.orig/include/linux/ext3_fs.h 2004-05-07 16:00:16.000000000 -0400
-+++ linux-stage/include/linux/ext3_fs.h 2004-05-07 16:00:17.000000000 -0400
-@@ -325,6 +325,8 @@
- #define EXT3_MOUNT_NO_UID32 0x2000 /* Disable 32-bit UIDs */
+--- linux-stage.orig/include/linux/ext3_fs.h 2004-05-11 17:21:20.000000000 -0400
++++ linux-stage/include/linux/ext3_fs.h 2004-05-11 17:21:21.000000000 -0400
+@@ -326,6 +326,8 @@
#define EXT3_MOUNT_XATTR_USER 0x4000 /* Extended user attributes */
#define EXT3_MOUNT_POSIX_ACL 0x8000 /* POSIX Access Control Lists */
-+#define EXT3_MOUNT_IOPEN 0x10000 /* Allow access via iopen */
-+#define EXT3_MOUNT_IOPEN_NOPRIV 0x20000 /* Make iopen world-readable */
+ #define EXT3_MOUNT_BARRIER 0x10000 /* Use block barriers */
++#define EXT3_MOUNT_IOPEN 0x20000 /* Allow access via iopen */
++#define EXT3_MOUNT_IOPEN_NOPRIV 0x40000 /* Make iopen world-readable */
/* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
#ifndef _LINUX_EXT2_FS_H
ext3-ea-in-inode-2.6-suse.patch
export-ext3-2.6-suse.patch
ext3-include-fixes-2.6-suse.patch
+ext3-htree-rename_fix.patch
+if MODULES
if LDISKFS
modulefs_DATA = ldiskfs$(KMODEXT)
endif
+endif
ldiskfs_linux_headers := $(addprefix linux/,$(subst ext3,ldiskfs,$(notdir $(linux_headers))))
fi
LUSTRE_MODULE_TRY_MAKE(
[#include <linux/version.h>],
- [LINUXRELEASE=UTS_RELEASE],
+ [char *LINUXRELEASE;
+ LINUXRELEASE=UTS_RELEASE;],
[$makerule LUSTRE_KERNEL_TEST=conftest.i],
[test -s kernel-tests/conftest.i],
[
# LINUXRELEASE="UTS_RELEASE"
- eval $(grep LINUXRELEASE kernel-tests/conftest.i)
+ eval $(grep "LINUXRELEASE=" kernel-tests/conftest.i)
],[
AC_MSG_RESULT([unknown])
AC_MSG_ERROR([Could not preprocess test program. Consult config.log for details.])
#include <linux/libcfs.h>
#define PORTAL_DEBUG
-#ifndef offsetof
-# define offsetof(typ,memb) ((unsigned long)((char *)&(((typ *)0)->memb)))
-#endif
-
-#define LOWEST_BIT_SET(x) ((x) & ~((x) - 1))
-
#ifdef __KERNEL__
# include <linux/vmalloc.h>
# include <linux/time.h>
TCPNAL = 5,
ROUTER = 6,
IBNAL = 7,
- CRAY_KB_ERNAL = 8,
NAL_ENUM_END_MARKER
};
#ifndef _KPR_H
#define _KPR_H
-# include <portals/lib-nal.h> /* for ptl_hdr_t */
+# include <portals/lib-types.h> /* for ptl_hdr_t */
/******************************************************************************/
/* Kernel Portals Router interface */
#define S_PTLROUTER 0x00100000
#define S_COBD 0x00200000
#define S_IBNAL 0x00400000
-#define S_LMV 0x00800000
-#define S_SM 0x01000000
-#define S_CMOBD 0x02000000
+#define S_SM 0x00800000
+#define S_ASOBD 0x01000000
+#define S_LMV 0x02000000
+#define S_CMOBD 0x04000000
+
/* If you change these values, please keep portals/utils/debug.c
* up to date! */
#endif
#ifdef __KERNEL__
+# define NTOH__u16(var) le16_to_cpu(var)
# define NTOH__u32(var) le32_to_cpu(var)
# define NTOH__u64(var) le64_to_cpu(var)
+# define HTON__u16(var) cpu_to_le16(var)
# define HTON__u32(var) cpu_to_le32(var)
# define HTON__u64(var) cpu_to_le64(var)
#else
}; \
(ret); \
})
+# define NTOH__u16(var) (var)
# define NTOH__u32(var) (var)
# define NTOH__u64(var) (expansion_u64(var))
+# define HTON__u16(var) (var)
# define HTON__u32(var) (var)
# define HTON__u64(var) (expansion_u64(var))
#endif
#include <portals/internal.h>
#include <portals/nal.h>
-#include <portals/arg-blocks.h>
-/* Hack for 2.4.18 macro name collision */
-#ifdef yield
-#undef yield
-#endif
#include <portals/types.h>
-#ifndef PTL_NO_WRAP
int PtlInit(int *);
void PtlFini(void);
int PtlNIFini(ptl_handle_ni_t interface_in);
-#endif
-
int PtlGetId(ptl_handle_ni_t ni_handle, ptl_process_id_t *id);
int PtlNIDist(ptl_handle_ni_t interface_in, ptl_process_id_t process_in,
unsigned long *distance_out);
-#ifndef PTL_NO_WRAP
int PtlNIHandle(ptl_handle_any_t handle_in, ptl_handle_ni_t * interface_out);
-#endif
/*
int PtlMEUnlinkList(ptl_handle_me_t current_in);
-int PtlTblDump(ptl_handle_ni_t ni, int index_in);
-int PtlMEDump(ptl_handle_me_t current_in);
-
/*
* Memory descriptors
*/
-#ifndef PTL_NO_WRAP
int PtlMDAttach(ptl_handle_me_t current_in, ptl_md_t md_in,
ptl_unlink_t unlink_in, ptl_handle_md_t * handle_out);
int PtlMDUpdate(ptl_handle_md_t md_in, ptl_md_t * old_inout,
ptl_md_t * new_inout, ptl_handle_eq_t testq_in);
-#endif
/* These should not be called by users */
int PtlMDUpdate_internal(ptl_handle_md_t md_in, ptl_md_t * old_inout,
/*
* Event queues
*/
-#ifndef PTL_NO_WRAP
-
-/* These should be called by users */
int PtlEQAlloc(ptl_handle_ni_t ni_in, ptl_size_t count_in,
ptl_eq_handler_t handler,
ptl_handle_eq_t *handle_out);
int PtlEQFree(ptl_handle_eq_t eventq_in);
-int PtlEQCount(ptl_handle_eq_t eventq_in, ptl_size_t * count_out);
-
int PtlEQGet(ptl_handle_eq_t eventq_in, ptl_event_t * event_out);
int PtlEQPoll(ptl_handle_eq_t *eventqs_in, int neq_in, int timeout,
ptl_event_t *event_out, int *which_out);
-#endif
/*
* Access Control Table
+++ /dev/null
-#ifndef PTL_BLOCKS_H
-#define PTL_BLOCKS_H
-
-#include "build_check.h"
-
-/*
- * blocks.h
- *
- * Argument block types for the Portals 3.0 library
- * Generated by idl
- *
- */
-
-#include <portals/types.h>
-
-/* put LIB_MAX_DISPATCH last here -- these must match the
- assignements to the dispatch table in lib-p30/dispatch.c */
-#define PTL_GETID 1
-#define PTL_NISTATUS 2
-#define PTL_NIDIST 3
-// #define PTL_NIDEBUG 4
-#define PTL_MEATTACH 5
-#define PTL_MEINSERT 6
-// #define PTL_MEPREPEND 7
-#define PTL_MEUNLINK 8
-#define PTL_TBLDUMP 9
-#define PTL_MEDUMP 10
-#define PTL_MDATTACH 11
-// #define PTL_MDINSERT 12
-#define PTL_MDBIND 13
-#define PTL_MDUPDATE 14
-#define PTL_MDUNLINK 15
-#define PTL_EQALLOC 16
-#define PTL_EQFREE 17
-#define PTL_ACENTRY 18
-#define PTL_PUT 19
-#define PTL_GET 20
-#define PTL_FAILNID 21
-#define LIB_MAX_DISPATCH 21
-
-typedef struct PtlFailNid_in {
- ptl_handle_ni_t interface;
- ptl_nid_t nid;
- unsigned int threshold;
-} PtlFailNid_in;
-
-typedef struct PtlFailNid_out {
- int rc;
-} PtlFailNid_out;
-
-typedef struct PtlGetId_in {
- ptl_handle_ni_t handle_in;
-} PtlGetId_in;
-
-typedef struct PtlGetId_out {
- int rc;
- ptl_process_id_t id_out;
-} PtlGetId_out;
-
-typedef struct PtlNIStatus_in {
- ptl_handle_ni_t interface_in;
- ptl_sr_index_t register_in;
-} PtlNIStatus_in;
-
-typedef struct PtlNIStatus_out {
- int rc;
- ptl_sr_value_t status_out;
-} PtlNIStatus_out;
-
-
-typedef struct PtlNIDist_in {
- ptl_handle_ni_t interface_in;
- ptl_process_id_t process_in;
-} PtlNIDist_in;
-
-typedef struct PtlNIDist_out {
- int rc;
- unsigned long distance_out;
-} PtlNIDist_out;
-
-
-typedef struct PtlNIDebug_in {
- unsigned int mask_in;
-} PtlNIDebug_in;
-
-typedef struct PtlNIDebug_out {
- unsigned int rc;
-} PtlNIDebug_out;
-
-
-typedef struct PtlMEAttach_in {
- ptl_handle_ni_t interface_in;
- ptl_pt_index_t index_in;
- ptl_ins_pos_t position_in;
- ptl_process_id_t match_id_in;
- ptl_match_bits_t match_bits_in;
- ptl_match_bits_t ignore_bits_in;
- ptl_unlink_t unlink_in;
-} PtlMEAttach_in;
-
-typedef struct PtlMEAttach_out {
- int rc;
- ptl_handle_me_t handle_out;
-} PtlMEAttach_out;
-
-
-typedef struct PtlMEInsert_in {
- ptl_handle_me_t current_in;
- ptl_process_id_t match_id_in;
- ptl_match_bits_t match_bits_in;
- ptl_match_bits_t ignore_bits_in;
- ptl_unlink_t unlink_in;
- ptl_ins_pos_t position_in;
-} PtlMEInsert_in;
-
-typedef struct PtlMEInsert_out {
- int rc;
- ptl_handle_me_t handle_out;
-} PtlMEInsert_out;
-
-typedef struct PtlMEUnlink_in {
- ptl_handle_me_t current_in;
- ptl_unlink_t unlink_in;
-} PtlMEUnlink_in;
-
-typedef struct PtlMEUnlink_out {
- int rc;
-} PtlMEUnlink_out;
-
-
-typedef struct PtlTblDump_in {
- int index_in;
-} PtlTblDump_in;
-
-typedef struct PtlTblDump_out {
- int rc;
-} PtlTblDump_out;
-
-
-typedef struct PtlMEDump_in {
- ptl_handle_me_t current_in;
-} PtlMEDump_in;
-
-typedef struct PtlMEDump_out {
- int rc;
-} PtlMEDump_out;
-
-
-typedef struct PtlMDAttach_in {
- ptl_handle_me_t me_in;
- ptl_handle_eq_t eq_in;
- ptl_md_t md_in;
- ptl_unlink_t unlink_in;
-} PtlMDAttach_in;
-
-typedef struct PtlMDAttach_out {
- int rc;
- ptl_handle_md_t handle_out;
-} PtlMDAttach_out;
-
-
-typedef struct PtlMDBind_in {
- ptl_handle_ni_t ni_in;
- ptl_handle_eq_t eq_in;
- ptl_md_t md_in;
- ptl_unlink_t unlink_in;
-} PtlMDBind_in;
-
-typedef struct PtlMDBind_out {
- int rc;
- ptl_handle_md_t handle_out;
-} PtlMDBind_out;
-
-
-typedef struct PtlMDUpdate_internal_in {
- ptl_handle_md_t md_in;
- ptl_handle_eq_t testq_in;
- ptl_seq_t sequence_in;
-
- ptl_md_t old_inout;
- int old_inout_valid;
- ptl_md_t new_inout;
- int new_inout_valid;
-} PtlMDUpdate_internal_in;
-
-typedef struct PtlMDUpdate_internal_out {
- int rc;
- ptl_md_t old_inout;
- ptl_md_t new_inout;
-} PtlMDUpdate_internal_out;
-
-
-typedef struct PtlMDUnlink_in {
- ptl_handle_md_t md_in;
-} PtlMDUnlink_in;
-
-typedef struct PtlMDUnlink_out {
- int rc;
- ptl_md_t status_out;
-} PtlMDUnlink_out;
-
-
-typedef struct PtlEQAlloc_in {
- ptl_handle_ni_t ni_in;
- ptl_size_t count_in;
- void *base_in;
- int len_in;
- ptl_eq_handler_t callback_in;
-} PtlEQAlloc_in;
-
-typedef struct PtlEQAlloc_out {
- int rc;
- ptl_handle_eq_t handle_out;
-} PtlEQAlloc_out;
-
-
-typedef struct PtlEQFree_in {
- ptl_handle_eq_t eventq_in;
-} PtlEQFree_in;
-
-typedef struct PtlEQFree_out {
- int rc;
-} PtlEQFree_out;
-
-
-typedef struct PtlACEntry_in {
- ptl_handle_ni_t ni_in;
- ptl_ac_index_t index_in;
- ptl_process_id_t match_id_in;
- ptl_pt_index_t portal_in;
-} PtlACEntry_in;
-
-typedef struct PtlACEntry_out {
- int rc;
-} PtlACEntry_out;
-
-
-typedef struct PtlPut_in {
- ptl_handle_md_t md_in;
- ptl_ack_req_t ack_req_in;
- ptl_process_id_t target_in;
- ptl_pt_index_t portal_in;
- ptl_ac_index_t cookie_in;
- ptl_match_bits_t match_bits_in;
- ptl_size_t offset_in;
- ptl_hdr_data_t hdr_data_in;
-} PtlPut_in;
-
-typedef struct PtlPut_out {
- int rc;
-} PtlPut_out;
-
-
-typedef struct PtlGet_in {
- ptl_handle_md_t md_in;
- ptl_process_id_t target_in;
- ptl_pt_index_t portal_in;
- ptl_ac_index_t cookie_in;
- ptl_match_bits_t match_bits_in;
- ptl_size_t offset_in;
-} PtlGet_in;
-
-typedef struct PtlGet_out {
- int rc;
-} PtlGet_out;
-
-
-#endif
PTL_EQ_IN_USE = 21,
- PTL_MAX_ERRNO = 22
+ PTL_NI_INVALID = 22,
+ PTL_MD_ILLEGAL = 23,
+
+ PTL_MAX_ERRNO = 24
} ptl_err_t;
/* If you change these, you must update the string table in api-errno.c */
extern int ptl_init; /* Has the library been initialized */
-extern int ptl_ni_init(void);
-extern void ptl_ni_fini(void);
-
-static inline ptl_eq_t *
-ptl_handle2usereq (ptl_handle_eq_t *handle)
-{
- /* EQ handles are a little wierd. On the "user" side, the cookie
- * is just a pointer to a queue of events in shared memory. It's
- * cb_eq_handle is the "real" handle which we pass when we
- * call do_forward(). */
- return (ptl_eq_t *)((unsigned long)handle->cookie);
-}
-
#endif
+++ /dev/null
-#ifndef PTL_DISPATCH_H
-#define PTL_DISPATCH_H
-
-#include "build_check.h"
-/*
- * include/dispatch.h
- *
- * Dispatch table header and externs for remote side
- * operations
- *
- * Generated by idl
- *
- */
-
-#include <portals/lib-p30.h>
-#include <portals/arg-blocks.h>
-
-extern int do_PtlGetId(nal_cb_t * nal, void *private, void *args, void *ret);
-extern int do_PtlNIStatus(nal_cb_t * nal, void *private, void *args, void *ret);
-extern int do_PtlNIDist(nal_cb_t * nal, void *private, void *args, void *ret);
-extern int do_PtlMEAttach(nal_cb_t * nal, void *private, void *args, void *ret);
-extern int do_PtlMEInsert(nal_cb_t * nal, void *private, void *args, void *ret);
-extern int do_PtlMEPrepend(nal_cb_t * nal, void *private, void *args,
- void *ret);
-extern int do_PtlMEUnlink(nal_cb_t * nal, void *private, void *args, void *ret);
-extern int do_PtlTblDump(nal_cb_t * nal, void *private, void *args, void *ret);
-extern int do_PtlMEDump(nal_cb_t * nal, void *private, void *args, void *ret);
-extern int do_PtlMDAttach(nal_cb_t * nal, void *private, void *args,
- void *ret);
-extern int do_PtlMDBind(nal_cb_t * nal, void *private, void *args,
- void *ret);
-extern int do_PtlMDUpdate_internal(nal_cb_t * nal, void *private, void *args,
- void *ret);
-extern int do_PtlMDUnlink(nal_cb_t * nal, void *private, void *args,
- void *ret);
-extern int do_PtlEQAlloc_internal(nal_cb_t * nal, void *private, void *args,
- void *ret);
-extern int do_PtlEQFree_internal(nal_cb_t * nal, void *private, void *args,
- void *ret);
-extern int do_PtlPut(nal_cb_t * nal, void *private, void *args, void *ret);
-extern int do_PtlGet(nal_cb_t * nal, void *private, void *args, void *ret);
-extern int do_PtlFailNid (nal_cb_t *nal, void *private, void *args, void *ret);
-
-extern char *dispatch_name(int index);
-#endif
#else
# include <portals/list.h>
# include <string.h>
+# include <pthread.h>
#endif
#include <portals/types.h>
#include <linux/kp30.h>
#include <portals/p30.h>
+#include <portals/nal.h>
#include <portals/lib-types.h>
-#include <portals/lib-nal.h>
-#include <portals/lib-dispatch.h>
static inline int ptl_is_wire_handle_none (ptl_handle_wire_t *wh)
{
wh->wh_object_cookie == PTL_WIRE_HANDLE_NONE.wh_object_cookie);
}
-#define state_lock(nal,flagsp) \
-do { \
- CDEBUG(D_PORTALS, "taking state lock\n"); \
- nal->cb_cli(nal, flagsp); \
-} while (0)
+#ifdef __KERNEL__
+#define LIB_LOCK(nal,flags) \
+ spin_lock_irqsave(&(nal)->libnal_ni.ni_lock, flags)
+#define LIB_UNLOCK(nal,flags) \
+ spin_unlock_irqrestore(&(nal)->libnal_ni.ni_lock, flags)
+#else
+#define LIB_LOCK(nal,flags) \
+ (pthread_mutex_lock(&(nal)->libnal_ni.ni_mutex), (flags) = 0)
+#define LIB_UNLOCK(nal,flags) \
+ pthread_mutex_unlock(&(nal)->libnal_ni.ni_mutex)
+#endif
-#define state_unlock(nal,flagsp) \
-{ \
- CDEBUG(D_PORTALS, "releasing state lock\n"); \
- nal->cb_sti(nal, flagsp); \
-}
#ifdef PTL_USE_LIB_FREELIST
#define MAX_MSGS 2048 /* Outstanding messages */
#define MAX_EQS 512
-extern int lib_freelist_init (nal_cb_t *nal, lib_freelist_t *fl, int nobj, int objsize);
-extern void lib_freelist_fini (nal_cb_t *nal, lib_freelist_t *fl);
+extern int lib_freelist_init (lib_nal_t *nal, lib_freelist_t *fl, int nobj, int objsize);
+extern void lib_freelist_fini (lib_nal_t *nal, lib_freelist_t *fl);
static inline void *
lib_freelist_alloc (lib_freelist_t *fl)
{
- /* ALWAYS called with statelock held */
+ /* ALWAYS called with liblock held */
lib_freeobj_t *o;
if (list_empty (&fl->fl_list))
static inline void
lib_freelist_free (lib_freelist_t *fl, void *obj)
{
- /* ALWAYS called with statelock held */
+ /* ALWAYS called with liblock held */
lib_freeobj_t *o = list_entry (obj, lib_freeobj_t, fo_contents);
list_add (&o->fo_list, &fl->fl_list);
static inline lib_eq_t *
-lib_eq_alloc (nal_cb_t *nal)
+lib_eq_alloc (lib_nal_t *nal)
{
- /* NEVER called with statelock held */
+ /* NEVER called with liblock held */
unsigned long flags;
lib_eq_t *eq;
- state_lock (nal, &flags);
- eq = (lib_eq_t *)lib_freelist_alloc (&nal->ni.ni_free_eqs);
- state_unlock (nal, &flags);
+ LIB_LOCK (nal, flags);
+ eq = (lib_eq_t *)lib_freelist_alloc (&nal->libnal_ni.ni_free_eqs);
+ LIB_UNLOCK (nal, flags);
return (eq);
}
static inline void
-lib_eq_free (nal_cb_t *nal, lib_eq_t *eq)
+lib_eq_free (lib_nal_t *nal, lib_eq_t *eq)
{
- /* ALWAYS called with statelock held */
- lib_freelist_free (&nal->ni.ni_free_eqs, eq);
+ /* ALWAYS called with liblock held */
+ lib_freelist_free (&nal->libnal_ni.ni_free_eqs, eq);
}
static inline lib_md_t *
-lib_md_alloc (nal_cb_t *nal, ptl_md_t *umd)
+lib_md_alloc (lib_nal_t *nal, ptl_md_t *umd)
{
- /* NEVER called with statelock held */
+ /* NEVER called with liblock held */
unsigned long flags;
lib_md_t *md;
- state_lock (nal, &flags);
- md = (lib_md_t *)lib_freelist_alloc (&nal->ni.ni_free_mds);
- state_unlock (nal, &flags);
+ LIB_LOCK (nal, flags);
+ md = (lib_md_t *)lib_freelist_alloc (&nal->libnal_ni.ni_free_mds);
+ LIB_UNLOCK (nal, flags);
return (md);
}
static inline void
-lib_md_free (nal_cb_t *nal, lib_md_t *md)
+lib_md_free (lib_nal_t *nal, lib_md_t *md)
{
- /* ALWAYS called with statelock held */
- lib_freelist_free (&nal->ni.ni_free_mds, md);
+ /* ALWAYS called with liblock held */
+ lib_freelist_free (&nal->libnal_ni.ni_free_mds, md);
}
static inline lib_me_t *
-lib_me_alloc (nal_cb_t *nal)
+lib_me_alloc (lib_nal_t *nal)
{
- /* NEVER called with statelock held */
+ /* NEVER called with liblock held */
unsigned long flags;
lib_me_t *me;
- state_lock (nal, &flags);
- me = (lib_me_t *)lib_freelist_alloc (&nal->ni.ni_free_mes);
- state_unlock (nal, &flags);
+ LIB_LOCK (nal, flags);
+ me = (lib_me_t *)lib_freelist_alloc (&nal->libnal_ni.ni_free_mes);
+ LIB_UNLOCK (nal, flags);
return (me);
}
static inline void
-lib_me_free (nal_cb_t *nal, lib_me_t *me)
+lib_me_free (lib_nal_t *nal, lib_me_t *me)
{
- /* ALWAYS called with statelock held */
- lib_freelist_free (&nal->ni.ni_free_mes, me);
+ /* ALWAYS called with liblock held */
+ lib_freelist_free (&nal->libnal_ni.ni_free_mes, me);
}
static inline lib_msg_t *
-lib_msg_alloc (nal_cb_t *nal)
+lib_msg_alloc (lib_nal_t *nal)
{
- /* NEVER called with statelock held */
+ /* NEVER called with liblock held */
unsigned long flags;
lib_msg_t *msg;
- state_lock (nal, &flags);
- msg = (lib_msg_t *)lib_freelist_alloc (&nal->ni.ni_free_msgs);
- state_unlock (nal, &flags);
+ LIB_LOCK (nal, flags);
+ msg = (lib_msg_t *)lib_freelist_alloc (&nal->libnal_ni.ni_free_msgs);
+ LIB_UNLOCK (nal, flags);
if (msg != NULL) {
/* NULL pointers, clear flags etc */
}
static inline void
-lib_msg_free (nal_cb_t *nal, lib_msg_t *msg)
+lib_msg_free (lib_nal_t *nal, lib_msg_t *msg)
{
- /* ALWAYS called with statelock held */
- lib_freelist_free (&nal->ni.ni_free_msgs, msg);
+ /* ALWAYS called with liblock held */
+ lib_freelist_free (&nal->libnal_ni.ni_free_msgs, msg);
}
#else
static inline lib_eq_t *
-lib_eq_alloc (nal_cb_t *nal)
+lib_eq_alloc (lib_nal_t *nal)
{
- /* NEVER called with statelock held */
+ /* NEVER called with liblock held */
lib_eq_t *eq;
PORTAL_ALLOC(eq, sizeof(*eq));
}
static inline void
-lib_eq_free (nal_cb_t *nal, lib_eq_t *eq)
+lib_eq_free (lib_nal_t *nal, lib_eq_t *eq)
{
- /* ALWAYS called with statelock held */
+ /* ALWAYS called with liblock held */
PORTAL_FREE(eq, sizeof(*eq));
}
static inline lib_md_t *
-lib_md_alloc (nal_cb_t *nal, ptl_md_t *umd)
+lib_md_alloc (lib_nal_t *nal, ptl_md_t *umd)
{
- /* NEVER called with statelock held */
+ /* NEVER called with liblock held */
lib_md_t *md;
int size;
int niov;
}
static inline void
-lib_md_free (nal_cb_t *nal, lib_md_t *md)
+lib_md_free (lib_nal_t *nal, lib_md_t *md)
{
- /* ALWAYS called with statelock held */
+ /* ALWAYS called with liblock held */
int size;
if ((md->options & PTL_MD_KIOV) != 0)
}
static inline lib_me_t *
-lib_me_alloc (nal_cb_t *nal)
+lib_me_alloc (lib_nal_t *nal)
{
- /* NEVER called with statelock held */
+ /* NEVER called with liblock held */
lib_me_t *me;
PORTAL_ALLOC(me, sizeof(*me));
}
static inline void
-lib_me_free(nal_cb_t *nal, lib_me_t *me)
+lib_me_free(lib_nal_t *nal, lib_me_t *me)
{
- /* ALWAYS called with statelock held */
+ /* ALWAYS called with liblock held */
PORTAL_FREE(me, sizeof(*me));
}
static inline lib_msg_t *
-lib_msg_alloc(nal_cb_t *nal)
+lib_msg_alloc(lib_nal_t *nal)
{
- /* NEVER called with statelock held; may be in interrupt... */
+ /* NEVER called with liblock held; may be in interrupt... */
lib_msg_t *msg;
if (in_interrupt())
}
static inline void
-lib_msg_free(nal_cb_t *nal, lib_msg_t *msg)
+lib_msg_free(lib_nal_t *nal, lib_msg_t *msg)
{
- /* ALWAYS called with statelock held */
+ /* ALWAYS called with liblock held */
PORTAL_FREE(msg, sizeof(*msg));
}
#endif
-extern lib_handle_t *lib_lookup_cookie (nal_cb_t *nal, __u64 cookie, int type);
-extern void lib_initialise_handle (nal_cb_t *nal, lib_handle_t *lh, int type);
-extern void lib_invalidate_handle (nal_cb_t *nal, lib_handle_t *lh);
+extern lib_handle_t *lib_lookup_cookie (lib_nal_t *nal, __u64 cookie, int type);
+extern void lib_initialise_handle (lib_nal_t *nal, lib_handle_t *lh, int type);
+extern void lib_invalidate_handle (lib_nal_t *nal, lib_handle_t *lh);
static inline void
-ptl_eq2handle (ptl_handle_eq_t *handle, lib_eq_t *eq)
+ptl_eq2handle (ptl_handle_eq_t *handle, lib_nal_t *nal, lib_eq_t *eq)
{
+ handle->nal_idx = nal->libnal_ni.ni_api->nal_handle.nal_idx;
handle->cookie = eq->eq_lh.lh_cookie;
}
static inline lib_eq_t *
-ptl_handle2eq (ptl_handle_eq_t *handle, nal_cb_t *nal)
+ptl_handle2eq (ptl_handle_eq_t *handle, lib_nal_t *nal)
{
- /* ALWAYS called with statelock held */
+ /* ALWAYS called with liblock held */
lib_handle_t *lh = lib_lookup_cookie (nal, handle->cookie,
PTL_COOKIE_TYPE_EQ);
if (lh == NULL)
}
static inline void
-ptl_md2handle (ptl_handle_md_t *handle, lib_md_t *md)
+ptl_md2handle (ptl_handle_md_t *handle, lib_nal_t *nal, lib_md_t *md)
{
+ handle->nal_idx = nal->libnal_ni.ni_api->nal_handle.nal_idx;
handle->cookie = md->md_lh.lh_cookie;
}
static inline lib_md_t *
-ptl_handle2md (ptl_handle_md_t *handle, nal_cb_t *nal)
+ptl_handle2md (ptl_handle_md_t *handle, lib_nal_t *nal)
{
- /* ALWAYS called with statelock held */
+ /* ALWAYS called with liblock held */
lib_handle_t *lh = lib_lookup_cookie (nal, handle->cookie,
PTL_COOKIE_TYPE_MD);
if (lh == NULL)
}
static inline lib_md_t *
-ptl_wire_handle2md (ptl_handle_wire_t *wh, nal_cb_t *nal)
+ptl_wire_handle2md (ptl_handle_wire_t *wh, lib_nal_t *nal)
{
- /* ALWAYS called with statelock held */
+ /* ALWAYS called with liblock held */
lib_handle_t *lh;
- if (wh->wh_interface_cookie != nal->ni.ni_interface_cookie)
+ if (wh->wh_interface_cookie != nal->libnal_ni.ni_interface_cookie)
return (NULL);
lh = lib_lookup_cookie (nal, wh->wh_object_cookie,
}
static inline void
-ptl_me2handle (ptl_handle_me_t *handle, lib_me_t *me)
+ptl_me2handle (ptl_handle_me_t *handle, lib_nal_t *nal, lib_me_t *me)
{
+ handle->nal_idx = nal->libnal_ni.ni_api->nal_handle.nal_idx;
handle->cookie = me->me_lh.lh_cookie;
}
static inline lib_me_t *
-ptl_handle2me (ptl_handle_me_t *handle, nal_cb_t *nal)
+ptl_handle2me (ptl_handle_me_t *handle, lib_nal_t *nal)
{
- /* ALWAYS called with statelock held */
+ /* ALWAYS called with liblock held */
lib_handle_t *lh = lib_lookup_cookie (nal, handle->cookie,
PTL_COOKIE_TYPE_ME);
if (lh == NULL)
return (lh_entry (lh, lib_me_t, me_lh));
}
-extern int lib_init(nal_cb_t *cb, ptl_process_id_t pid,
+extern int lib_init(lib_nal_t *libnal, nal_t *apinal,
+ ptl_process_id_t pid,
ptl_ni_limits_t *desired_limits,
ptl_ni_limits_t *actual_limits);
-extern int lib_fini(nal_cb_t * cb);
-extern void lib_dispatch(nal_cb_t * cb, void *private, int index,
- void *arg_block, void *ret_block);
-extern char *dispatch_name(int index);
+extern int lib_fini(lib_nal_t *libnal);
/*
- * When the NAL detects an incoming message, it should call
- * lib_parse() decode it. The NAL callbacks will be handed
- * the private cookie as a way for the NAL to maintain state
- * about which transaction is being processed. An extra parameter,
- * lib_cookie will contain the necessary information for
- * finalizing the message.
- *
- * After it has finished the handling the message, it should
- * call lib_finalize() with the lib_cookie parameter.
- * Call backs will be made to write events, send acks or
- * replies and so on.
+ * When the NAL detects an incoming message header, it should call
+ * lib_parse() decode it. If the message header is garbage, lib_parse()
+ * returns immediately with failure, otherwise the NAL callbacks will be
+ * called to receive the message body. They are handed the private cookie
+ * as a way for the NAL to maintain state about which transaction is being
+ * processed. An extra parameter, lib_msg contains the lib-level message
+ * state for passing to lib_finalize() when the message body has been
+ * received.
*/
-extern void lib_enq_event_locked (nal_cb_t *nal, void *private,
+extern void lib_enq_event_locked (lib_nal_t *nal, void *private,
lib_eq_t *eq, ptl_event_t *ev);
-extern void lib_finalize (nal_cb_t *nal, void *private, lib_msg_t *msg,
+extern void lib_finalize (lib_nal_t *nal, void *private, lib_msg_t *msg,
ptl_ni_fail_t ni_fail_type);
-extern void lib_parse (nal_cb_t *nal, ptl_hdr_t *hdr, void *private);
-extern lib_msg_t *lib_create_reply_msg (nal_cb_t *nal, ptl_nid_t peer_nid,
+extern ptl_err_t lib_parse (lib_nal_t *nal, ptl_hdr_t *hdr, void *private);
+extern lib_msg_t *lib_create_reply_msg (lib_nal_t *nal, ptl_nid_t peer_nid,
lib_msg_t *get_msg);
-extern void print_hdr (nal_cb_t * nal, ptl_hdr_t * hdr);
+extern void print_hdr (lib_nal_t * nal, ptl_hdr_t * hdr);
extern ptl_size_t lib_iov_nob (int niov, struct iovec *iov);
extern void lib_assert_wire_constants (void);
-extern ptl_err_t lib_recv (nal_cb_t *nal, void *private, lib_msg_t *msg, lib_md_t *md,
+extern ptl_err_t lib_recv (lib_nal_t *nal, void *private, lib_msg_t *msg, lib_md_t *md,
ptl_size_t offset, ptl_size_t mlen, ptl_size_t rlen);
-extern ptl_err_t lib_send (nal_cb_t *nal, void *private, lib_msg_t *msg,
+extern ptl_err_t lib_send (lib_nal_t *nal, void *private, lib_msg_t *msg,
ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
lib_md_t *md, ptl_size_t offset, ptl_size_t len);
-extern void lib_md_deconstruct(nal_cb_t * nal, lib_md_t * md_in,
- ptl_md_t * md_out);
-extern void lib_md_unlink(nal_cb_t * nal, lib_md_t * md_in);
-extern void lib_me_unlink(nal_cb_t * nal, lib_me_t * me_in);
+extern int lib_api_ni_status (nal_t *nal, ptl_sr_index_t sr_idx,
+ ptl_sr_value_t *status);
+extern int lib_api_ni_dist (nal_t *nal, ptl_process_id_t *pid,
+ unsigned long *dist);
+
+extern int lib_api_eq_alloc (nal_t *nal, ptl_size_t count,
+ ptl_eq_handler_t callback,
+ ptl_handle_eq_t *handle);
+extern int lib_api_eq_free(nal_t *nal, ptl_handle_eq_t *eqh);
+extern int lib_api_eq_poll (nal_t *nal,
+ ptl_handle_eq_t *eventqs, int neq, int timeout_ms,
+ ptl_event_t *event, int *which);
+
+extern int lib_api_me_attach(nal_t *nal,
+ ptl_pt_index_t portal,
+ ptl_process_id_t match_id,
+ ptl_match_bits_t match_bits,
+ ptl_match_bits_t ignore_bits,
+ ptl_unlink_t unlink, ptl_ins_pos_t pos,
+ ptl_handle_me_t *handle);
+extern int lib_api_me_insert(nal_t *nal,
+ ptl_handle_me_t *current_meh,
+ ptl_process_id_t match_id,
+ ptl_match_bits_t match_bits,
+ ptl_match_bits_t ignore_bits,
+ ptl_unlink_t unlink, ptl_ins_pos_t pos,
+ ptl_handle_me_t *handle);
+extern int lib_api_me_unlink (nal_t *nal, ptl_handle_me_t *meh);
+extern void lib_me_unlink(lib_nal_t *nal, lib_me_t *me);
+
+extern int lib_api_get_id(nal_t *nal, ptl_process_id_t *pid);
+
+extern void lib_md_unlink(lib_nal_t *nal, lib_md_t *md);
+extern void lib_md_deconstruct(lib_nal_t *nal, lib_md_t *lmd, ptl_md_t *umd);
+extern int lib_api_md_attach(nal_t *nal, ptl_handle_me_t *meh,
+ ptl_md_t *umd, ptl_unlink_t unlink,
+ ptl_handle_md_t *handle);
+extern int lib_api_md_bind(nal_t *nal, ptl_md_t *umd, ptl_unlink_t unlink,
+ ptl_handle_md_t *handle);
+extern int lib_api_md_unlink (nal_t *nal, ptl_handle_md_t *mdh);
+extern int lib_api_md_update (nal_t *nal, ptl_handle_md_t *mdh,
+ ptl_md_t *oldumd, ptl_md_t *newumd,
+ ptl_handle_eq_t *testqh);
+
+extern int lib_api_get(nal_t *apinal, ptl_handle_md_t *mdh,
+ ptl_process_id_t *id,
+ ptl_pt_index_t portal, ptl_ac_index_t ac,
+ ptl_match_bits_t match_bits, ptl_size_t offset);
+extern int lib_api_put(nal_t *apinal, ptl_handle_md_t *mdh,
+ ptl_ack_req_t ack, ptl_process_id_t *id,
+ ptl_pt_index_t portal, ptl_ac_index_t ac,
+ ptl_match_bits_t match_bits,
+ ptl_size_t offset, ptl_hdr_data_t hdr_data);
+extern int lib_api_fail_nid(nal_t *apinal, ptl_nid_t nid, unsigned int threshold);
+
#endif
+++ /dev/null
-#ifndef _LIB_NAL_H_
-#define _LIB_NAL_H_
-
-#include "build_check.h"
-/*
- * nal.h
- *
- * Library side headers that define the abstraction layer's
- * responsibilities and interfaces
- */
-
-#include <portals/lib-types.h>
-
-struct nal_cb_t {
- /*
- * Per interface portal table, access control table
- * and NAL private data field;
- */
- lib_ni_t ni;
- void *nal_data;
- /*
- * send: Sends a preformatted header and payload data to a
- * specified remote process. The payload is scattered over 'niov'
- * fragments described by iov, starting at 'offset' for 'mlen'
- * bytes.
- * NB the NAL may NOT overwrite iov.
- * PTL_OK on success => NAL has committed to send and will call
- * lib_finalize on completion
- */
- ptl_err_t (*cb_send) (nal_cb_t * nal, void *private, lib_msg_t * cookie,
- ptl_hdr_t * hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
- unsigned int niov, struct iovec *iov,
- size_t offset, size_t mlen);
-
- /* as send, but with a set of page fragments (NULL if not supported) */
- ptl_err_t (*cb_send_pages) (nal_cb_t * nal, void *private, lib_msg_t * cookie,
- ptl_hdr_t * hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
- unsigned int niov, ptl_kiov_t *iov,
- size_t offset, size_t mlen);
- /*
- * recv: Receives an incoming message from a remote process. The
- * payload is to be received into the scattered buffer of 'niov'
- * fragments described by iov, starting at 'offset' for 'mlen'
- * bytes. Payload bytes after 'mlen' up to 'rlen' are to be
- * discarded.
- * NB the NAL may NOT overwrite iov.
- * PTL_OK on success => NAL has committed to receive and will call
- * lib_finalize on completion
- */
- ptl_err_t (*cb_recv) (nal_cb_t * nal, void *private, lib_msg_t * cookie,
- unsigned int niov, struct iovec *iov,
- size_t offset, size_t mlen, size_t rlen);
-
- /* as recv, but with a set of page fragments (NULL if not supported) */
- ptl_err_t (*cb_recv_pages) (nal_cb_t * nal, void *private, lib_msg_t * cookie,
- unsigned int niov, ptl_kiov_t *iov,
- size_t offset, size_t mlen, size_t rlen);
- /*
- * read: Reads a block of data from a specified user address
- */
- ptl_err_t (*cb_read) (nal_cb_t * nal, void *private, void *dst_addr,
- user_ptr src_addr, size_t len);
-
- /*
- * write: Writes a block of data into a specified user address
- */
- ptl_err_t (*cb_write) (nal_cb_t * nal, void *private, user_ptr dsr_addr,
- void *src_addr, size_t len);
-
- /*
- * callback: Calls an event callback
- * NULL => lib calls eq's callback (if any) directly.
- */
- void (*cb_callback) (nal_cb_t * nal, void *private, lib_eq_t *eq,
- ptl_event_t *ev);
-
- /*
- * malloc: Acquire a block of memory in a system independent
- * fashion.
- */
- void *(*cb_malloc) (nal_cb_t * nal, size_t len);
-
- void (*cb_free) (nal_cb_t * nal, void *buf, size_t len);
-
- /*
- * (un)map: Tell the NAL about some memory it will access.
- * *addrkey passed to cb_unmap() is what cb_map() set it to.
- * type of *iov depends on options.
- * Set to NULL if not required.
- */
- ptl_err_t (*cb_map) (nal_cb_t * nal, unsigned int niov, struct iovec *iov,
- void **addrkey);
- void (*cb_unmap) (nal_cb_t * nal, unsigned int niov, struct iovec *iov,
- void **addrkey);
-
- /* as (un)map, but with a set of page fragments */
- ptl_err_t (*cb_map_pages) (nal_cb_t * nal, unsigned int niov, ptl_kiov_t *iov,
- void **addrkey);
- void (*cb_unmap_pages) (nal_cb_t * nal, unsigned int niov, ptl_kiov_t *iov,
- void **addrkey);
-
- void (*cb_printf) (nal_cb_t * nal, const char *fmt, ...);
-
- /* Turn interrupts off (begin of protected area) */
- void (*cb_cli) (nal_cb_t * nal, unsigned long *flags);
-
- /* Turn interrupts on (end of protected area) */
- void (*cb_sti) (nal_cb_t * nal, unsigned long *flags);
-
- /*
- * Calculate a network "distance" to given node
- */
- int (*cb_dist) (nal_cb_t * nal, ptl_nid_t nid, unsigned long *dist);
-};
-
-#endif
#else
# include <portals/list.h>
# include <string.h>
+# include <pthread.h>
#endif
#include <portals/types.h>
#include <linux/kp30.h>
#include <portals/p30.h>
+#include <portals/nal.h>
#include <portals/lib-types.h>
-#include <portals/lib-nal.h>
-#include <portals/lib-dispatch.h>
static inline int ptl_is_wire_handle_none (ptl_handle_wire_t *wh)
{
wh->wh_object_cookie == PTL_WIRE_HANDLE_NONE.wh_object_cookie);
}
-#define state_lock(nal,flagsp) \
-do { \
- CDEBUG(D_PORTALS, "taking state lock\n"); \
- nal->cb_cli(nal, flagsp); \
-} while (0)
+#ifdef __KERNEL__
+#define LIB_LOCK(nal,flags) \
+ spin_lock_irqsave(&(nal)->libnal_ni.ni_lock, flags)
+#define LIB_UNLOCK(nal,flags) \
+ spin_unlock_irqrestore(&(nal)->libnal_ni.ni_lock, flags)
+#else
+#define LIB_LOCK(nal,flags) \
+ (pthread_mutex_lock(&(nal)->libnal_ni.ni_mutex), (flags) = 0)
+#define LIB_UNLOCK(nal,flags) \
+ pthread_mutex_unlock(&(nal)->libnal_ni.ni_mutex)
+#endif
-#define state_unlock(nal,flagsp) \
-{ \
- CDEBUG(D_PORTALS, "releasing state lock\n"); \
- nal->cb_sti(nal, flagsp); \
-}
#ifdef PTL_USE_LIB_FREELIST
#define MAX_MSGS 2048 /* Outstanding messages */
#define MAX_EQS 512
-extern int lib_freelist_init (nal_cb_t *nal, lib_freelist_t *fl, int nobj, int objsize);
-extern void lib_freelist_fini (nal_cb_t *nal, lib_freelist_t *fl);
+extern int lib_freelist_init (lib_nal_t *nal, lib_freelist_t *fl, int nobj, int objsize);
+extern void lib_freelist_fini (lib_nal_t *nal, lib_freelist_t *fl);
static inline void *
lib_freelist_alloc (lib_freelist_t *fl)
{
- /* ALWAYS called with statelock held */
+ /* ALWAYS called with liblock held */
lib_freeobj_t *o;
if (list_empty (&fl->fl_list))
static inline void
lib_freelist_free (lib_freelist_t *fl, void *obj)
{
- /* ALWAYS called with statelock held */
+ /* ALWAYS called with liblock held */
lib_freeobj_t *o = list_entry (obj, lib_freeobj_t, fo_contents);
list_add (&o->fo_list, &fl->fl_list);
static inline lib_eq_t *
-lib_eq_alloc (nal_cb_t *nal)
+lib_eq_alloc (lib_nal_t *nal)
{
- /* NEVER called with statelock held */
+ /* NEVER called with liblock held */
unsigned long flags;
lib_eq_t *eq;
- state_lock (nal, &flags);
- eq = (lib_eq_t *)lib_freelist_alloc (&nal->ni.ni_free_eqs);
- state_unlock (nal, &flags);
+ LIB_LOCK (nal, flags);
+ eq = (lib_eq_t *)lib_freelist_alloc (&nal->libnal_ni.ni_free_eqs);
+ LIB_UNLOCK (nal, flags);
return (eq);
}
static inline void
-lib_eq_free (nal_cb_t *nal, lib_eq_t *eq)
+lib_eq_free (lib_nal_t *nal, lib_eq_t *eq)
{
- /* ALWAYS called with statelock held */
- lib_freelist_free (&nal->ni.ni_free_eqs, eq);
+ /* ALWAYS called with liblock held */
+ lib_freelist_free (&nal->libnal_ni.ni_free_eqs, eq);
}
static inline lib_md_t *
-lib_md_alloc (nal_cb_t *nal, ptl_md_t *umd)
+lib_md_alloc (lib_nal_t *nal, ptl_md_t *umd)
{
- /* NEVER called with statelock held */
+ /* NEVER called with liblock held */
unsigned long flags;
lib_md_t *md;
- state_lock (nal, &flags);
- md = (lib_md_t *)lib_freelist_alloc (&nal->ni.ni_free_mds);
- state_unlock (nal, &flags);
+ LIB_LOCK (nal, flags);
+ md = (lib_md_t *)lib_freelist_alloc (&nal->libnal_ni.ni_free_mds);
+ LIB_UNLOCK (nal, flags);
return (md);
}
static inline void
-lib_md_free (nal_cb_t *nal, lib_md_t *md)
+lib_md_free (lib_nal_t *nal, lib_md_t *md)
{
- /* ALWAYS called with statelock held */
- lib_freelist_free (&nal->ni.ni_free_mds, md);
+ /* ALWAYS called with liblock held */
+ lib_freelist_free (&nal->libnal_ni.ni_free_mds, md);
}
static inline lib_me_t *
-lib_me_alloc (nal_cb_t *nal)
+lib_me_alloc (lib_nal_t *nal)
{
- /* NEVER called with statelock held */
+ /* NEVER called with liblock held */
unsigned long flags;
lib_me_t *me;
- state_lock (nal, &flags);
- me = (lib_me_t *)lib_freelist_alloc (&nal->ni.ni_free_mes);
- state_unlock (nal, &flags);
+ LIB_LOCK (nal, flags);
+ me = (lib_me_t *)lib_freelist_alloc (&nal->libnal_ni.ni_free_mes);
+ LIB_UNLOCK (nal, flags);
return (me);
}
static inline void
-lib_me_free (nal_cb_t *nal, lib_me_t *me)
+lib_me_free (lib_nal_t *nal, lib_me_t *me)
{
- /* ALWAYS called with statelock held */
- lib_freelist_free (&nal->ni.ni_free_mes, me);
+ /* ALWAYS called with liblock held */
+ lib_freelist_free (&nal->libnal_ni.ni_free_mes, me);
}
static inline lib_msg_t *
-lib_msg_alloc (nal_cb_t *nal)
+lib_msg_alloc (lib_nal_t *nal)
{
- /* NEVER called with statelock held */
+ /* NEVER called with liblock held */
unsigned long flags;
lib_msg_t *msg;
- state_lock (nal, &flags);
- msg = (lib_msg_t *)lib_freelist_alloc (&nal->ni.ni_free_msgs);
- state_unlock (nal, &flags);
+ LIB_LOCK (nal, flags);
+ msg = (lib_msg_t *)lib_freelist_alloc (&nal->libnal_ni.ni_free_msgs);
+ LIB_UNLOCK (nal, flags);
if (msg != NULL) {
/* NULL pointers, clear flags etc */
}
static inline void
-lib_msg_free (nal_cb_t *nal, lib_msg_t *msg)
+lib_msg_free (lib_nal_t *nal, lib_msg_t *msg)
{
- /* ALWAYS called with statelock held */
- lib_freelist_free (&nal->ni.ni_free_msgs, msg);
+ /* ALWAYS called with liblock held */
+ lib_freelist_free (&nal->libnal_ni.ni_free_msgs, msg);
}
#else
static inline lib_eq_t *
-lib_eq_alloc (nal_cb_t *nal)
+lib_eq_alloc (lib_nal_t *nal)
{
- /* NEVER called with statelock held */
+ /* NEVER called with liblock held */
lib_eq_t *eq;
PORTAL_ALLOC(eq, sizeof(*eq));
}
static inline void
-lib_eq_free (nal_cb_t *nal, lib_eq_t *eq)
+lib_eq_free (lib_nal_t *nal, lib_eq_t *eq)
{
- /* ALWAYS called with statelock held */
+ /* ALWAYS called with liblock held */
PORTAL_FREE(eq, sizeof(*eq));
}
static inline lib_md_t *
-lib_md_alloc (nal_cb_t *nal, ptl_md_t *umd)
+lib_md_alloc (lib_nal_t *nal, ptl_md_t *umd)
{
- /* NEVER called with statelock held */
+ /* NEVER called with liblock held */
lib_md_t *md;
int size;
int niov;
}
static inline void
-lib_md_free (nal_cb_t *nal, lib_md_t *md)
+lib_md_free (lib_nal_t *nal, lib_md_t *md)
{
- /* ALWAYS called with statelock held */
+ /* ALWAYS called with liblock held */
int size;
if ((md->options & PTL_MD_KIOV) != 0)
}
static inline lib_me_t *
-lib_me_alloc (nal_cb_t *nal)
+lib_me_alloc (lib_nal_t *nal)
{
- /* NEVER called with statelock held */
+ /* NEVER called with liblock held */
lib_me_t *me;
PORTAL_ALLOC(me, sizeof(*me));
}
static inline void
-lib_me_free(nal_cb_t *nal, lib_me_t *me)
+lib_me_free(lib_nal_t *nal, lib_me_t *me)
{
- /* ALWAYS called with statelock held */
+ /* ALWAYS called with liblock held */
PORTAL_FREE(me, sizeof(*me));
}
static inline lib_msg_t *
-lib_msg_alloc(nal_cb_t *nal)
+lib_msg_alloc(lib_nal_t *nal)
{
- /* NEVER called with statelock held; may be in interrupt... */
+ /* NEVER called with liblock held; may be in interrupt... */
lib_msg_t *msg;
if (in_interrupt())
}
static inline void
-lib_msg_free(nal_cb_t *nal, lib_msg_t *msg)
+lib_msg_free(lib_nal_t *nal, lib_msg_t *msg)
{
- /* ALWAYS called with statelock held */
+ /* ALWAYS called with liblock held */
PORTAL_FREE(msg, sizeof(*msg));
}
#endif
-extern lib_handle_t *lib_lookup_cookie (nal_cb_t *nal, __u64 cookie, int type);
-extern void lib_initialise_handle (nal_cb_t *nal, lib_handle_t *lh, int type);
-extern void lib_invalidate_handle (nal_cb_t *nal, lib_handle_t *lh);
+extern lib_handle_t *lib_lookup_cookie (lib_nal_t *nal, __u64 cookie, int type);
+extern void lib_initialise_handle (lib_nal_t *nal, lib_handle_t *lh, int type);
+extern void lib_invalidate_handle (lib_nal_t *nal, lib_handle_t *lh);
static inline void
-ptl_eq2handle (ptl_handle_eq_t *handle, lib_eq_t *eq)
+ptl_eq2handle (ptl_handle_eq_t *handle, lib_nal_t *nal, lib_eq_t *eq)
{
+ handle->nal_idx = nal->libnal_ni.ni_api->nal_handle.nal_idx;
handle->cookie = eq->eq_lh.lh_cookie;
}
static inline lib_eq_t *
-ptl_handle2eq (ptl_handle_eq_t *handle, nal_cb_t *nal)
+ptl_handle2eq (ptl_handle_eq_t *handle, lib_nal_t *nal)
{
- /* ALWAYS called with statelock held */
+ /* ALWAYS called with liblock held */
lib_handle_t *lh = lib_lookup_cookie (nal, handle->cookie,
PTL_COOKIE_TYPE_EQ);
if (lh == NULL)
}
static inline void
-ptl_md2handle (ptl_handle_md_t *handle, lib_md_t *md)
+ptl_md2handle (ptl_handle_md_t *handle, lib_nal_t *nal, lib_md_t *md)
{
+ handle->nal_idx = nal->libnal_ni.ni_api->nal_handle.nal_idx;
handle->cookie = md->md_lh.lh_cookie;
}
static inline lib_md_t *
-ptl_handle2md (ptl_handle_md_t *handle, nal_cb_t *nal)
+ptl_handle2md (ptl_handle_md_t *handle, lib_nal_t *nal)
{
- /* ALWAYS called with statelock held */
+ /* ALWAYS called with liblock held */
lib_handle_t *lh = lib_lookup_cookie (nal, handle->cookie,
PTL_COOKIE_TYPE_MD);
if (lh == NULL)
}
static inline lib_md_t *
-ptl_wire_handle2md (ptl_handle_wire_t *wh, nal_cb_t *nal)
+ptl_wire_handle2md (ptl_handle_wire_t *wh, lib_nal_t *nal)
{
- /* ALWAYS called with statelock held */
+ /* ALWAYS called with liblock held */
lib_handle_t *lh;
- if (wh->wh_interface_cookie != nal->ni.ni_interface_cookie)
+ if (wh->wh_interface_cookie != nal->libnal_ni.ni_interface_cookie)
return (NULL);
lh = lib_lookup_cookie (nal, wh->wh_object_cookie,
}
static inline void
-ptl_me2handle (ptl_handle_me_t *handle, lib_me_t *me)
+ptl_me2handle (ptl_handle_me_t *handle, lib_nal_t *nal, lib_me_t *me)
{
+ handle->nal_idx = nal->libnal_ni.ni_api->nal_handle.nal_idx;
handle->cookie = me->me_lh.lh_cookie;
}
static inline lib_me_t *
-ptl_handle2me (ptl_handle_me_t *handle, nal_cb_t *nal)
+ptl_handle2me (ptl_handle_me_t *handle, lib_nal_t *nal)
{
- /* ALWAYS called with statelock held */
+ /* ALWAYS called with liblock held */
lib_handle_t *lh = lib_lookup_cookie (nal, handle->cookie,
PTL_COOKIE_TYPE_ME);
if (lh == NULL)
return (lh_entry (lh, lib_me_t, me_lh));
}
-extern int lib_init(nal_cb_t *cb, ptl_process_id_t pid,
+extern int lib_init(lib_nal_t *libnal, nal_t *apinal,
+ ptl_process_id_t pid,
ptl_ni_limits_t *desired_limits,
ptl_ni_limits_t *actual_limits);
-extern int lib_fini(nal_cb_t * cb);
-extern void lib_dispatch(nal_cb_t * cb, void *private, int index,
- void *arg_block, void *ret_block);
-extern char *dispatch_name(int index);
+extern int lib_fini(lib_nal_t *libnal);
/*
- * When the NAL detects an incoming message, it should call
- * lib_parse() decode it. The NAL callbacks will be handed
- * the private cookie as a way for the NAL to maintain state
- * about which transaction is being processed. An extra parameter,
- * lib_cookie will contain the necessary information for
- * finalizing the message.
- *
- * After it has finished the handling the message, it should
- * call lib_finalize() with the lib_cookie parameter.
- * Call backs will be made to write events, send acks or
- * replies and so on.
+ * When the NAL detects an incoming message header, it should call
+ * lib_parse() decode it. If the message header is garbage, lib_parse()
+ * returns immediately with failure, otherwise the NAL callbacks will be
+ * called to receive the message body. They are handed the private cookie
+ * as a way for the NAL to maintain state about which transaction is being
+ * processed. An extra parameter, lib_msg contains the lib-level message
+ * state for passing to lib_finalize() when the message body has been
+ * received.
*/
-extern void lib_enq_event_locked (nal_cb_t *nal, void *private,
+extern void lib_enq_event_locked (lib_nal_t *nal, void *private,
lib_eq_t *eq, ptl_event_t *ev);
-extern void lib_finalize (nal_cb_t *nal, void *private, lib_msg_t *msg,
+extern void lib_finalize (lib_nal_t *nal, void *private, lib_msg_t *msg,
ptl_ni_fail_t ni_fail_type);
-extern void lib_parse (nal_cb_t *nal, ptl_hdr_t *hdr, void *private);
-extern lib_msg_t *lib_create_reply_msg (nal_cb_t *nal, ptl_nid_t peer_nid,
+extern ptl_err_t lib_parse (lib_nal_t *nal, ptl_hdr_t *hdr, void *private);
+extern lib_msg_t *lib_create_reply_msg (lib_nal_t *nal, ptl_nid_t peer_nid,
lib_msg_t *get_msg);
-extern void print_hdr (nal_cb_t * nal, ptl_hdr_t * hdr);
+extern void print_hdr (lib_nal_t * nal, ptl_hdr_t * hdr);
extern ptl_size_t lib_iov_nob (int niov, struct iovec *iov);
extern void lib_assert_wire_constants (void);
-extern ptl_err_t lib_recv (nal_cb_t *nal, void *private, lib_msg_t *msg, lib_md_t *md,
+extern ptl_err_t lib_recv (lib_nal_t *nal, void *private, lib_msg_t *msg, lib_md_t *md,
ptl_size_t offset, ptl_size_t mlen, ptl_size_t rlen);
-extern ptl_err_t lib_send (nal_cb_t *nal, void *private, lib_msg_t *msg,
+extern ptl_err_t lib_send (lib_nal_t *nal, void *private, lib_msg_t *msg,
ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
lib_md_t *md, ptl_size_t offset, ptl_size_t len);
-extern void lib_md_deconstruct(nal_cb_t * nal, lib_md_t * md_in,
- ptl_md_t * md_out);
-extern void lib_md_unlink(nal_cb_t * nal, lib_md_t * md_in);
-extern void lib_me_unlink(nal_cb_t * nal, lib_me_t * me_in);
+extern int lib_api_ni_status (nal_t *nal, ptl_sr_index_t sr_idx,
+ ptl_sr_value_t *status);
+extern int lib_api_ni_dist (nal_t *nal, ptl_process_id_t *pid,
+ unsigned long *dist);
+
+extern int lib_api_eq_alloc (nal_t *nal, ptl_size_t count,
+ ptl_eq_handler_t callback,
+ ptl_handle_eq_t *handle);
+extern int lib_api_eq_free(nal_t *nal, ptl_handle_eq_t *eqh);
+extern int lib_api_eq_poll (nal_t *nal,
+ ptl_handle_eq_t *eventqs, int neq, int timeout_ms,
+ ptl_event_t *event, int *which);
+
+extern int lib_api_me_attach(nal_t *nal,
+ ptl_pt_index_t portal,
+ ptl_process_id_t match_id,
+ ptl_match_bits_t match_bits,
+ ptl_match_bits_t ignore_bits,
+ ptl_unlink_t unlink, ptl_ins_pos_t pos,
+ ptl_handle_me_t *handle);
+extern int lib_api_me_insert(nal_t *nal,
+ ptl_handle_me_t *current_meh,
+ ptl_process_id_t match_id,
+ ptl_match_bits_t match_bits,
+ ptl_match_bits_t ignore_bits,
+ ptl_unlink_t unlink, ptl_ins_pos_t pos,
+ ptl_handle_me_t *handle);
+extern int lib_api_me_unlink (nal_t *nal, ptl_handle_me_t *meh);
+extern void lib_me_unlink(lib_nal_t *nal, lib_me_t *me);
+
+extern int lib_api_get_id(nal_t *nal, ptl_process_id_t *pid);
+
+extern void lib_md_unlink(lib_nal_t *nal, lib_md_t *md);
+extern void lib_md_deconstruct(lib_nal_t *nal, lib_md_t *lmd, ptl_md_t *umd);
+extern int lib_api_md_attach(nal_t *nal, ptl_handle_me_t *meh,
+ ptl_md_t *umd, ptl_unlink_t unlink,
+ ptl_handle_md_t *handle);
+extern int lib_api_md_bind(nal_t *nal, ptl_md_t *umd, ptl_unlink_t unlink,
+ ptl_handle_md_t *handle);
+extern int lib_api_md_unlink (nal_t *nal, ptl_handle_md_t *mdh);
+extern int lib_api_md_update (nal_t *nal, ptl_handle_md_t *mdh,
+ ptl_md_t *oldumd, ptl_md_t *newumd,
+ ptl_handle_eq_t *testqh);
+
+extern int lib_api_get(nal_t *apinal, ptl_handle_md_t *mdh,
+ ptl_process_id_t *id,
+ ptl_pt_index_t portal, ptl_ac_index_t ac,
+ ptl_match_bits_t match_bits, ptl_size_t offset);
+extern int lib_api_put(nal_t *apinal, ptl_handle_md_t *mdh,
+ ptl_ack_req_t ack, ptl_process_id_t *id,
+ ptl_pt_index_t portal, ptl_ac_index_t ac,
+ ptl_match_bits_t match_bits,
+ ptl_size_t offset, ptl_hdr_data_t hdr_data);
+extern int lib_api_fail_nid(nal_t *apinal, ptl_nid_t nid, unsigned int threshold);
+
#endif
#include "build_check.h"
#include <portals/types.h>
+#include <portals/nal.h>
#ifdef __KERNEL__
# include <linux/uio.h>
# include <linux/smp_lock.h>
# include <sys/types.h>
#endif
-/* struct nal_cb_t is defined in lib-nal.h */
-typedef struct nal_cb_t nal_cb_t;
-
typedef char *user_ptr;
typedef struct lib_msg_t lib_msg_t;
typedef struct lib_ptl_t lib_ptl_t;
struct lib_eq_t {
struct list_head eq_list;
lib_handle_t eq_lh;
- ptl_seq_t sequence;
- ptl_size_t size;
- ptl_event_t *base;
+ ptl_seq_t eq_enq_seq;
+ ptl_seq_t eq_deq_seq;
+ ptl_size_t eq_size;
+ ptl_event_t *eq_events;
int eq_refcount;
- ptl_eq_handler_t event_callback;
+ ptl_eq_handler_t eq_callback;
void *eq_addrkey;
};
/* PTL_COOKIE_TYPES must be a power of 2, so the cookie type can be
* extracted by masking with (PTL_COOKIE_TYPES - 1) */
-typedef struct {
- ptl_nid_t nid;
- ptl_pid_t pid;
- lib_ptl_t tbl;
- lib_counters_t counters;
- ptl_ni_limits_t actual_limits;
+typedef struct lib_ni
+{
+ nal_t *ni_api;
+ ptl_process_id_t ni_pid;
+ lib_ptl_t ni_portals;
+ lib_counters_t ni_counters;
+ ptl_ni_limits_t ni_actual_limits;
int ni_lh_hash_size; /* size of lib handle hash table */
struct list_head *ni_lh_hash_table; /* all extant lib handles, this interface */
__u64 ni_next_object_cookie; /* cookie generator */
__u64 ni_interface_cookie; /* uniquely identifies this ni in this epoch */
- struct list_head ni_test_peers;
+ struct list_head ni_test_peers;
#ifdef PTL_USE_LIB_FREELIST
- lib_freelist_t ni_free_mes;
- lib_freelist_t ni_free_msgs;
- lib_freelist_t ni_free_mds;
- lib_freelist_t ni_free_eqs;
+ lib_freelist_t ni_free_mes;
+ lib_freelist_t ni_free_msgs;
+ lib_freelist_t ni_free_mds;
+ lib_freelist_t ni_free_eqs;
+#endif
+
+ struct list_head ni_active_msgs;
+ struct list_head ni_active_mds;
+ struct list_head ni_active_eqs;
+
+#ifdef __KERNEL__
+ spinlock_t ni_lock;
+ wait_queue_head_t ni_waitq;
+#else
+ pthread_mutex_t ni_mutex;
+ pthread_cond_t ni_cond;
#endif
- struct list_head ni_active_msgs;
- struct list_head ni_active_mds;
- struct list_head ni_active_eqs;
} lib_ni_t;
+
+typedef struct lib_nal
+{
+ /* lib-level interface state */
+ lib_ni_t libnal_ni;
+
+ /* NAL-private data */
+ void *libnal_data;
+
+ /*
+ * send: Sends a preformatted header and payload data to a
+ * specified remote process. The payload is scattered over 'niov'
+ * fragments described by iov, starting at 'offset' for 'mlen'
+ * bytes.
+ * NB the NAL may NOT overwrite iov.
+ * PTL_OK on success => NAL has committed to send and will call
+ * lib_finalize on completion
+ */
+ ptl_err_t (*libnal_send)
+ (struct lib_nal *nal, void *private, lib_msg_t *cookie,
+ ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
+ unsigned int niov, struct iovec *iov,
+ size_t offset, size_t mlen);
+
+ /* as send, but with a set of page fragments (NULL if not supported) */
+ ptl_err_t (*libnal_send_pages)
+ (struct lib_nal *nal, void *private, lib_msg_t * cookie,
+ ptl_hdr_t * hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
+ unsigned int niov, ptl_kiov_t *iov,
+ size_t offset, size_t mlen);
+ /*
+ * recv: Receives an incoming message from a remote process. The
+ * payload is to be received into the scattered buffer of 'niov'
+ * fragments described by iov, starting at 'offset' for 'mlen'
+ * bytes. Payload bytes after 'mlen' up to 'rlen' are to be
+ * discarded.
+ * NB the NAL may NOT overwrite iov.
+ * PTL_OK on success => NAL has committed to receive and will call
+ * lib_finalize on completion
+ */
+ ptl_err_t (*libnal_recv)
+ (struct lib_nal *nal, void *private, lib_msg_t * cookie,
+ unsigned int niov, struct iovec *iov,
+ size_t offset, size_t mlen, size_t rlen);
+
+ /* as recv, but with a set of page fragments (NULL if not supported) */
+ ptl_err_t (*libnal_recv_pages)
+ (struct lib_nal *nal, void *private, lib_msg_t * cookie,
+ unsigned int niov, ptl_kiov_t *iov,
+ size_t offset, size_t mlen, size_t rlen);
+
+ /*
+ * (un)map: Tell the NAL about some memory it will access.
+ * *addrkey passed to libnal_unmap() is what libnal_map() set it to.
+ * type of *iov depends on options.
+ * Set to NULL if not required.
+ */
+ ptl_err_t (*libnal_map)
+ (struct lib_nal *nal, unsigned int niov, struct iovec *iov,
+ void **addrkey);
+ void (*libnal_unmap)
+ (struct lib_nal *nal, unsigned int niov, struct iovec *iov,
+ void **addrkey);
+
+ /* as (un)map, but with a set of page fragments */
+ ptl_err_t (*libnal_map_pages)
+ (struct lib_nal *nal, unsigned int niov, ptl_kiov_t *iov,
+ void **addrkey);
+ void (*libnal_unmap_pages)
+ (struct lib_nal *nal, unsigned int niov, ptl_kiov_t *iov,
+ void **addrkey);
+
+ void (*libnal_printf)(struct lib_nal *nal, const char *fmt, ...);
+
+ /* Calculate a network "distance" to given node */
+ int (*libnal_dist) (struct lib_nal *nal, ptl_nid_t nid, unsigned long *dist);
+} lib_nal_t;
+
#endif
#include <portals/types.h>
-#ifdef yield
-#undef yield
-#endif
-
typedef struct nal_t nal_t;
struct nal_t {
+ /* common interface state */
int nal_refct;
+ ptl_handle_ni_t nal_handle;
+
+ /* NAL-private data */
void *nal_data;
- int (*startup) (nal_t *nal, ptl_pid_t requested_pid,
- ptl_ni_limits_t *req, ptl_ni_limits_t *actual);
+ /* NAL API implementation
+ * NB only nal_ni_init needs to be set when the NAL registers itself */
+ int (*nal_ni_init) (nal_t *nal, ptl_pid_t requested_pid,
+ ptl_ni_limits_t *req, ptl_ni_limits_t *actual);
- void (*shutdown) (nal_t *nal);
+ void (*nal_ni_fini) (nal_t *nal);
- int (*forward) (nal_t *nal, int index, /* Function ID */
- void *args, size_t arg_len, void *ret, size_t ret_len);
+ int (*nal_get_id) (nal_t *nal, ptl_process_id_t *id);
+ int (*nal_ni_status) (nal_t *nal, ptl_sr_index_t register, ptl_sr_value_t *status);
+ int (*nal_ni_dist) (nal_t *nal, ptl_process_id_t *id, unsigned long *distance);
+ int (*nal_fail_nid) (nal_t *nal, ptl_nid_t nid, unsigned int threshold);
- int (*yield) (nal_t *nal, unsigned long *flags, int milliseconds);
+ int (*nal_me_attach) (nal_t *nal, ptl_pt_index_t portal,
+ ptl_process_id_t match_id,
+ ptl_match_bits_t match_bits, ptl_match_bits_t ignore_bits,
+ ptl_unlink_t unlink, ptl_ins_pos_t pos,
+ ptl_handle_me_t *handle);
+ int (*nal_me_insert) (nal_t *nal, ptl_handle_me_t *me,
+ ptl_process_id_t match_id,
+ ptl_match_bits_t match_bits, ptl_match_bits_t ignore_bits,
+ ptl_unlink_t unlink, ptl_ins_pos_t pos,
+ ptl_handle_me_t *handle);
+ int (*nal_me_unlink) (nal_t *nal, ptl_handle_me_t *me);
+
+ int (*nal_md_attach) (nal_t *nal, ptl_handle_me_t *me,
+ ptl_md_t *md, ptl_unlink_t unlink,
+ ptl_handle_md_t *handle);
+ int (*nal_md_bind) (nal_t *nal,
+ ptl_md_t *md, ptl_unlink_t unlink,
+ ptl_handle_md_t *handle);
+ int (*nal_md_unlink) (nal_t *nal, ptl_handle_md_t *md);
+ int (*nal_md_update) (nal_t *nal, ptl_handle_md_t *md,
+ ptl_md_t *old_md, ptl_md_t *new_md,
+ ptl_handle_eq_t *testq);
- void (*lock) (nal_t *nal, unsigned long *flags);
+ int (*nal_eq_alloc) (nal_t *nal, ptl_size_t count,
+ ptl_eq_handler_t handler,
+ ptl_handle_eq_t *handle);
+ int (*nal_eq_free) (nal_t *nal, ptl_handle_eq_t *eq);
+ int (*nal_eq_poll) (nal_t *nal,
+ ptl_handle_eq_t *eqs, int neqs, int timeout,
+ ptl_event_t *event, int *which);
- void (*unlock) (nal_t *nal, unsigned long *flags);
+ int (*nal_ace_entry) (nal_t *nal, ptl_ac_index_t index,
+ ptl_process_id_t match_id, ptl_pt_index_t portal);
+
+ int (*nal_put) (nal_t *nal, ptl_handle_md_t *md, ptl_ack_req_t ack,
+ ptl_process_id_t *target, ptl_pt_index_t portal,
+ ptl_ac_index_t ac, ptl_match_bits_t match,
+ ptl_size_t offset, ptl_hdr_data_t hdr_data);
+ int (*nal_get) (nal_t *nal, ptl_handle_md_t *md,
+ ptl_process_id_t *target, ptl_pt_index_t portal,
+ ptl_ac_index_t ac, ptl_match_bits_t match,
+ ptl_size_t offset);
};
-extern nal_t *ptl_hndl2nal(ptl_handle_any_t * any);
+extern nal_t *ptl_hndl2nal(ptl_handle_any_t *any);
#ifdef __KERNEL__
extern int ptl_register_nal(ptl_interface_t interface, nal_t *nal);
#define PTL_EQ_HANDLER_NONE NULL
typedef struct {
- volatile ptl_seq_t sequence;
- ptl_size_t size;
- ptl_event_t *base;
- ptl_handle_any_t cb_eq_handle;
-} ptl_eq_t;
-
-typedef struct {
- ptl_eq_t *eq;
-} ptl_ni_t;
-
-typedef struct {
int max_mes;
int max_mds;
int max_eqs;
#define NRXTHREADS 10 /* max number of receiver threads */
typedef struct _gmnal_data_t {
- spinlock_t cb_lock;
spinlock_t stxd_lock;
struct semaphore stxd_token;
gmnal_stxd_t *stxd;
gmnal_srxd_t *srxd;
struct gm_hash *srxd_hash;
nal_t *nal;
- nal_cb_t *nal_cb;
+ lib_nal_t *libnal;
struct gm_port *gm_port;
unsigned int gm_local_nid;
unsigned int gm_global_nid;
#define GMNAL_GM_LOCK_INIT(a) spin_lock_init(&a->gm_lock);
#define GMNAL_GM_LOCK(a) spin_lock(&a->gm_lock);
#define GMNAL_GM_UNLOCK(a) spin_unlock(&a->gm_lock);
-#define GMNAL_CB_LOCK_INIT(a) spin_lock_init(&a->cb_lock);
/*
* CB NAL
*/
-int gmnal_cb_send(nal_cb_t *, void *, lib_msg_t *, ptl_hdr_t *,
+int gmnal_cb_send(lib_nal_t *, void *, lib_msg_t *, ptl_hdr_t *,
int, ptl_nid_t, ptl_pid_t, unsigned int, struct iovec *, size_t);
-int gmnal_cb_send_pages(nal_cb_t *, void *, lib_msg_t *, ptl_hdr_t *,
+int gmnal_cb_send_pages(lib_nal_t *, void *, lib_msg_t *, ptl_hdr_t *,
int, ptl_nid_t, ptl_pid_t, unsigned int, ptl_kiov_t *, size_t);
-int gmnal_cb_recv(nal_cb_t *, void *, lib_msg_t *,
+int gmnal_cb_recv(lib_nal_t *, void *, lib_msg_t *,
unsigned int, struct iovec *, size_t, size_t);
-int gmnal_cb_recv_pages(nal_cb_t *, void *, lib_msg_t *,
+int gmnal_cb_recv_pages(lib_nal_t *, void *, lib_msg_t *,
unsigned int, ptl_kiov_t *, size_t, size_t);
-int gmnal_cb_read(nal_cb_t *, void *private, void *, user_ptr, size_t);
-
-int gmnal_cb_write(nal_cb_t *, void *private, user_ptr, void *, size_t);
-
-int gmnal_cb_callback(nal_cb_t *, void *, lib_eq_t *, ptl_event_t *);
-
-void *gmnal_cb_malloc(nal_cb_t *, size_t);
-
-void gmnal_cb_free(nal_cb_t *, void *, size_t);
-
-void gmnal_cb_unmap(nal_cb_t *, unsigned int, struct iovec*, void **);
-
-int gmnal_cb_map(nal_cb_t *, unsigned int, struct iovec*, void **);
-
-void gmnal_cb_printf(nal_cb_t *, const char *fmt, ...);
-
-void gmnal_cb_cli(nal_cb_t *, unsigned long *);
-
-void gmnal_cb_sti(nal_cb_t *, unsigned long *);
-
-int gmnal_cb_dist(nal_cb_t *, ptl_nid_t, unsigned long *);
+int gmnal_cb_dist(lib_nal_t *, ptl_nid_t, unsigned long *);
int gmnal_init(void);
#define GMNAL_INIT_NAL_CB(a) do { \
- a->cb_send = gmnal_cb_send; \
- a->cb_send_pages = gmnal_cb_send_pages; \
- a->cb_recv = gmnal_cb_recv; \
- a->cb_recv_pages = gmnal_cb_recv_pages; \
- a->cb_read = gmnal_cb_read; \
- a->cb_write = gmnal_cb_write; \
- a->cb_callback = gmnal_cb_callback; \
- a->cb_malloc = gmnal_cb_malloc; \
- a->cb_free = gmnal_cb_free; \
- a->cb_map = NULL; \
- a->cb_unmap = NULL; \
- a->cb_printf = gmnal_cb_printf; \
- a->cb_cli = gmnal_cb_cli; \
- a->cb_sti = gmnal_cb_sti; \
- a->cb_dist = gmnal_cb_dist; \
- a->nal_data = NULL; \
+ a->libnal_send = gmnal_cb_send; \
+ a->libnal_send_pages = gmnal_cb_send_pages; \
+ a->libnal_recv = gmnal_cb_recv; \
+ a->libnal_recv_pages = gmnal_cb_recv_pages; \
+ a->libnal_map = NULL; \
+ a->libnal_unmap = NULL; \
+ a->libnal_dist = gmnal_cb_dist; \
+ a->libnal_data = NULL; \
} while (0)
/*
* Small messages
*/
-int gmnal_small_rx(nal_cb_t *, void *, lib_msg_t *, unsigned int,
+int gmnal_small_rx(lib_nal_t *, void *, lib_msg_t *, unsigned int,
struct iovec *, size_t, size_t);
-int gmnal_small_tx(nal_cb_t *, void *, lib_msg_t *, ptl_hdr_t *,
+int gmnal_small_tx(lib_nal_t *, void *, lib_msg_t *, ptl_hdr_t *,
int, ptl_nid_t, ptl_pid_t,
unsigned int, struct iovec*, int);
void gmnal_small_tx_callback(gm_port_t *, void *, gm_status_t);
/*
* Large messages
*/
-int gmnal_large_rx(nal_cb_t *, void *, lib_msg_t *, unsigned int,
+int gmnal_large_rx(lib_nal_t *, void *, lib_msg_t *, unsigned int,
struct iovec *, size_t, size_t);
-int gmnal_large_tx(nal_cb_t *, void *, lib_msg_t *, ptl_hdr_t *,
+int gmnal_large_tx(lib_nal_t *, void *, lib_msg_t *, ptl_hdr_t *,
int, ptl_nid_t, ptl_pid_t, unsigned int,
struct iovec*, int);
{ 0 }
};
-
-
-
-
-
-/*
- * gmnal_api_forward
- * This function takes a pack block of arguments from the NAL API
- * module and passes them to the NAL CB module. The CB module unpacks
- * the args and calls the appropriate function indicated by index.
- * Typically this function is used to pass args between kernel and use
- * space.
- * As lgmanl exists entirely in kernel, just pass the arg block directly
- * to the NAL CB, buy passing the args to lib_dispatch
- * Arguments are
- * nal_t nal Our nal
- * int index the api function that initiated this call
- * void *args packed block of function args
- * size_t arg_len length of args block
- * void *ret A return value for the API NAL
- * size_t ret_len Size of the return value
- *
- */
-
-int
-gmnal_api_forward(nal_t *nal, int index, void *args, size_t arg_len,
- void *ret, size_t ret_len)
-{
-
- nal_cb_t *nal_cb = NULL;
- gmnal_data_t *nal_data = NULL;
-
-
-
-
-
- if (!nal || !args || (index < 0) || (arg_len < 0)) {
- CDEBUG(D_ERROR, "Bad args to gmnal_api_forward\n");
- return (PTL_FAIL);
- }
-
- if (ret && (ret_len <= 0)) {
- CDEBUG(D_ERROR, "Bad args to gmnal_api_forward\n");
- return (PTL_FAIL);
- }
-
-
- if (!nal->nal_data) {
- CDEBUG(D_ERROR, "bad nal, no nal data\n");
- return (PTL_FAIL);
- }
-
- nal_data = nal->nal_data;
- CDEBUG(D_INFO, "nal_data is [%p]\n", nal_data);
-
- if (!nal_data->nal_cb) {
- CDEBUG(D_ERROR, "bad nal_data, no nal_cb\n");
- return (PTL_FAIL);
- }
-
- nal_cb = nal_data->nal_cb;
- CDEBUG(D_INFO, "nal_cb is [%p]\n", nal_cb);
-
- CDEBUG(D_PORTALS, "gmnal_api_forward calling lib_dispatch\n");
- lib_dispatch(nal_cb, NULL, index, args, ret);
- CDEBUG(D_PORTALS, "gmnal_api_forward returns from lib_dispatch\n");
-
- return(PTL_OK);
-}
-
-
/*
* gmnal_api_shutdown
* nal_refct == 0 => called on last matching PtlNIFini()
gmnal_api_shutdown(nal_t *nal, int interface)
{
gmnal_data_t *nal_data;
- nal_cb_t *nal_cb;
+ lib_nal_t *libnal;
if (nal->nal_refct != 0)
return;
CDEBUG(D_TRACE, "gmnal_api_shutdown: nal_data [%p]\n", nal_data);
LASSERT(nal == global_nal_data->nal);
- nal_data = nal->nal_data;
+ libnal = (lib_nal_t *)nal->nal_data;
+ nal_data = (gmnal_data_t *)libnal->libnal_data;
LASSERT(nal_data == global_nal_data);
- nal_cb = nal_data->nal_cb;
/* Stop portals calling our ioctl handler */
libcfs_nal_cmd_unregister(GMNAL);
* flag so when lib calls us we fail immediately and dont queue any
* more work but our threads can still call into lib OK. THEN
* shutdown our threads, THEN lib_fini() */
- lib_fini(nal_cb);
+ lib_fini(libnal);
gmnal_stop_rxthread(nal_data);
gmnal_stop_ctthread(nal_data);
GMNAL_GM_UNLOCK(nal_data);
if (nal_data->sysctl)
unregister_sysctl_table (nal_data->sysctl);
- PORTAL_FREE(nal, sizeof(nal_t));
+ /* Don't free 'nal'; it's a static struct */
PORTAL_FREE(nal_data, sizeof(gmnal_data_t));
- PORTAL_FREE(nal_cb, sizeof(nal_cb_t));
+ PORTAL_FREE(libnal, sizeof(lib_nal_t));
global_nal_data = NULL;
PORTAL_MODULE_UNUSE;
}
-/*
- * gmnal_api_validate
- * validate a user address for use in communications
- * There's nothing to be done here
- */
-int
-gmnal_api_validate(nal_t *nal, void *base, size_t extent)
-{
-
- return(PTL_OK);
-}
-
-
-
-/*
- * gmnal_api_yield
- * Give up the processor
- */
-void
-gmnal_api_yield(nal_t *nal, unsigned long *flags, int milliseconds)
-{
- CDEBUG(D_TRACE, "gmnal_api_yield : nal [%p]\n", nal);
-
- if (milliseconds != 0) {
- CERROR("Blocking yield not implemented yet\n");
- LBUG();
- }
-
- our_cond_resched();
- return;
-}
-
-
-
-/*
- * gmnal_api_lock
- * Take a threadsafe lock
- */
-void
-gmnal_api_lock(nal_t *nal, unsigned long *flags)
-{
-
- gmnal_data_t *nal_data;
- nal_cb_t *nal_cb;
-
- nal_data = nal->nal_data;
- nal_cb = nal_data->nal_cb;
-
- nal_cb->cb_cli(nal_cb, flags);
-
- return;
-}
-
-/*
- * gmnal_api_unlock
- * Release a threadsafe lock
- */
-void
-gmnal_api_unlock(nal_t *nal, unsigned long *flags)
-{
- gmnal_data_t *nal_data;
- nal_cb_t *nal_cb;
-
- nal_data = nal->nal_data;
- nal_cb = nal_data->nal_cb;
-
- nal_cb->cb_sti(nal_cb, flags);
-
- return;
-}
-
-
int
gmnal_api_startup(nal_t *nal, ptl_pid_t requested_pid,
ptl_ni_limits_t *requested_limits,
ptl_ni_limits_t *actual_limits)
{
- nal_cb_t *nal_cb = NULL;
+ lib_nal_t *libnal = NULL;
gmnal_data_t *nal_data = NULL;
gmnal_srxd_t *srxd = NULL;
gm_status_t gm_status;
if (nal->nal_refct != 0) {
if (actual_limits != NULL) {
- nal_data = (gmnal_data_t *)nal->nal_data;
- nal_cb = nal_data->nal_cb;
- *actual_limits = nal->_cb->ni.actual_limits;
+ libnal = (lib_nal_t *)nal->nal_data;
+ *actual_limits = nal->libnal_ni.ni_actual_limits;
return (PTL_OK);
}
CDEBUG(D_INFO, "Allocd and reset nal_data[%p]\n", nal_data);
CDEBUG(D_INFO, "small_msg_size is [%d]\n", nal_data->small_msg_size);
- PORTAL_ALLOC(nal_cb, sizeof(nal_cb_t));
- if (!nal_cb) {
+ PORTAL_ALLOC(libnal, sizeof(lib_nal_t));
+ if (!libnal) {
PORTAL_FREE(nal_data, sizeof(gmnal_data_t));
return(PTL_NO_SPACE);
}
- memset(nal_cb, 0, sizeof(nal_cb_t));
- CDEBUG(D_INFO, "Allocd and reset nal_cb[%p]\n", nal_cb);
+ memset(libnal, 0, sizeof(lib_nal_t));
+ CDEBUG(D_INFO, "Allocd and reset libnal[%p]\n", libnal);
- GMNAL_INIT_NAL_CB(nal_cb);
+ GMNAL_INIT_NAL_CB(libnal);
/*
* String them all together
*/
- nal->nal_data = (void*)nal_data;
- nal_cb->nal_data = (void*)nal_data;
+ libnal->libnal_data = (void*)nal_data;
nal_data->nal = nal;
- nal_data->nal_cb = nal_cb;
+ nal_data->libnal = libnal;
- GMNAL_CB_LOCK_INIT(nal_data);
GMNAL_GM_LOCK_INIT(nal_data);
if (gm_init() != GM_SUCCESS) {
CDEBUG(D_ERROR, "call to gm_init failed\n");
PORTAL_FREE(nal_data, sizeof(gmnal_data_t));
- PORTAL_FREE(nal_cb, sizeof(nal_cb_t));
+ PORTAL_FREE(libnal, sizeof(lib_nal_t));
return(PTL_FAIL);
}
gm_finalize();
GMNAL_GM_UNLOCK(nal_data);
PORTAL_FREE(nal_data, sizeof(gmnal_data_t));
- PORTAL_FREE(nal_cb, sizeof(nal_cb_t));
+ PORTAL_FREE(libnal, sizeof(lib_nal_t));
return(PTL_FAIL);
}
gm_finalize();
GMNAL_GM_UNLOCK(nal_data);
PORTAL_FREE(nal_data, sizeof(gmnal_data_t));
- PORTAL_FREE(nal_cb, sizeof(nal_cb_t));
+ PORTAL_FREE(libnal, sizeof(lib_nal_t));
return(PTL_FAIL);
}
gm_finalize();
GMNAL_GM_UNLOCK(nal_data);
PORTAL_FREE(nal_data, sizeof(gmnal_data_t));
- PORTAL_FREE(nal_cb, sizeof(nal_cb_t));
+ PORTAL_FREE(libnal, sizeof(lib_nal_t));
return(PTL_FAIL);
}
gm_finalize();
GMNAL_GM_UNLOCK(nal_data);
PORTAL_FREE(nal_data, sizeof(gmnal_data_t));
- PORTAL_FREE(nal_cb, sizeof(nal_cb_t));
+ PORTAL_FREE(libnal, sizeof(lib_nal_t));
return(PTL_FAIL);
}
nal_data->gm_local_nid = local_nid;
gm_finalize();
GMNAL_GM_UNLOCK(nal_data);
PORTAL_FREE(nal_data, sizeof(gmnal_data_t));
- PORTAL_FREE(nal_cb, sizeof(nal_cb_t));
+ PORTAL_FREE(libnal, sizeof(lib_nal_t));
return(PTL_FAIL);
}
CDEBUG(D_INFO, "Global node id is [%u]\n", global_nid);
CDEBUG(D_INFO, "portals_nid is ["LPU64"]\n", process_id.nid);
CDEBUG(D_PORTALS, "calling lib_init\n");
- if (lib_init(nal_cb, process_id,
+ if (lib_init(libnal, nal, process_id,
requested_limits, actual_limits) != PTL_OK) {
CDEBUG(D_ERROR, "lib_init failed\n");
gmnal_stop_rxthread(nal_data);
gm_finalize();
GMNAL_GM_UNLOCK(nal_data);
PORTAL_FREE(nal_data, sizeof(gmnal_data_t));
- PORTAL_FREE(nal_cb, sizeof(nal_cb_t));
+ PORTAL_FREE(libnal, sizeof(lib_nal_t));
return(PTL_FAIL);
}
/* XXX these cleanup cases should be restructured to
* minimise duplication... */
- lib_fini(nal_cb);
+ lib_fini(libnal);
gmnal_stop_rxthread(nal_data);
gmnal_stop_ctthread(nal_data);
gm_finalize();
GMNAL_GM_UNLOCK(nal_data);
PORTAL_FREE(nal_data, sizeof(gmnal_data_t));
- PORTAL_FREE(nal_cb, sizeof(nal_cb_t));
+ PORTAL_FREE(libnal, sizeof(lib_nal_t));
return(PTL_FAIL);
}
*/
void gmnal_fini()
{
- gmnal_data_t *nal_data = global_nal_data;
- nal_t *nal = nal_data->nal;
- nal_cb_t *nal_cb = nal_data->nal_cb;
-
CDEBUG(D_TRACE, "gmnal_fini\n");
LASSERT(global_nal_data == NULL);
#include "gmnal.h"
-int gmnal_cb_recv(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
+int gmnal_cb_recv(lib_nal_t *libnal, void *private, lib_msg_t *cookie,
unsigned int niov, struct iovec *iov, size_t mlen,
size_t rlen)
{
int status = PTL_OK;
- CDEBUG(D_TRACE, "gmnal_cb_recv nal_cb [%p], private[%p], cookie[%p], "
+ CDEBUG(D_TRACE, "gmnal_cb_recv libnal [%p], private[%p], cookie[%p], "
"niov[%d], iov [%p], mlen["LPSZ"], rlen["LPSZ"]\n",
- nal_cb, private, cookie, niov, iov, mlen, rlen);
+ libnal, private, cookie, niov, iov, mlen, rlen);
switch(srxd->type) {
case(GMNAL_SMALL_MESSAGE):
CDEBUG(D_INFO, "gmnal_cb_recv got small message\n");
- status = gmnal_small_rx(nal_cb, private, cookie, niov,
+ status = gmnal_small_rx(libnal, private, cookie, niov,
iov, mlen, rlen);
break;
case(GMNAL_LARGE_MESSAGE_INIT):
CDEBUG(D_INFO, "gmnal_cb_recv got large message init\n");
- status = gmnal_large_rx(nal_cb, private, cookie, niov,
+ status = gmnal_large_rx(libnal, private, cookie, niov,
iov, mlen, rlen);
}
return(status);
}
-int gmnal_cb_recv_pages(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
+int gmnal_cb_recv_pages(lib_nal_t *libnal, void *private, lib_msg_t *cookie,
unsigned int kniov, ptl_kiov_t *kiov, size_t mlen,
size_t rlen)
{
ptl_kiov_t *kiov_dup = kiov;;
- CDEBUG(D_TRACE, "gmnal_cb_recv_pages nal_cb [%p],private[%p], "
+ CDEBUG(D_TRACE, "gmnal_cb_recv_pages libnal [%p],private[%p], "
"cookie[%p], kniov[%d], kiov [%p], mlen["LPSZ"], rlen["LPSZ"]\n",
- nal_cb, private, cookie, kniov, kiov, mlen, rlen);
+ libnal, private, cookie, kniov, kiov, mlen, rlen);
if (srxd->type == GMNAL_SMALL_MESSAGE) {
PORTAL_ALLOC(iovec, sizeof(struct iovec)*kniov);
kiov++;
}
CDEBUG(D_INFO, "calling gmnal_small_rx\n");
- status = gmnal_small_rx(nal_cb, private, cookie, kniov,
+ status = gmnal_small_rx(libnal, private, cookie, kniov,
iovec_dup, mlen, rlen);
for (i=0; i<kniov; i++) {
kunmap(kiov_dup->kiov_page);
}
-int gmnal_cb_send(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
+int gmnal_cb_send(lib_nal_t *libnal, void *private, lib_msg_t *cookie,
ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
unsigned int niov, struct iovec *iov, size_t len)
{
CDEBUG(D_TRACE, "gmnal_cb_send niov[%d] len["LPSZ"] nid["LPU64"]\n",
niov, len, nid);
- nal_data = nal_cb->nal_data;
+ nal_data = libnal->libnal_data;
if (GMNAL_IS_SMALL_MESSAGE(nal_data, niov, iov, len)) {
CDEBUG(D_INFO, "This is a small message send\n");
- gmnal_small_tx(nal_cb, private, cookie, hdr, type, nid, pid,
+ gmnal_small_tx(libnal, private, cookie, hdr, type, nid, pid,
niov, iov, len);
} else {
CDEBUG(D_ERROR, "Large message send it is not supported\n");
- lib_finalize(nal_cb, private, cookie, PTL_FAIL);
+ lib_finalize(libnal, private, cookie, PTL_FAIL);
return(PTL_FAIL);
- gmnal_large_tx(nal_cb, private, cookie, hdr, type, nid, pid,
+ gmnal_large_tx(libnal, private, cookie, hdr, type, nid, pid,
niov, iov, len);
}
return(PTL_OK);
}
-int gmnal_cb_send_pages(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
- ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, unsigned int kniov, ptl_kiov_t *kiov, size_t len)
+int gmnal_cb_send_pages(lib_nal_t *libnal, void *private, lib_msg_t *cookie,
+ ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
+ unsigned int kniov, ptl_kiov_t *kiov, size_t len)
{
int i = 0;
ptl_kiov_t *kiov_dup = kiov;
CDEBUG(D_TRACE, "gmnal_cb_send_pages nid ["LPU64"] niov[%d] len["LPSZ"]\n", nid, kniov, len);
- nal_data = nal_cb->nal_data;
+ nal_data = libnal->libnal_data;
PORTAL_ALLOC(iovec, kniov*sizeof(struct iovec));
iovec_dup = iovec;
if (GMNAL_IS_SMALL_MESSAGE(nal_data, 0, NULL, len)) {
iovec++;
kiov++;
}
- gmnal_small_tx(nal_cb, private, cookie, hdr, type, nid,
+ gmnal_small_tx(libnal, private, cookie, hdr, type, nid,
pid, kniov, iovec_dup, len);
} else {
CDEBUG(D_ERROR, "Large message send it is not supported yet\n");
iovec++;
kiov++;
}
- gmnal_large_tx(nal_cb, private, cookie, hdr, type, nid,
+ gmnal_large_tx(libnal, private, cookie, hdr, type, nid,
pid, kniov, iovec, len);
}
for (i=0; i<kniov; i++) {
return(PTL_OK);
}
-int gmnal_cb_read(nal_cb_t *nal_cb, void *private, void *dst,
- user_ptr src, size_t len)
-{
- gm_bcopy(src, dst, len);
- return(PTL_OK);
-}
-
-int gmnal_cb_write(nal_cb_t *nal_cb, void *private, user_ptr dst,
- void *src, size_t len)
-{
- gm_bcopy(src, dst, len);
- return(PTL_OK);
-}
-
-int gmnal_cb_callback(nal_cb_t *nal_cb, void *private, lib_eq_t *eq,
- ptl_event_t *ev)
-{
-
- if (eq->event_callback != NULL) {
- CDEBUG(D_INFO, "found callback\n");
- eq->event_callback(ev);
- }
-
- return(PTL_OK);
-}
-
-void *gmnal_cb_malloc(nal_cb_t *nal_cb, size_t len)
-{
- void *ptr = NULL;
- CDEBUG(D_TRACE, "gmnal_cb_malloc len["LPSZ"]\n", len);
- PORTAL_ALLOC(ptr, len);
- return(ptr);
-}
-
-void gmnal_cb_free(nal_cb_t *nal_cb, void *buf, size_t len)
-{
- CDEBUG(D_TRACE, "gmnal_cb_free :: buf[%p] len["LPSZ"]\n", buf, len);
- PORTAL_FREE(buf, len);
- return;
-}
-
-void gmnal_cb_unmap(nal_cb_t *nal_cb, unsigned int niov, struct iovec *iov,
- void **addrkey)
-{
- return;
-}
-
-int gmnal_cb_map(nal_cb_t *nal_cb, unsigned int niov, struct iovec *iov,
- void**addrkey)
-{
- return(PTL_OK);
-}
-
-void gmnal_cb_printf(nal_cb_t *nal_cb, const char *fmt, ...)
-{
- CDEBUG(D_TRACE, "gmnal_cb_printf\n");
- printk(fmt);
- return;
-}
-
-void gmnal_cb_cli(nal_cb_t *nal_cb, unsigned long *flags)
-{
- gmnal_data_t *nal_data = (gmnal_data_t*)nal_cb->nal_data;
-
- spin_lock_irqsave(&nal_data->cb_lock, *flags);
- return;
-}
-
-void gmnal_cb_sti(nal_cb_t *nal_cb, unsigned long *flags)
-{
- gmnal_data_t *nal_data = (gmnal_data_t*)nal_cb->nal_data;
-
- spin_unlock_irqrestore(&nal_data->cb_lock, *flags);
- return;
-}
-
-void gmnal_cb_callback(nal_cb_t *nal_cb, void *private, lib_eq_t *eq, ptl_event_t *ev)
-{
- /* holding cb_lock */
-
- if (eq->event_callback != NULL)
- eq->event_callback(ev);
-
- /* We will wake theads sleeping in yield() here, AFTER the
- * callback, when we implement blocking yield */
-}
-
-int gmnal_cb_dist(nal_cb_t *nal_cb, ptl_nid_t nid, unsigned long *dist)
+int gmnal_cb_dist(lib_nal_t *libnal, ptl_nid_t nid, unsigned long *dist)
{
CDEBUG(D_TRACE, "gmnal_cb_dist\n");
if (dist)
unsigned int snode, sport, type, length;
gmnal_msghdr_t *gmnal_msghdr;
ptl_hdr_t *portals_hdr;
+ int rc;
CDEBUG(D_INFO, "nal_data [%p], we[%p] type [%d]\n",
nal_data, we, gmnal_type);
*/
srxd = gmnal_rxbuffer_to_srxd(nal_data, buffer);
CDEBUG(D_INFO, "Back from gmnal_rxbuffer_to_srxd\n");
- srxd->nal_data = nal_data;
if (!srxd) {
CDEBUG(D_ERROR, "Failed to get receive descriptor\n");
- lib_parse(nal_data->nal_cb, portals_hdr, srxd);
+ /* I think passing a NULL srxd to lib_parse will crash
+ * gmnal_recv() */
+ LBUG();
+ lib_parse(nal_data->libnal, portals_hdr, srxd);
return(GMNAL_STATUS_FAIL);
}
return(GMNAL_STATUS_OK);
}
+ srxd->nal_data = nal_data;
srxd->type = gmnal_type;
srxd->nsiov = gmnal_msghdr->niov;
srxd->gm_source_node = gmnal_msghdr->sender_node_id;
* cb_recv is responsible for returning the buffer
* for future receive
*/
- lib_parse(nal_data->nal_cb, portals_hdr, srxd);
+ rc = lib_parse(nal_data->libnal, portals_hdr, srxd);
+
+ if (rc != PTL_OK) {
+ /* I just received garbage; take appropriate action... */
+ LBUG();
+ }
return(GMNAL_STATUS_OK);
}
* Call lib_finalize
*/
int
-gmnal_small_rx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
+gmnal_small_rx(lib_nal_t *libnal, void *private, lib_msg_t *cookie,
unsigned int niov, struct iovec *iov, size_t mlen, size_t rlen)
{
gmnal_srxd_t *srxd = NULL;
void *buffer = NULL;
- gmnal_data_t *nal_data = (gmnal_data_t*)nal_cb->nal_data;
+ gmnal_data_t *nal_data = (gmnal_data_t*)libnal->nal_data;
CDEBUG(D_TRACE, "niov [%d] mlen["LPSZ"]\n", niov, mlen);
if (!private) {
CDEBUG(D_ERROR, "gmnal_small_rx no context\n");
- lib_finalize(nal_cb, private, cookie, PTL_FAIL);
+ lib_finalize(libnal, private, cookie, PTL_FAIL);
return(PTL_FAIL);
}
* let portals library know receive is complete
*/
CDEBUG(D_PORTALS, "calling lib_finalize\n");
- lib_finalize(nal_cb, private, cookie, PTL_OK);
+ lib_finalize(libnal, private, cookie, PTL_OK);
/*
* return buffer so it can be used again
*/
* The callback function informs when the send is complete.
*/
int
-gmnal_small_tx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
+gmnal_small_tx(lib_nal_t *libnal, void *private, lib_msg_t *cookie,
ptl_hdr_t *hdr, int type, ptl_nid_t global_nid, ptl_pid_t pid,
unsigned int niov, struct iovec *iov, int size)
{
- gmnal_data_t *nal_data = (gmnal_data_t*)nal_cb->nal_data;
+ gmnal_data_t *nal_data = (gmnal_data_t*)libnal->nal_data;
gmnal_stxd_t *stxd = NULL;
void *buffer = NULL;
gmnal_msghdr_t *msghdr = NULL;
unsigned int local_nid;
gm_status_t gm_status = GM_SUCCESS;
- CDEBUG(D_TRACE, "gmnal_small_tx nal_cb [%p] private [%p] cookie [%p] "
+ CDEBUG(D_TRACE, "gmnal_small_tx libnal [%p] private [%p] cookie [%p] "
"hdr [%p] type [%d] global_nid ["LPU64"] pid [%d] niov [%d] "
- "iov [%p] size [%d]\n", nal_cb, private, cookie, hdr, type,
+ "iov [%p] size [%d]\n", libnal, private, cookie, hdr, type,
global_nid, pid, niov, iov, size);
CDEBUG(D_INFO, "portals_hdr:: dest_nid ["LPU64"], src_nid ["LPU64"]\n",
gmnal_stxd_t *stxd = (gmnal_stxd_t*)context;
lib_msg_t *cookie = stxd->cookie;
gmnal_data_t *nal_data = (gmnal_data_t*)stxd->nal_data;
- nal_cb_t *nal_cb = nal_data->nal_cb;
+ lib_nal_t *libnal = nal_data->libnal;
if (!stxd) {
CDEBUG(D_TRACE, "send completion event for unknown stxd\n");
return;
}
gmnal_return_stxd(nal_data, stxd);
- lib_finalize(nal_cb, stxd, cookie, PTL_OK);
+ lib_finalize(libnal, stxd, cookie, PTL_OK);
return;
}
* this ack, deregister the memory. Only 1 send token is required here.
*/
int
-gmnal_large_tx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
+gmnal_large_tx(lib_nal_t *libnal, void *private, lib_msg_t *cookie,
ptl_hdr_t *hdr, int type, ptl_nid_t global_nid, ptl_pid_t pid,
unsigned int niov, struct iovec *iov, int size)
{
int niov_dup;
- CDEBUG(D_TRACE, "gmnal_large_tx nal_cb [%p] private [%p], cookie [%p] "
+ CDEBUG(D_TRACE, "gmnal_large_tx libnal [%p] private [%p], cookie [%p] "
"hdr [%p], type [%d] global_nid ["LPU64"], pid [%d], niov [%d], "
- "iov [%p], size [%d]\n", nal_cb, private, cookie, hdr, type,
+ "iov [%p], size [%d]\n", libnal, private, cookie, hdr, type,
global_nid, pid, niov, iov, size);
- if (nal_cb)
- nal_data = (gmnal_data_t*)nal_cb->nal_data;
+ if (libnal)
+ nal_data = (gmnal_data_t*)libnal->nal_data;
else {
- CDEBUG(D_ERROR, "no nal_cb.\n");
+ CDEBUG(D_ERROR, "no libnal.\n");
return(GMNAL_STATUS_FAIL);
}
* data from the sender.
*/
int
-gmnal_large_rx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
+gmnal_large_rx(lib_nal_t *libnal, void *private, lib_msg_t *cookie,
unsigned int nriov, struct iovec *riov, size_t mlen,
size_t rlen)
{
- gmnal_data_t *nal_data = nal_cb->nal_data;
+ gmnal_data_t *nal_data = libnal->nal_data;
gmnal_srxd_t *srxd = (gmnal_srxd_t*)private;
void *buffer = NULL;
struct iovec *riov_dup;
gmnal_msghdr_t *msghdr = NULL;
gm_status_t gm_status;
- CDEBUG(D_TRACE, "gmnal_large_rx :: nal_cb[%p], private[%p], "
+ CDEBUG(D_TRACE, "gmnal_large_rx :: libnal[%p], private[%p], "
"cookie[%p], niov[%d], iov[%p], mlen["LPSZ"], rlen["LPSZ"]\n",
- nal_cb, private, cookie, nriov, riov, mlen, rlen);
+ libnal, private, cookie, nriov, riov, mlen, rlen);
if (!srxd) {
CDEBUG(D_ERROR, "gmnal_large_rx no context\n");
- lib_finalize(nal_cb, private, cookie, PTL_FAIL);
+ lib_finalize(libnal, private, cookie, PTL_FAIL);
return(PTL_FAIL);
}
gmnal_ltxd_t *ltxd = (gmnal_ltxd_t*)context;
gmnal_srxd_t *srxd = ltxd->srxd;
- nal_cb_t *nal_cb = srxd->nal_data->nal_cb;
+ lib_nal_t *libnal = srxd->nal_data->libnal;
int lastone;
struct iovec *riov;
int nriov;
* Let our client application proceed
*/
CDEBUG(D_ERROR, "final callback context[%p]\n", srxd);
- lib_finalize(nal_cb, srxd, srxd->cookie, PTL_OK);
+ lib_finalize(libnal, srxd, srxd->cookie, PTL_OK);
/*
* send an ack to the sender to let him know we got the data
void
gmnal_large_tx_ack_received(gmnal_data_t *nal_data, gmnal_srxd_t *srxd)
{
- nal_cb_t *nal_cb = nal_data->nal_cb;
+ lib_nal_t *libnal = nal_data->libnal;
gmnal_stxd_t *stxd = NULL;
gmnal_msghdr_t *msghdr = NULL;
void *buffer = NULL;
CDEBUG(D_INFO, "gmnal_large_tx_ack_received stxd [%p]\n", stxd);
- lib_finalize(nal_cb, stxd, stxd->cookie, PTL_OK);
+ lib_finalize(libnal, stxd, stxd->cookie, PTL_OK);
/*
* extract the iovec from the stxd, deregister the memory.
#define QSWNAL_SYSCTL_COPY_SMALL_FWD 2
static ctl_table kqswnal_ctl_table[] = {
+ {QSWNAL_SYSCTL_OPTIMIZED_GETS, "optimized_puts",
+ &kqswnal_tunables.kqn_optimized_puts, sizeof (int),
+ 0644, NULL, &proc_dointvec},
{QSWNAL_SYSCTL_OPTIMIZED_GETS, "optimized_gets",
&kqswnal_tunables.kqn_optimized_gets, sizeof (int),
0644, NULL, &proc_dointvec},
};
#endif
-static int
-kqswnal_forward(nal_t *nal,
- int id,
- void *args, size_t args_len,
- void *ret, size_t ret_len)
-{
- kqswnal_data_t *k = nal->nal_data;
- nal_cb_t *nal_cb = k->kqn_cb;
-
- LASSERT (nal == &kqswnal_api);
- LASSERT (k == &kqswnal_data);
- LASSERT (nal_cb == &kqswnal_lib);
-
- lib_dispatch(nal_cb, k, id, args, ret); /* nal needs k */
- return (PTL_OK);
-}
-
-static void
-kqswnal_lock (nal_t *nal, unsigned long *flags)
-{
- kqswnal_data_t *k = nal->nal_data;
- nal_cb_t *nal_cb = k->kqn_cb;
-
- LASSERT (nal == &kqswnal_api);
- LASSERT (k == &kqswnal_data);
- LASSERT (nal_cb == &kqswnal_lib);
-
- nal_cb->cb_cli(nal_cb,flags);
-}
-
-static void
-kqswnal_unlock(nal_t *nal, unsigned long *flags)
-{
- kqswnal_data_t *k = nal->nal_data;
- nal_cb_t *nal_cb = k->kqn_cb;
-
- LASSERT (nal == &kqswnal_api);
- LASSERT (k == &kqswnal_data);
- LASSERT (nal_cb == &kqswnal_lib);
-
- nal_cb->cb_sti(nal_cb,flags);
-}
-
-static int
-kqswnal_yield(nal_t *nal, unsigned long *flags, int milliseconds)
-{
- /* NB called holding statelock */
- wait_queue_t wait;
- unsigned long now = jiffies;
-
- CDEBUG (D_NET, "yield\n");
-
- if (milliseconds == 0) {
- if (need_resched())
- schedule();
- return 0;
- }
-
- init_waitqueue_entry(&wait, current);
- set_current_state(TASK_INTERRUPTIBLE);
- add_wait_queue(&kqswnal_data.kqn_yield_waitq, &wait);
-
- kqswnal_unlock(nal, flags);
-
- if (milliseconds < 0)
- schedule ();
- else
- schedule_timeout((milliseconds * HZ) / 1000);
-
- kqswnal_lock(nal, flags);
-
- remove_wait_queue(&kqswnal_data.kqn_yield_waitq, &wait);
-
- if (milliseconds > 0) {
- milliseconds -= ((jiffies - now) * 1000) / HZ;
- if (milliseconds < 0)
- milliseconds = 0;
- }
-
- return (milliseconds);
-}
-
int
kqswnal_get_tx_desc (struct portals_cfg *pcfg)
{
kqswnal_data.kqn_nid_offset);
kqswnal_data.kqn_nid_offset =
pcfg->pcfg_nid - kqswnal_data.kqn_elanid;
- kqswnal_lib.ni.nid = pcfg->pcfg_nid;
+ kqswnal_lib.libnal_ni.ni_pid.nid = pcfg->pcfg_nid;
return (0);
default:
ptl_process_id_t my_process_id;
int pkmem = atomic_read(&portal_kmemory);
+ LASSERT (nal == &kqswnal_api);
+
if (nal->nal_refct != 0) {
if (actual_limits != NULL)
- *actual_limits = kqswnal_lib.ni.actual_limits;
+ *actual_limits = kqswnal_lib.libnal_ni.ni_actual_limits;
/* This module got the first ref */
PORTAL_MODULE_USE;
return (PTL_OK);
CDEBUG (D_MALLOC, "start kmem %d\n", atomic_read(&portal_kmemory));
- memset(&kqswnal_rpc_success, 0, sizeof(kqswnal_rpc_success));
- memset(&kqswnal_rpc_failed, 0, sizeof(kqswnal_rpc_failed));
-#if MULTIRAIL_EKC
- kqswnal_rpc_failed.Data[0] = -ECONNREFUSED;
-#else
- kqswnal_rpc_failed.Status = -ECONNREFUSED;
-#endif
/* ensure all pointers NULL etc */
memset (&kqswnal_data, 0, sizeof (kqswnal_data));
- kqswnal_data.kqn_cb = &kqswnal_lib;
-
INIT_LIST_HEAD (&kqswnal_data.kqn_idletxds);
INIT_LIST_HEAD (&kqswnal_data.kqn_nblk_idletxds);
INIT_LIST_HEAD (&kqswnal_data.kqn_activetxds);
spin_lock_init (&kqswnal_data.kqn_sched_lock);
init_waitqueue_head (&kqswnal_data.kqn_sched_waitq);
- spin_lock_init (&kqswnal_data.kqn_statelock);
- init_waitqueue_head (&kqswnal_data.kqn_yield_waitq);
+ /* Leave kqn_rpc_success zeroed */
+#if MULTIRAIL_EKC
+ kqswnal_data.kqn_rpc_failed.Data[0] = -ECONNREFUSED;
+#else
+ kqswnal_data.kqn_rpc_failed.Status = -ECONNREFUSED;
+#endif
/* pointers/lists/locks initialised */
kqswnal_data.kqn_init = KQN_INIT_DATA;
kqswnal_data.kqn_ep = ep_system();
if (kqswnal_data.kqn_ep == NULL) {
CERROR("Can't initialise EKC\n");
- kqswnal_shutdown(&kqswnal_api);
+ kqswnal_shutdown(nal);
return (PTL_IFACE_INVALID);
}
if (ep_waitfor_nodeid(kqswnal_data.kqn_ep) == ELAN_INVALID_NODE) {
CERROR("Can't get elan ID\n");
- kqswnal_shutdown(&kqswnal_api);
+ kqswnal_shutdown(nal);
return (PTL_IFACE_INVALID);
}
#else
if (kqswnal_data.kqn_ep == NULL)
{
CERROR ("Can't get elan device 0\n");
- kqswnal_shutdown(&kqswnal_api);
+ kqswnal_shutdown(nal);
return (PTL_IFACE_INVALID);
}
#endif
if (kqswnal_data.kqn_eptx == NULL)
{
CERROR ("Can't allocate transmitter\n");
- kqswnal_shutdown (&kqswnal_api);
+ kqswnal_shutdown (nal);
return (PTL_NO_SPACE);
}
if (kqswnal_data.kqn_eprx_small == NULL)
{
CERROR ("Can't install small msg receiver\n");
- kqswnal_shutdown (&kqswnal_api);
+ kqswnal_shutdown (nal);
return (PTL_NO_SPACE);
}
if (kqswnal_data.kqn_eprx_large == NULL)
{
CERROR ("Can't install large msg receiver\n");
- kqswnal_shutdown (&kqswnal_api);
+ kqswnal_shutdown (nal);
return (PTL_NO_SPACE);
}
EP_PERM_WRITE);
if (kqswnal_data.kqn_ep_tx_nmh == NULL) {
CERROR("Can't reserve tx dma space\n");
- kqswnal_shutdown(&kqswnal_api);
+ kqswnal_shutdown(nal);
return (PTL_NO_SPACE);
}
#else
if (rc != DDI_SUCCESS)
{
CERROR ("Can't reserve rx dma space\n");
- kqswnal_shutdown (&kqswnal_api);
+ kqswnal_shutdown (nal);
return (PTL_NO_SPACE);
}
#endif
EP_PERM_WRITE);
if (kqswnal_data.kqn_ep_tx_nmh == NULL) {
CERROR("Can't reserve rx dma space\n");
- kqswnal_shutdown(&kqswnal_api);
+ kqswnal_shutdown(nal);
return (PTL_NO_SPACE);
}
#else
if (rc != DDI_SUCCESS)
{
CERROR ("Can't reserve rx dma space\n");
- kqswnal_shutdown (&kqswnal_api);
+ kqswnal_shutdown (nal);
return (PTL_NO_SPACE);
}
#endif
sizeof(kqswnal_tx_t) * (KQSW_NTXMSGS + KQSW_NNBLK_TXMSGS));
if (kqswnal_data.kqn_txds == NULL)
{
- kqswnal_shutdown (&kqswnal_api);
+ kqswnal_shutdown (nal);
return (PTL_NO_SPACE);
}
PORTAL_ALLOC (ktx->ktx_buffer, KQSW_TX_BUFFER_SIZE);
if (ktx->ktx_buffer == NULL)
{
- kqswnal_shutdown (&kqswnal_api);
+ kqswnal_shutdown (nal);
return (PTL_NO_SPACE);
}
sizeof (kqswnal_rx_t) * (KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE));
if (kqswnal_data.kqn_rxds == NULL)
{
- kqswnal_shutdown (&kqswnal_api);
+ kqswnal_shutdown (nal);
return (PTL_NO_SPACE);
}
struct page *page = alloc_page(GFP_KERNEL);
if (page == NULL) {
- kqswnal_shutdown (&kqswnal_api);
+ kqswnal_shutdown (nal);
return (PTL_NO_SPACE);
}
my_process_id.nid = kqswnal_elanid2nid(kqswnal_data.kqn_elanid);
my_process_id.pid = 0;
- rc = lib_init(&kqswnal_lib, my_process_id,
+ rc = lib_init(&kqswnal_lib, nal, my_process_id,
requested_limits, actual_limits);
if (rc != PTL_OK)
{
CERROR ("lib_init failed %d\n", rc);
- kqswnal_shutdown (&kqswnal_api);
+ kqswnal_shutdown (nal);
return (rc);
}
kqswnal_rx_t *krx = &kqswnal_data.kqn_rxds[i];
/* NB this enqueue can allocate/sleep (attr == 0) */
+ krx->krx_state = KRX_POSTED;
#if MULTIRAIL_EKC
rc = ep_queue_receive(krx->krx_eprx, kqswnal_rxhandler, krx,
&krx->krx_elanbuffer, 0);
if (rc != EP_SUCCESS)
{
CERROR ("failed ep_queue_receive %d\n", rc);
- kqswnal_shutdown (&kqswnal_api);
+ kqswnal_shutdown (nal);
return (PTL_FAIL);
}
}
if (rc != 0)
{
CERROR ("failed to spawn scheduling thread: %d\n", rc);
- kqswnal_shutdown (&kqswnal_api);
+ kqswnal_shutdown (nal);
return (PTL_FAIL);
}
}
rc = libcfs_nal_cmd_register (QSWNAL, &kqswnal_cmd, NULL);
if (rc != 0) {
CERROR ("Can't initialise command interface (rc = %d)\n", rc);
- kqswnal_shutdown (&kqswnal_api);
+ kqswnal_shutdown (nal);
return (PTL_FAIL);
}
{
int rc;
- kqswnal_api.startup = kqswnal_startup;
- kqswnal_api.shutdown = kqswnal_shutdown;
- kqswnal_api.forward = kqswnal_forward;
- kqswnal_api.yield = kqswnal_yield;
- kqswnal_api.lock = kqswnal_lock;
- kqswnal_api.unlock = kqswnal_unlock;
- kqswnal_api.nal_data = &kqswnal_data;
-
- kqswnal_lib.nal_data = &kqswnal_data;
+ kqswnal_api.nal_ni_init = kqswnal_startup;
+ kqswnal_api.nal_ni_fini = kqswnal_shutdown;
/* Initialise dynamic tunables to defaults once only */
+ kqswnal_tunables.kqn_optimized_puts = KQSW_OPTIMIZED_PUTS;
kqswnal_tunables.kqn_optimized_gets = KQSW_OPTIMIZED_GETS;
rc = ptl_register_nal(QSWNAL, &kqswnal_api);
#define KQSW_RESCHED 100 /* # busy loops that forces scheduler to yield */
-#define KQSW_OPTIMIZED_GETS 1 /* optimized gets? */
+#define KQSW_OPTIMIZED_GETS 1 /* optimize gets >= this size */
+#define KQSW_OPTIMIZED_PUTS (32<<10) /* optimize puts >= this size */
#define KQSW_COPY_SMALL_FWD 0 /* copy small fwd messages to pre-mapped buffer? */
/*
int krx_npages; /* # pages in receive buffer */
int krx_nob; /* Number Of Bytes received into buffer */
int krx_rpc_reply_needed; /* peer waiting for EKC RPC reply */
- int krx_rpc_reply_sent; /* rpc reply sent */
+ int krx_rpc_reply_status; /* what status to send */
+ int krx_state; /* what this RX is doing */
atomic_t krx_refcount; /* how to tell when rpc is done */
kpr_fwd_desc_t krx_fwd; /* embedded forwarding descriptor */
ptl_kiov_t krx_kiov[KQSW_NRXMSGPAGES_LARGE]; /* buffer frags */
} kqswnal_rx_t;
+#define KRX_POSTED 1 /* receiving */
+#define KRX_PARSE 2 /* ready to be parsed */
+#define KRX_COMPLETING 3 /* waiting to be completed */
+
+
typedef struct
{
struct list_head ktx_list; /* enqueue idle/active */
int ktx_nmappedpages; /* # pages mapped for current message */
int ktx_port; /* destination ep port */
ptl_nid_t ktx_nid; /* destination node */
- void *ktx_args[2]; /* completion passthru */
+ void *ktx_args[3]; /* completion passthru */
char *ktx_buffer; /* pre-allocated contiguous buffer for hdr + small payloads */
unsigned long ktx_launchtime; /* when (in jiffies) the transmit was launched */
} kqswnal_tx_t;
#define KTX_IDLE 0 /* on kqn_(nblk_)idletxds */
-#define KTX_SENDING 1 /* local send */
-#define KTX_FORWARDING 2 /* routing a packet */
-#define KTX_GETTING 3 /* local optimised get */
+#define KTX_FORWARDING 1 /* sending a forwarded packet */
+#define KTX_SENDING 2 /* normal send */
+#define KTX_GETTING 3 /* sending optimised get */
+#define KTX_PUTTING 4 /* sending optimised put */
+#define KTX_RDMAING 5 /* handling optimised put/get */
typedef struct
{
/* dynamic tunables... */
+ int kqn_optimized_puts; /* optimized PUTs? */
int kqn_optimized_gets; /* optimized GETs? */
#if CONFIG_SYSCTL
struct ctl_table_header *kqn_sysctl; /* sysctl interface */
struct list_head kqn_delayedfwds; /* delayed forwards */
struct list_head kqn_delayedtxds; /* delayed transmits */
- spinlock_t kqn_statelock; /* cb_cli/cb_sti */
- wait_queue_head_t kqn_yield_waitq; /* where yield waits */
- nal_cb_t *kqn_cb; /* -> kqswnal_lib */
#if MULTIRAIL_EKC
EP_SYS *kqn_ep; /* elan system */
EP_NMH *kqn_ep_tx_nmh; /* elan reserved tx vaddrs */
ptl_nid_t kqn_nid_offset; /* this cluster's NID offset */
int kqn_nnodes; /* this cluster's size */
int kqn_elanid; /* this nodes's elan ID */
+
+ EP_STATUSBLK kqn_rpc_success; /* preset RPC reply status blocks */
+ EP_STATUSBLK kqn_rpc_failed;
} kqswnal_data_t;
/* kqn_init state */
#define KQN_INIT_LIB 2
#define KQN_INIT_ALL 3
-extern nal_cb_t kqswnal_lib;
+extern lib_nal_t kqswnal_lib;
extern nal_t kqswnal_api;
extern kqswnal_tunables_t kqswnal_tunables;
extern kqswnal_data_t kqswnal_data;
-/* global pre-prepared replies to keep off the stack */
-extern EP_STATUSBLK kqswnal_rpc_success;
-extern EP_STATUSBLK kqswnal_rpc_failed;
-
extern int kqswnal_thread_start (int (*fn)(void *arg), void *arg);
extern void kqswnal_rxhandler(EP_RXD *rxd);
extern int kqswnal_scheduler (void *);
extern void kqswnal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd);
-extern void kqswnal_dma_reply_complete (EP_RXD *rxd);
-extern void kqswnal_requeue_rx (kqswnal_rx_t *krx);
+extern void kqswnal_rx_done (kqswnal_rx_t *krx);
static inline ptl_nid_t
kqswnal_elanid2nid (int elanid)
return (nid - kqswnal_data.kqn_nid_offset);
}
+static inline ptl_nid_t
+kqswnal_rx_nid(kqswnal_rx_t *krx)
+{
+ return (kqswnal_elanid2nid(ep_rxd_node(krx->krx_rxd)));
+}
+
static inline int
kqswnal_pages_spanned (void *base, int nob)
{
}
#endif
-static inline void kqswnal_rx_done (kqswnal_rx_t *krx)
+static inline void kqswnal_rx_decref (kqswnal_rx_t *krx)
{
LASSERT (atomic_read (&krx->krx_refcount) > 0);
if (atomic_dec_and_test (&krx->krx_refcount))
- kqswnal_requeue_rx(krx);
+ kqswnal_rx_done(krx);
}
#if MULTIRAIL_EKC
#include "qswnal.h"
-EP_STATUSBLK kqswnal_rpc_success;
-EP_STATUSBLK kqswnal_rpc_failed;
-
/*
* LIB functions follow
*
*/
-static ptl_err_t
-kqswnal_read(nal_cb_t *nal, void *private, void *dst_addr, user_ptr src_addr,
- size_t len)
-{
- CDEBUG (D_NET, LPX64": reading "LPSZ" bytes from %p -> %p\n",
- nal->ni.nid, len, src_addr, dst_addr );
- memcpy( dst_addr, src_addr, len );
-
- return (PTL_OK);
-}
-
-static ptl_err_t
-kqswnal_write(nal_cb_t *nal, void *private, user_ptr dst_addr, void *src_addr,
- size_t len)
-{
- CDEBUG (D_NET, LPX64": writing "LPSZ" bytes from %p -> %p\n",
- nal->ni.nid, len, src_addr, dst_addr );
- memcpy( dst_addr, src_addr, len );
-
- return (PTL_OK);
-}
-
-static void *
-kqswnal_malloc(nal_cb_t *nal, size_t len)
-{
- void *buf;
-
- PORTAL_ALLOC(buf, len);
- return (buf);
-}
-
-static void
-kqswnal_free(nal_cb_t *nal, void *buf, size_t len)
-{
- PORTAL_FREE(buf, len);
-}
-
-static void
-kqswnal_printf (nal_cb_t * nal, const char *fmt, ...)
-{
- va_list ap;
- char msg[256];
-
- va_start (ap, fmt);
- vsnprintf (msg, sizeof (msg), fmt, ap); /* sprint safely */
- va_end (ap);
-
- msg[sizeof (msg) - 1] = 0; /* ensure terminated */
-
- CDEBUG (D_NET, "%s", msg);
-}
-
-#if (defined(CONFIG_SPARC32) || defined(CONFIG_SPARC64))
-# error "Can't save/restore irq contexts in different procedures"
-#endif
-
-static void
-kqswnal_cli(nal_cb_t *nal, unsigned long *flags)
-{
- kqswnal_data_t *data= nal->nal_data;
-
- spin_lock_irqsave(&data->kqn_statelock, *flags);
-}
-
-
-static void
-kqswnal_sti(nal_cb_t *nal, unsigned long *flags)
-{
- kqswnal_data_t *data= nal->nal_data;
-
- spin_unlock_irqrestore(&data->kqn_statelock, *flags);
-}
-
-static void
-kqswnal_callback(nal_cb_t *nal, void *private, lib_eq_t *eq, ptl_event_t *ev)
-{
- /* holding kqn_statelock */
-
- if (eq->event_callback != NULL)
- eq->event_callback(ev);
-
- if (waitqueue_active(&kqswnal_data.kqn_yield_waitq))
- wake_up_all(&kqswnal_data.kqn_yield_waitq);
-}
-
static int
-kqswnal_dist(nal_cb_t *nal, ptl_nid_t nid, unsigned long *dist)
+kqswnal_dist(lib_nal_t *nal, ptl_nid_t nid, unsigned long *dist)
{
- if (nid == nal->ni.nid)
+ if (nid == nal->libnal_ni.ni_pid.nid)
*dist = 0; /* it's me */
else if (kqswnal_nid2elanid (nid) >= 0)
*dist = 1; /* it's my peer */
do {
int fraglen = kiov->kiov_len - offset;
- /* nob exactly spans the iovs */
- LASSERT (fraglen <= nob);
- /* each frag fits in a page */
+ /* each page frag is contained in one page */
LASSERT (kiov->kiov_offset + kiov->kiov_len <= PAGE_SIZE);
+ if (fraglen > nob)
+ fraglen = nob;
+
nmapped++;
if (nmapped > maxmapped) {
CERROR("Can't map message in %d pages (max %d)\n",
do {
int fraglen = iov->iov_len - offset;
- long npages = kqswnal_pages_spanned (iov->iov_base, fraglen);
-
- /* nob exactly spans the iovs */
- LASSERT (fraglen <= nob);
+ long npages;
+ if (fraglen > nob)
+ fraglen = nob;
+ npages = kqswnal_pages_spanned (iov->iov_base, fraglen);
+
nmapped += npages;
if (nmapped > maxmapped) {
CERROR("Can't map message in %d pages (max %d)\n",
void
kqswnal_tx_done (kqswnal_tx_t *ktx, int error)
{
- lib_msg_t *msg;
- lib_msg_t *repmsg = NULL;
-
switch (ktx->ktx_state) {
case KTX_FORWARDING: /* router asked me to forward this packet */
kpr_fwd_done (&kqswnal_data.kqn_router,
(kpr_fwd_desc_t *)ktx->ktx_args[0], error);
break;
- case KTX_SENDING: /* packet sourced locally */
- lib_finalize (&kqswnal_lib, ktx->ktx_args[0],
+ case KTX_RDMAING: /* optimized GET/PUT handled */
+ case KTX_PUTTING: /* optimized PUT sent */
+ case KTX_SENDING: /* normal send */
+ lib_finalize (&kqswnal_lib, NULL,
(lib_msg_t *)ktx->ktx_args[1],
- (error == 0) ? PTL_OK :
- (error == -ENOMEM) ? PTL_NO_SPACE : PTL_FAIL);
+ (error == 0) ? PTL_OK : PTL_FAIL);
break;
- case KTX_GETTING: /* Peer has DMA-ed direct? */
- msg = (lib_msg_t *)ktx->ktx_args[1];
-
- if (error == 0) {
- repmsg = lib_create_reply_msg (&kqswnal_lib,
- ktx->ktx_nid, msg);
- if (repmsg == NULL)
- error = -ENOMEM;
- }
-
- if (error == 0) {
- lib_finalize (&kqswnal_lib, ktx->ktx_args[0],
- msg, PTL_OK);
- lib_finalize (&kqswnal_lib, NULL, repmsg, PTL_OK);
- } else {
- lib_finalize (&kqswnal_lib, ktx->ktx_args[0], msg,
- (error == -ENOMEM) ? PTL_NO_SPACE : PTL_FAIL);
- }
+ case KTX_GETTING: /* optimized GET sent & REPLY received */
+ /* Complete the GET with success since we can't avoid
+ * delivering a REPLY event; we committed to it when we
+ * launched the GET */
+ lib_finalize (&kqswnal_lib, NULL,
+ (lib_msg_t *)ktx->ktx_args[1], PTL_OK);
+ lib_finalize (&kqswnal_lib, NULL,
+ (lib_msg_t *)ktx->ktx_args[2],
+ (error == 0) ? PTL_OK : PTL_FAIL);
break;
default:
kqswnal_notify_peer_down(ktx);
status = -EHOSTDOWN;
- } else if (ktx->ktx_state == KTX_GETTING) {
- /* RPC completed OK; what did our peer put in the status
+ } else switch (ktx->ktx_state) {
+
+ case KTX_GETTING:
+ case KTX_PUTTING:
+ /* RPC completed OK; but what did our peer put in the status
* block? */
#if MULTIRAIL_EKC
status = ep_txd_statusblk(txd)->Data[0];
#else
status = ep_txd_statusblk(txd)->Status;
#endif
- } else {
+ break;
+
+ case KTX_FORWARDING:
+ case KTX_SENDING:
status = 0;
+ break;
+
+ default:
+ LBUG();
+ break;
}
kqswnal_tx_done (ktx, status);
return (-ESHUTDOWN);
LASSERT (dest >= 0); /* must be a peer */
- if (ktx->ktx_state == KTX_GETTING) {
- /* NB ktx_frag[0] is the GET hdr + kqswnal_remotemd_t. The
- * other frags are the GET sink which we obviously don't
- * send here :) */
-#if MULTIRAIL_EKC
+
+ switch (ktx->ktx_state) {
+ case KTX_GETTING:
+ case KTX_PUTTING:
+ /* NB ktx_frag[0] is the GET/PUT hdr + kqswnal_remotemd_t.
+ * The other frags are the payload, awaiting RDMA */
rc = ep_transmit_rpc(kqswnal_data.kqn_eptx, dest,
ktx->ktx_port, attr,
kqswnal_txhandler, ktx,
NULL, ktx->ktx_frags, 1);
-#else
- rc = ep_transmit_rpc(kqswnal_data.kqn_eptx, dest,
- ktx->ktx_port, attr, kqswnal_txhandler,
- ktx, NULL, ktx->ktx_frags, 1);
-#endif
- } else {
+ break;
+
+ case KTX_FORWARDING:
+ case KTX_SENDING:
#if MULTIRAIL_EKC
rc = ep_transmit_message(kqswnal_data.kqn_eptx, dest,
ktx->ktx_port, attr,
kqswnal_txhandler, ktx,
ktx->ktx_frags, ktx->ktx_nfrag);
#endif
+ break;
+
+ default:
+ LBUG();
+ rc = -EINVAL; /* no compiler warning please */
+ break;
}
switch (rc) {
}
}
+#if 0
static char *
hdr_type_string (ptl_hdr_t *hdr)
{
}
} /* end of print_hdr() */
+#endif
#if !MULTIRAIL_EKC
void
CERROR ("DATAVEC too small\n");
return (-E2BIG);
}
+#else
+int
+kqswnal_check_rdma (int nlfrag, EP_NMD *lfrag,
+ int nrfrag, EP_NMD *rfrag)
+{
+ int i;
+
+ if (nlfrag != nrfrag) {
+ CERROR("Can't cope with unequal # frags: %d local %d remote\n",
+ nlfrag, nrfrag);
+ return (-EINVAL);
+ }
+
+ for (i = 0; i < nlfrag; i++)
+ if (lfrag[i].nmd_len != rfrag[i].nmd_len) {
+ CERROR("Can't cope with unequal frags %d(%d):"
+ " %d local %d remote\n",
+ i, nlfrag, lfrag[i].nmd_len, rfrag[i].nmd_len);
+ return (-EINVAL);
+ }
+
+ return (0);
+}
#endif
-int
-kqswnal_dma_reply (kqswnal_tx_t *ktx, int nfrag,
- struct iovec *iov, ptl_kiov_t *kiov,
- int offset, int nob)
+kqswnal_remotemd_t *
+kqswnal_parse_rmd (kqswnal_rx_t *krx, int type, ptl_nid_t expected_nid)
{
- kqswnal_rx_t *krx = (kqswnal_rx_t *)ktx->ktx_args[0];
char *buffer = (char *)page_address(krx->krx_kiov[0].kiov_page);
+ ptl_hdr_t *hdr = (ptl_hdr_t *)buffer;
kqswnal_remotemd_t *rmd = (kqswnal_remotemd_t *)(buffer + KQSW_HDR_SIZE);
- int rc;
-#if MULTIRAIL_EKC
- int i;
-#else
- EP_DATAVEC datav[EP_MAXFRAG];
- int ndatav;
-#endif
- LASSERT (krx->krx_rpc_reply_needed);
- LASSERT ((iov == NULL) != (kiov == NULL));
+ ptl_nid_t nid = kqswnal_rx_nid(krx);
+
+ /* Note (1) lib_parse has already flipped hdr.
+ * (2) RDMA addresses are sent in native endian-ness. When
+ * EKC copes with different endian nodes, I'll fix this (and
+ * eat my hat :) */
+
+ LASSERT (krx->krx_nob >= sizeof(*hdr));
+
+ if (hdr->type != type) {
+ CERROR ("Unexpected optimized get/put type %d (%d expected)"
+ "from "LPX64"\n", hdr->type, type, nid);
+ return (NULL);
+ }
+
+ if (hdr->src_nid != nid) {
+ CERROR ("Unexpected optimized get/put source NID "
+ LPX64" from "LPX64"\n", hdr->src_nid, nid);
+ return (NULL);
+ }
+
+ LASSERT (nid == expected_nid);
- /* see kqswnal_sendmsg comment regarding endian-ness */
if (buffer + krx->krx_nob < (char *)(rmd + 1)) {
/* msg too small to discover rmd size */
CERROR ("Incoming message [%d] too small for RMD (%d needed)\n",
krx->krx_nob, (int)(((char *)(rmd + 1)) - buffer));
- return (-EINVAL);
+ return (NULL);
}
-
+
if (buffer + krx->krx_nob < (char *)&rmd->kqrmd_frag[rmd->kqrmd_nfrag]) {
/* rmd doesn't fit in the incoming message */
CERROR ("Incoming message [%d] too small for RMD[%d] (%d needed)\n",
krx->krx_nob, rmd->kqrmd_nfrag,
(int)(((char *)&rmd->kqrmd_frag[rmd->kqrmd_nfrag]) - buffer));
- return (-EINVAL);
+ return (NULL);
}
- /* Map the source data... */
+ return (rmd);
+}
+
+void
+kqswnal_rdma_store_complete (EP_RXD *rxd)
+{
+ int status = ep_rxd_status(rxd);
+ kqswnal_tx_t *ktx = (kqswnal_tx_t *)ep_rxd_arg(rxd);
+ kqswnal_rx_t *krx = (kqswnal_rx_t *)ktx->ktx_args[0];
+
+ CDEBUG((status == EP_SUCCESS) ? D_NET : D_ERROR,
+ "rxd %p, ktx %p, status %d\n", rxd, ktx, status);
+
+ LASSERT (ktx->ktx_state == KTX_RDMAING);
+ LASSERT (krx->krx_rxd == rxd);
+ LASSERT (krx->krx_rpc_reply_needed);
+
+ krx->krx_rpc_reply_needed = 0;
+ kqswnal_rx_decref (krx);
+
+ /* free ktx & finalize() its lib_msg_t */
+ kqswnal_tx_done(ktx, (status == EP_SUCCESS) ? 0 : -ECONNABORTED);
+}
+
+void
+kqswnal_rdma_fetch_complete (EP_RXD *rxd)
+{
+ /* Completed fetching the PUT data */
+ int status = ep_rxd_status(rxd);
+ kqswnal_tx_t *ktx = (kqswnal_tx_t *)ep_rxd_arg(rxd);
+ kqswnal_rx_t *krx = (kqswnal_rx_t *)ktx->ktx_args[0];
+ unsigned long flags;
+
+ CDEBUG((status == EP_SUCCESS) ? D_NET : D_ERROR,
+ "rxd %p, ktx %p, status %d\n", rxd, ktx, status);
+
+ LASSERT (ktx->ktx_state == KTX_RDMAING);
+ LASSERT (krx->krx_rxd == rxd);
+ LASSERT (krx->krx_rpc_reply_needed);
+
+ /* Set the RPC completion status */
+ status = (status == EP_SUCCESS) ? 0 : -ECONNABORTED;
+ krx->krx_rpc_reply_status = status;
+
+ /* free ktx & finalize() its lib_msg_t */
+ kqswnal_tx_done(ktx, status);
+
+ if (!in_interrupt()) {
+ /* OK to complete the RPC now (iff I had the last ref) */
+ kqswnal_rx_decref (krx);
+ return;
+ }
+
+ LASSERT (krx->krx_state == KRX_PARSE);
+ krx->krx_state = KRX_COMPLETING;
+
+ /* Complete the RPC in thread context */
+ spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags);
+
+ list_add_tail (&krx->krx_list, &kqswnal_data.kqn_readyrxds);
+ wake_up (&kqswnal_data.kqn_sched_waitq);
+
+ spin_unlock_irqrestore (&kqswnal_data.kqn_sched_lock, flags);
+}
+
+int
+kqswnal_rdma (kqswnal_rx_t *krx, lib_msg_t *libmsg, int type,
+ int niov, struct iovec *iov, ptl_kiov_t *kiov,
+ size_t offset, size_t len)
+{
+ kqswnal_remotemd_t *rmd;
+ kqswnal_tx_t *ktx;
+ int eprc;
+ int rc;
+#if !MULTIRAIL_EKC
+ EP_DATAVEC datav[EP_MAXFRAG];
+ int ndatav;
+#endif
+
+ LASSERT (type == PTL_MSG_GET || type == PTL_MSG_PUT);
+ /* Not both mapped and paged payload */
+ LASSERT (iov == NULL || kiov == NULL);
+ /* RPC completes with failure by default */
+ LASSERT (krx->krx_rpc_reply_needed);
+ LASSERT (krx->krx_rpc_reply_status != 0);
+
+ rmd = kqswnal_parse_rmd(krx, type, libmsg->ev.initiator.nid);
+ if (rmd == NULL)
+ return (-EPROTO);
+
+ if (len == 0) {
+ /* data got truncated to nothing. */
+ lib_finalize(&kqswnal_lib, krx, libmsg, PTL_OK);
+ /* Let kqswnal_rx_done() complete the RPC with success */
+ krx->krx_rpc_reply_status = 0;
+ return (0);
+ }
+
+ /* NB I'm using 'ktx' just to map the local RDMA buffers; I'm not
+ actually sending a portals message with it */
+ ktx = kqswnal_get_idle_tx(NULL, 0);
+ if (ktx == NULL) {
+ CERROR ("Can't get txd for RDMA with "LPX64"\n",
+ libmsg->ev.initiator.nid);
+ return (-ENOMEM);
+ }
+
+ ktx->ktx_state = KTX_RDMAING;
+ ktx->ktx_nid = libmsg->ev.initiator.nid;
+ ktx->ktx_args[0] = krx;
+ ktx->ktx_args[1] = libmsg;
+
+ /* Start mapping at offset 0 (we're not mapping any headers) */
ktx->ktx_nfrag = ktx->ktx_firsttmpfrag = 0;
+
if (kiov != NULL)
- rc = kqswnal_map_tx_kiov (ktx, offset, nob, nfrag, kiov);
+ rc = kqswnal_map_tx_kiov(ktx, offset, len, niov, kiov);
else
- rc = kqswnal_map_tx_iov (ktx, offset, nob, nfrag, iov);
+ rc = kqswnal_map_tx_iov(ktx, offset, len, niov, iov);
if (rc != 0) {
- CERROR ("Can't map source data: %d\n", rc);
- return (rc);
+ CERROR ("Can't map local RDMA data: %d\n", rc);
+ goto out;
}
#if MULTIRAIL_EKC
- if (ktx->ktx_nfrag != rmd->kqrmd_nfrag) {
- CERROR("Can't cope with unequal # frags: %d local %d remote\n",
- ktx->ktx_nfrag, rmd->kqrmd_nfrag);
- return (-EINVAL);
+ rc = kqswnal_check_rdma (ktx->ktx_nfrag, ktx->ktx_frags,
+ rmd->kqrmd_nfrag, rmd->kqrmd_frag);
+ if (rc != 0) {
+ CERROR ("Incompatible RDMA descriptors\n");
+ goto out;
}
-
- for (i = 0; i < rmd->kqrmd_nfrag; i++)
- if (ktx->ktx_frags[i].nmd_len != rmd->kqrmd_frag[i].nmd_len) {
- CERROR("Can't cope with unequal frags %d(%d):"
- " %d local %d remote\n",
- i, rmd->kqrmd_nfrag,
- ktx->ktx_frags[i].nmd_len,
- rmd->kqrmd_frag[i].nmd_len);
- return (-EINVAL);
- }
#else
- ndatav = kqswnal_eiovs2datav (EP_MAXFRAG, datav,
- ktx->ktx_nfrag, ktx->ktx_frags,
- rmd->kqrmd_nfrag, rmd->kqrmd_frag);
+ switch (type) {
+ default:
+ LBUG();
+
+ case PTL_MSG_GET:
+ ndatav = kqswnal_eiovs2datav(EP_MAXFRAG, datav,
+ ktx->ktx_nfrag, ktx->ktx_frags,
+ rmd->kqrmd_nfrag, rmd->kqrmd_frag);
+ break;
+
+ case PTL_MSG_PUT:
+ ndatav = kqswnal_eiovs2datav(EP_MAXFRAG, datav,
+ rmd->kqrmd_nfrag, rmd->kqrmd_frag,
+ ktx->ktx_nfrag, ktx->ktx_frags);
+ break;
+ }
+
if (ndatav < 0) {
CERROR ("Can't create datavec: %d\n", ndatav);
- return (ndatav);
+ rc = ndatav;
+ goto out;
}
#endif
- /* Our caller will start to race with kqswnal_dma_reply_complete... */
- LASSERT (atomic_read (&krx->krx_refcount) == 1);
- atomic_set (&krx->krx_refcount, 2);
+ LASSERT (atomic_read(&krx->krx_refcount) > 0);
+ /* Take an extra ref for the completion callback */
+ atomic_inc(&krx->krx_refcount);
-#if MULTIRAIL_EKC
- rc = ep_complete_rpc(krx->krx_rxd, kqswnal_dma_reply_complete, ktx,
- &kqswnal_rpc_success,
- ktx->ktx_frags, rmd->kqrmd_frag, rmd->kqrmd_nfrag);
- if (rc == EP_SUCCESS)
- return (0);
+ switch (type) {
+ default:
+ LBUG();
- /* Well we tried... */
- krx->krx_rpc_reply_needed = 0;
+ case PTL_MSG_GET:
+#if MULTIRAIL_EKC
+ eprc = ep_complete_rpc(krx->krx_rxd,
+ kqswnal_rdma_store_complete, ktx,
+ &kqswnal_data.kqn_rpc_success,
+ ktx->ktx_frags, rmd->kqrmd_frag, rmd->kqrmd_nfrag);
#else
- rc = ep_complete_rpc (krx->krx_rxd, kqswnal_dma_reply_complete, ktx,
- &kqswnal_rpc_success, datav, ndatav);
- if (rc == EP_SUCCESS)
- return (0);
-
- /* "old" EKC destroys rxd on failed completion */
- krx->krx_rxd = NULL;
+ eprc = ep_complete_rpc (krx->krx_rxd,
+ kqswnal_rdma_store_complete, ktx,
+ &kqswnal_data.kqn_rpc_success,
+ datav, ndatav);
+ if (eprc != EP_SUCCESS) /* "old" EKC destroys rxd on failed completion */
+ krx->krx_rxd = NULL;
#endif
+ if (eprc != EP_SUCCESS) {
+ CERROR("can't complete RPC: %d\n", eprc);
+ /* don't re-attempt RPC completion */
+ krx->krx_rpc_reply_needed = 0;
+ rc = -ECONNABORTED;
+ }
+ break;
+
+ case PTL_MSG_PUT:
+#if MULTIRAIL_EKC
+ eprc = ep_rpc_get (krx->krx_rxd,
+ kqswnal_rdma_fetch_complete, ktx,
+ rmd->kqrmd_frag, ktx->ktx_frags, ktx->ktx_nfrag);
+#else
+ eprc = ep_rpc_get (krx->krx_rxd,
+ kqswnal_rdma_fetch_complete, ktx,
+ datav, ndatav);
+#endif
+ if (eprc != EP_SUCCESS) {
+ CERROR("ep_rpc_get failed: %d\n", eprc);
+ rc = -ECONNABORTED;
+ }
+ break;
+ }
- CERROR("can't complete RPC: %d\n", rc);
-
- /* reset refcount back to 1: we're not going to be racing with
- * kqswnal_dma_reply_complete. */
- atomic_set (&krx->krx_refcount, 1);
+ out:
+ if (rc != 0) {
+ kqswnal_rx_decref(krx); /* drop callback's ref */
+ kqswnal_put_idle_tx (ktx);
+ }
- return (-ECONNABORTED);
+ atomic_dec(&kqswnal_data.kqn_pending_txs);
+ return (rc);
}
static ptl_err_t
-kqswnal_sendmsg (nal_cb_t *nal,
+kqswnal_sendmsg (lib_nal_t *nal,
void *private,
lib_msg_t *libmsg,
ptl_hdr_t *hdr,
int sumoff;
int sumnob;
#endif
+ /* NB 1. hdr is in network byte order */
+ /* 2. 'private' depends on the message type */
CDEBUG(D_NET, "sending "LPSZ" bytes in %d frags to nid: "LPX64
" pid %u\n", payload_nob, payload_niov, nid, pid);
return (PTL_FAIL);
}
+ if (type == PTL_MSG_REPLY && /* can I look in 'private' */
+ ((kqswnal_rx_t *)private)->krx_rpc_reply_needed) { /* is it an RPC */
+ /* Must be a REPLY for an optimized GET */
+ rc = kqswnal_rdma ((kqswnal_rx_t *)private, libmsg, PTL_MSG_GET,
+ payload_niov, payload_iov, payload_kiov,
+ payload_offset, payload_nob);
+ return ((rc == 0) ? PTL_OK : PTL_FAIL);
+ }
+
targetnid = nid;
if (kqswnal_nid2elanid (nid) < 0) { /* Can't send direct: find gateway? */
rc = kpr_lookup (&kqswnal_data.kqn_router, nid,
type == PTL_MSG_REPLY ||
in_interrupt()));
if (ktx == NULL) {
- kqswnal_cerror_hdr (hdr);
+ CERROR ("Can't get txd for msg type %d for "LPX64"\n",
+ type, libmsg->ev.initiator.nid);
return (PTL_NO_SPACE);
}
+ ktx->ktx_state = KTX_SENDING;
ktx->ktx_nid = targetnid;
ktx->ktx_args[0] = private;
ktx->ktx_args[1] = libmsg;
-
- if (type == PTL_MSG_REPLY &&
- ((kqswnal_rx_t *)private)->krx_rpc_reply_needed) {
- if (nid != targetnid ||
- kqswnal_nid2elanid(nid) !=
- ep_rxd_node(((kqswnal_rx_t *)private)->krx_rxd)) {
- CERROR("Optimized reply nid conflict: "
- "nid "LPX64" via "LPX64" elanID %d\n",
- nid, targetnid,
- ep_rxd_node(((kqswnal_rx_t *)private)->krx_rxd));
- rc = -EINVAL;
- goto out;
- }
-
- /* peer expects RPC completion with GET data */
- rc = kqswnal_dma_reply (ktx, payload_niov,
- payload_iov, payload_kiov,
- payload_offset, payload_nob);
- if (rc != 0)
- CERROR ("Can't DMA reply to "LPX64": %d\n", nid, rc);
- goto out;
- }
+ ktx->ktx_args[2] = NULL; /* set when a GET commits to REPLY */
memcpy (ktx->ktx_buffer, hdr, sizeof (*hdr)); /* copy hdr from caller's stack */
ktx->ktx_wire_hdr = (ptl_hdr_t *)ktx->ktx_buffer;
memcpy(ktx->ktx_buffer + sizeof(*hdr) + sizeof(csum), &csum, sizeof(csum));
#endif
- if (kqswnal_tunables.kqn_optimized_gets &&
- type == PTL_MSG_GET && /* doing a GET */
- nid == targetnid) { /* not forwarding */
+ /* The first frag will be the pre-mapped buffer for (at least) the
+ * portals header. */
+ ktx->ktx_nfrag = ktx->ktx_firsttmpfrag = 1;
+
+ if (nid == targetnid && /* not forwarding */
+ ((type == PTL_MSG_GET && /* optimize GET? */
+ kqswnal_tunables.kqn_optimized_gets != 0 &&
+ NTOH__u32(hdr->msg.get.sink_length) >= kqswnal_tunables.kqn_optimized_gets) ||
+ (type == PTL_MSG_PUT && /* optimize PUT? */
+ kqswnal_tunables.kqn_optimized_puts != 0 &&
+ payload_nob >= kqswnal_tunables.kqn_optimized_puts))) {
lib_md_t *md = libmsg->md;
kqswnal_remotemd_t *rmd = (kqswnal_remotemd_t *)(ktx->ktx_buffer + KQSW_HDR_SIZE);
- /* Optimised path: I send over the Elan vaddrs of the get
- * sink buffers, and my peer DMAs directly into them.
+ /* Optimised path: I send over the Elan vaddrs of the local
+ * buffers, and my peer DMAs directly to/from them.
*
* First I set up ktx as if it was going to send this
* payload, (it needs to map it anyway). This fills
* ktx_frags[1] and onward with the network addresses
* of the GET sink frags. I copy these into ktx_buffer,
- * immediately after the header, and send that as my GET
- * message.
- *
- * Note that the addresses are sent in native endian-ness.
- * When EKC copes with different endian nodes, I'll fix
- * this (and eat my hat :) */
+ * immediately after the header, and send that as my
+ * message. */
- ktx->ktx_nfrag = ktx->ktx_firsttmpfrag = 1;
- ktx->ktx_state = KTX_GETTING;
+ ktx->ktx_state = (type == PTL_MSG_PUT) ? KTX_PUTTING : KTX_GETTING;
if ((libmsg->md->options & PTL_MD_KIOV) != 0)
rc = kqswnal_map_tx_kiov (ktx, 0, md->length,
ktx->ktx_frags[0].Base = ktx->ktx_ebuffer;
ktx->ktx_frags[0].Len = KQSW_HDR_SIZE + payload_nob;
#endif
+ if (type == PTL_MSG_GET) {
+ /* Allocate reply message now while I'm in thread context */
+ ktx->ktx_args[2] = lib_create_reply_msg (&kqswnal_lib,
+ nid, libmsg);
+ if (ktx->ktx_args[2] == NULL)
+ goto out;
+
+ /* NB finalizing the REPLY message is my
+ * responsibility now, whatever happens. */
+ }
+
} else if (payload_nob <= KQSW_TX_MAXCONTIG) {
/* small message: single frag copied into the pre-mapped buffer */
- ktx->ktx_nfrag = ktx->ktx_firsttmpfrag = 1;
- ktx->ktx_state = KTX_SENDING;
#if MULTIRAIL_EKC
ep_nmd_subset(&ktx->ktx_frags[0], &ktx->ktx_ebuffer,
0, KQSW_HDR_SIZE + payload_nob);
/* large message: multiple frags: first is hdr in pre-mapped buffer */
- ktx->ktx_nfrag = ktx->ktx_firsttmpfrag = 1;
- ktx->ktx_state = KTX_SENDING;
#if MULTIRAIL_EKC
ep_nmd_subset(&ktx->ktx_frags[0], &ktx->ktx_ebuffer,
0, KQSW_HDR_SIZE);
rc == 0 ? "Sent" : "Failed to send",
payload_nob, nid, targetnid, rc);
- if (rc != 0)
+ if (rc != 0) {
+ if (ktx->ktx_state == KTX_GETTING &&
+ ktx->ktx_args[2] != NULL) {
+ /* We committed to reply, but there was a problem
+ * launching the GET. We can't avoid delivering a
+ * REPLY event since we committed above, so we
+ * pretend the GET succeeded but the REPLY
+ * failed. */
+ rc = 0;
+ lib_finalize (&kqswnal_lib, private, libmsg, PTL_OK);
+ lib_finalize (&kqswnal_lib, private,
+ (lib_msg_t *)ktx->ktx_args[2], PTL_FAIL);
+ }
+
kqswnal_put_idle_tx (ktx);
-
+ }
+
atomic_dec(&kqswnal_data.kqn_pending_txs);
return (rc == 0 ? PTL_OK : PTL_FAIL);
}
static ptl_err_t
-kqswnal_send (nal_cb_t *nal,
+kqswnal_send (lib_nal_t *nal,
void *private,
lib_msg_t *libmsg,
ptl_hdr_t *hdr,
}
static ptl_err_t
-kqswnal_send_pages (nal_cb_t *nal,
+kqswnal_send_pages (lib_nal_t *nal,
void *private,
lib_msg_t *libmsg,
ptl_hdr_t *hdr,
if (ktx == NULL) /* can't get txd right now */
return; /* fwd will be scheduled when tx desc freed */
- if (nid == kqswnal_lib.ni.nid) /* gateway is me */
+ if (nid == kqswnal_lib.libnal_ni.ni_pid.nid) /* gateway is me */
nid = fwd->kprfd_target_nid; /* target is final dest */
if (kqswnal_nid2elanid (nid) < 0) {
if (rc != 0) {
CERROR ("Failed to forward [%p] to "LPX64": %d\n", fwd, nid, rc);
- kqswnal_put_idle_tx (ktx);
/* complete now (with failure) */
- kpr_fwd_done (&kqswnal_data.kqn_router, fwd, rc);
+ kqswnal_tx_done (ktx, rc);
}
atomic_dec(&kqswnal_data.kqn_pending_txs);
NTOH__u64(hdr->src_nid), NTOH__u64(hdr->dest_nid),error);
}
- kqswnal_requeue_rx (krx);
+ LASSERT (atomic_read(&krx->krx_refcount) == 1);
+ kqswnal_rx_decref (krx);
}
void
-kqswnal_dma_reply_complete (EP_RXD *rxd)
+kqswnal_requeue_rx (kqswnal_rx_t *krx)
{
- int status = ep_rxd_status(rxd);
- kqswnal_tx_t *ktx = (kqswnal_tx_t *)ep_rxd_arg(rxd);
- kqswnal_rx_t *krx = (kqswnal_rx_t *)ktx->ktx_args[0];
- lib_msg_t *msg = (lib_msg_t *)ktx->ktx_args[1];
-
- CDEBUG((status == EP_SUCCESS) ? D_NET : D_ERROR,
- "rxd %p, ktx %p, status %d\n", rxd, ktx, status);
+ LASSERT (atomic_read(&krx->krx_refcount) == 0);
+ LASSERT (!krx->krx_rpc_reply_needed);
- LASSERT (krx->krx_rxd == rxd);
- LASSERT (krx->krx_rpc_reply_needed);
+ krx->krx_state = KRX_POSTED;
- krx->krx_rpc_reply_needed = 0;
- kqswnal_rx_done (krx);
+#if MULTIRAIL_EKC
+ if (kqswnal_data.kqn_shuttingdown) {
+ /* free EKC rxd on shutdown */
+ ep_complete_receive(krx->krx_rxd);
+ } else {
+ /* repost receive */
+ ep_requeue_receive(krx->krx_rxd,
+ kqswnal_rxhandler, krx,
+ &krx->krx_elanbuffer, 0);
+ }
+#else
+ if (kqswnal_data.kqn_shuttingdown)
+ return;
- lib_finalize (&kqswnal_lib, NULL, msg,
- (status == EP_SUCCESS) ? PTL_OK : PTL_FAIL);
- kqswnal_put_idle_tx (ktx);
+ if (krx->krx_rxd == NULL) {
+ /* We had a failed ep_complete_rpc() which nukes the
+ * descriptor in "old" EKC */
+ int eprc = ep_queue_receive(krx->krx_eprx,
+ kqswnal_rxhandler, krx,
+ krx->krx_elanbuffer,
+ krx->krx_npages * PAGE_SIZE, 0);
+ LASSERT (eprc == EP_SUCCESS);
+ /* We don't handle failure here; it's incredibly rare
+ * (never reported?) and only happens with "old" EKC */
+ } else {
+ ep_requeue_receive(krx->krx_rxd, kqswnal_rxhandler, krx,
+ krx->krx_elanbuffer,
+ krx->krx_npages * PAGE_SIZE);
+ }
+#endif
}
void
}
void
-kqswnal_requeue_rx (kqswnal_rx_t *krx)
+kqswnal_rx_done (kqswnal_rx_t *krx)
{
- int rc;
+ int rc;
+ EP_STATUSBLK *sblk;
LASSERT (atomic_read(&krx->krx_refcount) == 0);
if (krx->krx_rpc_reply_needed) {
+ /* We've not completed the peer's RPC yet... */
+ sblk = (krx->krx_rpc_reply_status == 0) ?
+ &kqswnal_data.kqn_rpc_success :
+ &kqswnal_data.kqn_rpc_failed;
- /* We failed to complete the peer's optimized GET (e.g. we
- * couldn't map the source buffers). We complete the
- * peer's EKC rpc now with failure. */
+ LASSERT (!in_interrupt());
#if MULTIRAIL_EKC
- rc = ep_complete_rpc(krx->krx_rxd, kqswnal_rpc_complete, krx,
- &kqswnal_rpc_failed, NULL, NULL, 0);
+ rc = ep_complete_rpc(krx->krx_rxd,
+ kqswnal_rpc_complete, krx,
+ sblk, NULL, NULL, 0);
if (rc == EP_SUCCESS)
return;
-
- CERROR("can't complete RPC: %d\n", rc);
#else
- if (krx->krx_rxd != NULL) {
- /* We didn't try (and fail) to complete earlier... */
- rc = ep_complete_rpc(krx->krx_rxd,
- kqswnal_rpc_complete, krx,
- &kqswnal_rpc_failed, NULL, 0);
- if (rc == EP_SUCCESS)
- return;
-
- CERROR("can't complete RPC: %d\n", rc);
- }
-
- /* NB the old ep_complete_rpc() frees rxd on failure, so we
- * have to requeue from scratch here, unless we're shutting
- * down */
- if (kqswnal_data.kqn_shuttingdown)
+ rc = ep_complete_rpc(krx->krx_rxd,
+ kqswnal_rpc_complete, krx,
+ sblk, NULL, 0);
+ if (rc == EP_SUCCESS)
return;
- rc = ep_queue_receive(krx->krx_eprx, kqswnal_rxhandler, krx,
- krx->krx_elanbuffer,
- krx->krx_npages * PAGE_SIZE, 0);
- LASSERT (rc == EP_SUCCESS);
- /* We don't handle failure here; it's incredibly rare
- * (never reported?) and only happens with "old" EKC */
- return;
+ /* "old" EKC destroys rxd on failed completion */
+ krx->krx_rxd = NULL;
#endif
+ CERROR("can't complete RPC: %d\n", rc);
+ krx->krx_rpc_reply_needed = 0;
}
-#if MULTIRAIL_EKC
- if (kqswnal_data.kqn_shuttingdown) {
- /* free EKC rxd on shutdown */
- ep_complete_receive(krx->krx_rxd);
- } else {
- /* repost receive */
- ep_requeue_receive(krx->krx_rxd, kqswnal_rxhandler, krx,
- &krx->krx_elanbuffer, 0);
- }
-#else
- /* don't actually requeue on shutdown */
- if (!kqswnal_data.kqn_shuttingdown)
- ep_requeue_receive(krx->krx_rxd, kqswnal_rxhandler, krx,
- krx->krx_elanbuffer, krx->krx_npages * PAGE_SIZE);
-#endif
+ kqswnal_requeue_rx(krx);
}
void
-kqswnal_rx (kqswnal_rx_t *krx)
+kqswnal_parse (kqswnal_rx_t *krx)
{
ptl_hdr_t *hdr = (ptl_hdr_t *) page_address(krx->krx_kiov[0].kiov_page);
ptl_nid_t dest_nid = NTOH__u64 (hdr->dest_nid);
int nob;
int niov;
- LASSERT (atomic_read(&krx->krx_refcount) == 0);
+ LASSERT (atomic_read(&krx->krx_refcount) == 1);
+
+ if (dest_nid == kqswnal_lib.libnal_ni.ni_pid.nid) { /* It's for me :) */
+ /* I ignore parse errors since I'm not consuming a byte
+ * stream */
+ (void)lib_parse (&kqswnal_lib, hdr, krx);
- if (dest_nid == kqswnal_lib.ni.nid) { /* It's for me :) */
- atomic_set(&krx->krx_refcount, 1);
- lib_parse (&kqswnal_lib, hdr, krx);
- kqswnal_rx_done(krx);
+ /* Drop my ref; any RDMA activity takes an additional ref */
+ kqswnal_rx_decref(krx);
return;
}
#if KQSW_CHECKSUM
- CERROR ("checksums for forwarded packets not implemented\n");
- LBUG ();
+ LASSERTF (0, "checksums for forwarded packets not implemented\n");
#endif
+
if (kqswnal_nid2elanid (dest_nid) >= 0) /* should have gone direct to peer */
{
CERROR("dropping packet from "LPX64" for "LPX64
": target is peer\n", NTOH__u64(hdr->src_nid), dest_nid);
- kqswnal_requeue_rx (krx);
+ kqswnal_rx_decref (krx);
return;
}
rxd, krx, nob, status);
LASSERT (krx != NULL);
-
+ LASSERT (krx->krx_state = KRX_POSTED);
+
+ krx->krx_state = KRX_PARSE;
krx->krx_rxd = rxd;
krx->krx_nob = nob;
#if MULTIRAIL_EKC
#else
krx->krx_rpc_reply_needed = ep_rxd_isrpc(rxd);
#endif
-
+ /* Default to failure if an RPC reply is requested but not handled */
+ krx->krx_rpc_reply_status = -EPROTO;
+ atomic_set (&krx->krx_refcount, 1);
+
/* must receive a whole header to be able to parse */
if (status != EP_SUCCESS || nob < sizeof (ptl_hdr_t))
{
CERROR("receive status failed with status %d nob %d\n",
ep_rxd_status(rxd), nob);
#endif
- kqswnal_requeue_rx (krx);
+ kqswnal_rx_decref(krx);
return;
}
if (!in_interrupt()) {
- kqswnal_rx (krx);
+ kqswnal_parse(krx);
return;
}
#endif
static ptl_err_t
-kqswnal_recvmsg (nal_cb_t *nal,
+kqswnal_recvmsg (lib_nal_t *nal,
void *private,
lib_msg_t *libmsg,
unsigned int niov,
{
kqswnal_rx_t *krx = (kqswnal_rx_t *)private;
char *buffer = page_address(krx->krx_kiov[0].kiov_page);
+ ptl_hdr_t *hdr = (ptl_hdr_t *)buffer;
int page;
char *page_ptr;
int page_nob;
char *iov_ptr;
int iov_nob;
int frag;
+ int rc;
#if KQSW_CHECKSUM
kqsw_csum_t senders_csum;
kqsw_csum_t payload_csum = 0;
- kqsw_csum_t hdr_csum = kqsw_csum(0, buffer, sizeof(ptl_hdr_t));
+ kqsw_csum_t hdr_csum = kqsw_csum(0, hdr, sizeof(*hdr));
size_t csum_len = mlen;
int csum_frags = 0;
int csum_nob = 0;
if (senders_csum != hdr_csum)
kqswnal_csum_error (krx, 1);
#endif
+ /* NB lib_parse() has already flipped *hdr */
+
CDEBUG(D_NET,"kqswnal_recv, mlen="LPSZ", rlen="LPSZ"\n", mlen, rlen);
+ if (krx->krx_rpc_reply_needed &&
+ hdr->type == PTL_MSG_PUT) {
+ /* This must be an optimized PUT */
+ rc = kqswnal_rdma (krx, libmsg, PTL_MSG_PUT,
+ niov, iov, kiov, offset, mlen);
+ return (rc == 0 ? PTL_OK : PTL_FAIL);
+ }
+
/* What was actually received must be >= payload. */
LASSERT (mlen <= rlen);
if (krx->krx_nob < KQSW_HDR_SIZE + mlen) {
}
static ptl_err_t
-kqswnal_recv(nal_cb_t *nal,
+kqswnal_recv(lib_nal_t *nal,
void *private,
lib_msg_t *libmsg,
unsigned int niov,
}
static ptl_err_t
-kqswnal_recv_pages (nal_cb_t *nal,
+kqswnal_recv_pages (lib_nal_t *nal,
void *private,
lib_msg_t *libmsg,
unsigned int niov,
spin_unlock_irqrestore(&kqswnal_data.kqn_sched_lock,
flags);
- kqswnal_rx (krx);
+ switch (krx->krx_state) {
+ case KRX_PARSE:
+ kqswnal_parse (krx);
+ break;
+ case KRX_COMPLETING:
+ /* Drop last ref to reply to RPC and requeue */
+ LASSERT (krx->krx_rpc_reply_needed);
+ kqswnal_rx_decref (krx);
+ break;
+ default:
+ LBUG();
+ }
did_something = 1;
spin_lock_irqsave(&kqswnal_data.kqn_sched_lock, flags);
return (0);
}
-nal_cb_t kqswnal_lib =
+lib_nal_t kqswnal_lib =
{
- nal_data: &kqswnal_data, /* NAL private data */
- cb_send: kqswnal_send,
- cb_send_pages: kqswnal_send_pages,
- cb_recv: kqswnal_recv,
- cb_recv_pages: kqswnal_recv_pages,
- cb_read: kqswnal_read,
- cb_write: kqswnal_write,
- cb_malloc: kqswnal_malloc,
- cb_free: kqswnal_free,
- cb_printf: kqswnal_printf,
- cb_cli: kqswnal_cli,
- cb_sti: kqswnal_sti,
- cb_callback: kqswnal_callback,
- cb_dist: kqswnal_dist
+ libnal_data: &kqswnal_data, /* NAL private data */
+ libnal_send: kqswnal_send,
+ libnal_send_pages: kqswnal_send_pages,
+ libnal_recv: kqswnal_recv,
+ libnal_recv_pages: kqswnal_recv_pages,
+ libnal_dist: kqswnal_dist
};
#endif
int
-ksocknal_api_forward(nal_t *nal, int id, void *args, size_t args_len,
- void *ret, size_t ret_len)
-{
- ksock_nal_data_t *k;
- nal_cb_t *nal_cb;
-
- k = nal->nal_data;
- nal_cb = k->ksnd_nal_cb;
-
- lib_dispatch(nal_cb, k, id, args, ret); /* ksocknal_send needs k */
- return PTL_OK;
-}
-
-void
-ksocknal_api_lock(nal_t *nal, unsigned long *flags)
-{
- ksock_nal_data_t *k;
- nal_cb_t *nal_cb;
-
- k = nal->nal_data;
- nal_cb = k->ksnd_nal_cb;
- nal_cb->cb_cli(nal_cb,flags);
-}
-
-void
-ksocknal_api_unlock(nal_t *nal, unsigned long *flags)
-{
- ksock_nal_data_t *k;
- nal_cb_t *nal_cb;
-
- k = nal->nal_data;
- nal_cb = k->ksnd_nal_cb;
- nal_cb->cb_sti(nal_cb,flags);
-}
-
-int
-ksocknal_api_yield(nal_t *nal, unsigned long *flags, int milliseconds)
-{
- /* NB called holding statelock */
- wait_queue_t wait;
- unsigned long now = jiffies;
-
- CDEBUG (D_NET, "yield\n");
-
- if (milliseconds == 0) {
- our_cond_resched();
- return 0;
- }
-
- init_waitqueue_entry(&wait, current);
- set_current_state (TASK_INTERRUPTIBLE);
- add_wait_queue (&ksocknal_data.ksnd_yield_waitq, &wait);
-
- ksocknal_api_unlock(nal, flags);
-
- if (milliseconds < 0)
- schedule ();
- else
- schedule_timeout((milliseconds * HZ) / 1000);
-
- ksocknal_api_lock(nal, flags);
-
- remove_wait_queue (&ksocknal_data.ksnd_yield_waitq, &wait);
-
- if (milliseconds > 0) {
- milliseconds -= ((jiffies - now) * 1000) / HZ;
- if (milliseconds < 0)
- milliseconds = 0;
- }
-
- return (milliseconds);
-}
-
-int
ksocknal_set_mynid(ptl_nid_t nid)
{
- lib_ni_t *ni = &ksocknal_lib.ni;
+ lib_ni_t *ni = &ksocknal_lib.libnal_ni;
/* FIXME: we have to do this because we call lib_init() at module
* insertion time, which is before we have 'mynid' available. lib_init
* problem. */
CDEBUG(D_IOCTL, "setting mynid to "LPX64" (old nid="LPX64")\n",
- nid, ni->nid);
+ nid, ni->ni_pid.nid);
- ni->nid = nid;
+ ni->ni_pid.nid = nid;
return (0);
}
/* flag threads to terminate; wake and wait for them to die */
ksocknal_data.ksnd_shuttingdown = 1;
+ mb();
wake_up_all (&ksocknal_data.ksnd_autoconnectd_waitq);
wake_up_all (&ksocknal_data.ksnd_reaper_waitq);
for (i = 0; i < SOCKNAL_N_SCHED; i++)
wake_up_all(&ksocknal_data.ksnd_schedulers[i].kss_waitq);
+ i = 4;
while (atomic_read (&ksocknal_data.ksnd_nthreads) != 0) {
- CDEBUG (D_NET, "waitinf for %d threads to terminate\n",
+ i++;
+ CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
+ "waiting for %d threads to terminate\n",
atomic_read (&ksocknal_data.ksnd_nthreads));
set_current_state (TASK_UNINTERRUPTIBLE);
schedule_timeout (HZ);
if (nal->nal_refct != 0) {
if (actual_limits != NULL)
- *actual_limits = ksocknal_lib.ni.actual_limits;
+ *actual_limits = ksocknal_lib.libnal_ni.ni_actual_limits;
/* This module got the first ref */
PORTAL_MODULE_USE;
return (PTL_OK);
rwlock_init(&ksocknal_data.ksnd_global_lock);
- ksocknal_data.ksnd_nal_cb = &ksocknal_lib;
- spin_lock_init (&ksocknal_data.ksnd_nal_cb_lock);
- init_waitqueue_head(&ksocknal_data.ksnd_yield_waitq);
-
spin_lock_init(&ksocknal_data.ksnd_small_fmp.fmp_lock);
INIT_LIST_HEAD(&ksocknal_data.ksnd_small_fmp.fmp_idle_fmbs);
INIT_LIST_HEAD(&ksocknal_data.ksnd_small_fmp.fmp_blocked_conns);
PORTAL_ALLOC(ksocknal_data.ksnd_schedulers,
sizeof(ksock_sched_t) * SOCKNAL_N_SCHED);
if (ksocknal_data.ksnd_schedulers == NULL) {
- ksocknal_api_shutdown (&ksocknal_api);
+ ksocknal_api_shutdown (nal);
return (-ENOMEM);
}
process_id.pid = 0;
process_id.nid = 0;
- rc = lib_init(&ksocknal_lib, process_id,
+ rc = lib_init(&ksocknal_lib, nal, process_id,
requested_limits, actual_limits);
if (rc != PTL_OK) {
CERROR("lib_init failed: error %d\n", rc);
- ksocknal_api_shutdown (&ksocknal_api);
+ ksocknal_api_shutdown (nal);
return (rc);
}
if (rc != 0) {
CERROR("Can't spawn socknal scheduler[%d]: %d\n",
i, rc);
- ksocknal_api_shutdown (&ksocknal_api);
+ ksocknal_api_shutdown (nal);
return (rc);
}
}
rc = ksocknal_thread_start (ksocknal_autoconnectd, (void *)((long)i));
if (rc != 0) {
CERROR("Can't spawn socknal autoconnectd: %d\n", rc);
- ksocknal_api_shutdown (&ksocknal_api);
+ ksocknal_api_shutdown (nal);
return (rc);
}
}
rc = ksocknal_thread_start (ksocknal_reaper, NULL);
if (rc != 0) {
CERROR ("Can't spawn socknal reaper: %d\n", rc);
- ksocknal_api_shutdown (&ksocknal_api);
+ ksocknal_api_shutdown (nal);
return (rc);
}
PORTAL_ALLOC(fmb, offsetof(ksock_fmb_t,
fmb_kiov[pool->fmp_buff_pages]));
if (fmb == NULL) {
- ksocknal_api_shutdown(&ksocknal_api);
+ ksocknal_api_shutdown(nal);
return (-ENOMEM);
}
fmb->fmb_kiov[j].kiov_page = alloc_page(GFP_KERNEL);
if (fmb->fmb_kiov[j].kiov_page == NULL) {
- ksocknal_api_shutdown (&ksocknal_api);
+ ksocknal_api_shutdown (nal);
return (-ENOMEM);
}
rc = libcfs_nal_cmd_register(SOCKNAL, &ksocknal_cmd, NULL);
if (rc != 0) {
CERROR ("Can't initialise command interface (rc = %d)\n", rc);
- ksocknal_api_shutdown (&ksocknal_api);
+ ksocknal_api_shutdown (nal);
return (rc);
}
/* check ksnr_connected/connecting field large enough */
LASSERT(SOCKNAL_CONN_NTYPES <= 4);
- ksocknal_api.startup = ksocknal_api_startup;
- ksocknal_api.forward = ksocknal_api_forward;
- ksocknal_api.shutdown = ksocknal_api_shutdown;
- ksocknal_api.lock = ksocknal_api_lock;
- ksocknal_api.unlock = ksocknal_api_unlock;
- ksocknal_api.nal_data = &ksocknal_data;
-
- ksocknal_lib.nal_data = &ksocknal_data;
+ ksocknal_api.nal_ni_init = ksocknal_api_startup;
+ ksocknal_api.nal_ni_fini = ksocknal_api_shutdown;
/* Initialise dynamic tunables to defaults once only */
ksocknal_tunables.ksnd_io_timeout = SOCKNAL_IO_TIMEOUT;
struct list_head *ksnd_peers; /* hash table of all my known peers */
int ksnd_peer_hash_size; /* size of ksnd_peers */
- nal_cb_t *ksnd_nal_cb;
- spinlock_t ksnd_nal_cb_lock; /* lib cli/sti lock */
- wait_queue_head_t ksnd_yield_waitq; /* where yield waits */
-
atomic_t ksnd_nthreads; /* # live threads */
int ksnd_shuttingdown; /* tell threads to exit */
ksock_sched_t *ksnd_schedulers; /* scheduler state */
} ksock_peer_t;
-extern nal_cb_t ksocknal_lib;
+extern lib_nal_t ksocknal_lib;
extern ksock_nal_data_t ksocknal_data;
extern ksock_tunables_t ksocknal_tunables;
* LIB functions follow
*
*/
-ptl_err_t
-ksocknal_read(nal_cb_t *nal, void *private, void *dst_addr,
- user_ptr src_addr, size_t len)
-{
- CDEBUG(D_NET, LPX64": reading %ld bytes from %p -> %p\n",
- nal->ni.nid, (long)len, src_addr, dst_addr);
-
- memcpy( dst_addr, src_addr, len );
- return PTL_OK;
-}
-
-ptl_err_t
-ksocknal_write(nal_cb_t *nal, void *private, user_ptr dst_addr,
- void *src_addr, size_t len)
-{
- CDEBUG(D_NET, LPX64": writing %ld bytes from %p -> %p\n",
- nal->ni.nid, (long)len, src_addr, dst_addr);
-
- memcpy( dst_addr, src_addr, len );
- return PTL_OK;
-}
-
-void *
-ksocknal_malloc(nal_cb_t *nal, size_t len)
-{
- void *buf;
-
- PORTAL_ALLOC(buf, len);
-
- if (buf != NULL)
- memset(buf, 0, len);
-
- return (buf);
-}
-
-void
-ksocknal_free(nal_cb_t *nal, void *buf, size_t len)
-{
- PORTAL_FREE(buf, len);
-}
-
-void
-ksocknal_printf(nal_cb_t *nal, const char *fmt, ...)
-{
- va_list ap;
- char msg[256];
-
- va_start (ap, fmt);
- vsnprintf (msg, sizeof (msg), fmt, ap); /* sprint safely */
- va_end (ap);
-
- msg[sizeof (msg) - 1] = 0; /* ensure terminated */
-
- CDEBUG (D_NET, "%s", msg);
-}
-
-void
-ksocknal_cli(nal_cb_t *nal, unsigned long *flags)
-{
- ksock_nal_data_t *data = nal->nal_data;
-
- /* OK to ignore 'flags'; we're only ever serialise threads and
- * never need to lock out interrupts */
- spin_lock(&data->ksnd_nal_cb_lock);
-}
-
-void
-ksocknal_sti(nal_cb_t *nal, unsigned long *flags)
-{
- ksock_nal_data_t *data;
- data = nal->nal_data;
-
- /* OK to ignore 'flags'; we're only ever serialise threads and
- * never need to lock out interrupts */
- spin_unlock(&data->ksnd_nal_cb_lock);
-}
-
-void
-ksocknal_callback(nal_cb_t *nal, void *private, lib_eq_t *eq, ptl_event_t *ev)
-{
- /* holding ksnd_nal_cb_lock */
-
- if (eq->event_callback != NULL)
- eq->event_callback(ev);
-
- if (waitqueue_active(&ksocknal_data.ksnd_yield_waitq))
- wake_up_all(&ksocknal_data.ksnd_yield_waitq);
-}
-
int
-ksocknal_dist(nal_cb_t *nal, ptl_nid_t nid, unsigned long *dist)
+ksocknal_dist(lib_nal_t *nal, ptl_nid_t nid, unsigned long *dist)
{
/* I would guess that if ksocknal_get_peer (nid) == NULL,
and we're not routing, then 'nid' is very distant :) */
- if ( nal->ni.nid == nid ) {
+ if (nal->libnal_ni.ni_pid.nid == nid) {
*dist = 0;
} else {
*dist = 1;
{
struct list_head *tmp;
ksock_route_t *route;
- ksock_route_t *candidate = NULL;
- int found = 0;
+ ksock_route_t *first_lazy = NULL;
+ int found_connecting_or_connected = 0;
int bits;
list_for_each (tmp, &peer->ksnp_routes) {
/* All typed connections have been established, or
* an untyped connection has been established, or
* connections are currently being established */
- found = 1;
+ found_connecting_or_connected = 1;
continue;
}
if (!time_after_eq (jiffies, route->ksnr_timeout))
continue;
- /* always do eager routes */
+ /* eager routes always want to be connected */
if (route->ksnr_eager)
return (route);
- if (candidate == NULL) {
- /* If we don't find any other route that is fully
- * connected or connecting, the first connectable
- * route is returned. If it fails to connect, it
- * will get placed at the end of the list */
- candidate = route;
- }
+ if (first_lazy == NULL)
+ first_lazy = route;
}
-
- return (found ? NULL : candidate);
+
+ /* No eager routes need to be connected. If some connection has
+ * already been established, or is being established there's nothing to
+ * do. Otherwise we return the first lazy route we found. If it fails
+ * to connect, it will go to the end of the list. */
+
+ if (!list_empty (&peer->ksnp_conns) ||
+ found_connecting_or_connected)
+ return (NULL);
+
+ return (first_lazy);
}
ksock_route_t *
}
ptl_err_t
-ksocknal_sendmsg(nal_cb_t *nal,
+ksocknal_sendmsg(lib_nal_t *nal,
void *private,
lib_msg_t *cookie,
ptl_hdr_t *hdr,
}
ptl_err_t
-ksocknal_send (nal_cb_t *nal, void *private, lib_msg_t *cookie,
+ksocknal_send (lib_nal_t *nal, void *private, lib_msg_t *cookie,
ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
unsigned int payload_niov, struct iovec *payload_iov,
size_t payload_offset, size_t payload_len)
}
ptl_err_t
-ksocknal_send_pages (nal_cb_t *nal, void *private, lib_msg_t *cookie,
+ksocknal_send_pages (lib_nal_t *nal, void *private, lib_msg_t *cookie,
ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
unsigned int payload_niov, ptl_kiov_t *payload_kiov,
size_t payload_offset, size_t payload_len)
fwd->kprfd_gateway_nid, fwd->kprfd_target_nid);
/* I'm the gateway; must be the last hop */
- if (nid == ksocknal_lib.ni.nid)
+ if (nid == ksocknal_lib.libnal_ni.ni_pid.nid)
nid = fwd->kprfd_target_nid;
/* setup iov for hdr */
switch (conn->ksnc_rx_state) {
case SOCKNAL_RX_HEADER:
if (conn->ksnc_hdr.type != HTON__u32(PTL_MSG_HELLO) &&
- NTOH__u64(conn->ksnc_hdr.dest_nid) != ksocknal_lib.ni.nid) {
+ NTOH__u64(conn->ksnc_hdr.dest_nid) !=
+ ksocknal_lib.libnal_ni.ni_pid.nid) {
/* This packet isn't for me */
ksocknal_fwd_parse (conn);
switch (conn->ksnc_rx_state) {
}
/* sets wanted_len, iovs etc */
- lib_parse(&ksocknal_lib, &conn->ksnc_hdr, conn);
+ rc = lib_parse(&ksocknal_lib, &conn->ksnc_hdr, conn);
+
+ if (rc != PTL_OK) {
+ /* I just received garbage: give up on this conn */
+ ksocknal_close_conn_and_siblings (conn, rc);
+ return (-EPROTO);
+ }
if (conn->ksnc_rx_nob_wanted != 0) { /* need to get payload? */
conn->ksnc_rx_state = SOCKNAL_RX_BODY;
}
ptl_err_t
-ksocknal_recv (nal_cb_t *nal, void *private, lib_msg_t *msg,
+ksocknal_recv (lib_nal_t *nal, void *private, lib_msg_t *msg,
unsigned int niov, struct iovec *iov,
size_t offset, size_t mlen, size_t rlen)
{
}
ptl_err_t
-ksocknal_recv_pages (nal_cb_t *nal, void *private, lib_msg_t *msg,
+ksocknal_recv_pages (lib_nal_t *nal, void *private, lib_msg_t *msg,
unsigned int niov, ptl_kiov_t *kiov,
size_t offset, size_t mlen, size_t rlen)
{
hmv->version_major = __cpu_to_le16 (PORTALS_PROTO_VERSION_MAJOR);
hmv->version_minor = __cpu_to_le16 (PORTALS_PROTO_VERSION_MINOR);
- hdr.src_nid = __cpu_to_le64 (ksocknal_lib.ni.nid);
+ hdr.src_nid = __cpu_to_le64 (ksocknal_lib.libnal_ni.ni_pid.nid);
hdr.type = __cpu_to_le32 (PTL_MSG_HELLO);
hdr.msg.hello.type = __cpu_to_le32 (*type);
return (0);
}
-nal_cb_t ksocknal_lib = {
- nal_data: &ksocknal_data, /* NAL private data */
- cb_send: ksocknal_send,
- cb_send_pages: ksocknal_send_pages,
- cb_recv: ksocknal_recv,
- cb_recv_pages: ksocknal_recv_pages,
- cb_read: ksocknal_read,
- cb_write: ksocknal_write,
- cb_malloc: ksocknal_malloc,
- cb_free: ksocknal_free,
- cb_printf: ksocknal_printf,
- cb_cli: ksocknal_cli,
- cb_sti: ksocknal_sti,
- cb_callback: ksocknal_callback,
- cb_dist: ksocknal_dist
+lib_nal_t ksocknal_lib = {
+ libnal_data: &ksocknal_data, /* NAL private data */
+ libnal_send: ksocknal_send,
+ libnal_send_pages: ksocknal_send_pages,
+ libnal_recv: ksocknal_recv,
+ libnal_recv_pages: ksocknal_recv_pages,
+ libnal_dist: ksocknal_dist
};
#define PORTAL_MINOR 240
struct nal_cmd_handler {
+ int nch_number;
nal_cmd_handler_fn *nch_handler;
void *nch_private;
};
-static struct nal_cmd_handler nal_cmd[NAL_MAX_NR + 1];
+static struct nal_cmd_handler nal_cmd[16];
static DECLARE_MUTEX(nal_cmd_sem);
#ifdef PORTAL_DEBUG
PORTAL_FREE(data, len);
}
+struct nal_cmd_handler *
+libcfs_find_nal_cmd_handler(int nal)
+{
+ int i;
+
+ for (i = 0; i < sizeof(nal_cmd)/sizeof(nal_cmd[0]); i++)
+ if (nal_cmd[i].nch_handler != NULL &&
+ nal_cmd[i].nch_number == nal)
+ return (&nal_cmd[i]);
+
+ return (NULL);
+}
+
int
libcfs_nal_cmd_register(int nal, nal_cmd_handler_fn *handler, void *private)
{
- int rc = 0;
+ struct nal_cmd_handler *cmd;
+ int i;
+ int rc;
CDEBUG(D_IOCTL, "Register NAL %d, handler: %p\n", nal, handler);
- if (nal > 0 && nal <= NAL_MAX_NR) {
- down(&nal_cmd_sem);
- if (nal_cmd[nal].nch_handler != NULL)
- rc = -EBUSY;
- else {
- nal_cmd[nal].nch_handler = handler;
- nal_cmd[nal].nch_private = private;
+ down(&nal_cmd_sem);
+
+ if (libcfs_find_nal_cmd_handler(nal) != NULL) {
+ up (&nal_cmd_sem);
+ return (-EBUSY);
+ }
+
+ cmd = NULL;
+ for (i = 0; i < sizeof(nal_cmd)/sizeof(nal_cmd[0]); i++)
+ if (nal_cmd[i].nch_handler == NULL) {
+ cmd = &nal_cmd[i];
+ break;
}
- up(&nal_cmd_sem);
+
+ if (cmd == NULL) {
+ rc = -EBUSY;
+ } else {
+ rc = 0;
+ cmd->nch_number = nal;
+ cmd->nch_handler = handler;
+ cmd->nch_private = private;
}
+
+ up(&nal_cmd_sem);
+
return rc;
}
EXPORT_SYMBOL(libcfs_nal_cmd_register);
void
libcfs_nal_cmd_unregister(int nal)
{
- CDEBUG(D_IOCTL, "Unregister NAL %d\n", nal);
+ struct nal_cmd_handler *cmd;
- LASSERT(nal > 0 && nal <= NAL_MAX_NR);
- LASSERT(nal_cmd[nal].nch_handler != NULL);
+ CDEBUG(D_IOCTL, "Unregister NAL %d\n", nal);
down(&nal_cmd_sem);
- nal_cmd[nal].nch_handler = NULL;
- nal_cmd[nal].nch_private = NULL;
+ cmd = libcfs_find_nal_cmd_handler(nal);
+ LASSERT (cmd != NULL);
+ cmd->nch_handler = NULL;
+ cmd->nch_private = NULL;
up(&nal_cmd_sem);
}
EXPORT_SYMBOL(libcfs_nal_cmd_unregister);
int
libcfs_nal_cmd(struct portals_cfg *pcfg)
{
+ struct nal_cmd_handler *cmd;
__u32 nal = pcfg->pcfg_nal;
int rc = -EINVAL;
ENTRY;
down(&nal_cmd_sem);
- if (nal > 0 && nal <= NAL_MAX_NR &&
- nal_cmd[nal].nch_handler != NULL) {
+ cmd = libcfs_find_nal_cmd_handler(nal);
+ if (cmd != NULL) {
CDEBUG(D_IOCTL, "calling handler nal: %d, cmd: %d\n", nal,
pcfg->pcfg_command);
- rc = nal_cmd[nal].nch_handler(pcfg, nal_cmd[nal].nch_private);
+ rc = cmd->nch_handler(pcfg, cmd->nch_private);
}
up(&nal_cmd_sem);
MODULES := portals
-portals-objs := api-eq.o api-init.o api-me.o api-errno.o api-ni.o api-wrap.o
-portals-objs += lib-dispatch.o lib-init.o lib-me.o lib-msg.o lib-eq.o lib-md.o
+portals-objs := api-errno.o api-ni.o api-wrap.o
+portals-objs += lib-init.o lib-me.o lib-msg.o lib-eq.o lib-md.o
portals-objs += lib-move.o lib-ni.o lib-pid.o module.o
@INCLUDE_RULES@
include $(src)/../Kernelenv
obj-y += portals.o
-portals-objs := lib-dispatch.o lib-eq.o lib-init.o lib-md.o lib-me.o \
+portals-objs := lib-eq.o lib-init.o lib-md.o lib-me.o \
lib-move.o lib-msg.o lib-ni.o lib-pid.o \
- api-eq.o api-errno.o api-init.o api-me.o api-ni.o \
- api-wrap.o module.o
+ api-errno.o api-ni.o api-wrap.o \
+ module.o
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * api/api-eq.c
- * User-level event queue management routines
- *
- * Copyright (c) 2001-2003 Cluster File Systems, Inc.
- * Copyright (c) 2001-2002 Sandia National Laboratories
- *
- * This file is part of Lustre, http://www.sf.net/projects/lustre/
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#define DEBUG_SUBSYSTEM S_PORTALS
-#include <portals/api-support.h>
-
-int ptl_get_event (ptl_eq_t *eq, ptl_event_t *ev)
-{
- int new_index = eq->sequence & (eq->size - 1);
- ptl_event_t *new_event = &eq->base[new_index];
- ENTRY;
-
- CDEBUG(D_INFO, "new_event: %p, sequence: %lu, eq->size: %u\n",
- new_event, eq->sequence, eq->size);
-
- if (PTL_SEQ_GT (eq->sequence, new_event->sequence)) {
- RETURN(PTL_EQ_EMPTY);
- }
-
- *ev = *new_event;
-
- /* ensure event is delivered correctly despite possible
- races with lib_finalize */
- if (eq->sequence != new_event->sequence) {
- CERROR("DROPPING EVENT: eq seq %lu ev seq %lu\n",
- eq->sequence, new_event->sequence);
- RETURN(PTL_EQ_DROPPED);
- }
-
- eq->sequence = new_event->sequence + 1;
- RETURN(PTL_OK);
-}
-
-int PtlEQGet(ptl_handle_eq_t eventq, ptl_event_t * ev)
-{
- int which;
-
- return (PtlEQPoll (&eventq, 1, 0, ev, &which));
-}
-
-int PtlEQWait(ptl_handle_eq_t eventq_in, ptl_event_t *event_out)
-{
- int which;
-
- return (PtlEQPoll (&eventq_in, 1, PTL_TIME_FOREVER,
- event_out, &which));
-}
-
-int PtlEQPoll(ptl_handle_eq_t *eventqs_in, int neq_in, int timeout,
- ptl_event_t *event_out, int *which_out)
-{
- nal_t *nal;
- int i;
- int rc;
- unsigned long flags;
-
- if (!ptl_init)
- RETURN(PTL_NO_INIT);
-
- if (neq_in < 1)
- RETURN(PTL_EQ_INVALID);
-
- nal = ptl_hndl2nal(&eventqs_in[0]);
- if (nal == NULL)
- RETURN(PTL_EQ_INVALID);
-
- nal->lock(nal, &flags);
-
- for (;;) {
- for (i = 0; i < neq_in; i++) {
- ptl_eq_t *eq = ptl_handle2usereq(&eventqs_in[i]);
-
- if (i > 0 &&
- ptl_hndl2nal(&eventqs_in[i]) != nal) {
- nal->unlock(nal, &flags);
- RETURN (PTL_EQ_INVALID);
- }
-
- /* size must be a power of 2 to handle a wrapped sequence # */
- LASSERT (eq->size != 0 &&
- eq->size == LOWEST_BIT_SET (eq->size));
-
- rc = ptl_get_event (eq, event_out);
- if (rc != PTL_EQ_EMPTY) {
- nal->unlock(nal, &flags);
- *which_out = i;
- RETURN(rc);
- }
- }
-
- if (timeout == 0) {
- nal->unlock(nal, &flags);
- RETURN (PTL_EQ_EMPTY);
- }
-
- timeout = nal->yield(nal, &flags, timeout);
- }
-}
"PTL_EQ_IN_USE",
+ "PTL_NI_INVALID",
+ "PTL_MD_ILLEGAL",
+
"PTL_MAX_ERRNO"
};
/* If you change these, you must update the number table in portals/errno.h */
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * api/api-init.c
- * Initialization and global data for the p30 user side library
- *
- * Copyright (c) 2001-2003 Cluster File Systems, Inc.
- * Copyright (c) 2001-2002 Sandia National Laboratories
- *
- * This file is part of Lustre, http://www.sf.net/projects/lustre/
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#define DEBUG_SUBSYSTEM S_PORTALS
-#include <portals/api-support.h>
-
-int PtlInit(int *max_interfaces)
-{
- if (max_interfaces != NULL)
- *max_interfaces = NAL_MAX_NR;
-
- LASSERT(!strcmp(ptl_err_str[PTL_MAX_ERRNO], "PTL_MAX_ERRNO"));
-
- return ptl_ni_init();
-}
-
-
-void PtlFini(void)
-{
- ptl_ni_fini();
-}
-
-
-void PtlSnprintHandle(char *str, int len, ptl_handle_any_t h)
-{
- snprintf(str, len, "0x%lx."LPX64, h.nal_idx, h.cookie);
-}
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * api/api-me.c
- * Match Entry local operations.
- *
- * Copyright (c) 2001-2003 Cluster File Systems, Inc.
- * Copyright (c) 2001-2002 Sandia National Laboratories
- *
- * This file is part of Lustre, http://www.sf.net/projects/lustre/
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#define DEBUG_SUBSYSTEM S_PORTALS
-#include <portals/api-support.h>
-
* invalidated out from under her (or worse, swapped for a
* completely different interface!) */
+ LASSERT (ptl_init);
+
if (((idx ^ NI_HANDLE_MAGIC) & ~NI_HANDLE_MASK) != 0)
return NULL;
ptl_mutex_exit();
}
-int ptl_ni_init(void)
+int PtlInit(int *max_interfaces)
{
+ LASSERT(!strcmp(ptl_err_str[PTL_MAX_ERRNO], "PTL_MAX_ERRNO"));
+
/* If this assertion fails, we need more bits in NI_HANDLE_MASK and
* to shift NI_HANDLE_MAGIC left appropriately */
LASSERT (NAL_MAX_NR <= (NI_HANDLE_MASK + 1));
+ if (max_interfaces != NULL)
+ *max_interfaces = NAL_MAX_NR;
+
ptl_mutex_enter();
if (!ptl_init) {
return PTL_OK;
}
-void ptl_ni_fini(void)
+void PtlFini(void)
{
nal_t *nal;
int i;
if (nal->nal_refct != 0) {
CWARN("NAL %d has outstanding refcount %d\n",
i, nal->nal_refct);
- nal->shutdown(nal);
+ nal->nal_ni_fini(nal);
}
ptl_nal_table[i] = NULL;
}
nal = ptl_nal_table[interface];
-
+ nal->nal_handle.nal_idx = (NI_HANDLE_MAGIC & ~NI_HANDLE_MASK) | interface;
+ nal->nal_handle.cookie = 0;
+
CDEBUG(D_OTHER, "Starting up NAL (%d) refs %d\n", interface, nal->nal_refct);
- rc = nal->startup(nal, requested_pid, desired_limits, actual_limits);
+ rc = nal->nal_ni_init(nal, requested_pid, desired_limits, actual_limits);
if (rc != PTL_OK) {
CERROR("Error %d starting up NAL %d, refs %d\n", rc,
}
nal->nal_refct++;
- handle->nal_idx = (NI_HANDLE_MAGIC & ~NI_HANDLE_MASK) | interface;
+ *handle = nal->nal_handle;
out:
ptl_mutex_exit ();
+
return rc;
}
nal->nal_refct--;
/* nal_refct == 0 tells nal->shutdown to really shut down */
- nal->shutdown(nal);
+ nal->nal_ni_fini(nal);
ptl_mutex_exit ();
return PTL_OK;
}
-
-int PtlNIHandle(ptl_handle_any_t handle_in, ptl_handle_ni_t * ni_out)
-{
- *ni_out = handle_in;
-
- return PTL_OK;
-}
# define DEBUG_SUBSYSTEM S_PORTALS
#include <portals/api-support.h>
-static int do_forward(ptl_handle_any_t any_h, int cmd, void *argbuf,
- int argsize, void *retbuf, int retsize)
+void PtlSnprintHandle(char *str, int len, ptl_handle_any_t h)
{
- nal_t *nal;
+ snprintf(str, len, "0x%lx."LPX64, h.nal_idx, h.cookie);
+}
- if (!ptl_init) {
- CERROR("Not initialized\n");
+int PtlNIHandle(ptl_handle_any_t handle_in, ptl_handle_ni_t *ni_out)
+{
+ if (!ptl_init)
return PTL_NO_INIT;
- }
-
- nal = ptl_hndl2nal(&any_h);
- if (!nal)
+
+ if (ptl_hndl2nal(&handle_in) == NULL)
return PTL_HANDLE_INVALID;
-
- nal->forward(nal, cmd, argbuf, argsize, retbuf, retsize);
-
+
+ *ni_out = handle_in;
return PTL_OK;
}
int PtlGetId(ptl_handle_ni_t ni_handle, ptl_process_id_t *id)
{
- PtlGetId_in args;
- PtlGetId_out ret;
- int rc;
-
- args.handle_in = ni_handle;
+ nal_t *nal;
- rc = do_forward(ni_handle, PTL_GETID, &args, sizeof(args), &ret,
- sizeof(ret));
- if (rc != PTL_OK)
- return rc;
+ if (!ptl_init)
+ return PTL_NO_INIT;
- if (id)
- *id = ret.id_out;
+ nal = ptl_hndl2nal(&ni_handle);
+ if (nal == NULL)
+ return PTL_NI_INVALID;
- return ret.rc;
+ return nal->nal_get_id(nal, id);
}
int PtlFailNid (ptl_handle_ni_t interface, ptl_nid_t nid, unsigned int threshold)
{
- PtlFailNid_in args;
- PtlFailNid_out ret;
- int rc;
-
- args.interface = interface;
- args.nid = nid;
- args.threshold = threshold;
+ nal_t *nal;
+
+ if (!ptl_init)
+ return PTL_NO_INIT;
- rc = do_forward (interface, PTL_FAILNID,
- &args, sizeof(args), &ret, sizeof (ret));
+ nal = ptl_hndl2nal(&interface);
+ if (nal == NULL)
+ return PTL_NI_INVALID;
- return ((rc != PTL_OK) ? rc : ret.rc);
+ return nal->nal_fail_nid(nal, nid, threshold);
}
int PtlNIStatus(ptl_handle_ni_t interface_in, ptl_sr_index_t register_in,
- ptl_sr_value_t * status_out)
+ ptl_sr_value_t *status_out)
{
- PtlNIStatus_in args;
- PtlNIStatus_out ret;
- int rc;
+ nal_t *nal;
- args.interface_in = interface_in;
- args.register_in = register_in;
-
- rc = do_forward(interface_in, PTL_NISTATUS, &args, sizeof(args), &ret,
- sizeof(ret));
-
- if (rc != PTL_OK)
- return rc;
-
- if (status_out)
- *status_out = ret.status_out;
+ if (!ptl_init)
+ return PTL_NO_INIT;
+
+ nal = ptl_hndl2nal(&interface_in);
+ if (nal == NULL)
+ return PTL_NI_INVALID;
- return ret.rc;
+ return nal->nal_ni_status(nal, register_in, status_out);
}
int PtlNIDist(ptl_handle_ni_t interface_in, ptl_process_id_t process_in,
unsigned long *distance_out)
{
- PtlNIDist_in args;
- PtlNIDist_out ret;
- int rc;
-
- args.interface_in = interface_in;
- args.process_in = process_in;
-
- rc = do_forward(interface_in, PTL_NIDIST, &args, sizeof(args), &ret,
- sizeof(ret));
+ nal_t *nal;
- if (rc != PTL_OK)
- return rc;
-
- if (distance_out)
- *distance_out = ret.distance_out;
+ if (!ptl_init)
+ return PTL_NO_INIT;
+
+ nal = ptl_hndl2nal(&interface_in);
+ if (nal == NULL)
+ return PTL_NI_INVALID;
- return ret.rc;
+ return nal->nal_ni_dist(nal, &process_in, distance_out);
}
int PtlMEAttach(ptl_handle_ni_t interface_in, ptl_pt_index_t index_in,
ptl_process_id_t match_id_in, ptl_match_bits_t match_bits_in,
ptl_match_bits_t ignore_bits_in, ptl_unlink_t unlink_in,
- ptl_ins_pos_t pos_in, ptl_handle_me_t * handle_out)
+ ptl_ins_pos_t pos_in, ptl_handle_me_t *handle_out)
{
- PtlMEAttach_in args;
- PtlMEAttach_out ret;
- int rc;
-
- args.interface_in = interface_in;
- args.index_in = index_in;
- args.match_id_in = match_id_in;
- args.match_bits_in = match_bits_in;
- args.ignore_bits_in = ignore_bits_in;
- args.unlink_in = unlink_in;
- args.position_in = pos_in;
-
- rc = do_forward(interface_in, PTL_MEATTACH, &args, sizeof(args), &ret,
- sizeof(ret));
-
- if (rc != PTL_OK)
- return rc;
-
- if (handle_out) {
- handle_out->nal_idx = interface_in.nal_idx;
- handle_out->cookie = ret.handle_out.cookie;
- }
-
- return ret.rc;
+ nal_t *nal;
+
+ if (!ptl_init)
+ return PTL_NO_INIT;
+
+ nal = ptl_hndl2nal(&interface_in);
+ if (nal == NULL)
+ return PTL_NI_INVALID;
+
+ return nal->nal_me_attach(nal, index_in, match_id_in,
+ match_bits_in, ignore_bits_in,
+ unlink_in, pos_in, handle_out);
}
int PtlMEInsert(ptl_handle_me_t current_in, ptl_process_id_t match_id_in,
ptl_unlink_t unlink_in, ptl_ins_pos_t position_in,
ptl_handle_me_t * handle_out)
{
- PtlMEInsert_in args;
- PtlMEInsert_out ret;
- int rc;
-
- args.current_in = current_in;
- args.match_id_in = match_id_in;
- args.match_bits_in = match_bits_in;
- args.ignore_bits_in = ignore_bits_in;
- args.unlink_in = unlink_in;
- args.position_in = position_in;
-
- rc = do_forward(current_in, PTL_MEINSERT, &args, sizeof(args), &ret,
- sizeof(ret));
-
- if (rc != PTL_OK)
- return (rc == PTL_HANDLE_INVALID) ? PTL_ME_INVALID : rc;
-
- if (handle_out) {
- handle_out->nal_idx = current_in.nal_idx;
- handle_out->cookie = ret.handle_out.cookie;
- }
- return ret.rc;
+ nal_t *nal;
+
+ if (!ptl_init)
+ return PTL_NO_INIT;
+
+ nal = ptl_hndl2nal(¤t_in);
+ if (nal == NULL)
+ return PTL_ME_INVALID;
+
+ return nal->nal_me_insert(nal, ¤t_in, match_id_in,
+ match_bits_in, ignore_bits_in,
+ unlink_in, position_in, handle_out);
}
int PtlMEUnlink(ptl_handle_me_t current_in)
{
- PtlMEUnlink_in args;
- PtlMEUnlink_out ret;
- int rc;
+ nal_t *nal;
- args.current_in = current_in;
- args.unlink_in = PTL_RETAIN;
-
- rc = do_forward(current_in, PTL_MEUNLINK, &args, sizeof(args), &ret,
- sizeof(ret));
-
- if (rc != PTL_OK)
- return (rc == PTL_HANDLE_INVALID) ? PTL_ME_INVALID : rc;
+ if (!ptl_init)
+ return PTL_NO_INIT;
+
+ nal = ptl_hndl2nal(¤t_in);
+ if (nal == NULL)
+ return PTL_ME_INVALID;
- return ret.rc;
+ return nal->nal_me_unlink(nal, ¤t_in);
}
-int PtlTblDump(ptl_handle_ni_t ni, int index_in)
+int PtlMDAttach(ptl_handle_me_t me_in, ptl_md_t md_in,
+ ptl_unlink_t unlink_in, ptl_handle_md_t * handle_out)
{
- PtlTblDump_in args;
- PtlTblDump_out ret;
- int rc;
+ nal_t *nal;
- args.index_in = index_in;
-
- rc = do_forward(ni, PTL_TBLDUMP, &args, sizeof(args), &ret,
- sizeof(ret));
+ if (!ptl_init)
+ return PTL_NO_INIT;
+
+ nal = ptl_hndl2nal(&me_in);
+ if (nal == NULL)
+ return PTL_ME_INVALID;
- if (rc != PTL_OK)
- return rc;
+ if (!PtlHandleIsEqual(md_in.eventq, PTL_EQ_NONE) &&
+ ptl_hndl2nal(&md_in.eventq) != nal)
+ return PTL_MD_ILLEGAL;
- return ret.rc;
+ return (nal->nal_md_attach)(nal, &me_in, &md_in,
+ unlink_in, handle_out);
}
-int PtlMEDump(ptl_handle_me_t current_in)
+int PtlMDBind(ptl_handle_ni_t ni_in, ptl_md_t md_in,
+ ptl_unlink_t unlink_in, ptl_handle_md_t *handle_out)
{
- PtlMEDump_in args;
- PtlMEDump_out ret;
- int rc;
+ nal_t *nal;
- args.current_in = current_in;
-
- rc = do_forward(current_in, PTL_MEDUMP, &args, sizeof(args), &ret,
- sizeof(ret));
+ if (!ptl_init)
+ return PTL_NO_INIT;
+
+ nal = ptl_hndl2nal(&ni_in);
+ if (nal == NULL)
+ return PTL_NI_INVALID;
- if (rc != PTL_OK)
- return (rc == PTL_HANDLE_INVALID) ? PTL_ME_INVALID : rc;
+ if (!PtlHandleIsEqual(md_in.eventq, PTL_EQ_NONE) &&
+ ptl_hndl2nal(&md_in.eventq) != nal)
+ return PTL_MD_ILLEGAL;
- return ret.rc;
+ return (nal->nal_md_bind)(nal, &md_in, unlink_in, handle_out);
}
-static ptl_handle_eq_t md2eq (ptl_md_t *md)
+int PtlMDUpdate(ptl_handle_md_t md_in, ptl_md_t *old_inout,
+ ptl_md_t *new_inout, ptl_handle_eq_t testq_in)
{
- if (PtlHandleIsEqual (md->eventq, PTL_EQ_NONE))
- return (PTL_EQ_NONE);
+ nal_t *nal;
- return (ptl_handle2usereq (&md->eventq)->cb_eq_handle);
-}
-
-
-int PtlMDAttach(ptl_handle_me_t me_in, ptl_md_t md_in,
- ptl_unlink_t unlink_in, ptl_handle_md_t * handle_out)
-{
- PtlMDAttach_in args;
- PtlMDAttach_out ret;
- int rc;
-
- args.eq_in = md2eq(&md_in);
- args.me_in = me_in;
- args.md_in = md_in;
- args.unlink_in = unlink_in;
-
- rc = do_forward(me_in, PTL_MDATTACH,
- &args, sizeof(args), &ret, sizeof(ret));
-
- if (rc != PTL_OK)
- return (rc == PTL_HANDLE_INVALID) ? PTL_ME_INVALID : rc;
-
- if (handle_out) {
- handle_out->nal_idx = me_in.nal_idx;
- handle_out->cookie = ret.handle_out.cookie;
- }
- return ret.rc;
-}
-
+ if (!ptl_init)
+ return PTL_NO_INIT;
+
+ nal = ptl_hndl2nal(&md_in);
+ if (nal == NULL)
+ return PTL_MD_INVALID;
+ if (!PtlHandleIsEqual(testq_in, PTL_EQ_NONE) &&
+ ptl_hndl2nal(&testq_in) != nal)
+ return PTL_EQ_INVALID;
-int PtlMDBind(ptl_handle_ni_t ni_in, ptl_md_t md_in,
- ptl_unlink_t unlink_in, ptl_handle_md_t * handle_out)
-{
- PtlMDBind_in args;
- PtlMDBind_out ret;
- int rc;
-
- args.eq_in = md2eq(&md_in);
- args.ni_in = ni_in;
- args.md_in = md_in;
- args.unlink_in = unlink_in;
-
- rc = do_forward(ni_in, PTL_MDBIND,
- &args, sizeof(args), &ret, sizeof(ret));
-
- if (rc != PTL_OK)
- return rc;
-
- if (handle_out) {
- handle_out->nal_idx = ni_in.nal_idx;
- handle_out->cookie = ret.handle_out.cookie;
- }
- return ret.rc;
+ return (nal->nal_md_update)(nal, &md_in,
+ old_inout, new_inout, &testq_in);
}
-int PtlMDUpdate(ptl_handle_md_t md_in, ptl_md_t *old_inout,
- ptl_md_t *new_inout, ptl_handle_eq_t testq_in)
+int PtlMDUnlink(ptl_handle_md_t md_in)
{
- PtlMDUpdate_internal_in args;
- PtlMDUpdate_internal_out ret;
- int rc;
-
- args.md_in = md_in;
-
- if (old_inout) {
- args.old_inout = *old_inout;
- args.old_inout_valid = 1;
- } else
- args.old_inout_valid = 0;
-
- if (new_inout) {
- args.new_inout = *new_inout;
- args.new_inout_valid = 1;
- } else
- args.new_inout_valid = 0;
-
- if (PtlHandleIsEqual (testq_in, PTL_EQ_NONE)) {
- args.testq_in = PTL_EQ_NONE;
- args.sequence_in = -1;
- } else {
- ptl_eq_t *eq = ptl_handle2usereq (&testq_in);
-
- args.testq_in = eq->cb_eq_handle;
- args.sequence_in = eq->sequence;
- }
-
- rc = do_forward(md_in, PTL_MDUPDATE, &args, sizeof(args), &ret,
- sizeof(ret));
- if (rc != PTL_OK)
- return (rc == PTL_HANDLE_INVALID) ? PTL_MD_INVALID : rc;
-
- if (old_inout)
- *old_inout = ret.old_inout;
-
- return ret.rc;
+ nal_t *nal;
+
+ if (!ptl_init)
+ return PTL_NO_INIT;
+
+ nal = ptl_hndl2nal(&md_in);
+ if (nal == NULL)
+ return PTL_MD_INVALID;
+
+ return (nal->nal_md_unlink)(nal, &md_in);
}
-int PtlMDUnlink(ptl_handle_md_t md_in)
+int PtlEQAlloc(ptl_handle_ni_t interface, ptl_size_t count,
+ ptl_eq_handler_t callback,
+ ptl_handle_eq_t *handle_out)
{
- PtlMDUnlink_in args;
- PtlMDUnlink_out ret;
- int rc;
-
- args.md_in = md_in;
- rc = do_forward(md_in, PTL_MDUNLINK, &args, sizeof(args), &ret,
- sizeof(ret));
- if (rc != PTL_OK)
- return (rc == PTL_HANDLE_INVALID) ? PTL_MD_INVALID : rc;
+ nal_t *nal;
+
+ if (!ptl_init)
+ return PTL_NO_INIT;
+
+ nal = ptl_hndl2nal(&interface);
+ if (nal == NULL)
+ return PTL_NI_INVALID;
- return ret.rc;
+ return (nal->nal_eq_alloc)(nal, count, callback, handle_out);
}
-int PtlEQAlloc(ptl_handle_ni_t interface, ptl_size_t count,
- ptl_eq_handler_t callback,
- ptl_handle_eq_t * handle_out)
+int PtlEQFree(ptl_handle_eq_t eventq)
{
- ptl_eq_t *eq = NULL;
- ptl_event_t *ev = NULL;
- PtlEQAlloc_in args;
- PtlEQAlloc_out ret;
- int rc, i;
- nal_t *nal;
+ nal_t *nal;
if (!ptl_init)
return PTL_NO_INIT;
- nal = ptl_hndl2nal (&interface);
+ nal = ptl_hndl2nal(&eventq);
if (nal == NULL)
- return PTL_HANDLE_INVALID;
+ return PTL_EQ_INVALID;
- if (count != LOWEST_BIT_SET(count)) { /* not a power of 2 already */
- do { /* knock off all but the top bit... */
- count &= ~LOWEST_BIT_SET (count);
- } while (count != LOWEST_BIT_SET(count));
-
- count <<= 1; /* ...and round up */
- }
-
- if (count == 0) /* catch bad parameter / overflow on roundup */
- return (PTL_VAL_FAILED);
-
- PORTAL_ALLOC(ev, count * sizeof(ptl_event_t));
- if (!ev)
- return PTL_NO_SPACE;
-
- for (i = 0; i < count; i++)
- ev[i].sequence = 0;
-
- args.ni_in = interface;
- args.count_in = count;
- args.base_in = ev;
- args.len_in = count * sizeof(*ev);
- args.callback_in = callback;
-
- rc = do_forward(interface, PTL_EQALLOC, &args, sizeof(args), &ret,
- sizeof(ret));
- if (rc != PTL_OK)
- goto fail;
- if (ret.rc)
- GOTO(fail, rc = ret.rc);
-
- PORTAL_ALLOC(eq, sizeof(*eq));
- if (!eq) {
- rc = PTL_NO_SPACE;
- goto fail;
- }
-
- eq->sequence = 1;
- eq->size = count;
- eq->base = ev;
-
- /* EQ handles are a little wierd. PtlEQGet() just looks at the
- * queued events in shared memory. It doesn't want to do_forward()
- * at all, so the cookie in the EQ handle we pass out of here is
- * simply a pointer to the event queue we just set up. We stash
- * the handle returned by do_forward(), so we can pass it back via
- * do_forward() when we need to. */
-
- eq->cb_eq_handle.nal_idx = interface.nal_idx;
- eq->cb_eq_handle.cookie = ret.handle_out.cookie;
-
- handle_out->nal_idx = interface.nal_idx;
- handle_out->cookie = (__u64)((unsigned long)eq);
- return PTL_OK;
+ return (nal->nal_eq_free)(nal, &eventq);
+}
-fail:
- PORTAL_FREE(ev, count * sizeof(ptl_event_t));
- return rc;
+int PtlEQGet(ptl_handle_eq_t eventq, ptl_event_t *ev)
+{
+ int which;
+
+ return (PtlEQPoll (&eventq, 1, 0, ev, &which));
}
-int PtlEQFree(ptl_handle_eq_t eventq)
+int PtlEQWait(ptl_handle_eq_t eventq_in, ptl_event_t *event_out)
{
- PtlEQFree_in args;
- PtlEQFree_out ret;
- ptl_eq_t *eq;
- int rc;
+ int which;
+
+ return (PtlEQPoll (&eventq_in, 1, PTL_TIME_FOREVER,
+ event_out, &which));
+}
- eq = ptl_handle2usereq (&eventq);
- args.eventq_in = eq->cb_eq_handle;
+int PtlEQPoll(ptl_handle_eq_t *eventqs_in, int neq_in, int timeout,
+ ptl_event_t *event_out, int *which_out)
+{
+ int i;
+ nal_t *nal;
- rc = do_forward(eq->cb_eq_handle, PTL_EQFREE, &args,
- sizeof(args), &ret, sizeof(ret));
+ if (!ptl_init)
+ return PTL_NO_INIT;
+
+ if (neq_in < 1)
+ return PTL_EQ_INVALID;
+
+ nal = ptl_hndl2nal(&eventqs_in[0]);
+ if (nal == NULL)
+ return PTL_EQ_INVALID;
- /* XXX we're betting rc == PTL_OK here */
- PORTAL_FREE(eq->base, eq->size * sizeof(ptl_event_t));
- PORTAL_FREE(eq, sizeof(*eq));
+ for (i = 1; i < neq_in; i++)
+ if (ptl_hndl2nal(&eventqs_in[i]) != nal)
+ return PTL_EQ_INVALID;
- return rc;
+ return (nal->nal_eq_poll)(nal, eventqs_in, neq_in, timeout,
+ event_out, which_out);
}
+
int PtlACEntry(ptl_handle_ni_t ni_in, ptl_ac_index_t index_in,
ptl_process_id_t match_id_in, ptl_pt_index_t portal_in)
{
- PtlACEntry_in args;
- PtlACEntry_out ret;
- int rc;
-
- /*
- * Copy arguments into the argument block to
- * hand to the forwarding object
- */
- args.ni_in = ni_in;
- args.index_in = index_in;
- args.match_id_in = match_id_in;
- args.portal_in = portal_in;
-
- rc = do_forward(ni_in, PTL_ACENTRY, &args, sizeof(args), &ret,
- sizeof(ret));
-
- return (rc != PTL_OK) ? rc : ret.rc;
+ nal_t *nal;
+
+ if (!ptl_init)
+ return PTL_NO_INIT;
+
+ nal = ptl_hndl2nal(&ni_in);
+ if (nal == NULL)
+ return PTL_NI_INVALID;
+
+ return (nal->nal_ace_entry)(nal, index_in, match_id_in, portal_in);
}
int PtlPut(ptl_handle_md_t md_in, ptl_ack_req_t ack_req_in,
ptl_process_id_t target_in, ptl_pt_index_t portal_in,
- ptl_ac_index_t cookie_in, ptl_match_bits_t match_bits_in,
+ ptl_ac_index_t ac_in, ptl_match_bits_t match_bits_in,
ptl_size_t offset_in, ptl_hdr_data_t hdr_data_in)
{
- PtlPut_in args;
- PtlPut_out ret;
- int rc;
-
- /*
- * Copy arguments into the argument block to
- * hand to the forwarding object
- */
- args.md_in = md_in;
- args.ack_req_in = ack_req_in;
- args.target_in = target_in;
- args.portal_in = portal_in;
- args.cookie_in = cookie_in;
- args.match_bits_in = match_bits_in;
- args.offset_in = offset_in;
- args.hdr_data_in = hdr_data_in;
-
- rc = do_forward(md_in, PTL_PUT, &args, sizeof(args), &ret, sizeof(ret));
-
- return (rc != PTL_OK) ? rc : ret.rc;
+ nal_t *nal;
+
+ if (!ptl_init)
+ return PTL_NO_INIT;
+
+ nal = ptl_hndl2nal(&md_in);
+ if (nal == NULL)
+ return PTL_MD_INVALID;
+
+ return (nal->nal_put)(nal, &md_in, ack_req_in,
+ &target_in, portal_in, ac_in,
+ match_bits_in, offset_in, hdr_data_in);
}
int PtlGet(ptl_handle_md_t md_in, ptl_process_id_t target_in,
- ptl_pt_index_t portal_in, ptl_ac_index_t cookie_in,
+ ptl_pt_index_t portal_in, ptl_ac_index_t ac_in,
ptl_match_bits_t match_bits_in, ptl_size_t offset_in)
{
- PtlGet_in args;
- PtlGet_out ret;
- int rc;
-
- /*
- * Copy arguments into the argument block to
- * hand to the forwarding object
- */
- args.md_in = md_in;
- args.target_in = target_in;
- args.portal_in = portal_in;
- args.cookie_in = cookie_in;
- args.match_bits_in = match_bits_in;
- args.offset_in = offset_in;
-
- rc = do_forward(md_in, PTL_GET, &args, sizeof(args), &ret, sizeof(ret));
-
- return (rc != PTL_OK) ? rc : ret.rc;
+ nal_t *nal;
+
+ if (!ptl_init)
+ return PTL_NO_INIT;
+
+ nal = ptl_hndl2nal(&md_in);
+ if (nal == NULL)
+ return PTL_MD_INVALID;
+
+ return (nal->nal_get)(nal, &md_in,
+ &target_in, portal_in, ac_in,
+ match_bits_in, offset_in);
}
+
# This code is issued under the GNU General Public License.
# See the file COPYING in this distribution
-my_sources = api-eq.c api-init.c api-me.c api-errno.c api-ni.c api-wrap.c \
- lib-dispatch.c lib-init.c lib-me.c lib-msg.c lib-eq.c \
+my_sources = api-errno.c api-ni.c api-wrap.c \
+ lib-init.c lib-me.c lib-msg.c lib-eq.c \
lib-md.c lib-move.c lib-ni.c lib-pid.c
if !CRAY_PORTALS
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * lib/lib-dispatch.c
- *
- * Copyright (c) 2001-2003 Cluster File Systems, Inc.
- * Copyright (c) 2001-2002 Sandia National Laboratories
- *
- * This file is part of Lustre, http://www.sf.net/projects/lustre/
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#define DEBUG_SUBSYSTEM S_PORTALS
-#include <portals/lib-p30.h>
-#include <portals/lib-dispatch.h>
-
-typedef struct {
- int (*fun) (nal_cb_t * nal, void *private, void *in, void *out);
- char *name;
-} dispatch_table_t;
-
-static dispatch_table_t dispatch_table[] = {
- [PTL_GETID] {do_PtlGetId, "PtlGetId"},
- [PTL_NISTATUS] {do_PtlNIStatus, "PtlNIStatus"},
- [PTL_NIDIST] {do_PtlNIDist, "PtlNIDist"},
- [PTL_MEATTACH] {do_PtlMEAttach, "PtlMEAttach"},
- [PTL_MEINSERT] {do_PtlMEInsert, "PtlMEInsert"},
- [PTL_MEUNLINK] {do_PtlMEUnlink, "PtlMEUnlink"},
- [PTL_TBLDUMP] {do_PtlTblDump, "PtlTblDump"},
- [PTL_MEDUMP] {do_PtlMEDump, "PtlMEDump"},
- [PTL_MDATTACH] {do_PtlMDAttach, "PtlMDAttach"},
- [PTL_MDBIND] {do_PtlMDBind, "PtlMDBind"},
- [PTL_MDUPDATE] {do_PtlMDUpdate_internal, "PtlMDUpdate_internal"},
- [PTL_MDUNLINK] {do_PtlMDUnlink, "PtlMDUnlink"},
- [PTL_EQALLOC] {do_PtlEQAlloc_internal, "PtlEQAlloc_internal"},
- [PTL_EQFREE] {do_PtlEQFree_internal, "PtlEQFree_internal"},
- [PTL_PUT] {do_PtlPut, "PtlPut"},
- [PTL_GET] {do_PtlGet, "PtlGet"},
- [PTL_FAILNID] {do_PtlFailNid, "PtlFailNid"},
- /* */ {0, ""}
-};
-
-/*
- * This really should be elsewhere, but lib-p30/dispatch.c is
- * an automatically generated file.
- */
-void lib_dispatch(nal_cb_t * nal, void *private, int index, void *arg_block,
- void *ret_block)
-{
- lib_ni_t *ni = &nal->ni;
-
- if (index < 0 || index > LIB_MAX_DISPATCH ||
- !dispatch_table[index].fun) {
- CDEBUG(D_NET, LPU64": Invalid API call %d\n", ni->nid, index);
- return;
- }
-
- CDEBUG(D_NET, LPU64": API call %s (%d)\n", ni->nid,
- dispatch_table[index].name, index);
-
- dispatch_table[index].fun(nal, private, arg_block, ret_block);
-}
-
-char *dispatch_name(int index)
-{
- return dispatch_table[index].name;
-}
#define DEBUG_SUBSYSTEM S_PORTALS
#include <portals/lib-p30.h>
-#include <portals/arg-blocks.h>
-int do_PtlEQAlloc_internal(nal_cb_t * nal, void *private, void *v_args,
- void *v_ret)
+int
+lib_api_eq_alloc (nal_t *apinal, ptl_size_t count,
+ ptl_eq_handler_t callback,
+ ptl_handle_eq_t *handle)
{
- /*
- * Incoming:
- * ptl_handle_ni_t ni_in
- * ptl_size_t count_in
- * void * base_in
- *
- * Outgoing:
- * ptl_handle_eq_t * handle_out
- */
-
- PtlEQAlloc_in *args = v_args;
- PtlEQAlloc_out *ret = v_ret;
-
- lib_eq_t *eq;
- unsigned long flags;
-
- /* api should have rounded up */
- if (args->count_in != LOWEST_BIT_SET (args->count_in))
- return ret->rc = PTL_VAL_FAILED;
+ lib_nal_t *nal = apinal->nal_data;
+ lib_eq_t *eq;
+ unsigned long flags;
+ int rc;
+ /* We need count to be a power of 2 so that when eq_{enq,deq}_seq
+ * overflow, they don't skip entries, so the queue has the same
+ * apparant capacity at all times */
+
+ if (count != LOWEST_BIT_SET(count)) { /* not a power of 2 already */
+ do { /* knock off all but the top bit... */
+ count &= ~LOWEST_BIT_SET (count);
+ } while (count != LOWEST_BIT_SET(count));
+
+ count <<= 1; /* ...and round up */
+ }
+
+ if (count == 0) /* catch bad parameter / overflow on roundup */
+ return (PTL_VAL_FAILED);
+
eq = lib_eq_alloc (nal);
if (eq == NULL)
- return (ret->rc = PTL_NO_SPACE);
+ return (PTL_NO_SPACE);
- state_lock(nal, &flags);
+ PORTAL_ALLOC(eq->eq_events, count * sizeof(ptl_event_t));
+ if (eq->eq_events == NULL) {
+ LIB_LOCK(nal, flags);
+ lib_eq_free (nal, eq);
+ LIB_UNLOCK(nal, flags);
+ }
- if (nal->cb_map != NULL) {
+ if (nal->libnal_map != NULL) {
struct iovec iov = {
- .iov_base = args->base_in,
- .iov_len = args->count_in * sizeof (ptl_event_t) };
+ .iov_base = eq->eq_events,
+ .iov_len = count * sizeof(ptl_event_t)};
- ret->rc = nal->cb_map (nal, 1, &iov, &eq->eq_addrkey);
- if (ret->rc != PTL_OK) {
+ rc = nal->libnal_map(nal, 1, &iov, &eq->eq_addrkey);
+ if (rc != PTL_OK) {
+ LIB_LOCK(nal, flags);
lib_eq_free (nal, eq);
-
- state_unlock (nal, &flags);
- return (ret->rc);
+ LIB_UNLOCK(nal, flags);
+ return (rc);
}
}
- eq->sequence = 1;
- eq->base = args->base_in;
- eq->size = args->count_in;
+ /* NB this resets all event sequence numbers to 0, to be earlier
+ * than eq_deq_seq */
+ memset(eq->eq_events, 0, count * sizeof(ptl_event_t));
+
+ eq->eq_deq_seq = 1;
+ eq->eq_enq_seq = 1;
+ eq->eq_size = count;
eq->eq_refcount = 0;
- eq->event_callback = args->callback_in;
+ eq->eq_callback = callback;
+
+ LIB_LOCK(nal, flags);
lib_initialise_handle (nal, &eq->eq_lh, PTL_COOKIE_TYPE_EQ);
- list_add (&eq->eq_list, &nal->ni.ni_active_eqs);
+ list_add (&eq->eq_list, &nal->libnal_ni.ni_active_eqs);
- state_unlock(nal, &flags);
+ LIB_UNLOCK(nal, flags);
- ptl_eq2handle(&ret->handle_out, eq);
- return (ret->rc = PTL_OK);
+ ptl_eq2handle(handle, nal, eq);
+ return (PTL_OK);
}
-int do_PtlEQFree_internal(nal_cb_t * nal, void *private, void *v_args,
- void *v_ret)
+int
+lib_api_eq_free(nal_t *apinal, ptl_handle_eq_t *eqh)
{
- /*
- * Incoming:
- * ptl_handle_eq_t eventq_in
- *
- * Outgoing:
- */
-
- PtlEQFree_in *args = v_args;
- PtlEQFree_out *ret = v_ret;
- lib_eq_t *eq;
- long flags;
+ lib_nal_t *nal = apinal->nal_data;
+ lib_eq_t *eq;
+ int size;
+ ptl_event_t *events;
+ void *addrkey;
+ unsigned long flags;
- state_lock (nal, &flags);
+ LIB_LOCK(nal, flags);
- eq = ptl_handle2eq(&args->eventq_in, nal);
+ eq = ptl_handle2eq(eqh, nal);
if (eq == NULL) {
- ret->rc = PTL_EQ_INVALID;
- } else if (eq->eq_refcount != 0) {
- ret->rc = PTL_EQ_IN_USE;
+ LIB_UNLOCK(nal, flags);
+ return (PTL_EQ_INVALID);
+ }
+
+ if (eq->eq_refcount != 0) {
+ LIB_UNLOCK(nal, flags);
+ return (PTL_EQ_IN_USE);
+ }
+
+ /* stash for free after lock dropped */
+ events = eq->eq_events;
+ size = eq->eq_size;
+ addrkey = eq->eq_addrkey;
+
+ lib_invalidate_handle (nal, &eq->eq_lh);
+ list_del (&eq->eq_list);
+ lib_eq_free (nal, eq);
+
+ LIB_UNLOCK(nal, flags);
+
+ if (nal->libnal_unmap != NULL) {
+ struct iovec iov = {
+ .iov_base = events,
+ .iov_len = size * sizeof(ptl_event_t)};
+
+ nal->libnal_unmap(nal, 1, &iov, &addrkey);
+ }
+
+ PORTAL_FREE(events, size * sizeof (ptl_event_t));
+
+ return (PTL_OK);
+}
+
+int
+lib_get_event (lib_eq_t *eq, ptl_event_t *ev)
+{
+ int new_index = eq->eq_deq_seq & (eq->eq_size - 1);
+ ptl_event_t *new_event = &eq->eq_events[new_index];
+ int rc;
+ ENTRY;
+
+ CDEBUG(D_INFO, "event: %p, sequence: %lu, eq->size: %u\n",
+ new_event, eq->eq_deq_seq, eq->eq_size);
+
+ if (PTL_SEQ_GT (eq->eq_deq_seq, new_event->sequence)) {
+ RETURN(PTL_EQ_EMPTY);
+ }
+
+ /* We've got a new event... */
+ *ev = *new_event;
+
+ /* ...but did it overwrite an event we've not seen yet? */
+ if (eq->eq_deq_seq == new_event->sequence) {
+ rc = PTL_OK;
} else {
- if (nal->cb_unmap != NULL) {
- struct iovec iov = {
- .iov_base = eq->base,
- .iov_len = eq->size * sizeof (ptl_event_t) };
-
- nal->cb_unmap(nal, 1, &iov, &eq->eq_addrkey);
+ CERROR("Event Queue Overflow: eq seq %lu ev seq %lu\n",
+ eq->eq_deq_seq, new_event->sequence);
+ rc = PTL_EQ_DROPPED;
+ }
+
+ eq->eq_deq_seq = new_event->sequence + 1;
+ RETURN(rc);
+}
+
+
+int
+lib_api_eq_poll (nal_t *apinal,
+ ptl_handle_eq_t *eventqs, int neq, int timeout_ms,
+ ptl_event_t *event, int *which)
+{
+ lib_nal_t *nal = apinal->nal_data;
+ lib_ni_t *ni = &nal->libnal_ni;
+ unsigned long flags;
+ int i;
+ int rc;
+#ifdef __KERNEL__
+ wait_queue_t wq;
+ unsigned long now;
+#else
+ struct timeval then;
+ struct timeval now;
+ struct timespec ts;
+#endif
+ ENTRY;
+
+ LIB_LOCK(nal, flags);
+
+ for (;;) {
+ for (i = 0; i < neq; i++) {
+ lib_eq_t *eq = ptl_handle2eq(&eventqs[i], nal);
+
+ rc = lib_get_event (eq, event);
+ if (rc != PTL_EQ_EMPTY) {
+ LIB_UNLOCK(nal, flags);
+ *which = i;
+ RETURN(rc);
+ }
+ }
+
+ if (timeout_ms == 0) {
+ LIB_UNLOCK (nal, flags);
+ RETURN (PTL_EQ_EMPTY);
}
- lib_invalidate_handle (nal, &eq->eq_lh);
- list_del (&eq->eq_list);
- lib_eq_free (nal, eq);
- ret->rc = PTL_OK;
- }
+ /* Some architectures force us to do spin locking/unlocking
+ * in the same stack frame, means we can abstract the
+ * locking here */
+#ifdef __KERNEL__
+ init_waitqueue_entry(&wq, current);
+ set_current_state(TASK_INTERRUPTIBLE);
+ add_wait_queue(&ni->ni_waitq, &wq);
- state_unlock (nal, &flags);
+ LIB_UNLOCK(nal, flags);
- return (ret->rc);
+ if (timeout_ms < 0) {
+ schedule ();
+ } else {
+ now = jiffies;
+ schedule_timeout((timeout_ms * HZ)/1000);
+ timeout_ms -= ((jiffies - now) * 1000)/HZ;
+ if (timeout_ms < 0)
+ timeout_ms = 0;
+ }
+
+ LIB_LOCK(nal, flags);
+#else
+ if (timeout_ms < 0) {
+ pthread_cond_wait(&ni->ni_cond, &ni->ni_mutex);
+ } else {
+ gettimeofday(&then, NULL);
+
+ ts.tv_sec = then.tv_sec + timeout_ms/1000;
+ ts.tv_nsec = then.tv_usec * 1000 +
+ (timeout_ms%1000) * 1000000;
+ if (ts.tv_nsec >= 1000000000) {
+ ts.tv_sec++;
+ ts.tv_nsec -= 1000000000;
+ }
+
+ pthread_cond_timedwait(&ni->ni_cond,
+ &ni->ni_mutex, &ts);
+
+ gettimeofday(&now, NULL);
+ timeout_ms -= (now.tv_sec - then.tv_sec) * 1000 +
+ (now.tv_usec - then.tv_usec) / 1000;
+
+ if (timeout_ms < 0)
+ timeout_ms = 0;
+ }
+#endif
+ }
}
#ifndef PTL_USE_LIB_FREELIST
int
-kportal_descriptor_setup (nal_cb_t *nal,
+kportal_descriptor_setup (lib_nal_t *nal,
ptl_ni_limits_t *requested_limits,
ptl_ni_limits_t *actual_limits)
{
}
void
-kportal_descriptor_cleanup (nal_cb_t *nal)
+kportal_descriptor_cleanup (lib_nal_t *nal)
{
}
#else
int
-lib_freelist_init (nal_cb_t *nal, lib_freelist_t *fl, int n, int size)
+lib_freelist_init (lib_nal_t *nal, lib_freelist_t *fl, int n, int size)
{
char *space;
size += offsetof (lib_freeobj_t, fo_contents);
- space = nal->cb_malloc (nal, n * size);
+ PORTAL_ALLOC(space, n * size);
if (space == NULL)
return (PTL_NO_SPACE);
}
void
-lib_freelist_fini (nal_cb_t *nal, lib_freelist_t *fl)
+lib_freelist_fini (lib_nal_t *nal, lib_freelist_t *fl)
{
struct list_head *el;
int count;
LASSERT (count == fl->fl_nobjs);
- nal->cb_free (nal, fl->fl_objs, fl->fl_nobjs * fl->fl_objsize);
+ PORTAL_FREE(fl->fl_objs, fl->fl_nobjs * fl->fl_objsize);
memset (fl, 0, sizeof (fl));
}
int
-kportal_descriptor_setup (nal_cb_t *nal,
+kportal_descriptor_setup (lib_nal_t *nal,
ptl_ni_limits_t *requested_limits,
ptl_ni_limits_t *actual_limits)
{
/* NB on failure caller must still call kportal_descriptor_cleanup */
/* ****** */
- int rc;
+ lib_ni_t *ni = &nal->libnal_ni;
+ int rc;
- memset (&nal->ni.ni_free_mes, 0, sizeof (nal->ni.ni_free_mes));
- memset (&nal->ni.ni_free_msgs, 0, sizeof (nal->ni.ni_free_msgs));
- memset (&nal->ni.ni_free_mds, 0, sizeof (nal->ni.ni_free_mds));
- memset (&nal->ni.ni_free_eqs, 0, sizeof (nal->ni.ni_free_eqs));
+ memset (&ni->ni_free_mes, 0, sizeof (ni->ni_free_mes));
+ memset (&ni->ni_free_msgs, 0, sizeof (ni->ni_free_msgs));
+ memset (&ni->ni_free_mds, 0, sizeof (ni->ni_free_mds));
+ memset (&ni->ni_free_eqs, 0, sizeof (ni->ni_free_eqs));
/* Ignore requested limits! */
actual_limits->max_mes = MAX_MES;
/* Hahahah what a load of bollocks. There's nowhere to
* specify the max # messages in-flight */
- rc = lib_freelist_init (nal, &nal->ni.ni_free_mes,
+ rc = lib_freelist_init (nal, &ni->ni_free_mes,
MAX_MES, sizeof (lib_me_t));
if (rc != PTL_OK)
return (rc);
- rc = lib_freelist_init (nal, &nal->ni.ni_free_msgs,
+ rc = lib_freelist_init (nal, &ni->ni_free_msgs,
MAX_MSGS, sizeof (lib_msg_t));
if (rc != PTL_OK)
return (rc);
- rc = lib_freelist_init (nal, &nal->ni.ni_free_mds,
+ rc = lib_freelist_init (nal, &ni->ni_free_mds,
MAX_MDS, sizeof (lib_md_t));
if (rc != PTL_OK)
return (rc);
- rc = lib_freelist_init (nal, &nal->ni.ni_free_eqs,
+ rc = lib_freelist_init (nal, &ni->ni_free_eqs,
MAX_EQS, sizeof (lib_eq_t));
return (rc);
}
void
-kportal_descriptor_cleanup (nal_cb_t *nal)
+kportal_descriptor_cleanup (lib_nal_t *nal)
{
- lib_freelist_fini (nal, &nal->ni.ni_free_mes);
- lib_freelist_fini (nal, &nal->ni.ni_free_msgs);
- lib_freelist_fini (nal, &nal->ni.ni_free_mds);
- lib_freelist_fini (nal, &nal->ni.ni_free_eqs);
+ lib_ni_t *ni = &nal->libnal_ni;
+
+ lib_freelist_fini (nal, &ni->ni_free_mes);
+ lib_freelist_fini (nal, &ni->ni_free_msgs);
+ lib_freelist_fini (nal, &ni->ni_free_mds);
+ lib_freelist_fini (nal, &ni->ni_free_eqs);
}
#endif
__u64
-lib_create_interface_cookie (nal_cb_t *nal)
+lib_create_interface_cookie (lib_nal_t *nal)
{
/* NB the interface cookie in wire handles guards against delayed
* replies and ACKs appearing valid in a new instance of the same
}
int
-lib_setup_handle_hash (nal_cb_t *nal)
+lib_setup_handle_hash (lib_nal_t *nal)
{
- lib_ni_t *ni = &nal->ni;
+ lib_ni_t *ni = &nal->libnal_ni;
int i;
/* Arbitrary choice of hash table size */
#else
ni->ni_lh_hash_size = (MAX_MES + MAX_MDS + MAX_EQS)/4;
#endif
- ni->ni_lh_hash_table =
- (struct list_head *)nal->cb_malloc (nal, ni->ni_lh_hash_size
- * sizeof (struct list_head));
+ PORTAL_ALLOC(ni->ni_lh_hash_table,
+ ni->ni_lh_hash_size * sizeof (struct list_head));
if (ni->ni_lh_hash_table == NULL)
return (PTL_NO_SPACE);
}
void
-lib_cleanup_handle_hash (nal_cb_t *nal)
+lib_cleanup_handle_hash (lib_nal_t *nal)
{
- lib_ni_t *ni = &nal->ni;
+ lib_ni_t *ni = &nal->libnal_ni;
if (ni->ni_lh_hash_table == NULL)
return;
- nal->cb_free (nal, ni->ni_lh_hash_table,
- ni->ni_lh_hash_size * sizeof (struct list_head));
+ PORTAL_FREE(ni->ni_lh_hash_table,
+ ni->ni_lh_hash_size * sizeof (struct list_head));
}
lib_handle_t *
-lib_lookup_cookie (nal_cb_t *nal, __u64 cookie, int type)
+lib_lookup_cookie (lib_nal_t *nal, __u64 cookie, int type)
{
/* ALWAYS called with statelock held */
- lib_ni_t *ni = &nal->ni;
+ lib_ni_t *ni = &nal->libnal_ni;
struct list_head *list;
struct list_head *el;
unsigned int hash;
}
void
-lib_initialise_handle (nal_cb_t *nal, lib_handle_t *lh, int type)
+lib_initialise_handle (lib_nal_t *nal, lib_handle_t *lh, int type)
{
/* ALWAYS called with statelock held */
- lib_ni_t *ni = &nal->ni;
+ lib_ni_t *ni = &nal->libnal_ni;
unsigned int hash;
LASSERT (type >= 0 && type < PTL_COOKIE_TYPES);
}
void
-lib_invalidate_handle (nal_cb_t *nal, lib_handle_t *lh)
+lib_invalidate_handle (lib_nal_t *nal, lib_handle_t *lh)
{
list_del (&lh->lh_hash_chain);
}
int
-lib_init(nal_cb_t *nal, ptl_process_id_t process_id,
+lib_init(lib_nal_t *libnal, nal_t *apinal,
+ ptl_process_id_t process_id,
ptl_ni_limits_t *requested_limits,
ptl_ni_limits_t *actual_limits)
{
int rc = PTL_OK;
- lib_ni_t *ni = &nal->ni;
- int ptl_size;
- int i;
+ lib_ni_t *ni = &libnal->libnal_ni;
+ int ptl_size;
+ int i;
ENTRY;
/* NB serialised in PtlNIInit() */
lib_assert_wire_constants ();
-
- /*
- * Allocate the portal table for this interface
- * and all per-interface objects.
- */
- memset(&ni->counters, 0, sizeof(lib_counters_t));
- rc = kportal_descriptor_setup (nal, requested_limits,
- &ni->actual_limits);
+ /* Setup the API nal with the lib API handling functions */
+ apinal->nal_get_id = lib_api_get_id;
+ apinal->nal_ni_status = lib_api_ni_status;
+ apinal->nal_ni_dist = lib_api_ni_dist;
+ apinal->nal_fail_nid = lib_api_fail_nid;
+ apinal->nal_me_attach = lib_api_me_attach;
+ apinal->nal_me_insert = lib_api_me_insert;
+ apinal->nal_me_unlink = lib_api_me_unlink;
+ apinal->nal_md_attach = lib_api_md_attach;
+ apinal->nal_md_bind = lib_api_md_bind;
+ apinal->nal_md_unlink = lib_api_md_unlink;
+ apinal->nal_md_update = lib_api_md_update;
+ apinal->nal_eq_alloc = lib_api_eq_alloc;
+ apinal->nal_eq_free = lib_api_eq_free;
+ apinal->nal_eq_poll = lib_api_eq_poll;
+ apinal->nal_put = lib_api_put;
+ apinal->nal_get = lib_api_get;
+
+ apinal->nal_data = libnal;
+ ni->ni_api = apinal;
+
+ rc = kportal_descriptor_setup (libnal, requested_limits,
+ &ni->ni_actual_limits);
if (rc != PTL_OK)
goto out;
+ memset(&ni->ni_counters, 0, sizeof(lib_counters_t));
+
INIT_LIST_HEAD (&ni->ni_active_msgs);
INIT_LIST_HEAD (&ni->ni_active_mds);
INIT_LIST_HEAD (&ni->ni_active_eqs);
-
INIT_LIST_HEAD (&ni->ni_test_peers);
- ni->ni_interface_cookie = lib_create_interface_cookie (nal);
+#ifdef __KERNEL__
+ spin_lock_init (&ni->ni_lock);
+ init_waitqueue_head (&ni->ni_waitq);
+#else
+ pthread_mutex_init(&ni->ni_mutex, NULL);
+ pthread_cond_init(&ni->ni_cond, NULL);
+#endif
+
+ ni->ni_interface_cookie = lib_create_interface_cookie (libnal);
ni->ni_next_object_cookie = 0;
- rc = lib_setup_handle_hash (nal);
+ rc = lib_setup_handle_hash (libnal);
if (rc != PTL_OK)
goto out;
- ni->nid = process_id.nid;
- ni->pid = process_id.pid;
+ ni->ni_pid = process_id;
if (requested_limits != NULL)
ptl_size = requested_limits->max_pt_index + 1;
else
ptl_size = 64;
- ni->tbl.size = ptl_size;
- ni->tbl.tbl = nal->cb_malloc(nal, sizeof(struct list_head) * ptl_size);
- if (ni->tbl.tbl == NULL) {
+ ni->ni_portals.size = ptl_size;
+ PORTAL_ALLOC(ni->ni_portals.tbl,
+ ptl_size * sizeof(struct list_head));
+ if (ni->ni_portals.tbl == NULL) {
rc = PTL_NO_SPACE;
goto out;
}
for (i = 0; i < ptl_size; i++)
- INIT_LIST_HEAD(&(ni->tbl.tbl[i]));
+ INIT_LIST_HEAD(&(ni->ni_portals.tbl[i]));
/* max_{mes,mds,eqs} set in kportal_descriptor_setup */
/* We don't have an access control table! */
- ni->actual_limits.max_ac_index = -1;
+ ni->ni_actual_limits.max_ac_index = -1;
- ni->actual_limits.max_pt_index = ptl_size - 1;
- ni->actual_limits.max_md_iovecs = PTL_MD_MAX_IOV;
- ni->actual_limits.max_me_list = INT_MAX;
+ ni->ni_actual_limits.max_pt_index = ptl_size - 1;
+ ni->ni_actual_limits.max_md_iovecs = PTL_MD_MAX_IOV;
+ ni->ni_actual_limits.max_me_list = INT_MAX;
/* We don't support PtlGetPut! */
- ni->actual_limits.max_getput_md = 0;
+ ni->ni_actual_limits.max_getput_md = 0;
if (actual_limits != NULL)
- *actual_limits = ni->actual_limits;
+ *actual_limits = ni->ni_actual_limits;
out:
if (rc != PTL_OK) {
- lib_cleanup_handle_hash (nal);
- kportal_descriptor_cleanup (nal);
+ lib_cleanup_handle_hash (libnal);
+ kportal_descriptor_cleanup (libnal);
}
RETURN (rc);
}
int
-lib_fini(nal_cb_t * nal)
+lib_fini(lib_nal_t *nal)
{
- lib_ni_t *ni = &nal->ni;
+ lib_ni_t *ni = &nal->libnal_ni;
int idx;
/* NB no state_lock() since this is the last reference. The NAL
* network op (eg MD with non-zero pending count)
*/
- for (idx = 0; idx < ni->tbl.size; idx++)
- while (!list_empty (&ni->tbl.tbl[idx])) {
- lib_me_t *me = list_entry (ni->tbl.tbl[idx].next,
+ for (idx = 0; idx < ni->ni_portals.size; idx++)
+ while (!list_empty (&ni->ni_portals.tbl[idx])) {
+ lib_me_t *me = list_entry (ni->ni_portals.tbl[idx].next,
lib_me_t, me_list);
CERROR ("Active me %p on exit\n", me);
lib_msg_free (nal, msg);
}
- nal->cb_free(nal, ni->tbl.tbl, sizeof(struct list_head) * ni->tbl.size);
+ PORTAL_FREE(ni->ni_portals.tbl,
+ ni->ni_portals.size * sizeof(struct list_head));
lib_cleanup_handle_hash (nal);
kportal_descriptor_cleanup (nal);
+#ifndef __KERNEL__
+ pthread_mutex_destroy(&ni->ni_mutex);
+ pthread_cond_destroy(&ni->ni_cond);
+#endif
+
return (PTL_OK);
}
#endif
#include <portals/lib-p30.h>
-#include <portals/arg-blocks.h>
/* must be called with state lock held */
-void lib_md_unlink(nal_cb_t * nal, lib_md_t * md)
+void
+lib_md_unlink(lib_nal_t *nal, lib_md_t *md)
{
if ((md->md_flags & PTL_MD_FLAG_ZOMBIE) == 0) {
/* first unlink attempt... */
CDEBUG(D_NET, "Unlinking md %p\n", md);
if ((md->options & PTL_MD_KIOV) != 0) {
- if (nal->cb_unmap_pages != NULL)
- nal->cb_unmap_pages (nal, md->md_niov, md->md_iov.kiov,
- &md->md_addrkey);
- } else if (nal->cb_unmap != NULL) {
- nal->cb_unmap (nal, md->md_niov, md->md_iov.iov,
- &md->md_addrkey);
+ if (nal->libnal_unmap_pages != NULL)
+ nal->libnal_unmap_pages (nal,
+ md->md_niov,
+ md->md_iov.kiov,
+ &md->md_addrkey);
+ } else if (nal->libnal_unmap != NULL) {
+ nal->libnal_unmap (nal,
+ md->md_niov, md->md_iov.iov,
+ &md->md_addrkey);
}
if (md->eq != NULL) {
}
/* must be called with state lock held */
-static int lib_md_build(nal_cb_t *nal, lib_md_t *new, void *private,
- ptl_md_t *md, ptl_handle_eq_t *eqh, int unlink)
+static int
+lib_md_build(lib_nal_t *nal, lib_md_t *lmd, ptl_md_t *umd, int unlink)
{
lib_eq_t *eq = NULL;
int rc;
int i;
int niov;
+ int total_length = 0;
/* NB we are passed an allocated, but uninitialised/active md.
* if we return success, caller may lib_md_unlink() it.
* otherwise caller may only lib_md_free() it.
*/
- if (!PtlHandleIsEqual (*eqh, PTL_EQ_NONE)) {
- eq = ptl_handle2eq(eqh, nal);
+ if (!PtlHandleIsEqual (umd->eventq, PTL_EQ_NONE)) {
+ eq = ptl_handle2eq(&umd->eventq, nal);
if (eq == NULL)
return PTL_EQ_INVALID;
}
- /* Must check this _before_ allocation. Also, note that non-iov
- * MDs must set md_niov to 0. */
- LASSERT((md->options & (PTL_MD_IOVEC | PTL_MD_KIOV)) == 0 ||
- md->length <= PTL_MD_MAX_IOV);
-
/* This implementation doesn't know how to create START events or
* disable END events. Best to LASSERT our caller is compliant so
* we find out quickly... */
- LASSERT (PtlHandleIsEqual (*eqh, PTL_EQ_NONE) ||
- ((md->options & PTL_MD_EVENT_START_DISABLE) != 0 &&
- (md->options & PTL_MD_EVENT_END_DISABLE) == 0));
-
- if ((md->options & PTL_MD_MAX_SIZE) != 0 && /* max size used */
- (md->max_size < 0 || md->max_size > md->length)) // illegal max_size
- return PTL_MD_INVALID;
-
- new->me = NULL;
- new->start = md->start;
- new->offset = 0;
- new->max_size = md->max_size;
- new->options = md->options;
- new->user_ptr = md->user_ptr;
- new->eq = eq;
- new->threshold = md->threshold;
- new->pending = 0;
- new->md_flags = (unlink == PTL_UNLINK) ? PTL_MD_FLAG_AUTO_UNLINK : 0;
-
- if ((md->options & PTL_MD_IOVEC) != 0) {
- int total_length = 0;
-
- if ((md->options & PTL_MD_KIOV) != 0) /* Can't specify both */
- return PTL_MD_INVALID;
-
- new->md_niov = niov = md->length;
-
- if (nal->cb_read (nal, private, new->md_iov.iov, md->start,
- niov * sizeof (new->md_iov.iov[0])))
- return PTL_SEGV;
+ LASSERT (eq == NULL ||
+ ((umd->options & PTL_MD_EVENT_START_DISABLE) != 0 &&
+ (umd->options & PTL_MD_EVENT_END_DISABLE) == 0));
+
+ lmd->me = NULL;
+ lmd->start = umd->start;
+ lmd->offset = 0;
+ lmd->max_size = umd->max_size;
+ lmd->options = umd->options;
+ lmd->user_ptr = umd->user_ptr;
+ lmd->eq = eq;
+ lmd->threshold = umd->threshold;
+ lmd->pending = 0;
+ lmd->md_flags = (unlink == PTL_UNLINK) ? PTL_MD_FLAG_AUTO_UNLINK : 0;
+
+ if ((umd->options & PTL_MD_IOVEC) != 0) {
+
+ if ((umd->options & PTL_MD_KIOV) != 0) /* Can't specify both */
+ return PTL_MD_ILLEGAL;
+
+ lmd->md_niov = niov = umd->length;
+ memcpy(lmd->md_iov.iov, umd->start,
+ niov * sizeof (lmd->md_iov.iov[0]));
for (i = 0; i < niov; i++) {
/* We take the base address on trust */
- if (new->md_iov.iov[i].iov_len <= 0) /* invalid length */
- return PTL_VAL_FAILED;
+ if (lmd->md_iov.iov[i].iov_len <= 0) /* invalid length */
+ return PTL_MD_ILLEGAL;
- total_length += new->md_iov.iov[i].iov_len;
+ total_length += lmd->md_iov.iov[i].iov_len;
}
- new->length = total_length;
+ lmd->length = total_length;
- if (nal->cb_map != NULL) {
- rc = nal->cb_map (nal, niov, new->md_iov.iov,
- &new->md_addrkey);
+ if ((umd->options & PTL_MD_MAX_SIZE) != 0 && /* max size used */
+ (umd->max_size < 0 ||
+ umd->max_size > total_length)) // illegal max_size
+ return PTL_MD_ILLEGAL;
+
+ if (nal->libnal_map != NULL) {
+ rc = nal->libnal_map (nal, niov, lmd->md_iov.iov,
+ &lmd->md_addrkey);
if (rc != PTL_OK)
return (rc);
}
- } else if ((md->options & PTL_MD_KIOV) != 0) {
+ } else if ((umd->options & PTL_MD_KIOV) != 0) {
#ifndef __KERNEL__
- return PTL_MD_INVALID;
-#else
- int total_length = 0;
-
+ return PTL_MD_ILLEGAL;
+#else
/* Trap attempt to use paged I/O if unsupported early. */
- if (nal->cb_send_pages == NULL ||
- nal->cb_recv_pages == NULL)
+ if (nal->libnal_send_pages == NULL ||
+ nal->libnal_recv_pages == NULL)
return PTL_MD_INVALID;
- new->md_niov = niov = md->length;
+ lmd->md_niov = niov = umd->length;
+ memcpy(lmd->md_iov.kiov, umd->start,
+ niov * sizeof (lmd->md_iov.kiov[0]));
- if (nal->cb_read (nal, private, new->md_iov.kiov, md->start,
- niov * sizeof (new->md_iov.kiov[0])))
- return PTL_SEGV;
-
for (i = 0; i < niov; i++) {
/* We take the page pointer on trust */
- if (new->md_iov.kiov[i].kiov_offset +
- new->md_iov.kiov[i].kiov_len > PAGE_SIZE )
+ if (lmd->md_iov.kiov[i].kiov_offset +
+ lmd->md_iov.kiov[i].kiov_len > PAGE_SIZE )
return PTL_VAL_FAILED; /* invalid length */
- total_length += new->md_iov.kiov[i].kiov_len;
+ total_length += lmd->md_iov.kiov[i].kiov_len;
}
- new->length = total_length;
+ lmd->length = total_length;
+
+ if ((umd->options & PTL_MD_MAX_SIZE) != 0 && /* max size used */
+ (umd->max_size < 0 ||
+ umd->max_size > total_length)) // illegal max_size
+ return PTL_MD_ILLEGAL;
- if (nal->cb_map_pages != NULL) {
- rc = nal->cb_map_pages (nal, niov, new->md_iov.kiov,
- &new->md_addrkey);
+ if (nal->libnal_map_pages != NULL) {
+ rc = nal->libnal_map_pages (nal, niov, lmd->md_iov.kiov,
+ &lmd->md_addrkey);
if (rc != PTL_OK)
return (rc);
}
#endif
} else { /* contiguous */
- new->length = md->length;
- new->md_niov = niov = 1;
- new->md_iov.iov[0].iov_base = md->start;
- new->md_iov.iov[0].iov_len = md->length;
-
- if (nal->cb_map != NULL) {
- rc = nal->cb_map (nal, niov, new->md_iov.iov,
- &new->md_addrkey);
+ lmd->length = umd->length;
+ lmd->md_niov = niov = 1;
+ lmd->md_iov.iov[0].iov_base = umd->start;
+ lmd->md_iov.iov[0].iov_len = umd->length;
+
+ if ((umd->options & PTL_MD_MAX_SIZE) != 0 && /* max size used */
+ (umd->max_size < 0 ||
+ umd->max_size > umd->length)) // illegal max_size
+ return PTL_MD_ILLEGAL;
+
+ if (nal->libnal_map != NULL) {
+ rc = nal->libnal_map (nal, niov, lmd->md_iov.iov,
+ &lmd->md_addrkey);
if (rc != PTL_OK)
return (rc);
}
eq->eq_refcount++;
/* It's good; let handle2md succeed and add to active mds */
- lib_initialise_handle (nal, &new->md_lh, PTL_COOKIE_TYPE_MD);
- list_add (&new->md_list, &nal->ni.ni_active_mds);
+ lib_initialise_handle (nal, &lmd->md_lh, PTL_COOKIE_TYPE_MD);
+ list_add (&lmd->md_list, &nal->libnal_ni.ni_active_mds);
return PTL_OK;
}
/* must be called with state lock held */
-void lib_md_deconstruct(nal_cb_t * nal, lib_md_t * md, ptl_md_t * new)
+void
+lib_md_deconstruct(lib_nal_t *nal, lib_md_t *lmd, ptl_md_t *umd)
{
/* NB this doesn't copy out all the iov entries so when a
* discontiguous MD is copied out, the target gets to know the
* original iov pointer (in start) and the number of entries it had
* and that's all.
*/
- new->start = md->start;
- new->length = ((md->options & (PTL_MD_IOVEC | PTL_MD_KIOV)) == 0) ?
- md->length : md->md_niov;
- new->threshold = md->threshold;
- new->max_size = md->max_size;
- new->options = md->options;
- new->user_ptr = md->user_ptr;
- ptl_eq2handle(&new->eventq, md->eq);
+ umd->start = lmd->start;
+ umd->length = ((lmd->options & (PTL_MD_IOVEC | PTL_MD_KIOV)) == 0) ?
+ lmd->length : lmd->md_niov;
+ umd->threshold = lmd->threshold;
+ umd->max_size = lmd->max_size;
+ umd->options = lmd->options;
+ umd->user_ptr = lmd->user_ptr;
+ ptl_eq2handle(&umd->eventq, nal, lmd->eq);
}
-int do_PtlMDAttach(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
+int
+lib_api_md_attach(nal_t *apinal, ptl_handle_me_t *meh,
+ ptl_md_t *umd, ptl_unlink_t unlink,
+ ptl_handle_md_t *handle)
{
- /*
- * Incoming:
- * ptl_handle_me_t current_in
- * ptl_md_t md_in
- * ptl_unlink_t unlink_in
- *
- * Outgoing:
- * ptl_handle_md_t * handle_out
- */
-
- PtlMDAttach_in *args = v_args;
- PtlMDAttach_out *ret = v_ret;
- lib_me_t *me;
- lib_md_t *md;
+ lib_nal_t *nal = apinal->nal_data;
+ lib_me_t *me;
+ lib_md_t *md;
unsigned long flags;
+ int rc;
- if ((args->md_in.options & (PTL_MD_KIOV | PTL_MD_IOVEC)) != 0 &&
- args->md_in.length > PTL_MD_MAX_IOV) /* too many fragments */
- return (ret->rc = PTL_IOV_INVALID);
+ if ((umd->options & (PTL_MD_KIOV | PTL_MD_IOVEC)) != 0 &&
+ umd->length > PTL_MD_MAX_IOV) /* too many fragments */
+ return PTL_IOV_INVALID;
- md = lib_md_alloc(nal, &args->md_in);
+ md = lib_md_alloc(nal, umd);
if (md == NULL)
- return (ret->rc = PTL_NO_SPACE);
+ return PTL_NO_SPACE;
- state_lock(nal, &flags);
+ LIB_LOCK(nal, flags);
- me = ptl_handle2me(&args->me_in, nal);
+ me = ptl_handle2me(meh, nal);
if (me == NULL) {
- ret->rc = PTL_ME_INVALID;
+ rc = PTL_ME_INVALID;
} else if (me->md != NULL) {
- ret->rc = PTL_ME_IN_USE;
+ rc = PTL_ME_IN_USE;
} else {
- ret->rc = lib_md_build(nal, md, private, &args->md_in,
- &args->eq_in, args->unlink_in);
-
- if (ret->rc == PTL_OK) {
+ rc = lib_md_build(nal, md, umd, unlink);
+ if (rc == PTL_OK) {
me->md = md;
md->me = me;
- ptl_md2handle(&ret->handle_out, md);
+ ptl_md2handle(handle, nal, md);
- state_unlock (nal, &flags);
+ LIB_UNLOCK(nal, flags);
return (PTL_OK);
}
}
lib_md_free (nal, md);
- state_unlock (nal, &flags);
- return (ret->rc);
+ LIB_UNLOCK(nal, flags);
+ return (rc);
}
-int do_PtlMDBind(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
+int
+lib_api_md_bind(nal_t *apinal,
+ ptl_md_t *umd, ptl_unlink_t unlink,
+ ptl_handle_md_t *handle)
{
- /*
- * Incoming:
- * ptl_handle_ni_t ni_in
- * ptl_md_t md_in
- *
- * Outgoing:
- * ptl_handle_md_t * handle_out
- */
-
- PtlMDBind_in *args = v_args;
- PtlMDBind_out *ret = v_ret;
- lib_md_t *md;
+ lib_nal_t *nal = apinal->nal_data;
+ lib_md_t *md;
unsigned long flags;
+ int rc;
- if ((args->md_in.options & (PTL_MD_KIOV | PTL_MD_IOVEC)) != 0 &&
- args->md_in.length > PTL_MD_MAX_IOV) /* too many fragments */
- return (ret->rc = PTL_IOV_INVALID);
+ if ((umd->options & (PTL_MD_KIOV | PTL_MD_IOVEC)) != 0 &&
+ umd->length > PTL_MD_MAX_IOV) /* too many fragments */
+ return PTL_IOV_INVALID;
- md = lib_md_alloc(nal, &args->md_in);
+ md = lib_md_alloc(nal, umd);
if (md == NULL)
- return (ret->rc = PTL_NO_SPACE);
+ return PTL_NO_SPACE;
- state_lock(nal, &flags);
+ LIB_LOCK(nal, flags);
- ret->rc = lib_md_build(nal, md, private, &args->md_in,
- &args->eq_in, args->unlink_in);
+ rc = lib_md_build(nal, md, umd, unlink);
- if (ret->rc == PTL_OK) {
- ptl_md2handle(&ret->handle_out, md);
+ if (rc == PTL_OK) {
+ ptl_md2handle(handle, nal, md);
- state_unlock(nal, &flags);
+ LIB_UNLOCK(nal, flags);
return (PTL_OK);
}
lib_md_free (nal, md);
- state_unlock(nal, &flags);
- return (ret->rc);
+ LIB_UNLOCK(nal, flags);
+ return (rc);
}
-int do_PtlMDUnlink(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
+int
+lib_api_md_unlink (nal_t *apinal, ptl_handle_md_t *mdh)
{
- PtlMDUnlink_in *args = v_args;
- PtlMDUnlink_out *ret = v_ret;
+ lib_nal_t *nal = apinal->nal_data;
ptl_event_t ev;
lib_md_t *md;
unsigned long flags;
- state_lock(nal, &flags);
+ LIB_LOCK(nal, flags);
- md = ptl_handle2md(&args->md_in, nal);
+ md = ptl_handle2md(mdh, nal);
if (md == NULL) {
- state_unlock(nal, &flags);
- return (ret->rc = PTL_MD_INVALID);
+ LIB_UNLOCK(nal, flags);
+ return PTL_MD_INVALID;
}
/* If the MD is busy, lib_md_unlink just marks it for deletion, and
ev.unlinked = 1;
lib_md_deconstruct(nal, md, &ev.mem_desc);
- lib_enq_event_locked(nal, private, md->eq, &ev);
+ lib_enq_event_locked(nal, NULL, md->eq, &ev);
}
- lib_md_deconstruct(nal, md, &ret->status_out);
lib_md_unlink(nal, md);
- ret->rc = PTL_OK;
- state_unlock(nal, &flags);
-
- return (PTL_OK);
+ LIB_UNLOCK(nal, flags);
+ return PTL_OK;
}
-int do_PtlMDUpdate_internal(nal_cb_t * nal, void *private, void *v_args,
- void *v_ret)
+int
+lib_api_md_update (nal_t *apinal,
+ ptl_handle_md_t *mdh,
+ ptl_md_t *oldumd, ptl_md_t *newumd,
+ ptl_handle_eq_t *testqh)
{
- /*
- * Incoming:
- * ptl_handle_md_t md_in
- * ptl_md_t * old_inout
- * ptl_md_t * new_inout
- * ptl_handle_eq_t testq_in
- * ptl_seq_t sequence_in
- *
- * Outgoing:
- * ptl_md_t * old_inout
- * ptl_md_t * new_inout
- */
- PtlMDUpdate_internal_in *args = v_args;
- PtlMDUpdate_internal_out *ret = v_ret;
- lib_md_t *md;
- lib_eq_t *test_eq = NULL;
- ptl_md_t *new = &args->new_inout;
+ lib_nal_t *nal = apinal->nal_data;
+ lib_md_t *md;
+ lib_eq_t *test_eq = NULL;
unsigned long flags;
+ int rc;
- state_lock(nal, &flags);
+ LIB_LOCK(nal, flags);
- md = ptl_handle2md(&args->md_in, nal);
+ md = ptl_handle2md(mdh, nal);
if (md == NULL) {
- ret->rc = PTL_MD_INVALID;
+ rc = PTL_MD_INVALID;
goto out;
}
- if (args->old_inout_valid)
- lib_md_deconstruct(nal, md, &ret->old_inout);
+ if (oldumd != NULL)
+ lib_md_deconstruct(nal, md, oldumd);
- if (!args->new_inout_valid) {
- ret->rc = PTL_OK;
+ if (newumd == NULL) {
+ rc = PTL_OK;
goto out;
}
/* XXX fttb, the new MD must be the same "shape" wrt fragmentation,
* since we simply overwrite the old lib-md */
- if ((((new->options ^ md->options) &
+ if ((((newumd->options ^ md->options) &
(PTL_MD_IOVEC | PTL_MD_KIOV)) != 0) ||
- ((new->options & (PTL_MD_IOVEC | PTL_MD_KIOV)) != 0 &&
- new->length != md->md_niov)) {
- ret->rc = PTL_IOV_INVALID;
+ ((newumd->options & (PTL_MD_IOVEC | PTL_MD_KIOV)) != 0 &&
+ newumd->length != md->md_niov)) {
+ rc = PTL_IOV_INVALID;
goto out;
}
- if (!PtlHandleIsEqual (args->testq_in, PTL_EQ_NONE)) {
- test_eq = ptl_handle2eq(&args->testq_in, nal);
+ if (!PtlHandleIsEqual (*testqh, PTL_EQ_NONE)) {
+ test_eq = ptl_handle2eq(testqh, nal);
if (test_eq == NULL) {
- ret->rc = PTL_EQ_INVALID;
+ rc = PTL_EQ_INVALID;
goto out;
}
}
if (md->pending != 0) {
- ret->rc = PTL_MD_NO_UPDATE;
- goto out;
+ rc = PTL_MD_NO_UPDATE;
+ goto out;
}
if (test_eq == NULL ||
- test_eq->sequence == args->sequence_in) {
+ test_eq->eq_deq_seq == test_eq->eq_enq_seq) {
lib_me_t *me = md->me;
int unlink = (md->md_flags & PTL_MD_FLAG_AUTO_UNLINK) ?
PTL_UNLINK : PTL_RETAIN;
// #warning this does not track eq refcounts properly
- ret->rc = lib_md_build(nal, md, private,
- new, &new->eventq, unlink);
+ rc = lib_md_build(nal, md, newumd, unlink);
md->me = me;
} else {
- ret->rc = PTL_MD_NO_UPDATE;
+ rc = PTL_MD_NO_UPDATE;
}
out:
- state_unlock(nal, &flags);
- return (ret->rc);
+ LIB_UNLOCK(nal, flags);
+
+ return rc;
}
#endif
#include <portals/lib-p30.h>
-#include <portals/arg-blocks.h>
-static void lib_me_dump(nal_cb_t * nal, lib_me_t * me);
-
-int do_PtlMEAttach(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
+int
+lib_api_me_attach(nal_t *apinal,
+ ptl_pt_index_t portal,
+ ptl_process_id_t match_id,
+ ptl_match_bits_t match_bits,
+ ptl_match_bits_t ignore_bits,
+ ptl_unlink_t unlink, ptl_ins_pos_t pos,
+ ptl_handle_me_t *handle)
{
- PtlMEAttach_in *args = v_args;
- PtlMEAttach_out *ret = v_ret;
- lib_ni_t *ni = &nal->ni;
- lib_ptl_t *tbl = &ni->tbl;
+ lib_nal_t *nal = apinal->nal_data;
+ lib_ni_t *ni = &nal->libnal_ni;
+ lib_ptl_t *tbl = &ni->ni_portals;
+ lib_me_t *me;
unsigned long flags;
- lib_me_t *me;
- if (args->index_in >= tbl->size)
- return ret->rc = PTL_PT_INDEX_INVALID;
+ if (portal >= tbl->size)
+ return PTL_PT_INDEX_INVALID;
/* Should check for valid matchid, but not yet */
- if (0)
- return ret->rc = PTL_PROCESS_INVALID;
me = lib_me_alloc (nal);
if (me == NULL)
- return (ret->rc = PTL_NO_SPACE);
+ return PTL_NO_SPACE;
- state_lock(nal, &flags);
+ LIB_LOCK(nal, flags);
- me->match_id = args->match_id_in;
- me->match_bits = args->match_bits_in;
- me->ignore_bits = args->ignore_bits_in;
- me->unlink = args->unlink_in;
+ me->match_id = match_id;
+ me->match_bits = match_bits;
+ me->ignore_bits = ignore_bits;
+ me->unlink = unlink;
me->md = NULL;
lib_initialise_handle (nal, &me->me_lh, PTL_COOKIE_TYPE_ME);
- if (args->position_in == PTL_INS_AFTER)
- list_add_tail(&me->me_list, &(tbl->tbl[args->index_in]));
+ if (pos == PTL_INS_AFTER)
+ list_add_tail(&me->me_list, &(tbl->tbl[portal]));
else
- list_add(&me->me_list, &(tbl->tbl[args->index_in]));
+ list_add(&me->me_list, &(tbl->tbl[portal]));
- ptl_me2handle(&ret->handle_out, me);
+ ptl_me2handle(handle, nal, me);
- state_unlock(nal, &flags);
+ LIB_UNLOCK(nal, flags);
- return ret->rc = PTL_OK;
+ return PTL_OK;
}
-int do_PtlMEInsert(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
+int
+lib_api_me_insert(nal_t *apinal,
+ ptl_handle_me_t *current_meh,
+ ptl_process_id_t match_id,
+ ptl_match_bits_t match_bits,
+ ptl_match_bits_t ignore_bits,
+ ptl_unlink_t unlink, ptl_ins_pos_t pos,
+ ptl_handle_me_t *handle)
{
- PtlMEInsert_in *args = v_args;
- PtlMEInsert_out *ret = v_ret;
+ lib_nal_t *nal = apinal->nal_data;
+ lib_me_t *current_me;
+ lib_me_t *new_me;
unsigned long flags;
- lib_me_t *me;
- lib_me_t *new;
- new = lib_me_alloc (nal);
- if (new == NULL)
- return (ret->rc = PTL_NO_SPACE);
+ new_me = lib_me_alloc (nal);
+ if (new_me == NULL)
+ return PTL_NO_SPACE;
/* Should check for valid matchid, but not yet */
- state_lock(nal, &flags);
+ LIB_LOCK(nal, flags);
- me = ptl_handle2me(&args->current_in, nal);
- if (me == NULL) {
- lib_me_free (nal, new);
+ current_me = ptl_handle2me(current_meh, nal);
+ if (current_me == NULL) {
+ lib_me_free (nal, new_me);
- state_unlock (nal, &flags);
- return (ret->rc = PTL_ME_INVALID);
+ LIB_UNLOCK(nal, flags);
+ return PTL_ME_INVALID;
}
- new->match_id = args->match_id_in;
- new->match_bits = args->match_bits_in;
- new->ignore_bits = args->ignore_bits_in;
- new->unlink = args->unlink_in;
- new->md = NULL;
+ new_me->match_id = match_id;
+ new_me->match_bits = match_bits;
+ new_me->ignore_bits = ignore_bits;
+ new_me->unlink = unlink;
+ new_me->md = NULL;
- lib_initialise_handle (nal, &new->me_lh, PTL_COOKIE_TYPE_ME);
+ lib_initialise_handle (nal, &new_me->me_lh, PTL_COOKIE_TYPE_ME);
- if (args->position_in == PTL_INS_AFTER)
- list_add_tail(&new->me_list, &me->me_list);
+ if (pos == PTL_INS_AFTER)
+ list_add_tail(&new_me->me_list, ¤t_me->me_list);
else
- list_add(&new->me_list, &me->me_list);
+ list_add(&new_me->me_list, ¤t_me->me_list);
- ptl_me2handle(&ret->handle_out, new);
+ ptl_me2handle(handle, nal, new_me);
- state_unlock(nal, &flags);
+ LIB_UNLOCK(nal, flags);
- return ret->rc = PTL_OK;
+ return PTL_OK;
}
-int do_PtlMEUnlink(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
+int
+lib_api_me_unlink (nal_t *apinal, ptl_handle_me_t *meh)
{
- PtlMEUnlink_in *args = v_args;
- PtlMEUnlink_out *ret = v_ret;
+ lib_nal_t *nal = apinal->nal_data;
unsigned long flags;
- lib_me_t *me;
+ lib_me_t *me;
+ int rc;
- state_lock(nal, &flags);
+ LIB_LOCK(nal, flags);
- me = ptl_handle2me(&args->current_in, nal);
+ me = ptl_handle2me(meh, nal);
if (me == NULL) {
- ret->rc = PTL_ME_INVALID;
+ rc = PTL_ME_INVALID;
} else {
lib_me_unlink(nal, me);
- ret->rc = PTL_OK;
+ rc = PTL_OK;
}
- state_unlock(nal, &flags);
+ LIB_UNLOCK(nal, flags);
- return (ret->rc);
+ return (rc);
}
/* call with state_lock please */
-void lib_me_unlink(nal_cb_t *nal, lib_me_t *me)
+void
+lib_me_unlink(lib_nal_t *nal, lib_me_t *me)
{
list_del (&me->me_list);
lib_me_free(nal, me);
}
-int do_PtlTblDump(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
+#if 0
+static void
+lib_me_dump(lib_nal_t *nal, lib_me_t * me)
{
- PtlTblDump_in *args = v_args;
- PtlTblDump_out *ret = v_ret;
- lib_ptl_t *tbl = &nal->ni.tbl;
- ptl_handle_any_t handle;
- struct list_head *tmp;
- unsigned long flags;
+ CWARN("Match Entry %p ("LPX64")\n", me,
+ me->me_lh.lh_cookie);
- if (args->index_in < 0 || args->index_in >= tbl->size)
- return ret->rc = PTL_PT_INDEX_INVALID;
-
- nal->cb_printf(nal, "Portal table index %d\n", args->index_in);
-
- state_lock(nal, &flags);
- list_for_each(tmp, &(tbl->tbl[args->index_in])) {
- lib_me_t *me = list_entry(tmp, lib_me_t, me_list);
- ptl_me2handle(&handle, me);
- lib_me_dump(nal, me);
- }
- state_unlock(nal, &flags);
+ CWARN("\tMatch/Ignore\t= %016lx / %016lx\n",
+ me->match_bits, me->ignore_bits);
- return ret->rc = PTL_OK;
-}
-
-int do_PtlMEDump(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
-{
- PtlMEDump_in *args = v_args;
- PtlMEDump_out *ret = v_ret;
- lib_me_t *me;
- unsigned long flags;
-
- state_lock(nal, &flags);
-
- me = ptl_handle2me(&args->current_in, nal);
- if (me == NULL) {
- ret->rc = PTL_ME_INVALID;
- } else {
- lib_me_dump(nal, me);
- ret->rc = PTL_OK;
- }
-
- state_unlock(nal, &flags);
-
- return ret->rc;
-}
-
-static void lib_me_dump(nal_cb_t * nal, lib_me_t * me)
-{
- nal->cb_printf(nal, "Match Entry %p ("LPX64")\n", me,
- me->me_lh.lh_cookie);
-
- nal->cb_printf(nal, "\tMatch/Ignore\t= %016lx / %016lx\n",
- me->match_bits, me->ignore_bits);
-
- nal->cb_printf(nal, "\tMD\t= %p\n", me->md);
- nal->cb_printf(nal, "\tprev\t= %p\n",
- list_entry(me->me_list.prev, lib_me_t, me_list));
- nal->cb_printf(nal, "\tnext\t= %p\n",
- list_entry(me->me_list.next, lib_me_t, me_list));
+ CWARN("\tMD\t= %p\n", me->md);
+ CWARN("\tprev\t= %p\n",
+ list_entry(me->me_list.prev, lib_me_t, me_list));
+ CWARN("\tnext\t= %p\n",
+ list_entry(me->me_list.next, lib_me_t, me_list));
}
+#endif
#endif
#include <portals/p30.h>
#include <portals/lib-p30.h>
-#include <portals/arg-blocks.h>
/* forward ref */
-static void lib_commit_md (nal_cb_t *nal, lib_md_t *md, lib_msg_t *msg);
+static void lib_commit_md (lib_nal_t *nal, lib_md_t *md, lib_msg_t *msg);
static lib_md_t *
-lib_match_md(nal_cb_t *nal, int index, int op_mask,
+lib_match_md(lib_nal_t *nal, int index, int op_mask,
ptl_nid_t src_nid, ptl_pid_t src_pid,
ptl_size_t rlength, ptl_size_t roffset,
ptl_match_bits_t match_bits, lib_msg_t *msg,
ptl_size_t *mlength_out, ptl_size_t *offset_out)
{
- lib_ni_t *ni = &nal->ni;
- struct list_head *match_list = &ni->tbl.tbl[index];
+ lib_ni_t *ni = &nal->libnal_ni;
+ struct list_head *match_list = &ni->ni_portals.tbl[index];
struct list_head *tmp;
lib_me_t *me;
lib_md_t *md;
CDEBUG (D_NET, "Request from "LPU64".%d of length %d into portal %d "
"MB="LPX64"\n", src_nid, src_pid, rlength, index, match_bits);
- if (index < 0 || index >= ni->tbl.size) {
+ if (index < 0 || index >= ni->ni_portals.size) {
CERROR("Invalid portal %d not in [0-%d]\n",
- index, ni->tbl.size);
+ index, ni->ni_portals.size);
goto failed;
}
failed:
CERROR (LPU64": Dropping %s from "LPU64".%d portal %d match "LPX64
" offset %d length %d: no match\n",
- ni->nid, (op_mask == PTL_MD_OP_GET) ? "GET" : "PUT",
+ ni->ni_pid.nid, (op_mask == PTL_MD_OP_GET) ? "GET" : "PUT",
src_nid, src_pid, index, match_bits, roffset, rlength);
RETURN(NULL);
}
-int do_PtlFailNid (nal_cb_t *nal, void *private, void *v_args, void *v_ret)
+int lib_api_fail_nid (nal_t *apinal, ptl_nid_t nid, unsigned int threshold)
{
- PtlFailNid_in *args = v_args;
- PtlFailNid_out *ret = v_ret;
+ lib_nal_t *nal = apinal->nal_data;
lib_test_peer_t *tp;
unsigned long flags;
struct list_head *el;
struct list_head *next;
struct list_head cull;
- if (args->threshold != 0) {
+ if (threshold != 0) {
/* Adding a new entry */
- tp = (lib_test_peer_t *)nal->cb_malloc (nal, sizeof (*tp));
+ PORTAL_ALLOC(tp, sizeof(*tp));
if (tp == NULL)
- return (ret->rc = PTL_FAIL);
+ return PTL_NO_SPACE;
- tp->tp_nid = args->nid;
- tp->tp_threshold = args->threshold;
+ tp->tp_nid = nid;
+ tp->tp_threshold = threshold;
- state_lock (nal, &flags);
- list_add (&tp->tp_list, &nal->ni.ni_test_peers);
- state_unlock (nal, &flags);
- return (ret->rc = PTL_OK);
+ LIB_LOCK(nal, flags);
+ list_add_tail (&tp->tp_list, &nal->libnal_ni.ni_test_peers);
+ LIB_UNLOCK(nal, flags);
+ return PTL_OK;
}
/* removing entries */
INIT_LIST_HEAD (&cull);
- state_lock (nal, &flags);
+ LIB_LOCK(nal, flags);
- list_for_each_safe (el, next, &nal->ni.ni_test_peers) {
+ list_for_each_safe (el, next, &nal->libnal_ni.ni_test_peers) {
tp = list_entry (el, lib_test_peer_t, tp_list);
if (tp->tp_threshold == 0 || /* needs culling anyway */
- args->nid == PTL_NID_ANY || /* removing all entries */
- tp->tp_nid == args->nid) /* matched this one */
+ nid == PTL_NID_ANY || /* removing all entries */
+ tp->tp_nid == nid) /* matched this one */
{
list_del (&tp->tp_list);
list_add (&tp->tp_list, &cull);
}
}
- state_unlock (nal, &flags);
+ LIB_UNLOCK(nal, flags);
while (!list_empty (&cull)) {
tp = list_entry (cull.next, lib_test_peer_t, tp_list);
list_del (&tp->tp_list);
- nal->cb_free (nal, tp, sizeof (*tp));
+ PORTAL_FREE(tp, sizeof (*tp));
}
- return (ret->rc = PTL_OK);
+ return PTL_OK;
}
static int
-fail_peer (nal_cb_t *nal, ptl_nid_t nid, int outgoing)
+fail_peer (lib_nal_t *nal, ptl_nid_t nid, int outgoing)
{
lib_test_peer_t *tp;
struct list_head *el;
INIT_LIST_HEAD (&cull);
- state_lock (nal, &flags);
+ LIB_LOCK (nal, flags);
- list_for_each_safe (el, next, &nal->ni.ni_test_peers) {
+ list_for_each_safe (el, next, &nal->libnal_ni.ni_test_peers) {
tp = list_entry (el, lib_test_peer_t, tp_list);
if (tp->tp_threshold == 0) {
}
}
- state_unlock (nal, &flags);
+ LIB_UNLOCK (nal, flags);
while (!list_empty (&cull)) {
tp = list_entry (cull.next, lib_test_peer_t, tp_list);
list_del (&tp->tp_list);
- nal->cb_free (nal, tp, sizeof (*tp));
+ PORTAL_FREE(tp, sizeof (*tp));
}
return (fail);
#endif
ptl_err_t
-lib_recv (nal_cb_t *nal, void *private, lib_msg_t *msg, lib_md_t *md,
+lib_recv (lib_nal_t *nal, void *private, lib_msg_t *msg, lib_md_t *md,
ptl_size_t offset, ptl_size_t mlen, ptl_size_t rlen)
{
if (mlen == 0)
- return (nal->cb_recv(nal, private, msg,
- 0, NULL,
- offset, mlen, rlen));
+ return (nal->libnal_recv(nal, private, msg,
+ 0, NULL,
+ offset, mlen, rlen));
if ((md->options & PTL_MD_KIOV) == 0)
- return (nal->cb_recv(nal, private, msg,
- md->md_niov, md->md_iov.iov,
- offset, mlen, rlen));
+ return (nal->libnal_recv(nal, private, msg,
+ md->md_niov, md->md_iov.iov,
+ offset, mlen, rlen));
- return (nal->cb_recv_pages(nal, private, msg,
- md->md_niov, md->md_iov.kiov,
- offset, mlen, rlen));
+ return (nal->libnal_recv_pages(nal, private, msg,
+ md->md_niov, md->md_iov.kiov,
+ offset, mlen, rlen));
}
ptl_err_t
-lib_send (nal_cb_t *nal, void *private, lib_msg_t *msg,
+lib_send (lib_nal_t *nal, void *private, lib_msg_t *msg,
ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
lib_md_t *md, ptl_size_t offset, ptl_size_t len)
{
if (len == 0)
- return (nal->cb_send(nal, private, msg,
- hdr, type, nid, pid,
- 0, NULL,
- offset, len));
+ return (nal->libnal_send(nal, private, msg,
+ hdr, type, nid, pid,
+ 0, NULL,
+ offset, len));
if ((md->options & PTL_MD_KIOV) == 0)
- return (nal->cb_send(nal, private, msg,
- hdr, type, nid, pid,
- md->md_niov, md->md_iov.iov,
- offset, len));
-
- return (nal->cb_send_pages(nal, private, msg,
- hdr, type, nid, pid,
- md->md_niov, md->md_iov.kiov,
- offset, len));
+ return (nal->libnal_send(nal, private, msg,
+ hdr, type, nid, pid,
+ md->md_niov, md->md_iov.iov,
+ offset, len));
+
+ return (nal->libnal_send_pages(nal, private, msg,
+ hdr, type, nid, pid,
+ md->md_niov, md->md_iov.kiov,
+ offset, len));
}
static void
-lib_commit_md (nal_cb_t *nal, lib_md_t *md, lib_msg_t *msg)
+lib_commit_md (lib_nal_t *nal, lib_md_t *md, lib_msg_t *msg)
{
- /* ALWAYS called holding the state_lock */
- lib_counters_t *counters = &nal->ni.counters;
+ /* ALWAYS called holding the LIB_LOCK */
+ lib_counters_t *counters = &nal->libnal_ni.ni_counters;
/* Here, we commit the MD to a network OP by marking it busy and
* decrementing its threshold. Come what may, the network "owns"
if (counters->msgs_alloc > counters->msgs_max)
counters->msgs_max = counters->msgs_alloc;
- list_add (&msg->msg_list, &nal->ni.ni_active_msgs);
+ list_add (&msg->msg_list, &nal->libnal_ni.ni_active_msgs);
}
static void
-lib_drop_message (nal_cb_t *nal, void *private, ptl_hdr_t *hdr)
+lib_drop_message (lib_nal_t *nal, void *private, ptl_hdr_t *hdr)
{
unsigned long flags;
* to receive (init_msg() not called) and therefore can't cause an
* event. */
- state_lock(nal, &flags);
- nal->ni.counters.drop_count++;
- nal->ni.counters.drop_length += hdr->payload_length;
- state_unlock(nal, &flags);
+ LIB_LOCK(nal, flags);
+ nal->libnal_ni.ni_counters.drop_count++;
+ nal->libnal_ni.ni_counters.drop_length += hdr->payload_length;
+ LIB_UNLOCK(nal, flags);
/* NULL msg => if NAL calls lib_finalize it will be a noop */
(void) lib_recv(nal, private, NULL, NULL, 0, 0, hdr->payload_length);
*
*/
static ptl_err_t
-parse_put(nal_cb_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
+parse_put(lib_nal_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
{
- lib_ni_t *ni = &nal->ni;
+ lib_ni_t *ni = &nal->libnal_ni;
ptl_size_t mlength = 0;
ptl_size_t offset = 0;
ptl_err_t rc;
hdr->msg.put.ptl_index = NTOH__u32 (hdr->msg.put.ptl_index);
hdr->msg.put.offset = NTOH__u32 (hdr->msg.put.offset);
- state_lock(nal, &flags);
+ LIB_LOCK(nal, flags);
md = lib_match_md(nal, hdr->msg.put.ptl_index, PTL_MD_OP_PUT,
hdr->src_nid, hdr->src_pid,
hdr->msg.put.match_bits, msg,
&mlength, &offset);
if (md == NULL) {
- state_unlock(nal, &flags);
+ LIB_UNLOCK(nal, flags);
return (PTL_FAIL);
}
msg->ack_wmd = hdr->msg.put.ack_wmd;
}
- ni->counters.recv_count++;
- ni->counters.recv_length += mlength;
+ ni->ni_counters.recv_count++;
+ ni->ni_counters.recv_length += mlength;
- state_unlock(nal, &flags);
+ LIB_UNLOCK(nal, flags);
rc = lib_recv(nal, private, msg, md, offset, mlength,
hdr->payload_length);
if (rc != PTL_OK)
CERROR(LPU64": error on receiving PUT from "LPU64": %d\n",
- ni->nid, hdr->src_nid, rc);
+ ni->ni_pid.nid, hdr->src_nid, rc);
return (rc);
}
static ptl_err_t
-parse_get(nal_cb_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
+parse_get(lib_nal_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
{
- lib_ni_t *ni = &nal->ni;
+ lib_ni_t *ni = &nal->libnal_ni;
ptl_size_t mlength = 0;
ptl_size_t offset = 0;
lib_md_t *md;
hdr->msg.get.sink_length = NTOH__u32 (hdr->msg.get.sink_length);
hdr->msg.get.src_offset = NTOH__u32 (hdr->msg.get.src_offset);
- state_lock(nal, &flags);
+ LIB_LOCK(nal, flags);
md = lib_match_md(nal, hdr->msg.get.ptl_index, PTL_MD_OP_GET,
hdr->src_nid, hdr->src_pid,
hdr->msg.get.match_bits, msg,
&mlength, &offset);
if (md == NULL) {
- state_unlock(nal, &flags);
+ LIB_UNLOCK(nal, flags);
return (PTL_FAIL);
}
msg->ev.type = PTL_EVENT_GET_END;
msg->ev.hdr_data = 0;
- ni->counters.send_count++;
- ni->counters.send_length += mlength;
+ ni->ni_counters.send_count++;
+ ni->ni_counters.send_length += mlength;
- state_unlock(nal, &flags);
+ LIB_UNLOCK(nal, flags);
memset (&reply, 0, sizeof (reply));
reply.type = HTON__u32 (PTL_MSG_REPLY);
reply.dest_nid = HTON__u64 (hdr->src_nid);
- reply.src_nid = HTON__u64 (ni->nid);
reply.dest_pid = HTON__u32 (hdr->src_pid);
- reply.src_pid = HTON__u32 (ni->pid);
+ reply.src_nid = HTON__u64 (ni->ni_pid.nid);
+ reply.src_pid = HTON__u32 (ni->ni_pid.pid);
reply.payload_length = HTON__u32 (mlength);
reply.msg.reply.dst_wmd = hdr->msg.get.return_wmd;
hdr->src_nid, hdr->src_pid, md, offset, mlength);
if (rc != PTL_OK)
CERROR(LPU64": Unable to send REPLY for GET from "LPU64": %d\n",
- ni->nid, hdr->src_nid, rc);
+ ni->ni_pid.nid, hdr->src_nid, rc);
/* Discard any junk after the hdr */
(void) lib_recv(nal, private, NULL, NULL, 0, 0, hdr->payload_length);
}
static ptl_err_t
-parse_reply(nal_cb_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
+parse_reply(lib_nal_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
{
- lib_ni_t *ni = &nal->ni;
+ lib_ni_t *ni = &nal->libnal_ni;
lib_md_t *md;
int rlength;
int length;
unsigned long flags;
ptl_err_t rc;
- state_lock(nal, &flags);
+ LIB_LOCK(nal, flags);
/* NB handles only looked up by creator (no flips) */
md = ptl_wire_handle2md(&hdr->msg.reply.dst_wmd, nal);
if (md == NULL || md->threshold == 0) {
CERROR (LPU64": Dropping REPLY from "LPU64" for %s MD "LPX64"."LPX64"\n",
- ni->nid, hdr->src_nid,
+ ni->ni_pid.nid, hdr->src_nid,
md == NULL ? "invalid" : "inactive",
hdr->msg.reply.dst_wmd.wh_interface_cookie,
hdr->msg.reply.dst_wmd.wh_object_cookie);
- state_unlock(nal, &flags);
+ LIB_UNLOCK(nal, flags);
return (PTL_FAIL);
}
if ((md->options & PTL_MD_TRUNCATE) == 0) {
CERROR (LPU64": Dropping REPLY from "LPU64
" length %d for MD "LPX64" would overflow (%d)\n",
- ni->nid, hdr->src_nid, length,
+ ni->ni_pid.nid, hdr->src_nid, length,
hdr->msg.reply.dst_wmd.wh_object_cookie,
md->length);
- state_unlock(nal, &flags);
+ LIB_UNLOCK(nal, flags);
return (PTL_FAIL);
}
length = md->length;
lib_md_deconstruct(nal, md, &msg->ev.mem_desc);
- ni->counters.recv_count++;
- ni->counters.recv_length += length;
+ ni->ni_counters.recv_count++;
+ ni->ni_counters.recv_length += length;
- state_unlock(nal, &flags);
+ LIB_UNLOCK(nal, flags);
rc = lib_recv(nal, private, msg, md, 0, length, rlength);
if (rc != PTL_OK)
CERROR(LPU64": error on receiving REPLY from "LPU64": %d\n",
- ni->nid, hdr->src_nid, rc);
+ ni->ni_pid.nid, hdr->src_nid, rc);
return (rc);
}
static ptl_err_t
-parse_ack(nal_cb_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
+parse_ack(lib_nal_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
{
- lib_ni_t *ni = &nal->ni;
+ lib_ni_t *ni = &nal->libnal_ni;
lib_md_t *md;
unsigned long flags;
hdr->msg.ack.match_bits = NTOH__u64 (hdr->msg.ack.match_bits);
hdr->msg.ack.mlength = NTOH__u32 (hdr->msg.ack.mlength);
- state_lock(nal, &flags);
+ LIB_LOCK(nal, flags);
/* NB handles only looked up by creator (no flips) */
md = ptl_wire_handle2md(&hdr->msg.ack.dst_wmd, nal);
if (md == NULL || md->threshold == 0) {
CDEBUG(D_INFO, LPU64": Dropping ACK from "LPU64" to %s MD "
- LPX64"."LPX64"\n", ni->nid, hdr->src_nid,
+ LPX64"."LPX64"\n", ni->ni_pid.nid, hdr->src_nid,
(md == NULL) ? "invalid" : "inactive",
hdr->msg.ack.dst_wmd.wh_interface_cookie,
hdr->msg.ack.dst_wmd.wh_object_cookie);
- state_unlock(nal, &flags);
+ LIB_UNLOCK(nal, flags);
return (PTL_FAIL);
}
CDEBUG(D_NET, LPU64": ACK from "LPU64" into md "LPX64"\n",
- ni->nid, hdr->src_nid,
+ ni->ni_pid.nid, hdr->src_nid,
hdr->msg.ack.dst_wmd.wh_object_cookie);
lib_commit_md(nal, md, msg);
lib_md_deconstruct(nal, md, &msg->ev.mem_desc);
- ni->counters.recv_count++;
+ ni->ni_counters.recv_count++;
- state_unlock(nal, &flags);
+ LIB_UNLOCK(nal, flags);
/* We have received and matched up the ack OK, create the
* completion event now... */
}
}
-void print_hdr(nal_cb_t * nal, ptl_hdr_t * hdr)
+void print_hdr(lib_nal_t *nal, ptl_hdr_t * hdr)
{
char *type_str = hdr_type_string (hdr);
- nal->cb_printf(nal, "P3 Header at %p of type %s\n", hdr, type_str);
- nal->cb_printf(nal, " From nid/pid %Lu/%Lu", hdr->src_nid,
- hdr->src_pid);
- nal->cb_printf(nal, " To nid/pid %Lu/%Lu\n", hdr->dest_nid,
- hdr->dest_pid);
+ CWARN("P3 Header at %p of type %s\n", hdr, type_str);
+ CWARN(" From nid/pid "LPX64"/%u", hdr->src_nid, hdr->src_pid);
+ CWARN(" To nid/pid "LPX64"/%u\n", hdr->dest_nid, hdr->dest_pid);
switch (hdr->type) {
default:
break;
case PTL_MSG_PUT:
- nal->cb_printf(nal,
- " Ptl index %d, ack md "LPX64"."LPX64", "
- "match bits "LPX64"\n",
- hdr->msg.put.ptl_index,
- hdr->msg.put.ack_wmd.wh_interface_cookie,
- hdr->msg.put.ack_wmd.wh_object_cookie,
- hdr->msg.put.match_bits);
- nal->cb_printf(nal,
- " Length %d, offset %d, hdr data "LPX64"\n",
- hdr->payload_length, hdr->msg.put.offset,
- hdr->msg.put.hdr_data);
+ CWARN(" Ptl index %d, ack md "LPX64"."LPX64", "
+ "match bits "LPX64"\n",
+ hdr->msg.put.ptl_index,
+ hdr->msg.put.ack_wmd.wh_interface_cookie,
+ hdr->msg.put.ack_wmd.wh_object_cookie,
+ hdr->msg.put.match_bits);
+ CWARN(" Length %d, offset %d, hdr data "LPX64"\n",
+ hdr->payload_length, hdr->msg.put.offset,
+ hdr->msg.put.hdr_data);
break;
case PTL_MSG_GET:
- nal->cb_printf(nal,
- " Ptl index %d, return md "LPX64"."LPX64", "
- "match bits "LPX64"\n", hdr->msg.get.ptl_index,
- hdr->msg.get.return_wmd.wh_interface_cookie,
- hdr->msg.get.return_wmd.wh_object_cookie,
- hdr->msg.get.match_bits);
- nal->cb_printf(nal,
- " Length %d, src offset %d\n",
- hdr->msg.get.sink_length,
- hdr->msg.get.src_offset);
+ CWARN(" Ptl index %d, return md "LPX64"."LPX64", "
+ "match bits "LPX64"\n", hdr->msg.get.ptl_index,
+ hdr->msg.get.return_wmd.wh_interface_cookie,
+ hdr->msg.get.return_wmd.wh_object_cookie,
+ hdr->msg.get.match_bits);
+ CWARN(" Length %d, src offset %d\n",
+ hdr->msg.get.sink_length,
+ hdr->msg.get.src_offset);
break;
case PTL_MSG_ACK:
- nal->cb_printf(nal, " dst md "LPX64"."LPX64", "
- "manipulated length %d\n",
- hdr->msg.ack.dst_wmd.wh_interface_cookie,
- hdr->msg.ack.dst_wmd.wh_object_cookie,
- hdr->msg.ack.mlength);
+ CWARN(" dst md "LPX64"."LPX64", "
+ "manipulated length %d\n",
+ hdr->msg.ack.dst_wmd.wh_interface_cookie,
+ hdr->msg.ack.dst_wmd.wh_object_cookie,
+ hdr->msg.ack.mlength);
break;
case PTL_MSG_REPLY:
- nal->cb_printf(nal, " dst md "LPX64"."LPX64", "
- "length %d\n",
- hdr->msg.reply.dst_wmd.wh_interface_cookie,
- hdr->msg.reply.dst_wmd.wh_object_cookie,
- hdr->payload_length);
+ CWARN(" dst md "LPX64"."LPX64", "
+ "length %d\n",
+ hdr->msg.reply.dst_wmd.wh_interface_cookie,
+ hdr->msg.reply.dst_wmd.wh_object_cookie,
+ hdr->payload_length);
}
} /* end of print_hdr() */
-void
-lib_parse(nal_cb_t *nal, ptl_hdr_t *hdr, void *private)
+ptl_err_t
+lib_parse(lib_nal_t *nal, ptl_hdr_t *hdr, void *private)
{
unsigned long flags;
ptl_err_t rc;
lib_msg_t *msg;
+
+ /* NB we return PTL_OK if we manage to parse the header and believe
+ * it looks OK. Anything that goes wrong with receiving the
+ * message after that point is the responsibility of the NAL */
/* convert common fields to host byte order */
- hdr->dest_nid = NTOH__u64 (hdr->dest_nid);
+ hdr->type = NTOH__u32 (hdr->type);
hdr->src_nid = NTOH__u64 (hdr->src_nid);
- hdr->dest_pid = NTOH__u32 (hdr->dest_pid);
hdr->src_pid = NTOH__u32 (hdr->src_pid);
- hdr->type = NTOH__u32 (hdr->type);
+ hdr->dest_pid = NTOH__u32 (hdr->dest_pid);
hdr->payload_length = NTOH__u32(hdr->payload_length);
-#if 0
- nal->cb_printf(nal, "%d: lib_parse: nal=%p hdr=%p type=%d\n",
- nal->ni.nid, nal, hdr, hdr->type);
- print_hdr(nal, hdr);
-#endif
- if (hdr->type == PTL_MSG_HELLO) {
+
+ switch (hdr->type) {
+ case PTL_MSG_HELLO: {
/* dest_nid is really ptl_magicversion_t */
ptl_magicversion_t *mv = (ptl_magicversion_t *)&hdr->dest_nid;
- CERROR (LPU64": Dropping unexpected HELLO message: "
+ mv->magic = NTOH__u32(mv->magic);
+ mv->version_major = NTOH__u16(mv->version_major);
+ mv->version_minor = NTOH__u16(mv->version_minor);
+
+ if (mv->magic == PORTALS_PROTO_MAGIC &&
+ mv->version_major == PORTALS_PROTO_VERSION_MAJOR &&
+ mv->version_minor == PORTALS_PROTO_VERSION_MINOR) {
+ CWARN (LPU64": Dropping unexpected HELLO message: "
+ "magic %d, version %d.%d from "LPD64"\n",
+ nal->libnal_ni.ni_pid.nid, mv->magic,
+ mv->version_major, mv->version_minor,
+ hdr->src_nid);
+
+ /* it's good but we don't want it */
+ lib_drop_message(nal, private, hdr);
+ return PTL_OK;
+ }
+
+ /* we got garbage */
+ CERROR (LPU64": Bad HELLO message: "
"magic %d, version %d.%d from "LPD64"\n",
- nal->ni.nid, mv->magic,
+ nal->libnal_ni.ni_pid.nid, mv->magic,
mv->version_major, mv->version_minor,
hdr->src_nid);
- lib_drop_message(nal, private, hdr);
- return;
+ return PTL_FAIL;
}
-
- if (hdr->dest_nid != nal->ni.nid) {
- CERROR(LPU64": Dropping %s message from "LPU64" to "LPU64
- " (not me)\n", nal->ni.nid, hdr_type_string (hdr),
- hdr->src_nid, hdr->dest_nid);
- lib_drop_message(nal, private, hdr);
- return;
+
+ case PTL_MSG_ACK:
+ case PTL_MSG_PUT:
+ case PTL_MSG_GET:
+ case PTL_MSG_REPLY:
+ hdr->dest_nid = NTOH__u64 (hdr->dest_nid);
+ if (hdr->dest_nid != nal->libnal_ni.ni_pid.nid) {
+ CERROR(LPU64": BAD dest NID in %s message from"
+ LPU64" to "LPU64" (not me)\n",
+ nal->libnal_ni.ni_pid.nid, hdr_type_string (hdr),
+ hdr->src_nid, hdr->dest_nid);
+ return PTL_FAIL;
+ }
+ break;
+
+ default:
+ CERROR(LPU64": Bad message type 0x%x from "LPU64"\n",
+ nal->libnal_ni.ni_pid.nid, hdr->type, hdr->src_nid);
+ return PTL_FAIL;
}
- if (!list_empty (&nal->ni.ni_test_peers) && /* normally we don't */
+ /* We've decided we're not receiving garbage since we can parse the
+ * header. We will return PTL_OK come what may... */
+
+ if (!list_empty (&nal->libnal_ni.ni_test_peers) && /* normally we don't */
fail_peer (nal, hdr->src_nid, 0)) /* shall we now? */
{
CERROR(LPU64": Dropping incoming %s from "LPU64
": simulated failure\n",
- nal->ni.nid, hdr_type_string (hdr),
+ nal->libnal_ni.ni_pid.nid, hdr_type_string (hdr),
hdr->src_nid);
lib_drop_message(nal, private, hdr);
- return;
+ return PTL_OK;
}
msg = lib_msg_alloc(nal);
if (msg == NULL) {
CERROR(LPU64": Dropping incoming %s from "LPU64
": can't allocate a lib_msg_t\n",
- nal->ni.nid, hdr_type_string (hdr),
+ nal->libnal_ni.ni_pid.nid, hdr_type_string (hdr),
hdr->src_nid);
lib_drop_message(nal, private, hdr);
- return;
+ return PTL_OK;
}
switch (hdr->type) {
rc = parse_reply(nal, hdr, private, msg);
break;
default:
- CERROR(LPU64": Dropping <unknown> message from "LPU64
- ": Bad type=0x%x\n", nal->ni.nid, hdr->src_nid,
- hdr->type);
- rc = PTL_FAIL;
+ LASSERT(0);
+ rc = PTL_FAIL; /* no compiler warning please */
break;
}
/* committed... */
lib_finalize(nal, private, msg, rc);
} else {
- state_lock(nal, &flags);
- lib_msg_free(nal, msg); /* expects state_lock held */
- state_unlock(nal, &flags);
+ LIB_LOCK(nal, flags);
+ lib_msg_free(nal, msg); /* expects LIB_LOCK held */
+ LIB_UNLOCK(nal, flags);
lib_drop_message(nal, private, hdr);
}
}
+
+ return PTL_OK;
+ /* That's "OK I can parse it", not "OK I like it" :) */
}
int
-do_PtlPut(nal_cb_t *nal, void *private, void *v_args, void *v_ret)
+lib_api_put(nal_t *apinal, ptl_handle_md_t *mdh,
+ ptl_ack_req_t ack, ptl_process_id_t *id,
+ ptl_pt_index_t portal, ptl_ac_index_t ac,
+ ptl_match_bits_t match_bits,
+ ptl_size_t offset, ptl_hdr_data_t hdr_data)
{
- /*
- * Incoming:
- * ptl_handle_md_t md_in
- * ptl_ack_req_t ack_req_in
- * ptl_process_id_t target_in
- * ptl_pt_index_t portal_in
- * ptl_ac_index_t cookie_in
- * ptl_match_bits_t match_bits_in
- * ptl_size_t offset_in
- *
- * Outgoing:
- */
-
- PtlPut_in *args = v_args;
- ptl_process_id_t *id = &args->target_in;
- PtlPut_out *ret = v_ret;
- lib_ni_t *ni = &nal->ni;
+ lib_nal_t *nal = apinal->nal_data;
+ lib_ni_t *ni = &nal->libnal_ni;
lib_msg_t *msg;
ptl_hdr_t hdr;
lib_md_t *md;
unsigned long flags;
int rc;
- if (!list_empty (&nal->ni.ni_test_peers) && /* normally we don't */
+ if (!list_empty (&ni->ni_test_peers) && /* normally we don't */
fail_peer (nal, id->nid, 1)) /* shall we now? */
{
- CERROR(LPU64": Dropping PUT to "LPU64": simulated failure\n",
- nal->ni.nid, id->nid);
- return (ret->rc = PTL_PROCESS_INVALID);
+ CERROR("Dropping PUT to "LPU64": simulated failure\n",
+ id->nid);
+ return PTL_PROCESS_INVALID;
}
msg = lib_msg_alloc(nal);
if (msg == NULL) {
CERROR(LPU64": Dropping PUT to "LPU64": ENOMEM on lib_msg_t\n",
- ni->nid, id->nid);
- return (ret->rc = PTL_NO_SPACE);
+ ni->ni_pid.nid, id->nid);
+ return PTL_NO_SPACE;
}
- state_lock(nal, &flags);
+ LIB_LOCK(nal, flags);
- md = ptl_handle2md(&args->md_in, nal);
+ md = ptl_handle2md(mdh, nal);
if (md == NULL || md->threshold == 0) {
lib_msg_free(nal, msg);
- state_unlock(nal, &flags);
+ LIB_UNLOCK(nal, flags);
- return (ret->rc = PTL_MD_INVALID);
+ return PTL_MD_INVALID;
}
- CDEBUG(D_NET, "PtlPut -> %Lu: %lu\n", (unsigned long long)id->nid,
- (unsigned long)id->pid);
+ CDEBUG(D_NET, "PtlPut -> "LPX64"\n", id->nid);
memset (&hdr, 0, sizeof (hdr));
hdr.type = HTON__u32 (PTL_MSG_PUT);
hdr.dest_nid = HTON__u64 (id->nid);
- hdr.src_nid = HTON__u64 (ni->nid);
hdr.dest_pid = HTON__u32 (id->pid);
- hdr.src_pid = HTON__u32 (ni->pid);
+ hdr.src_nid = HTON__u64 (ni->ni_pid.nid);
+ hdr.src_pid = HTON__u32 (ni->ni_pid.pid);
hdr.payload_length = HTON__u32 (md->length);
/* NB handles only looked up by creator (no flips) */
- if (args->ack_req_in == PTL_ACK_REQ) {
+ if (ack == PTL_ACK_REQ) {
hdr.msg.put.ack_wmd.wh_interface_cookie = ni->ni_interface_cookie;
hdr.msg.put.ack_wmd.wh_object_cookie = md->md_lh.lh_cookie;
} else {
hdr.msg.put.ack_wmd = PTL_WIRE_HANDLE_NONE;
}
- hdr.msg.put.match_bits = HTON__u64 (args->match_bits_in);
- hdr.msg.put.ptl_index = HTON__u32 (args->portal_in);
- hdr.msg.put.offset = HTON__u32 (args->offset_in);
- hdr.msg.put.hdr_data = args->hdr_data_in;
+ hdr.msg.put.match_bits = HTON__u64 (match_bits);
+ hdr.msg.put.ptl_index = HTON__u32 (portal);
+ hdr.msg.put.offset = HTON__u32 (offset);
+ hdr.msg.put.hdr_data = hdr_data;
lib_commit_md(nal, md, msg);
msg->ev.type = PTL_EVENT_SEND_END;
- msg->ev.initiator.nid = ni->nid;
- msg->ev.initiator.pid = ni->pid;
- msg->ev.portal = args->portal_in;
- msg->ev.match_bits = args->match_bits_in;
+ msg->ev.initiator.nid = ni->ni_pid.nid;
+ msg->ev.initiator.pid = ni->ni_pid.pid;
+ msg->ev.portal = portal;
+ msg->ev.match_bits = match_bits;
msg->ev.rlength = md->length;
msg->ev.mlength = md->length;
- msg->ev.offset = args->offset_in;
- msg->ev.hdr_data = args->hdr_data_in;
+ msg->ev.offset = offset;
+ msg->ev.hdr_data = hdr_data;
lib_md_deconstruct(nal, md, &msg->ev.mem_desc);
- ni->counters.send_count++;
- ni->counters.send_length += md->length;
+ ni->ni_counters.send_count++;
+ ni->ni_counters.send_length += md->length;
- state_unlock(nal, &flags);
+ LIB_UNLOCK(nal, flags);
- rc = lib_send (nal, private, msg, &hdr, PTL_MSG_PUT,
+ rc = lib_send (nal, NULL, msg, &hdr, PTL_MSG_PUT,
id->nid, id->pid, md, 0, md->length);
if (rc != PTL_OK) {
- CERROR(LPU64": error sending PUT to "LPU64": %d\n",
- ni->nid, id->nid, rc);
- lib_finalize (nal, private, msg, rc);
+ CERROR("Error sending PUT to "LPX64": %d\n",
+ id->nid, rc);
+ lib_finalize (nal, NULL, msg, rc);
}
/* completion will be signalled by an event */
- return ret->rc = PTL_OK;
+ return PTL_OK;
}
lib_msg_t *
-lib_create_reply_msg (nal_cb_t *nal, ptl_nid_t peer_nid, lib_msg_t *getmsg)
+lib_create_reply_msg (lib_nal_t *nal, ptl_nid_t peer_nid, lib_msg_t *getmsg)
{
/* The NAL can DMA direct to the GET md (i.e. no REPLY msg). This
* returns a msg for the NAL to pass to lib_finalize() when the sink
* CAVEAT EMPTOR: 'getmsg' is the original GET, which is freed when
* lib_finalize() is called on it, so the NAL must call this first */
- lib_ni_t *ni = &nal->ni;
+ lib_ni_t *ni = &nal->libnal_ni;
lib_msg_t *msg = lib_msg_alloc(nal);
lib_md_t *getmd = getmsg->md;
unsigned long flags;
- state_lock(nal, &flags);
+ LIB_LOCK(nal, flags);
LASSERT (getmd->pending > 0);
lib_md_deconstruct(nal, getmd, &msg->ev.mem_desc);
- ni->counters.recv_count++;
- ni->counters.recv_length += getmd->length;
+ ni->ni_counters.recv_count++;
+ ni->ni_counters.recv_length += getmd->length;
- state_unlock(nal, &flags);
+ LIB_UNLOCK(nal, flags);
return msg;
drop_msg:
lib_msg_free(nal, msg);
drop:
- nal->ni.counters.drop_count++;
- nal->ni.counters.drop_length += getmd->length;
+ nal->libnal_ni.ni_counters.drop_count++;
+ nal->libnal_ni.ni_counters.drop_length += getmd->length;
- state_unlock (nal, &flags);
+ LIB_UNLOCK (nal, flags);
return NULL;
}
int
-do_PtlGet(nal_cb_t *nal, void *private, void *v_args, void *v_ret)
+lib_api_get(nal_t *apinal, ptl_handle_md_t *mdh, ptl_process_id_t *id,
+ ptl_pt_index_t portal, ptl_ac_index_t ac,
+ ptl_match_bits_t match_bits, ptl_size_t offset)
{
- /*
- * Incoming:
- * ptl_handle_md_t md_in
- * ptl_process_id_t target_in
- * ptl_pt_index_t portal_in
- * ptl_ac_index_t cookie_in
- * ptl_match_bits_t match_bits_in
- * ptl_size_t offset_in
- *
- * Outgoing:
- */
-
- PtlGet_in *args = v_args;
- ptl_process_id_t *id = &args->target_in;
- PtlGet_out *ret = v_ret;
- lib_ni_t *ni = &nal->ni;
+ lib_nal_t *nal = apinal->nal_data;
+ lib_ni_t *ni = &nal->libnal_ni;
lib_msg_t *msg;
ptl_hdr_t hdr;
lib_md_t *md;
unsigned long flags;
int rc;
- if (!list_empty (&nal->ni.ni_test_peers) && /* normally we don't */
+ if (!list_empty (&ni->ni_test_peers) && /* normally we don't */
fail_peer (nal, id->nid, 1)) /* shall we now? */
{
- CERROR(LPU64": Dropping PUT to "LPU64": simulated failure\n",
- nal->ni.nid, id->nid);
- return (ret->rc = PTL_PROCESS_INVALID);
+ CERROR("Dropping PUT to "LPX64": simulated failure\n",
+ id->nid);
+ return PTL_PROCESS_INVALID;
}
msg = lib_msg_alloc(nal);
if (msg == NULL) {
- CERROR(LPU64": Dropping GET to "LPU64": ENOMEM on lib_msg_t\n",
- ni->nid, id->nid);
- return (ret->rc = PTL_NO_SPACE);
+ CERROR("Dropping GET to "LPU64": ENOMEM on lib_msg_t\n",
+ id->nid);
+ return PTL_NO_SPACE;
}
- state_lock(nal, &flags);
+ LIB_LOCK(nal, flags);
- md = ptl_handle2md(&args->md_in, nal);
+ md = ptl_handle2md(mdh, nal);
if (md == NULL || !md->threshold) {
lib_msg_free(nal, msg);
- state_unlock(nal, &flags);
+ LIB_UNLOCK(nal, flags);
- return ret->rc = PTL_MD_INVALID;
+ return PTL_MD_INVALID;
}
CDEBUG(D_NET, "PtlGet -> %Lu: %lu\n", (unsigned long long)id->nid,
memset (&hdr, 0, sizeof (hdr));
hdr.type = HTON__u32 (PTL_MSG_GET);
hdr.dest_nid = HTON__u64 (id->nid);
- hdr.src_nid = HTON__u64 (ni->nid);
hdr.dest_pid = HTON__u32 (id->pid);
- hdr.src_pid = HTON__u32 (ni->pid);
+ hdr.src_nid = HTON__u64 (ni->ni_pid.nid);
+ hdr.src_pid = HTON__u32 (ni->ni_pid.pid);
hdr.payload_length = 0;
/* NB handles only looked up by creator (no flips) */
hdr.msg.get.return_wmd.wh_interface_cookie = ni->ni_interface_cookie;
hdr.msg.get.return_wmd.wh_object_cookie = md->md_lh.lh_cookie;
- hdr.msg.get.match_bits = HTON__u64 (args->match_bits_in);
- hdr.msg.get.ptl_index = HTON__u32 (args->portal_in);
- hdr.msg.get.src_offset = HTON__u32 (args->offset_in);
+ hdr.msg.get.match_bits = HTON__u64 (match_bits);
+ hdr.msg.get.ptl_index = HTON__u32 (portal);
+ hdr.msg.get.src_offset = HTON__u32 (offset);
hdr.msg.get.sink_length = HTON__u32 (md->length);
lib_commit_md(nal, md, msg);
msg->ev.type = PTL_EVENT_SEND_END;
- msg->ev.initiator.nid = ni->nid;
- msg->ev.initiator.pid = ni->pid;
- msg->ev.portal = args->portal_in;
- msg->ev.match_bits = args->match_bits_in;
+ msg->ev.initiator = ni->ni_pid;
+ msg->ev.portal = portal;
+ msg->ev.match_bits = match_bits;
msg->ev.rlength = md->length;
msg->ev.mlength = md->length;
- msg->ev.offset = args->offset_in;
+ msg->ev.offset = offset;
msg->ev.hdr_data = 0;
lib_md_deconstruct(nal, md, &msg->ev.mem_desc);
- ni->counters.send_count++;
+ ni->ni_counters.send_count++;
- state_unlock(nal, &flags);
+ LIB_UNLOCK(nal, flags);
- rc = lib_send (nal, private, msg, &hdr, PTL_MSG_GET,
+ rc = lib_send (nal, NULL, msg, &hdr, PTL_MSG_GET,
id->nid, id->pid, NULL, 0, 0);
if (rc != PTL_OK) {
CERROR(LPU64": error sending GET to "LPU64": %d\n",
- ni->nid, id->nid, rc);
- lib_finalize (nal, private, msg, rc);
+ ni->ni_pid.nid, id->nid, rc);
+ lib_finalize (nal, NULL, msg, rc);
}
/* completion will be signalled by an event */
- return ret->rc = PTL_OK;
+ return PTL_OK;
}
void lib_assert_wire_constants (void)
#include <portals/lib-p30.h>
void
-lib_enq_event_locked (nal_cb_t *nal, void *private,
+lib_enq_event_locked (lib_nal_t *nal, void *private,
lib_eq_t *eq, ptl_event_t *ev)
{
ptl_event_t *eq_slot;
- int rc;
- ev->sequence = eq->sequence++; /* Allocate the next queue slot */
-
- /* size must be a power of 2 to handle a wrapped sequence # */
- LASSERT (eq->size != 0 &&
- eq->size == LOWEST_BIT_SET (eq->size));
- eq_slot = eq->base + (ev->sequence & (eq->size - 1));
+ ev->sequence = eq->eq_enq_seq++; /* Allocate the next queue slot */
- /* Copy the event into the allocated slot, ensuring all the rest of
- * the event's contents have been copied _before_ the sequence
- * number gets updated. A processes 'getting' an event waits on
- * the next queue slot's sequence to be 'new'. When it is, _all_
- * other event fields had better be consistent. I assert
- * 'sequence' is the last member, so I only need a 2 stage copy. */
+ /* size must be a power of 2 to handle sequence # overflow */
+ LASSERT (eq->eq_size != 0 &&
+ eq->eq_size == LOWEST_BIT_SET (eq->eq_size));
+ eq_slot = eq->eq_events + (ev->sequence & (eq->eq_size - 1));
- LASSERT(sizeof (ptl_event_t) ==
- offsetof(ptl_event_t, sequence) + sizeof(ev->sequence));
+ /* There is no race since both event consumers and event producers
+ * take the LIB_LOCK(), so we don't screw around with memory
+ * barriers, setting the sequence number last or wierd structure
+ * layout assertions. */
+ *eq_slot = *ev;
- rc = nal->cb_write (nal, private, (user_ptr)eq_slot, ev,
- offsetof (ptl_event_t, sequence));
- LASSERT (rc == PTL_OK);
+ /* Call the callback handler (if any) */
+ if (eq->eq_callback != NULL)
+ eq->eq_callback (eq_slot);
+ /* Wake anyone sleeping for an event (see lib-eq.c) */
#ifdef __KERNEL__
- barrier();
-#endif
- /* Updating the sequence number is what makes the event 'new' NB if
- * the cb_write below isn't atomic, this could cause a race with
- * PtlEQGet */
- rc = nal->cb_write(nal, private, (user_ptr)&eq_slot->sequence,
- (void *)&ev->sequence,sizeof (ev->sequence));
- LASSERT (rc == PTL_OK);
-
-#ifdef __KERNEL__
- barrier();
+ if (waitqueue_active(&nal->libnal_ni.ni_waitq))
+ wake_up_all(&nal->libnal_ni.ni_waitq);
+#else
+ pthread_cond_broadcast(&nal->libnal_ni.ni_cond);
#endif
-
- if (nal->cb_callback != NULL)
- nal->cb_callback(nal, private, eq, ev);
- else if (eq->event_callback != NULL)
- eq->event_callback(ev);
}
void
-lib_finalize(nal_cb_t *nal, void *private, lib_msg_t *msg, ptl_err_t status)
+lib_finalize (lib_nal_t *nal, void *private, lib_msg_t *msg, ptl_err_t status)
{
lib_md_t *md;
int unlink;
memset (&ack, 0, sizeof (ack));
ack.type = HTON__u32 (PTL_MSG_ACK);
ack.dest_nid = HTON__u64 (msg->ev.initiator.nid);
- ack.src_nid = HTON__u64 (nal->ni.nid);
ack.dest_pid = HTON__u32 (msg->ev.initiator.pid);
- ack.src_pid = HTON__u32 (nal->ni.pid);
+ ack.src_nid = HTON__u64 (nal->libnal_ni.ni_pid.nid);
+ ack.src_pid = HTON__u32 (nal->libnal_ni.ni_pid.pid);
ack.payload_length = 0;
ack.msg.ack.dst_wmd = msg->ack_wmd;
md = msg->md;
- state_lock(nal, &flags);
+ LIB_LOCK(nal, flags);
/* Now it's safe to drop my caller's ref */
md->pending--;
lib_md_unlink(nal, md);
list_del (&msg->msg_list);
- nal->ni.counters.msgs_alloc--;
+ nal->libnal_ni.ni_counters.msgs_alloc--;
lib_msg_free(nal, msg);
- state_unlock(nal, &flags);
+ LIB_UNLOCK(nal, flags);
}
#define DEBUG_SUBSYSTEM S_PORTALS
#include <portals/lib-p30.h>
-#include <portals/arg-blocks.h>
#define MAX_DIST 18446744073709551615ULL
-int do_PtlNIStatus(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
+int lib_api_ni_status (nal_t *apinal, ptl_sr_index_t sr_idx,
+ ptl_sr_value_t *status)
{
- /*
- * Incoming:
- * ptl_handle_ni_t interface_in
- * ptl_sr_index_t register_in
- *
- * Outgoing:
- * ptl_sr_value_t * status_out
- */
-
- PtlNIStatus_in *args = v_args;
- PtlNIStatus_out *ret = v_ret;
- lib_ni_t *ni = &nal->ni;
- lib_counters_t *count = &ni->counters;
-
- if (!args)
- return ret->rc = PTL_SEGV;
-
- ret->rc = PTL_OK;
- ret->status_out = 0;
-
- /*
- * I hate this sort of code.... Hash tables, offset lists?
- * Treat the counters as an array of ints?
- */
- if (args->register_in == PTL_SR_DROP_COUNT)
- ret->status_out = count->drop_count;
-
- else if (args->register_in == PTL_SR_DROP_LENGTH)
- ret->status_out = count->drop_length;
-
- else if (args->register_in == PTL_SR_RECV_COUNT)
- ret->status_out = count->recv_count;
-
- else if (args->register_in == PTL_SR_RECV_LENGTH)
- ret->status_out = count->recv_length;
-
- else if (args->register_in == PTL_SR_SEND_COUNT)
- ret->status_out = count->send_count;
-
- else if (args->register_in == PTL_SR_SEND_LENGTH)
- ret->status_out = count->send_length;
-
- else if (args->register_in == PTL_SR_MSGS_MAX)
- ret->status_out = count->msgs_max;
- else
- ret->rc = PTL_SR_INDEX_INVALID;
-
- return ret->rc;
+ lib_nal_t *nal = apinal->nal_data;
+ lib_ni_t *ni = &nal->libnal_ni;
+ lib_counters_t *count = &ni->ni_counters;
+
+ switch (sr_idx) {
+ case PTL_SR_DROP_COUNT:
+ *status = count->drop_count;
+ return PTL_OK;
+ case PTL_SR_DROP_LENGTH:
+ *status = count->drop_length;
+ return PTL_OK;
+ case PTL_SR_RECV_COUNT:
+ *status = count->recv_count;
+ return PTL_OK;
+ case PTL_SR_RECV_LENGTH:
+ *status = count->recv_length;
+ return PTL_OK;
+ case PTL_SR_SEND_COUNT:
+ *status = count->send_count;
+ return PTL_OK;
+ case PTL_SR_SEND_LENGTH:
+ *status = count->send_length;
+ return PTL_OK;
+ case PTL_SR_MSGS_MAX:
+ *status = count->msgs_max;
+ return PTL_OK;
+ default:
+ *status = 0;
+ return PTL_SR_INDEX_INVALID;
+ }
}
-int do_PtlNIDist(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
+int lib_api_ni_dist (nal_t *apinal, ptl_process_id_t *pid, unsigned long *dist)
{
- /*
- * Incoming:
- * ptl_handle_ni_t interface_in
- * ptl_process_id_t process_in
-
- *
- * Outgoing:
- * unsigned long * distance_out
-
- */
-
- PtlNIDist_in *args = v_args;
- PtlNIDist_out *ret = v_ret;
-
- unsigned long dist;
- ptl_process_id_t id_in = args->process_in;
- ptl_nid_t nid;
- int rc;
-
- nid = id_in.nid;
-
- if ((rc = nal->cb_dist(nal, nid, &dist)) != 0) {
- ret->distance_out = (unsigned long) MAX_DIST;
- return PTL_PROCESS_INVALID;
- }
-
- ret->distance_out = dist;
+ lib_nal_t *nal = apinal->nal_data;
- return ret->rc = PTL_OK;
+ return (nal->libnal_dist(nal, pid->nid, dist));
}
# include <unistd.h>
#endif
#include <portals/lib-p30.h>
-#include <portals/arg-blocks.h>
-int do_PtlGetId(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
+int
+lib_api_get_id(nal_t *apinal, ptl_process_id_t *pid)
{
- /*
- * Incoming:
- * ptl_handle_ni_t handle_in
- *
- * Outgoing:
- * ptl_process_id_t * id_out
- * ptl_id_t * gsize_out
- */
-
- PtlGetId_out *ret = v_ret;
- lib_ni_t *ni = &nal->ni;
-
- ret->id_out.nid = ni->nid;
- ret->id_out.pid = ni->pid;
-
- return ret->rc = PTL_OK;
+ lib_nal_t *nal = apinal->nal_data;
+
+ *pid = nal->libnal_ni.ni_pid;
+ return PTL_OK;
}
EXPORT_SYMBOL(ptl_unregister_nal);
EXPORT_SYMBOL(ptl_err_str);
-EXPORT_SYMBOL(lib_dispatch);
EXPORT_SYMBOL(PtlMEAttach);
EXPORT_SYMBOL(PtlMEInsert);
EXPORT_SYMBOL(PtlMEUnlink);
EXPORT_SYMBOL(lib_create_reply_msg);
EXPORT_SYMBOL(lib_init);
EXPORT_SYMBOL(lib_fini);
-EXPORT_SYMBOL(dispatch_name);
MODULE_AUTHOR("Peter J. Braam <braam@clusterfs.com>");
MODULE_DESCRIPTION("Portals v3.1");
int port;
if (pidrequest==(unsigned short)PTL_PID_ANY) port = 0;
else port=pidrequest;
- t->nal_cb->ni.nid=get_node_id();
- t->nal_cb->ni.pid=port;
+ t->lib_nal->libnal_ni.ni_pid.nid=get_node_id();
+ t->lib_nal->libnal_ni.ni_pid.pid=port;
}
#else
in_addr = get_node_id();
t->iptop8 = in_addr >> PNAL_HOSTID_SHIFT;/* for making new connections */
- t->nal_cb->ni.nid = ((in_addr & PNAL_HOSTID_MASK)
- << PNAL_VNODE_SHIFT)
- + virtnode;
-
+ t->lib_nal->libnal_ni.ni_pid.nid = ((in_addr & PNAL_HOSTID_MASK)
+ << PNAL_VNODE_SHIFT)
+ + virtnode;
pid=pidrequest;
/* TODO: Support of pid PTL_ID_ANY with virtual nodes needs more work. */
#ifdef notyet
return;
}
else port = ((virtnode << PNAL_VNODE_SHIFT) + pid) + PNAL_BASE_PORT;
- t->nal_cb->ni.pid=pid;
+ t->lib_nal->libnal_ni.ni_pid.pid=pid;
}
#endif
typedef struct bridge {
int alive;
- nal_cb_t *nal_cb;
+ lib_nal_t *lib_nal;
void *lower;
void *local;
void (*shutdown)(struct bridge *);
syscall(SYS_write, p->notifier[0], buf, sizeof(buf));
}
-/* Function: forward
- * Arguments: nal_t *nal: pointer to my top-side nal structure
- * id: the command to pass to the lower layer
- * args, args_len:pointer to and length of the request
- * ret, ret_len: pointer to and size of the result
- * Returns: a portals status code
- *
- * forwards a packaged api call from the 'api' side to the 'library'
- * side, and collects the result
- */
-static int procbridge_forward(nal_t *n, int id, void *args, size_t args_len,
- void *ret, size_t ret_len)
-{
- bridge b = (bridge) n->nal_data;
-
- if (id == PTL_FINI) {
- lib_fini(b->nal_cb);
-
- if (b->shutdown)
- (*b->shutdown)(b);
- }
-
- lib_dispatch(b->nal_cb, NULL, id, args, ret);
-
- return (PTL_OK);
-}
-
-
/* Function: shutdown
* Arguments: nal: a pointer to my top side nal structure
* ni: my network interface index
*/
static void procbridge_shutdown(nal_t *n)
{
- bridge b=(bridge)n->nal_data;
+ lib_nal_t *nal = n->nal_data;
+ bridge b=(bridge)nal->libnal_data;
procbridge p=(procbridge)b->local;
p->nal_flags |= NAL_FLAG_STOPPING;
}
-static void procbridge_lock(nal_t * n, unsigned long *flags)
-{
- bridge b=(bridge)n->nal_data;
- procbridge p=(procbridge)b->local;
-
- pthread_mutex_lock(&p->mutex);
-}
-
-static void procbridge_unlock(nal_t * n, unsigned long *flags)
-{
- bridge b=(bridge)n->nal_data;
- procbridge p=(procbridge)b->local;
-
- pthread_mutex_unlock(&p->mutex);
-}
-
-/* Function: yield
- * Arguments: pid:
- *
- * this function was originally intended to allow the
- * lower half thread to be scheduled to allow progress. we
- * overload it to explicitly block until signalled by the
- * lower half.
- */
-static int procbridge_yield(nal_t *n, unsigned long *flags, int milliseconds)
-{
- bridge b=(bridge)n->nal_data;
- procbridge p=(procbridge)b->local;
-
- if (milliseconds == 0)
- return 0;
-
- if (milliseconds < 0) {
- pthread_cond_wait(&p->cond,&p->mutex);
- } else {
- struct timeval then;
- struct timeval now;
- struct timespec timeout;
-
- gettimeofday(&then, NULL);
- timeout.tv_sec = then.tv_sec + milliseconds/1000;
- timeout.tv_nsec = then.tv_usec * 1000 + milliseconds % 1000 * 1000000;
- if (timeout.tv_nsec >= 1000000000) {
- timeout.tv_sec++;
- timeout.tv_nsec -= 1000000000;
- }
-
- pthread_cond_timedwait(&p->cond, &p->mutex, &timeout);
-
- gettimeofday(&now, NULL);
- milliseconds -= (now.tv_sec - then.tv_sec) * 1000 +
- (now.tv_usec - then.tv_usec) / 1000;
-
- if (milliseconds < 0)
- milliseconds = 0;
- }
-
- return (milliseconds);
-}
-
/* forward decl */
extern int procbridge_startup (nal_t *, ptl_pid_t,
ptl_ni_limits_t *, ptl_ni_limits_t *);
/* api_nal
* the interface vector to allow the generic code to access
- * this nal. this is seperate from the library side nal_cb.
+ * this nal. this is seperate from the library side lib_nal.
* TODO: should be dyanmically allocated
*/
nal_t procapi_nal = {
nal_data: NULL,
- startup: procbridge_startup,
- shutdown: procbridge_shutdown,
- forward: procbridge_forward,
- yield: procbridge_yield,
- lock: procbridge_lock,
- unlock: procbridge_unlock
+ nal_ni_init: procbridge_startup,
+ nal_ni_fini: procbridge_shutdown,
};
ptl_nid_t tcpnal_mynid;
b=(bridge)malloc(sizeof(struct bridge));
p=(procbridge)malloc(sizeof(struct procbridge));
- nal->nal_data=b;
b->local=p;
args.nia_requested_pid = requested_pid;
args.nia_actual_limits = actual_limits;
args.nia_nal_type = nal_type;
args.nia_bridge = b;
+ args.nia_apinal = nal;
/* init procbridge */
pthread_mutex_init(&p->mutex,0);
if (p->nal_flags & NAL_FLAG_STOPPED)
return PTL_FAIL;
- b->nal_cb->ni.nid = tcpnal_mynid;
+ b->lib_nal->libnal_ni.ni_pid.nid = tcpnal_mynid;
return PTL_OK;
}
int nal_flags;
- pthread_mutex_t nal_cb_lock;
} *procbridge;
typedef struct nal_init_args {
ptl_ni_limits_t *nia_actual_limits;
int nia_nal_type;
bridge nia_bridge;
+ nal_t *nia_apinal;
} nal_init_args_t;
extern void *nal_thread(void *);
/* the following functions are stubs to satisfy the nal definition
without doing anything particularily useful*/
-static ptl_err_t nal_write(nal_cb_t *nal,
- void *private,
- user_ptr dst_addr,
- void *src_addr,
- size_t len)
-{
- memcpy(dst_addr, src_addr, len);
- return PTL_OK;
-}
-
-static ptl_err_t nal_read(nal_cb_t * nal,
- void *private,
- void *dst_addr,
- user_ptr src_addr,
- size_t len)
-{
- memcpy(dst_addr, src_addr, len);
- return PTL_OK;
-}
-
-static void *nal_malloc(nal_cb_t *nal,
- size_t len)
-{
- void *buf = malloc(len);
- return buf;
-}
-
-static void nal_free(nal_cb_t *nal,
- void *buf,
- size_t len)
-{
- free(buf);
-}
-
-static void nal_printf(nal_cb_t *nal,
- const char *fmt,
- ...)
-{
- va_list ap;
-
- va_start(ap, fmt);
- vprintf(fmt, ap);
- va_end(ap);
-}
-
-
-static void nal_cli(nal_cb_t *nal,
- unsigned long *flags)
-{
- bridge b = (bridge) nal->nal_data;
- procbridge p = (procbridge) b->local;
-
- pthread_mutex_lock(&p->mutex);
-}
-
-
-static void nal_sti(nal_cb_t *nal,
- unsigned long *flags)
-{
- bridge b = (bridge)nal->nal_data;
- procbridge p = (procbridge) b->local;
-
- pthread_mutex_unlock(&p->mutex);
-}
-
-static void nal_callback(nal_cb_t *nal, void *private,
- lib_eq_t *eq, ptl_event_t *ev)
-{
- bridge b = (bridge)nal->nal_data;
- procbridge p = (procbridge) b->local;
-
- /* holding p->mutex */
- if (eq->event_callback != NULL)
- eq->event_callback(ev);
-
- pthread_cond_broadcast(&p->cond);
-}
-
-static int nal_dist(nal_cb_t *nal,
+static int nal_dist(lib_nal_t *nal,
ptl_nid_t nid,
unsigned long *dist)
{
ptl_process_id_t process_id;
int nal_type;
- b->nal_cb=(nal_cb_t *)malloc(sizeof(nal_cb_t));
- b->nal_cb->nal_data=b;
- b->nal_cb->cb_read=nal_read;
- b->nal_cb->cb_write=nal_write;
- b->nal_cb->cb_malloc=nal_malloc;
- b->nal_cb->cb_free=nal_free;
- b->nal_cb->cb_map=NULL;
- b->nal_cb->cb_unmap=NULL;
- b->nal_cb->cb_printf=nal_printf;
- b->nal_cb->cb_cli=nal_cli;
- b->nal_cb->cb_sti=nal_sti;
- b->nal_cb->cb_callback=nal_callback;
- b->nal_cb->cb_dist=nal_dist;
+ b->lib_nal=(lib_nal_t *)malloc(sizeof(lib_nal_t));
+ b->lib_nal->libnal_data=b;
+ b->lib_nal->libnal_map=NULL;
+ b->lib_nal->libnal_unmap=NULL;
+ b->lib_nal->libnal_dist=nal_dist;
nal_type = args->nia_nal_type;
- /* Wierd, but this sets b->nal_cb->ni.{nid,pid}, which lib_init() is
- * about to do from the process_id passed to it...*/
+ /* Wierd, but this sets b->lib_nal->libnal_ni.ni_pid.{nid,pid}, which
+ * lib_init() is about to do from the process_id passed to it...*/
set_address(b,args->nia_requested_pid);
- process_id.pid = b->nal_cb->ni.pid;
- process_id.nid = b->nal_cb->ni.nid;
+ process_id = b->lib_nal->libnal_ni.ni_pid;
if (nal_table[nal_type]) rc=(*nal_table[nal_type])(b);
/* initialize the generic 'library' level code */
- rc = lib_init(b->nal_cb, process_id,
+ rc = lib_init(b->lib_nal, args->nia_apinal,
+ process_id,
args->nia_requested_limits,
args->nia_actual_limits);
int port;
if (pidrequest==(unsigned short)PTL_PID_ANY) port = 0;
else port=pidrequest;
- t->nal_cb->ni.nid=get_node_id();
- t->nal_cb->ni.pid=port;
+ t->lib_nal->libnal_ni.ni_pid.nid=get_node_id();
+ t->lib_nal->libnal_ni.ni_pid.pid=port;
}
#else
in_addr = get_node_id();
t->iptop8 = in_addr >> PNAL_HOSTID_SHIFT;/* for making new connections */
- t->nal_cb->ni.nid = ((in_addr & PNAL_HOSTID_MASK)
- << PNAL_VNODE_SHIFT)
- + virtnode;
-
+ t->lib_nal->libnal_ni.ni_pid.nid = ((in_addr & PNAL_HOSTID_MASK)
+ << PNAL_VNODE_SHIFT)
+ + virtnode;
pid=pidrequest;
/* TODO: Support of pid PTL_ID_ANY with virtual nodes needs more work. */
#ifdef notyet
return;
}
else port = ((virtnode << PNAL_VNODE_SHIFT) + pid) + PNAL_BASE_PORT;
- t->nal_cb->ni.pid=pid;
+ t->lib_nal->libnal_ni.ni_pid.pid=pid;
}
#endif
typedef struct bridge {
int alive;
- nal_cb_t *nal_cb;
+ lib_nal_t *lib_nal;
void *lower;
void *local;
void (*shutdown)(struct bridge *);
syscall(SYS_write, p->notifier[0], buf, sizeof(buf));
}
-/* Function: forward
- * Arguments: nal_t *nal: pointer to my top-side nal structure
- * id: the command to pass to the lower layer
- * args, args_len:pointer to and length of the request
- * ret, ret_len: pointer to and size of the result
- * Returns: a portals status code
- *
- * forwards a packaged api call from the 'api' side to the 'library'
- * side, and collects the result
- */
-static int procbridge_forward(nal_t *n, int id, void *args, size_t args_len,
- void *ret, size_t ret_len)
-{
- bridge b = (bridge) n->nal_data;
-
- if (id == PTL_FINI) {
- lib_fini(b->nal_cb);
-
- if (b->shutdown)
- (*b->shutdown)(b);
- }
-
- lib_dispatch(b->nal_cb, NULL, id, args, ret);
-
- return (PTL_OK);
-}
-
-
/* Function: shutdown
* Arguments: nal: a pointer to my top side nal structure
* ni: my network interface index
*/
static void procbridge_shutdown(nal_t *n)
{
- bridge b=(bridge)n->nal_data;
+ lib_nal_t *nal = n->nal_data;
+ bridge b=(bridge)nal->libnal_data;
procbridge p=(procbridge)b->local;
p->nal_flags |= NAL_FLAG_STOPPING;
}
-static void procbridge_lock(nal_t * n, unsigned long *flags)
-{
- bridge b=(bridge)n->nal_data;
- procbridge p=(procbridge)b->local;
-
- pthread_mutex_lock(&p->mutex);
-}
-
-static void procbridge_unlock(nal_t * n, unsigned long *flags)
-{
- bridge b=(bridge)n->nal_data;
- procbridge p=(procbridge)b->local;
-
- pthread_mutex_unlock(&p->mutex);
-}
-
-/* Function: yield
- * Arguments: pid:
- *
- * this function was originally intended to allow the
- * lower half thread to be scheduled to allow progress. we
- * overload it to explicitly block until signalled by the
- * lower half.
- */
-static int procbridge_yield(nal_t *n, unsigned long *flags, int milliseconds)
-{
- bridge b=(bridge)n->nal_data;
- procbridge p=(procbridge)b->local;
-
- if (milliseconds == 0)
- return 0;
-
- if (milliseconds < 0) {
- pthread_cond_wait(&p->cond,&p->mutex);
- } else {
- struct timeval then;
- struct timeval now;
- struct timespec timeout;
-
- gettimeofday(&then, NULL);
- timeout.tv_sec = then.tv_sec + milliseconds/1000;
- timeout.tv_nsec = then.tv_usec * 1000 + milliseconds % 1000 * 1000000;
- if (timeout.tv_nsec >= 1000000000) {
- timeout.tv_sec++;
- timeout.tv_nsec -= 1000000000;
- }
-
- pthread_cond_timedwait(&p->cond, &p->mutex, &timeout);
-
- gettimeofday(&now, NULL);
- milliseconds -= (now.tv_sec - then.tv_sec) * 1000 +
- (now.tv_usec - then.tv_usec) / 1000;
-
- if (milliseconds < 0)
- milliseconds = 0;
- }
-
- return (milliseconds);
-}
-
/* forward decl */
extern int procbridge_startup (nal_t *, ptl_pid_t,
ptl_ni_limits_t *, ptl_ni_limits_t *);
/* api_nal
* the interface vector to allow the generic code to access
- * this nal. this is seperate from the library side nal_cb.
+ * this nal. this is seperate from the library side lib_nal.
* TODO: should be dyanmically allocated
*/
nal_t procapi_nal = {
nal_data: NULL,
- startup: procbridge_startup,
- shutdown: procbridge_shutdown,
- forward: procbridge_forward,
- yield: procbridge_yield,
- lock: procbridge_lock,
- unlock: procbridge_unlock
+ nal_ni_init: procbridge_startup,
+ nal_ni_fini: procbridge_shutdown,
};
ptl_nid_t tcpnal_mynid;
b=(bridge)malloc(sizeof(struct bridge));
p=(procbridge)malloc(sizeof(struct procbridge));
- nal->nal_data=b;
b->local=p;
args.nia_requested_pid = requested_pid;
args.nia_actual_limits = actual_limits;
args.nia_nal_type = nal_type;
args.nia_bridge = b;
+ args.nia_apinal = nal;
/* init procbridge */
pthread_mutex_init(&p->mutex,0);
if (p->nal_flags & NAL_FLAG_STOPPED)
return PTL_FAIL;
- b->nal_cb->ni.nid = tcpnal_mynid;
+ b->lib_nal->libnal_ni.ni_pid.nid = tcpnal_mynid;
return PTL_OK;
}
int nal_flags;
- pthread_mutex_t nal_cb_lock;
} *procbridge;
typedef struct nal_init_args {
ptl_ni_limits_t *nia_actual_limits;
int nia_nal_type;
bridge nia_bridge;
+ nal_t *nia_apinal;
} nal_init_args_t;
extern void *nal_thread(void *);
/* the following functions are stubs to satisfy the nal definition
without doing anything particularily useful*/
-static ptl_err_t nal_write(nal_cb_t *nal,
- void *private,
- user_ptr dst_addr,
- void *src_addr,
- size_t len)
-{
- memcpy(dst_addr, src_addr, len);
- return PTL_OK;
-}
-
-static ptl_err_t nal_read(nal_cb_t * nal,
- void *private,
- void *dst_addr,
- user_ptr src_addr,
- size_t len)
-{
- memcpy(dst_addr, src_addr, len);
- return PTL_OK;
-}
-
-static void *nal_malloc(nal_cb_t *nal,
- size_t len)
-{
- void *buf = malloc(len);
- return buf;
-}
-
-static void nal_free(nal_cb_t *nal,
- void *buf,
- size_t len)
-{
- free(buf);
-}
-
-static void nal_printf(nal_cb_t *nal,
- const char *fmt,
- ...)
-{
- va_list ap;
-
- va_start(ap, fmt);
- vprintf(fmt, ap);
- va_end(ap);
-}
-
-
-static void nal_cli(nal_cb_t *nal,
- unsigned long *flags)
-{
- bridge b = (bridge) nal->nal_data;
- procbridge p = (procbridge) b->local;
-
- pthread_mutex_lock(&p->mutex);
-}
-
-
-static void nal_sti(nal_cb_t *nal,
- unsigned long *flags)
-{
- bridge b = (bridge)nal->nal_data;
- procbridge p = (procbridge) b->local;
-
- pthread_mutex_unlock(&p->mutex);
-}
-
-static void nal_callback(nal_cb_t *nal, void *private,
- lib_eq_t *eq, ptl_event_t *ev)
-{
- bridge b = (bridge)nal->nal_data;
- procbridge p = (procbridge) b->local;
-
- /* holding p->mutex */
- if (eq->event_callback != NULL)
- eq->event_callback(ev);
-
- pthread_cond_broadcast(&p->cond);
-}
-
-static int nal_dist(nal_cb_t *nal,
+static int nal_dist(lib_nal_t *nal,
ptl_nid_t nid,
unsigned long *dist)
{
ptl_process_id_t process_id;
int nal_type;
- b->nal_cb=(nal_cb_t *)malloc(sizeof(nal_cb_t));
- b->nal_cb->nal_data=b;
- b->nal_cb->cb_read=nal_read;
- b->nal_cb->cb_write=nal_write;
- b->nal_cb->cb_malloc=nal_malloc;
- b->nal_cb->cb_free=nal_free;
- b->nal_cb->cb_map=NULL;
- b->nal_cb->cb_unmap=NULL;
- b->nal_cb->cb_printf=nal_printf;
- b->nal_cb->cb_cli=nal_cli;
- b->nal_cb->cb_sti=nal_sti;
- b->nal_cb->cb_callback=nal_callback;
- b->nal_cb->cb_dist=nal_dist;
+ b->lib_nal=(lib_nal_t *)malloc(sizeof(lib_nal_t));
+ b->lib_nal->libnal_data=b;
+ b->lib_nal->libnal_map=NULL;
+ b->lib_nal->libnal_unmap=NULL;
+ b->lib_nal->libnal_dist=nal_dist;
nal_type = args->nia_nal_type;
- /* Wierd, but this sets b->nal_cb->ni.{nid,pid}, which lib_init() is
- * about to do from the process_id passed to it...*/
+ /* Wierd, but this sets b->lib_nal->libnal_ni.ni_pid.{nid,pid}, which
+ * lib_init() is about to do from the process_id passed to it...*/
set_address(b,args->nia_requested_pid);
- process_id.pid = b->nal_cb->ni.pid;
- process_id.nid = b->nal_cb->ni.nid;
+ process_id = b->lib_nal->libnal_ni.ni_pid;
if (nal_table[nal_type]) rc=(*nal_table[nal_type])(b);
/* initialize the generic 'library' level code */
- rc = lib_init(b->nal_cb, process_id,
+ rc = lib_init(b->lib_nal, args->nia_apinal,
+ process_id,
args->nia_requested_limits,
args->nia_actual_limits);
*
* sends a packet to the peer, after insuring that a connection exists
*/
-ptl_err_t tcpnal_send(nal_cb_t *n,
+ptl_err_t tcpnal_send(lib_nal_t *n,
void *private,
lib_msg_t *cookie,
ptl_hdr_t *hdr,
size_t len)
{
connection c;
- bridge b=(bridge)n->nal_data;
+ bridge b=(bridge)n->libnal_data;
struct iovec tiov[257];
static pthread_mutex_t send_lock = PTHREAD_MUTEX_INITIALIZER;
ptl_err_t rc = PTL_OK;
/* Function: tcpnal_recv
- * Arguments: nal_cb_t *nal: pointer to my nal control block
+ * Arguments: lib_nal_t *nal: pointer to my nal control block
* void *private: connection pointer passed through
* lib_parse()
* lib_msg_t *cookie: passed back to portals library
* blocking read of the requested data. must drain out the
* difference of mainpulated and requested lengths from the network
*/
-ptl_err_t tcpnal_recv(nal_cb_t *n,
+ptl_err_t tcpnal_recv(lib_nal_t *n,
void *private,
lib_msg_t *cookie,
unsigned int niov,
ptl_hdr_t hdr;
if (read_connection(c, (unsigned char *)&hdr, sizeof(hdr))){
- lib_parse(b->nal_cb, &hdr, c);
+ lib_parse(b->lib_nal, &hdr, c);
+ /*TODO: check error status*/
return(1);
}
return(0);
{
manager m;
- b->nal_cb->cb_send=tcpnal_send;
- b->nal_cb->cb_recv=tcpnal_recv;
+ b->lib_nal->libnal_send=tcpnal_send;
+ b->lib_nal->libnal_recv=tcpnal_recv;
b->shutdown=tcpnal_shutdown;
- if (!(m=init_connections(PNAL_PORT(b->nal_cb->ni.nid,
- b->nal_cb->ni.pid),
+ if (!(m=init_connections(PNAL_PORT(b->lib_nal->libnal_ni.ni_pid.nid,
+ b->lib_nal->libnal_ni.ni_pid.pid),
from_connection,b))){
/* TODO: this needs to shut down the
newly created junk */
return(PTL_NAL_FAILED);
}
/* XXX cfs hack */
- b->nal_cb->ni.pid=0;
+ b->lib_nal->libnal_ni.ni_pid.pid=0;
b->lower=m;
return(PTL_OK);
}
*
* sends a packet to the peer, after insuring that a connection exists
*/
-ptl_err_t tcpnal_send(nal_cb_t *n,
+ptl_err_t tcpnal_send(lib_nal_t *n,
void *private,
lib_msg_t *cookie,
ptl_hdr_t *hdr,
size_t len)
{
connection c;
- bridge b=(bridge)n->nal_data;
+ bridge b=(bridge)n->libnal_data;
struct iovec tiov[257];
static pthread_mutex_t send_lock = PTHREAD_MUTEX_INITIALIZER;
ptl_err_t rc = PTL_OK;
/* Function: tcpnal_recv
- * Arguments: nal_cb_t *nal: pointer to my nal control block
+ * Arguments: lib_nal_t *nal: pointer to my nal control block
* void *private: connection pointer passed through
* lib_parse()
* lib_msg_t *cookie: passed back to portals library
* blocking read of the requested data. must drain out the
* difference of mainpulated and requested lengths from the network
*/
-ptl_err_t tcpnal_recv(nal_cb_t *n,
+ptl_err_t tcpnal_recv(lib_nal_t *n,
void *private,
lib_msg_t *cookie,
unsigned int niov,
ptl_hdr_t hdr;
if (read_connection(c, (unsigned char *)&hdr, sizeof(hdr))){
- lib_parse(b->nal_cb, &hdr, c);
+ lib_parse(b->lib_nal, &hdr, c);
+ /*TODO: check error status*/
return(1);
}
return(0);
{
manager m;
- b->nal_cb->cb_send=tcpnal_send;
- b->nal_cb->cb_recv=tcpnal_recv;
+ b->lib_nal->libnal_send=tcpnal_send;
+ b->lib_nal->libnal_recv=tcpnal_recv;
b->shutdown=tcpnal_shutdown;
- if (!(m=init_connections(PNAL_PORT(b->nal_cb->ni.nid,
- b->nal_cb->ni.pid),
+ if (!(m=init_connections(PNAL_PORT(b->lib_nal->libnal_ni.ni_pid.nid,
+ b->lib_nal->libnal_ni.ni_pid.pid),
from_connection,b))){
/* TODO: this needs to shut down the
newly created junk */
return(PTL_NAL_FAILED);
}
/* XXX cfs hack */
- b->nal_cb->ni.pid=0;
+ b->lib_nal->libnal_ni.ni_pid.pid=0;
b->lower=m;
return(PTL_OK);
}
tbd Cluster File Systems, Inc. <info@clusterfs.com>
* version 1.2.x
* bug fixes
- - clear page cache after eviction (2766)
- don't dereference NULL peer_ni in ldlm_handle_ast_error (3258)
- don't allow unlinking open directory if it isn't empty (2904)
- handle partial page writes in filter; fix 512b direct IO (3138)
* miscellania
- drop scimac NAL (unmaintained)
-tbd Cluster File Systems, Inc. <info@clusterfs.com>
+2004-05-27 Cluster File Systems, Inc. <info@clusterfs.com>
* version 1.2.2
* bug fixes
- don't copy lvb into (possibly NULL) reply on error (2983)
- update iopen-2.6 patch with fixes from 2399,2517,2904 (3301)
- don't leak open file on MDS after open resend (3325)
- serialize filter_precreate and filter_destroy_precreated (3329)
+ - loop device shouldn't call sync_dev() for nul device (3092)
+ - clear page cache after eviction (2766)
+ - resynchronize MDS->OST in background (2824)
+ - refuse to mount the same filesystem twice on same mountpoint (3394)
+ - allow llmount to create routes for mounting behind routers (3320)
+ - push lock cancellation to blocking thread for glimpse ASTs (3409)
+ - don't call osc_set_data_with_check() for TEST_LOCK matches (3159)
+ - fix rare problem with rename on htree directories (3417)
* miscellania
- allow default OST striping configuration per directory (1414)
- fix compilation for qswnal for 2.6 kernels (3125)
- increase maximum number of MDS request buffers for large systems
- change liblustreapi to be useful for external progs like lfsck (3098)
+ - increase local configuration timeout for slow disks (3353)
2004-03-22 Cluster File Systems, Inc. <info@clusterfs.com>
* version 1.2.1
modules: lustre_build_version $(DEP) $(LDISKFS) lvfs-sources
$(MAKE) $(ARCH_UM) -C $(LINUX) -f $(PWD)/kernel-tests/Makefile LUSTRE_LINUX_CONFIG=$(LINUX_CONFIG) SUBDIRS=$(PWD) -o tmp_include_depends -o scripts -o include/config/MARKER $@
+endif # MODULES
+
+all-recursive: lustre_build_version
+
lustre_build_version:
perl $(top_builddir)/scripts/version_tag.pl $(top_srcdir) $(top_builddir) > tmpver
echo "#define LUSTRE_RELEASE @RELEASE@" >> tmpver
$(RM) tmpver || \
mv tmpver $(top_builddir)/include/linux/lustre_build_version.h
-endif # MODULES
-
dist-hook:
find $(distdir) -name .deps | xargs rm -rf
find $(distdir) -name CVS | xargs rm -rf
ftp://fr2.rpmfind.net/linux/redhat/9/en/os/i386/RedHat/RPMS/autoconf-2.57-3.noarch.rpm
EOF
- [ "$cmd" = "automake" -a "$required" = "1.7.8" ] && cat >&2 <<EOF
+ [ "$cmd" = "automake-1.7" -a "$required" = "1.7.8" ] && cat >&2 <<EOF
or for RH9 systems you can use:
}
check_version() {
+ local tool
local cmd
local required
local version
- cmd=$1
- required=$2
+ tool=$1
+ cmd=$2
+ required=$3
echo -n "checking for $cmd $required... "
if ! $cmd --version >/dev/null ; then
error_msg "missing"
fi
- version=$($cmd --version | awk "BEGIN { IGNORECASE=1 } /$cmd \(GNU $cmd\)/ { print \$4 }")
+ version=$($cmd --version | awk "BEGIN { IGNORECASE=1 } /$tool \(GNU $tool\)/ { print \$4 }")
echo "found $version"
if ! compare_versions "$required" "$version" ; then
error_msg "too old"
fi
}
-check_version automake "1.7.8"
-check_version autoconf "2.57"
+check_version automake automake-1.7 "1.7.8"
+check_version autoconf autoconf "2.57"
echo "Running aclocal..."
-aclocal
+aclocal-1.7
echo "Running autoheader..."
autoheader
echo "Running automake..."
-automake -a -c
+automake-1.7 -a -c
echo "Running autoconf..."
autoconf
AC_INIT
AC_CANONICAL_SYSTEM
-AM_INIT_AUTOMAKE(lustre, HEAD)
+AM_INIT_AUTOMAKE(lustre, b1_4)
# AM_MAINTAINER_MODE
# Four main targets: lustre kernel modules, utilities, tests, and liblustre
static inline void lustre_daemonize_helper(void)
{
LASSERT(current->signal != NULL);
- current->session = 1;
+ current->signal->session = 1;
if (current->group_leader)
- current->group_leader->__pgrp = 1;
+ current->group_leader->signal->pgrp = 1;
else
CERROR("we aren't group leader\n");
- current->tty = NULL;
+ current->signal->tty = NULL;
}
static inline int cleanup_group_info(void)
char *name);
int ptlrpc_unregister_service(struct ptlrpc_service *service);
int liblustre_check_services (void *arg);
+void ptlrpc_daemonize(void);
+
struct ptlrpc_svc_data {
char *name;
+ struct ext3_dir_entry_2 *de = (struct ext3_dir_entry_2 *) (from + map->offs);
+ rec_len = EXT3_DIR_REC_LEN(de->name_len);
+ memcpy (to, de, rec_len);
-+ ((struct ext3_dir_entry_2 *) to)->rec_len = rec_len;
++ ((struct ext3_dir_entry_2 *)to)->rec_len = cpu_to_le16(rec_len);
+ de->inode = 0;
+ map++;
+ to += rec_len;
+ rec_len = EXT3_DIR_REC_LEN(de->name_len);
+ if (de > to)
+ memmove(to, de, rec_len);
-+ to->rec_len = rec_len;
++ to->rec_len = cpu_to_le16(rec_len);
+ prev = to;
-+ to = (struct ext3_dir_entry_2 *) (((char *) to) + rec_len);
++ to = (struct ext3_dir_entry_2 *)((char *)to + rec_len);
+ }
+ de = next;
+ }
+ data1 = bh2->b_data;
+
+ /* The 0th block becomes the root, move the dirents out */
-+ de = (struct ext3_dir_entry_2 *) &root->dotdot;
-+ de = (struct ext3_dir_entry_2 *) ((char *)de + de->rec_len);
++ de = (struct ext3_dir_entry_2 *)&root->dotdot;
++ de = (struct ext3_dir_entry_2 *)((char *)de + le16_to_cpu(de->rec_len));
+ len = ((char *) root) + blocksize - (char *) de;
+ memcpy (data1, de, len);
+ de = (struct ext3_dir_entry_2 *) data1;
+ struct ext3_dir_entry_2 *de = (struct ext3_dir_entry_2 *) (from + map->offs);
+ rec_len = EXT3_DIR_REC_LEN(de->name_len);
+ memcpy (to, de, rec_len);
-+ ((struct ext3_dir_entry_2 *) to)->rec_len = rec_len;
++ ((struct ext3_dir_entry_2 *)to)->rec_len = cpu_to_le16(rec_len);
+ de->inode = 0;
+ map++;
+ to += rec_len;
+ rec_len = EXT3_DIR_REC_LEN(de->name_len);
+ if (de > to)
+ memmove(to, de, rec_len);
-+ to->rec_len = rec_len;
++ to->rec_len = cpu_to_le16(rec_len);
+ prev = to;
+ to = (struct ext3_dir_entry_2 *) (((char *) to) + rec_len);
+ }
+ data1 = bh2->b_data;
+
+ /* The 0th block becomes the root, move the dirents out */
-+ de = (struct ext3_dir_entry_2 *) &root->dotdot;
-+ de = (struct ext3_dir_entry_2 *) ((char *)de + de->rec_len);
++ de = (struct ext3_dir_entry_2 *)&root->dotdot;
++ de = (struct ext3_dir_entry_2 *)((char *)de + le16_to_cpu(de->rec_len));
+ len = ((char *) root) + blocksize - (char *) de;
+ memcpy (data1, de, len);
+ de = (struct ext3_dir_entry_2 *) data1;
+ struct ext3_dir_entry_2 *de = (struct ext3_dir_entry_2 *) (from + map->offs);
+ rec_len = EXT3_DIR_REC_LEN(de->name_len);
+ memcpy (to, de, rec_len);
-+ ((struct ext3_dir_entry_2 *) to)->rec_len = rec_len;
++ ((struct ext3_dir_entry_2 *)to)->rec_len = cpu_to_le16(rec_len);
+ de->inode = 0;
+ map++;
+ to += rec_len;
+ rec_len = EXT3_DIR_REC_LEN(de->name_len);
+ if (de > to)
+ memmove(to, de, rec_len);
-+ to->rec_len = rec_len;
++ to->rec_len = cpu_to_le16(rec_len);
+ prev = to;
-+ to = (struct ext3_dir_entry_2 *) (((char *) to) + rec_len);
++ to = (struct ext3_dir_entry_2 *)((char *)to + rec_len);
+ }
+ de = next;
+ }
+ data1 = bh2->b_data;
+
+ /* The 0th block becomes the root, move the dirents out */
-+ de = (struct ext3_dir_entry_2 *) &root->dotdot;
-+ de = (struct ext3_dir_entry_2 *) ((char *)de + de->rec_len);
++ de = (struct ext3_dir_entry_2 *)&root->dotdot;
++ de = (struct ext3_dir_entry_2 *)((char *)de + le16_to_cpu(de->rec_len));
+ len = ((char *) root) + blocksize - (char *) de;
+ memcpy (data1, de, len);
+ de = (struct ext3_dir_entry_2 *) data1;
+ struct ext3_dir_entry_2 *de = (struct ext3_dir_entry_2 *) (from + map->offs);
+ rec_len = EXT3_DIR_REC_LEN(de->name_len);
+ memcpy (to, de, rec_len);
-+ ((struct ext3_dir_entry_2 *) to)->rec_len = rec_len;
++ ((struct ext3_dir_entry_2 *)to)->rec_len = cpu_to_le16(rec_len);
+ de->inode = 0;
+ map++;
+ to += rec_len;
+ rec_len = EXT3_DIR_REC_LEN(de->name_len);
+ if (de > to)
+ memmove(to, de, rec_len);
-+ to->rec_len = rec_len;
++ to->rec_len = cpu_to_le16(rec_len);
+ prev = to;
-+ to = (struct ext3_dir_entry_2 *) (((char *) to) + rec_len);
++ to = (struct ext3_dir_entry_2 *)((char *)to + rec_len);
+ }
+ de = next;
+ }
+ data1 = bh2->b_data;
+
+ /* The 0th block becomes the root, move the dirents out */
-+ de = (struct ext3_dir_entry_2 *) &root->dotdot;
-+ de = (struct ext3_dir_entry_2 *) ((char *)de + de->rec_len);
++ de = (struct ext3_dir_entry_2 *)&root->dotdot;
++ de = (struct ext3_dir_entry_2 *)((char *)de + le16_to_cpu(de->rec_len));
+ len = ((char *) root) + blocksize - (char *) de;
+ memcpy (data1, de, len);
+ de = (struct ext3_dir_entry_2 *) data1;
diff -Nru a/fs/ext3/namei.c b/fs/ext3/namei.c
--- a/fs/ext3/namei.c Thu Nov 7 10:57:49 2002
+++ b/fs/ext3/namei.c Thu Nov 7 10:57:49 2002
-@@ -2173,7 +2173,26 @@
+@@ -2173,7 +2173,30 @@
/*
* ok, that's it
*/
- ext3_delete_entry(handle, old_dir, old_de, old_bh);
-+ retval = ext3_delete_entry(handle, old_dir, old_de, old_bh);
-+ if (retval == -ENOENT) {
-+ /*
-+ * old_de could have moved out from under us.
-+ */
++ if (le32_to_cpu(old_de->inode) != old_inode->i_ino ||
++ old_de->name_len != old_dentry->d_name.len ||
++ strncmp(old_de->name, old_dentry->d_name.name, old_de->name_len) ||
++ (retval = ext3_delete_entry(handle, old_dir,
++ old_de, old_bh)) == -ENOENT) {
++ /* old_de could have moved from under us during htree split, so
++ * make sure that we are deleting the right entry. We might
++ * also be pointing to a stale entry in the unused part of
++ * old_bh so just checking inum and the name isn't enough. */
+ struct buffer_head *old_bh2;
+ struct ext3_dir_entry_2 *old_de2;
-+
++
+ old_bh2 = ext3_find_entry(old_dentry, &old_de2);
+ if (old_bh2) {
+ retval = ext3_delete_entry(handle, old_dir,
+ struct ext3_dir_entry_2 *de = (struct ext3_dir_entry_2 *) (from + map->offs);
+ rec_len = EXT3_DIR_REC_LEN(de->name_len);
+ memcpy (to, de, rec_len);
-+ ((struct ext3_dir_entry_2 *) to)->rec_len = rec_len;
++ ((struct ext3_dir_entry_2 *)to)->rec_len = cpu_to_le16(rec_len);
+ de->inode = 0;
+ map++;
+ to += rec_len;
+ rec_len = EXT3_DIR_REC_LEN(de->name_len);
+ if (de > to)
+ memmove(to, de, rec_len);
-+ to->rec_len = rec_len;
++ to->rec_len = cpu_to_le16(rec_len);
+ prev = to;
-+ to = (struct ext3_dir_entry_2 *) (((char *) to) + rec_len);
++ to = (struct ext3_dir_entry_2 *)((char *)to + rec_len);
+ }
+ de = next;
+ }
if (IS_SYNC(old_dir) || IS_SYNC(new_dir))
handle->h_sync = 1;
-@@ -1070,14 +2174,33 @@
+@@ -1070,14 +2174,37 @@
/*
* ok, that's it
*/
- ext3_delete_entry(handle, old_dir, old_de, old_bh);
-+ retval = ext3_delete_entry(handle, old_dir, old_de, old_bh);
-+ if (retval == -ENOENT) {
-+ /*
-+ * old_de could have moved out from under us.
-+ */
++ if (le32_to_cpu(old_de->inode) != old_inode->i_ino ||
++ old_de->name_len != old_dentry->d_name.len ||
++ strncmp(old_de->name, old_dentry->d_name.name, old_de->name_len) ||
++ (retval = ext3_delete_entry(handle, old_dir,
++ old_de, old_bh)) == -ENOENT) {
++ /* old_de could have moved from under us during htree split, so
++ * make sure that we are deleting the right entry. We might
++ * also be pointing to a stale entry in the unused part of
++ * old_bh so just checking inum and the name isn't enough. */
+ struct buffer_head *old_bh2;
+ struct ext3_dir_entry_2 *old_de2;
-+
++
+ old_bh2 = ext3_find_entry(old_dentry, &old_de2);
+ if (old_bh2) {
+ retval = ext3_delete_entry(handle, old_dir,
+ struct ext3_dir_entry_2 *de = (struct ext3_dir_entry_2 *) (from + map->offs);
+ rec_len = EXT3_DIR_REC_LEN(de->name_len);
+ memcpy (to, de, rec_len);
-+ ((struct ext3_dir_entry_2 *) to)->rec_len = rec_len;
++ ((struct ext3_dir_entry_2 *)to)->rec_len = cpu_to_le16(rec_len);
+ de->inode = 0;
+ map++;
+ to += rec_len;
+ rec_len = EXT3_DIR_REC_LEN(de->name_len);
+ if (de > to)
+ memmove(to, de, rec_len);
-+ to->rec_len = rec_len;
++ to->rec_len = cpu_to_le16(rec_len);
+ prev = to;
-+ to = (struct ext3_dir_entry_2 *) (((char *) to) + rec_len);
++ to = (struct ext3_dir_entry_2 *)((char *) to + rec_len);
+ }
+ de = next;
+ }
if (IS_SYNC(old_dir) || IS_SYNC(new_dir))
handle->h_sync = 1;
-@@ -1070,14 +2174,33 @@
+@@ -1070,14 +2174,37 @@
/*
* ok, that's it
*/
- ext3_delete_entry(handle, old_dir, old_de, old_bh);
-+ retval = ext3_delete_entry(handle, old_dir, old_de, old_bh);
-+ if (retval == -ENOENT) {
-+ /*
-+ * old_de could have moved out from under us.
-+ */
++ if (le32_to_cpu(old_de->inode) != old_inode->i_ino ||
++ old_de->name_len != old_dentry->d_name.len ||
++ strncmp(old_de->name, old_dentry->d_name.name, old_de->name_len) ||
++ (retval = ext3_delete_entry(handle, old_dir,
++ old_de, old_bh)) == -ENOENT) {
++ /* old_de could have moved from under us during htree split, so
++ * make sure that we are deleting the right entry. We might
++ * also be pointing to a stale entry in the unused part of
++ * old_bh so just checking inum and the name isn't enough. */
+ struct buffer_head *old_bh2;
+ struct ext3_dir_entry_2 *old_de2;
-+
++
+ old_bh2 = ext3_find_entry(old_dentry, &old_de2);
+ if (old_bh2) {
+ retval = ext3_delete_entry(handle, old_dir,
+ struct ext3_dir_entry_2 *de = (struct ext3_dir_entry_2 *) (from + map->offs);
+ rec_len = EXT3_DIR_REC_LEN(de->name_len);
+ memcpy (to, de, rec_len);
-+ ((struct ext3_dir_entry_2 *) to)->rec_len = rec_len;
++ ((struct ext3_dir_entry_2 *)to)->rec_len = cpu_to_le16(rec_len);
+ de->inode = 0;
+ map++;
+ to += rec_len;
+ rec_len = EXT3_DIR_REC_LEN(de->name_len);
+ if (de > to)
+ memmove(to, de, rec_len);
-+ to->rec_len = rec_len;
++ to->rec_len = cpu_to_le16(rec_len);
+ prev = to;
-+ to = (struct ext3_dir_entry_2 *) (((char *) to) + rec_len);
++ to = (struct ext3_dir_entry_2 *)((char *)to + rec_len);
+ }
+ de = next;
+ }
if (IS_SYNC(old_dir) || IS_SYNC(new_dir))
handle->h_sync = 1;
-@@ -1070,14 +2174,33 @@ static int ext3_rename (struct inode * o
+@@ -1070,14 +2174,37 @@ static int ext3_rename (struct inode * o
/*
* ok, that's it
*/
- ext3_delete_entry(handle, old_dir, old_de, old_bh);
-+ retval = ext3_delete_entry(handle, old_dir, old_de, old_bh);
-+ if (retval == -ENOENT) {
-+ /*
-+ * old_de could have moved out from under us.
-+ */
++ if (le32_to_cpu(old_de->inode) != old_inode->i_ino ||
++ old_de->name_len != old_dentry->d_name.len ||
++ strncmp(old_de->name, old_dentry->d_name.name, old_de->name_len) ||
++ (retval = ext3_delete_entry(handle, old_dir,
++ old_de, old_bh)) == -ENOENT) {
++ /* old_de could have moved from under us during htree split, so
++ * make sure that we are deleting the right entry. We might
++ * also be pointing to a stale entry in the unused part of
++ * old_bh so just checking inum and the name isn't enough. */
+ struct buffer_head *old_bh2;
+ struct ext3_dir_entry_2 *old_de2;
-+
++
+ old_bh2 = ext3_find_entry(old_dentry, &old_de2);
+ if (old_bh2) {
+ retval = ext3_delete_entry(handle, old_dir,
--- /dev/null
+===== fs/ext3/namei.c 1.52 vs edited =====
+--- 1.52/fs/ext3/namei.c Mon May 10 05:25:34 2004
++++ edited/fs/ext3/namei.c Thu May 20 19:57:10 2004
+@@ -2264,11 +2264,15 @@
+ /*
+ * ok, that's it
+ */
+- retval = ext3_delete_entry(handle, old_dir, old_de, old_bh);
+- if (retval == -ENOENT) {
+- /*
+- * old_de could have moved out from under us.
+- */
++ if (le32_to_cpu(old_de->inode) != old_inode->i_ino ||
++ old_de->name_len != old_dentry->d_name.len ||
++ strncmp(old_de->name, old_dentry->d_name.name, old_de->name_len) ||
++ (retval = ext3_delete_entry(handle, old_dir,
++ old_de, old_bh)) == -ENOENT) {
++ /* old_de could have moved from under us during htree split, so
++ * make sure that we are deleting the right entry. We might
++ * also be pointing to a stale entry in the unused part of
++ * old_bh so just checking inum and the name isn't enough. */
+ struct buffer_head *old_bh2;
+ struct ext3_dir_entry_2 *old_de2;
+
+ struct ext3_dir_entry_2 *de = (struct ext3_dir_entry_2 *) (from + map->offs);
+ rec_len = EXT3_DIR_REC_LEN(de->name_len);
+ memcpy (to, de, rec_len);
-+ ((struct ext3_dir_entry_2 *) to)->rec_len = rec_len;
++ ((struct ext3_dir_entry_2 *)to)->rec_len = cpu_to_le16(rec_len);
+ de->inode = 0;
+ map++;
+ to += rec_len;
+ rec_len = EXT3_DIR_REC_LEN(de->name_len);
+ if (de > to)
+ memmove(to, de, rec_len);
-+ to->rec_len = rec_len;
++ to->rec_len = cpu_to_le16(rec_len);
+ prev = to;
-+ to = (struct ext3_dir_entry_2 *) (((char *) to) + rec_len);
++ to = (struct ext3_dir_entry_2 *)((char *)to + rec_len);
+ }
+ de = next;
+ }
if (IS_SYNC(old_dir) || IS_SYNC(new_dir))
handle->h_sync = 1;
-@@ -1069,14 +2172,33 @@
+@@ -1069,14 +2172,37 @@
/*
* ok, that's it
*/
- ext3_delete_entry(handle, old_dir, old_de, old_bh);
-+ retval = ext3_delete_entry(handle, old_dir, old_de, old_bh);
-+ if (retval == -ENOENT) {
-+ /*
-+ * old_de could have moved out from under us.
-+ */
++ if (le32_to_cpu(old_de->inode) != old_inode->i_ino ||
++ old_de->name_len != old_dentry->d_name.len ||
++ strncmp(old_de->name, old_dentry->d_name.name, old_de->name_len) ||
++ (retval = ext3_delete_entry(handle, old_dir,
++ old_de, old_bh)) == -ENOENT) {
++ /* old_de could have moved from under us during htree split, so
++ * make sure that we are deleting the right entry. We might
++ * also be pointing to a stale entry in the unused part of
++ * old_bh so just checking inum and the name isn't enough. */
+ struct buffer_head *old_bh2;
+ struct ext3_dir_entry_2 *old_de2;
-+
++
+ old_bh2 = ext3_find_entry(old_dentry, &old_de2);
+ if (old_bh2) {
+ retval = ext3_delete_entry(handle, old_dir,
+ struct ext3_dir_entry_2 *de = (struct ext3_dir_entry_2 *) (from + map->offs);
+ rec_len = EXT3_DIR_REC_LEN(de->name_len);
+ memcpy (to, de, rec_len);
-+ ((struct ext3_dir_entry_2 *) to)->rec_len = rec_len;
++ ((struct ext3_dir_entry_2 *)to)->rec_len = cpu_to_le16(rec_len);
+ de->inode = 0;
+ map++;
+ to += rec_len;
+ rec_len = EXT3_DIR_REC_LEN(de->name_len);
+ if (de > to)
+ memmove(to, de, rec_len);
-+ to->rec_len = rec_len;
++ to->rec_len = cpu_to_le16(rec_len);
+ prev = to;
-+ to = (struct ext3_dir_entry_2 *) (((char *) to) + rec_len);
++ to = (struct ext3_dir_entry_2 *)((char *)to + rec_len);
+ }
+ de = next;
+ }
if (IS_SYNC(old_dir) || IS_SYNC(new_dir))
handle->h_sync = 1;
-@@ -1071,14 +2174,33 @@ static int ext3_rename (struct inode * o
+@@ -1071,14 +2174,37 @@ static int ext3_rename (struct inode * o
/*
* ok, that's it
*/
- ext3_delete_entry(handle, old_dir, old_de, old_bh);
-+ retval = ext3_delete_entry(handle, old_dir, old_de, old_bh);
-+ if (retval == -ENOENT) {
-+ /*
-+ * old_de could have moved out from under us.
-+ */
++ if (le32_to_cpu(old_de->inode) != old_inode->i_ino ||
++ old_de->name_len != old_dentry->d_name.len ||
++ strncmp(old_de->name, old_dentry->d_name.name, old_de->name_len) ||
++ (retval = ext3_delete_entry(handle, old_dir,
++ old_de, old_bh)) == -ENOENT) {
++ /* old_de could have moved from under us during htree split, so
++ * make sure that we are deleting the right entry. We might
++ * also be pointing to a stale entry in the unused part of
++ * old_bh so just checking inum and the name isn't enough. */
+ struct buffer_head *old_bh2;
+ struct ext3_dir_entry_2 *old_de2;
-+
++
+ old_bh2 = ext3_find_entry(old_dentry, &old_de2);
+ if (old_bh2) {
+ retval = ext3_delete_entry(handle, old_dir,
include/linux/ext3_fs_i.h | 6
6 files changed, 500 insertions(+), 109 deletions(-)
-Index: linux-2.4.24/fs/ext3/namei.c
+Index: lum/fs/ext3/namei.c
===================================================================
---- linux-2.4.24.orig/fs/ext3/namei.c 2004-05-22 12:08:41.000000000 +0800
-+++ linux-2.4.24/fs/ext3/namei.c 2004-05-22 12:11:40.000000000 +0800
+--- lum.orig/fs/ext3/namei.c 2004-06-03 16:32:28.000000000 -0400
++++ lum/fs/ext3/namei.c 2004-06-03 16:45:45.000000000 -0400
@@ -51,6 +51,9 @@
{
struct buffer_head *bh;
+ (struct ext3_dir_entry_2 *) (from + map->offs);
rec_len = EXT3_DIR_REC_LEN(de->name_len);
memcpy (to, de, rec_len);
- ((struct ext3_dir_entry_2 *) to)->rec_len = rec_len;
+ ((struct ext3_dir_entry_2 *)to)->rec_len = cpu_to_le16(rec_len);
@@ -987,7 +1150,8 @@
static struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size)
if (bh)
brelse(bh);
dx_release(frames);
-@@ -1901,6 +2220,7 @@
+@@ -1905,6 +2224,7 @@
struct buffer_head * bh;
struct ext3_dir_entry_2 * de;
handle_t *handle;
handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS);
if (IS_ERR(handle)) {
-@@ -1908,7 +2228,7 @@
+@@ -1912,7 +2232,7 @@
}
retval = -ENOENT;
if (!bh)
goto end_rmdir;
-@@ -1919,14 +2239,19 @@
+@@ -1923,14 +2243,19 @@
DQUOT_INIT(inode);
retval = -EIO;
if (retval)
goto end_rmdir;
if (inode->i_nlink != 2)
-@@ -1985,6 +2310,7 @@
+@@ -1989,6 +2314,7 @@
struct buffer_head * bh;
struct ext3_dir_entry_2 * de;
handle_t *handle;
handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS);
if (IS_ERR(handle)) {
-@@ -1995,7 +2321,7 @@
+@@ -1999,7 +2325,7 @@
handle->h_sync = 1;
retval = -ENOENT;
if (!bh)
goto end_unlink;
-@@ -2003,8 +2329,10 @@
+@@ -2007,8 +2333,10 @@
DQUOT_INIT(inode);
retval = -EIO;
if (!inode->i_nlink) {
ext3_warning (inode->i_sb, "ext3_unlink",
-@@ -2013,6 +2341,7 @@
+@@ -2017,6 +2345,7 @@
inode->i_nlink = 1;
}
retval = ext3_delete_entry(handle, dir, de, bh);
if (retval)
goto end_unlink;
dir->i_ctime = dir->i_mtime = CURRENT_TIME;
-@@ -2151,6 +2480,7 @@
+@@ -2155,6 +2484,7 @@
struct buffer_head * old_bh, * new_bh, * dir_bh;
struct ext3_dir_entry_2 * old_de, * new_de;
int retval;
old_bh = new_bh = dir_bh = NULL;
-@@ -2163,7 +2493,10 @@
+@@ -2167,7 +2497,10 @@
if (IS_SYNC(old_dir) || IS_SYNC(new_dir))
handle->h_sync = 1;
/*
* Check for inode number is _not_ due to possible IO errors.
* We might rmdir the source, keep it as pwd of some process
-@@ -2176,7 +2509,7 @@
+@@ -2180,7 +2513,7 @@
goto end_rename;
new_inode = new_dentry->d_inode;
if (new_bh) {
if (!new_inode) {
brelse (new_bh);
-@@ -2239,7 +2572,7 @@
+@@ -2247,7 +2580,7 @@
struct buffer_head *old_bh2;
struct ext3_dir_entry_2 *old_de2;
-
+
- old_bh2 = ext3_find_entry(old_dentry, &old_de2);
+ old_bh2 = ext3_find_entry(old_dentry, &old_de2, 1, &lock3 /* FIXME */);
if (old_bh2) {
retval = ext3_delete_entry(handle, old_dir,
old_de2, old_bh2);
-@@ -2282,6 +2615,14 @@
+@@ -2290,6 +2623,14 @@
retval = 0;
end_rename:
brelse (dir_bh);
brelse (old_bh);
brelse (new_bh);
-@@ -2290,6 +2631,29 @@
+@@ -2298,6 +2639,29 @@
}
/*
* directories can handle most operations...
*/
struct inode_operations ext3_dir_inode_operations = {
-Index: linux-2.4.24/fs/ext3/super.c
+Index: lum/fs/ext3/super.c
===================================================================
---- linux-2.4.24.orig/fs/ext3/super.c 2004-05-22 12:09:38.000000000 +0800
-+++ linux-2.4.24/fs/ext3/super.c 2004-05-22 12:11:40.000000000 +0800
+--- lum.orig/fs/ext3/super.c 2004-06-03 16:32:28.000000000 -0400
++++ lum/fs/ext3/super.c 2004-06-03 16:37:15.000000000 -0400
@@ -733,6 +733,9 @@
if (want_numeric(value, "sb", sb_block))
return 0;
return sb;
failed_mount3:
-Index: linux-2.4.24/fs/ext3/inode.c
+Index: lum/fs/ext3/inode.c
===================================================================
---- linux-2.4.24.orig/fs/ext3/inode.c 2004-05-22 12:09:48.000000000 +0800
-+++ linux-2.4.24/fs/ext3/inode.c 2004-05-22 12:11:40.000000000 +0800
+--- lum.orig/fs/ext3/inode.c 2004-06-03 16:32:29.000000000 -0400
++++ lum/fs/ext3/inode.c 2004-06-03 16:37:15.000000000 -0400
@@ -2251,6 +2251,9 @@
} else if (S_ISDIR(inode->i_mode)) {
inode->i_op = &ext3_dir_inode_operations;
} else if (S_ISLNK(inode->i_mode)) {
if (ext3_inode_is_fast_symlink(inode))
inode->i_op = &ext3_fast_symlink_inode_operations;
-Index: linux-2.4.24/fs/ext3/ialloc.c
+Index: lum/fs/ext3/ialloc.c
===================================================================
---- linux-2.4.24.orig/fs/ext3/ialloc.c 2004-05-22 12:09:38.000000000 +0800
-+++ linux-2.4.24/fs/ext3/ialloc.c 2004-05-22 12:11:40.000000000 +0800
+--- lum.orig/fs/ext3/ialloc.c 2004-06-03 16:32:28.000000000 -0400
++++ lum/fs/ext3/ialloc.c 2004-06-03 16:37:15.000000000 -0400
@@ -609,6 +609,9 @@
return ERR_PTR(-EDQUOT);
}
return inode;
fail:
-Index: linux-2.4.24/include/linux/ext3_fs.h
+Index: lum/include/linux/ext3_fs.h
===================================================================
---- linux-2.4.24.orig/include/linux/ext3_fs.h 2004-05-22 12:09:42.000000000 +0800
-+++ linux-2.4.24/include/linux/ext3_fs.h 2004-05-22 12:11:41.000000000 +0800
+--- lum.orig/include/linux/ext3_fs.h 2004-06-03 16:32:28.000000000 -0400
++++ lum/include/linux/ext3_fs.h 2004-06-03 16:37:15.000000000 -0400
@@ -320,6 +320,7 @@
/*
* Mount flags
#define EXT3_MOUNT_CHECK 0x0001 /* Do mount-time checks */
#define EXT3_MOUNT_GRPID 0x0004 /* Create files with directory's group */
#define EXT3_MOUNT_DEBUG 0x0008 /* Some debugging messages */
-Index: linux-2.4.24/include/linux/ext3_fs_i.h
+Index: lum/include/linux/ext3_fs_i.h
===================================================================
---- linux-2.4.24.orig/include/linux/ext3_fs_i.h 2004-05-22 12:09:38.000000000 +0800
-+++ linux-2.4.24/include/linux/ext3_fs_i.h 2004-05-22 12:13:54.000000000 +0800
+--- lum.orig/include/linux/ext3_fs_i.h 2004-06-03 16:32:28.000000000 -0400
++++ lum/include/linux/ext3_fs_i.h 2004-06-03 16:37:15.000000000 -0400
@@ -17,6 +17,7 @@
#define _LINUX_EXT3_FS_I
+ ext3_dirent *de = (ext3_dirent *) (from + map->offs);
+ rec_len = EXT3_DIR_REC_LEN(de->name_len);
+ memcpy (to, de, rec_len);
-+ ((ext3_dirent *) to)->rec_len = rec_len;
++ ((ext3_dirent *)to)->rec_len = le16_to_cpu(rec_len);
+ to += rec_len;
+ map++;
+ }
+
+ /* Fancy dance to stay within two buffers */
+ de2 = dx_copy_dirents (data1, data2, map + split, count - split);
-+ data3 = (char *) de2 + de2->rec_len;
++ data3 = (char *) de2 + le16_to_cpu(de2->rec_len);
+ de = dx_copy_dirents (data1, data3, map, split);
-+ memcpy(data1, data3, (char *) de + de->rec_len - data3);
++ memcpy(data1, data3, (char *) de + le16_to_cpu(de->rec_len) - data3);
+ de = (ext3_dirent *) ((char *) de - data3 + data1); // relocate de
+ de->rec_len = cpu_to_le16(data1 + dir->i_sb->s_blocksize - (char *)de);
+ de2->rec_len = cpu_to_le16(data2 + dir->i_sb->s_blocksize-(char *)de2);
if (IS_ERR(handle))
return PTR_ERR(handle);
-@@ -1077,7 +1844,7 @@
+@@ -1069,14 +1837,37 @@
+ /*
+ * ok, that's it
+ */
+- ext3_delete_entry(handle, old_dir, old_de, old_bh);
++ if (le32_to_cpu(old_de->inode) != old_inode->i_ino ||
++ old_de->name_len != old_dentry->d_name.len ||
++ strncmp(old_de->name, old_dentry->d_name.name, old_de->name_len) ||
++ (retval = ext3_delete_entry(handle, old_dir,
++ old_de, old_bh)) == -ENOENT) {
++ /* old_de could have moved from under us during htree split, so
++ * make sure that we are deleting the right entry. We might
++ * also be pointing to a stale entry in the unused part of
++ * old_bh so just checking inum and the name isn't enough. */
++ struct buffer_head *old_bh2;
++ struct ext3_dir_entry_2 *old_de2;
++
++ old_bh2 = ext3_find_entry(old_dentry, &old_de2);
++ if (old_bh2) {
++ retval = ext3_delete_entry(handle, old_dir,
++ old_de2, old_bh2);
++ brelse(old_bh2);
++ }
++ }
++ if (retval) {
++ ext3_warning(old_dir->i_sb, "ext3_rename",
++ "Deleting old file (%lu), %d, error=%d",
++ old_dir->i_ino, old_dir->i_nlink, retval);
++ }
+
+ if (new_inode) {
+ new_inode->i_nlink--;
new_inode->i_ctime = CURRENT_TIME;
}
old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME;
Index: linux-stage/fs/ext3/Makefile
===================================================================
---- linux-stage.orig/fs/ext3/Makefile 2004-05-07 16:00:16.000000000 -0400
-+++ linux-stage/fs/ext3/Makefile 2004-05-07 16:00:17.000000000 -0400
+--- linux-stage.orig/fs/ext3/Makefile 2004-05-11 17:21:20.000000000 -0400
++++ linux-stage/fs/ext3/Makefile 2004-05-11 17:21:21.000000000 -0400
@@ -4,7 +4,7 @@
obj-$(CONFIG_EXT3_FS) += ext3.o
ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
Index: linux-stage/fs/ext3/inode.c
===================================================================
---- linux-stage.orig/fs/ext3/inode.c 2004-05-07 16:00:16.000000000 -0400
-+++ linux-stage/fs/ext3/inode.c 2004-05-07 17:21:59.000000000 -0400
+--- linux-stage.orig/fs/ext3/inode.c 2004-05-11 17:21:21.000000000 -0400
++++ linux-stage/fs/ext3/inode.c 2004-05-11 17:21:21.000000000 -0400
@@ -37,6 +37,7 @@
#include <linux/mpage.h>
#include <linux/uio.h>
bh = iloc.bh;
Index: linux-stage/fs/ext3/iopen.c
===================================================================
---- linux-stage.orig/fs/ext3/iopen.c 2004-05-07 16:00:17.000000000 -0400
-+++ linux-stage/fs/ext3/iopen.c 2004-05-07 17:22:37.000000000 -0400
+--- linux-stage.orig/fs/ext3/iopen.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-stage/fs/ext3/iopen.c 2004-05-11 17:21:21.000000000 -0400
@@ -0,0 +1,272 @@
+/*
+ * linux/fs/ext3/iopen.c
+}
Index: linux-stage/fs/ext3/iopen.h
===================================================================
---- linux-stage.orig/fs/ext3/iopen.h 2004-05-07 16:00:17.000000000 -0400
-+++ linux-stage/fs/ext3/iopen.h 2004-05-07 16:00:17.000000000 -0400
+--- linux-stage.orig/fs/ext3/iopen.h 1969-12-31 19:00:00.000000000 -0500
++++ linux-stage/fs/ext3/iopen.h 2004-05-11 17:21:21.000000000 -0400
@@ -0,0 +1,15 @@
+/*
+ * iopen.h
+ struct inode *inode, int rehash);
Index: linux-stage/fs/ext3/namei.c
===================================================================
---- linux-stage.orig/fs/ext3/namei.c 2004-05-07 16:00:16.000000000 -0400
-+++ linux-stage/fs/ext3/namei.c 2004-05-07 16:00:17.000000000 -0400
+--- linux-stage.orig/fs/ext3/namei.c 2004-05-11 17:21:20.000000000 -0400
++++ linux-stage/fs/ext3/namei.c 2004-05-11 17:21:21.000000000 -0400
@@ -37,6 +37,7 @@
#include <linux/buffer_head.h>
#include <linux/smp_lock.h>
}
Index: linux-stage/fs/ext3/super.c
===================================================================
---- linux-stage.orig/fs/ext3/super.c 2004-05-07 16:00:16.000000000 -0400
-+++ linux-stage/fs/ext3/super.c 2004-05-07 17:21:59.000000000 -0400
+--- linux-stage.orig/fs/ext3/super.c 2004-05-11 17:21:21.000000000 -0400
++++ linux-stage/fs/ext3/super.c 2004-05-11 17:44:53.000000000 -0400
@@ -536,7 +536,7 @@
Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, Opt_noload,
Opt_commit, Opt_journal_update, Opt_journal_inum,
Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
-- Opt_ignore, Opt_err,
-+ Opt_ignore, Opt_err, Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
+- Opt_ignore, Opt_barrier,
++ Opt_ignore, Opt_barrier, Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
+ Opt_err,
};
- static match_table_t tokens = {
-@@ -575,6 +575,9 @@
- {Opt_ignore, "noquota"},
+@@ -577,6 +577,9 @@
{Opt_ignore, "quota"},
{Opt_ignore, "usrquota"},
-+ {Opt_iopen, "iopen"},
-+ {Opt_noiopen, "noiopen"},
-+ {Opt_iopen_nopriv, "iopen_nopriv"},
+ {Opt_barrier, "barrier=%u"},
++ {Opt_iopen, "iopen"},
++ {Opt_noiopen, "noiopen"},
++ {Opt_iopen_nopriv, "iopen_nopriv"},
{Opt_err, NULL}
};
-@@ -762,6 +765,18 @@
- case Opt_abort:
- set_opt(sbi->s_mount_opt, ABORT);
+@@ -772,6 +775,18 @@
+ else
+ clear_opt(sbi->s_mount_opt, BARRIER);
break;
+ case Opt_iopen:
+ set_opt (sbi->s_mount_opt, IOPEN);
default:
Index: linux-stage/include/linux/ext3_fs.h
===================================================================
---- linux-stage.orig/include/linux/ext3_fs.h 2004-05-07 16:00:16.000000000 -0400
-+++ linux-stage/include/linux/ext3_fs.h 2004-05-07 16:00:17.000000000 -0400
-@@ -325,6 +325,8 @@
- #define EXT3_MOUNT_NO_UID32 0x2000 /* Disable 32-bit UIDs */
+--- linux-stage.orig/include/linux/ext3_fs.h 2004-05-11 17:21:20.000000000 -0400
++++ linux-stage/include/linux/ext3_fs.h 2004-05-11 17:21:21.000000000 -0400
+@@ -326,6 +326,8 @@
#define EXT3_MOUNT_XATTR_USER 0x4000 /* Extended user attributes */
#define EXT3_MOUNT_POSIX_ACL 0x8000 /* POSIX Access Control Lists */
-+#define EXT3_MOUNT_IOPEN 0x10000 /* Allow access via iopen */
-+#define EXT3_MOUNT_IOPEN_NOPRIV 0x20000 /* Make iopen world-readable */
+ #define EXT3_MOUNT_BARRIER 0x10000 /* Use block barriers */
++#define EXT3_MOUNT_IOPEN 0x20000 /* Allow access via iopen */
++#define EXT3_MOUNT_IOPEN_NOPRIV 0x40000 /* Make iopen world-readable */
/* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
#ifndef _LINUX_EXT2_FS_H
--- /dev/null
+--- drivers/block/loop.c.bu 2004-05-11 16:27:23.000000000 -0700
++++ drivers/block/loop.c 2004-05-11 16:28:50.000000000 -0700
+@@ -978,7 +978,7 @@ static int lo_release(struct inode *inod
+
+ lo = &loop_dev[dev];
+
+- if (!(lo->lo_flags & LO_FLAGS_READ_ONLY)) {
++ if (!(lo->lo_flags & LO_FLAGS_READ_ONLY) && lo->lo_device != 0) {
+ fsync_dev(lo->lo_device);
+ invalidate_buffers(lo->lo_device);
+ }
+Version 37: fix htree rename-within-same-dir (b=3417), endianness (b=2447)
Version 36: don't dput dentry after error (b=2350), zero page->private (3119)
Version 35: pass intent to real_lookup after revalidate failure (b=3285)
Version 34: fix ext3 iopen assertion failure (b=2517, b=2399)
--- /dev/null Fri Aug 30 17:31:37 2002
+++ linux-2.4.18-18.8.0-l12-braam/include/linux/lustre_version.h Thu Feb 13 07:58:33 2003
@@ -0,0 +1 @@
-+#define LUSTRE_KERNEL_VERSION 36
++#define LUSTRE_KERNEL_VERSION 37
_
--- /dev/null
+Index: linux-2.6.4-51.0/drivers/md/dm-path-selector.c
+===================================================================
+--- linux-2.6.4-51.0.orig/drivers/md/dm-path-selector.c 2004-04-18 20:10:21.000000000 -0400
++++ linux-2.6.4-51.0/drivers/md/dm-path-selector.c 2004-04-18 20:10:59.000000000 -0400
+@@ -129,7 +129,7 @@
+ struct path *path;
+ };
+
+-static struct path_info *path_lookup(struct list_head *head, struct path *p)
++static struct path_info *md_path_lookup(struct list_head *head, struct path *p)
+ {
+ struct path_info *pi;
+
+@@ -235,9 +235,9 @@
+ * mind the expense of these searches.
+ */
+ spin_lock_irqsave(&s->lock, flags);
+- pi = path_lookup(&s->valid_paths, p);
++ pi = md_path_lookup(&s->valid_paths, p);
+ if (!pi)
+- pi = path_lookup(&s->invalid_paths, p);
++ pi = md_path_lookup(&s->invalid_paths, p);
+
+ if (!pi)
+ DMWARN("asked to change the state of an unknown path");
-Index: linux-2.6.4-51.0/fs/exec.c
+Index: linux-2.6.5-12.1/fs/exec.c
===================================================================
---- linux-2.6.4-51.0.orig/fs/exec.c 2004-04-05 12:41:59.000000000 -0400
-+++ linux-2.6.4-51.0/fs/exec.c 2004-04-05 17:36:42.000000000 -0400
-@@ -122,8 +122,11 @@
- struct file * file;
+--- linux-2.6.5-12.1.orig/fs/exec.c 2004-05-10 19:21:56.000000000 +0300
++++ linux-2.6.5-12.1/fs/exec.c 2004-05-25 17:32:14.038494200 +0300
+@@ -125,9 +125,10 @@
struct nameidata nd;
int error;
-+ intent_init(&nd.intent, IT_OPEN);
- nd.intent.open.flags = FMODE_READ;
-+ error = user_path_walk_it(library, &nd);
-+
-+ nd.intent.it_flags = O_RDONLY;
- error = __user_walk(library, LOOKUP_FOLLOW|LOOKUP_OPEN, &nd);
- if (error)
++ intent_init(&nd.intent, IT_OPEN);
+
+- FSHOOK_BEGIN_USER_WALK(open,
++ nd.intent.it_flags = FMODE_READ;
++ FSHOOK_BEGIN_USER_WALK_IT(open,
+ error,
+ library,
+ LOOKUP_FOLLOW|LOOKUP_OPEN,
+@@ -144,7 +145,7 @@
goto out;
-@@ -136,7 +139,7 @@
- if (error)
- goto exit;
+ }
- file = dentry_open(nd.dentry, nd.mnt, O_RDONLY);
+ file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &nd.intent);
error = PTR_ERR(file);
if (IS_ERR(file))
goto out;
-@@ -485,8 +488,9 @@
- int err;
- struct file *file;
+@@ -495,8 +496,9 @@
+
+ FSHOOK_BEGIN(open, err, .filename = name, .flags = O_RDONLY)
- nd.intent.open.flags = FMODE_READ;
- err = path_lookup(name, LOOKUP_FOLLOW|LOOKUP_OPEN, &nd);
+ intent_init(&nd.intent, IT_OPEN);
-+ nd.intent.it_flags = O_RDONLY;
++ nd.intent.it_flags = FMODE_READ;
+ err = path_lookup(name, LOOKUP_FOLLOW, &nd);
file = ERR_PTR(err);
if (!err) {
-@@ -499,7 +503,7 @@
+@@ -509,7 +511,7 @@
err = -EACCES;
file = ERR_PTR(err);
if (!err) {
if (!IS_ERR(file)) {
err = deny_write_access(file);
if (err) {
-Index: linux-2.6.4-51.0/fs/namei.c
+Index: linux-2.6.5-12.1/fs/namei.c
===================================================================
---- linux-2.6.4-51.0.orig/fs/namei.c 2004-04-05 12:41:59.000000000 -0400
-+++ linux-2.6.4-51.0/fs/namei.c 2004-04-05 17:36:42.000000000 -0400
-@@ -269,8 +269,19 @@
+--- linux-2.6.5-12.1.orig/fs/namei.c 2004-05-10 19:21:56.000000000 +0300
++++ linux-2.6.5-12.1/fs/namei.c 2004-05-25 17:32:14.040493896 +0300
+@@ -270,8 +270,19 @@
return 0;
}
dput(nd->dentry);
mntput(nd->mnt);
}
-@@ -347,7 +358,10 @@
+@@ -348,7 +359,10 @@
{
struct dentry * result;
struct inode *dir = parent->d_inode;
down(&dir->i_sem);
/*
* First re-do the cached lookup just in case it was created
-@@ -386,7 +400,10 @@
+@@ -387,7 +401,10 @@
if (result->d_op && result->d_op->d_revalidate) {
if (!result->d_op->d_revalidate(result, nd) && !d_invalidate(result)) {
dput(result);
}
}
return result;
-@@ -563,6 +580,33 @@
+@@ -564,6 +581,33 @@
return PTR_ERR(dentry);
}
/*
* Name resolution.
*
-@@ -663,7 +705,9 @@
+@@ -664,7 +708,9 @@
if (inode->i_op->follow_link) {
mntget(next.mnt);
dput(next.dentry);
mntput(next.mnt);
if (err)
-@@ -702,14 +746,29 @@
+@@ -703,14 +749,29 @@
inode = nd->dentry->d_inode;
/* fallthrough */
case 1:
if (err)
break;
follow_mount(&next.mnt, &next.dentry);
-@@ -935,7 +994,7 @@
+@@ -936,7 +997,7 @@
}
/* SMP-safe */
{
unsigned long hash;
struct qstr this;
-@@ -955,11 +1014,16 @@
+@@ -956,11 +1017,16 @@
}
this.hash = end_name_hash(hash);
/*
* namei()
*
-@@ -971,7 +1035,7 @@
+@@ -972,7 +1038,8 @@
* that namei follows links, while lnamei does not.
* SMP-safe
*/
--int fastcall __user_walk(const char __user *name, unsigned flags, struct nameidata *nd)
-+int fastcall __user_walk_it(const char __user *name, unsigned flags, struct nameidata *nd)
+-int fastcall __user_walk(const char __user *name, unsigned flags, struct nameidata *nd, const char **pname)
++int fastcall __user_walk_it(const char __user *name, unsigned flags,
++ struct nameidata *nd, const char **pname)
{
char *tmp = getname(name);
int err = PTR_ERR(tmp);
-@@ -983,6 +1047,12 @@
+@@ -987,6 +1054,13 @@
return err;
}
-+int __user_walk(const char __user *name, unsigned flags, struct nameidata *nd)
++int __user_walk(const char __user *name, unsigned flags,
++ struct nameidata *nd, const char **pname)
+{
+ intent_init(&nd->intent, IT_LOOKUP);
-+ return __user_walk_it(name, flags, nd);
++ return __user_walk_it(name, flags, nd, pname);
+}
+
/*
* It's inline, so penalty for filesystems that don't use sticky bit is
* minimal.
-@@ -1255,8 +1325,8 @@
+@@ -1259,8 +1333,8 @@
acc_mode |= MAY_APPEND;
/* Fill in the open() intent data */
/*
* The simplest case - just a plain lookup.
-@@ -1271,6 +1341,7 @@
+@@ -1275,6 +1349,7 @@
/*
* Create - we need to know the parent.
*/
error = path_lookup(pathname, LOOKUP_PARENT|LOOKUP_OPEN|LOOKUP_CREATE, nd);
if (error)
return error;
-@@ -1287,7 +1358,9 @@
+@@ -1291,7 +1366,9 @@
dir = nd->dentry;
nd->flags &= ~LOOKUP_PARENT;
down(&dir->d_inode->i_sem);
do_last:
error = PTR_ERR(dentry);
-@@ -1392,7 +1465,9 @@
+@@ -1396,7 +1473,9 @@
}
dir = nd->dentry;
down(&dir->d_inode->i_sem);
putname(nd->last.name);
goto do_last;
}
-@@ -2154,7 +2229,9 @@
+@@ -2196,7 +2275,9 @@
__vfs_follow_link(struct nameidata *nd, const char *link)
{
int res = 0;
if (IS_ERR(link))
goto fail;
-@@ -2164,6 +2241,10 @@
+@@ -2206,6 +2287,10 @@
/* weird __emul_prefix() stuff did it */
goto out;
}
res = link_path_walk(link, nd);
out:
if (current->link_count || res || nd->last_type!=LAST_NORM)
-Index: linux-2.6.4-51.0/fs/namespace.c
+Index: linux-2.6.5-12.1/fs/namespace.c
===================================================================
---- linux-2.6.4-51.0.orig/fs/namespace.c 2004-04-05 12:41:59.000000000 -0400
-+++ linux-2.6.4-51.0/fs/namespace.c 2004-04-07 13:28:23.000000000 -0400
-@@ -107,6 +107,7 @@
+--- linux-2.6.5-12.1.orig/fs/namespace.c 2004-05-10 19:21:56.000000000 +0300
++++ linux-2.6.5-12.1/fs/namespace.c 2004-05-25 17:33:44.385759328 +0300
+@@ -108,6 +108,7 @@
static void detach_mnt(struct vfsmount *mnt, struct nameidata *old_nd)
{
old_nd->dentry = mnt->mnt_mountpoint;
old_nd->mnt = mnt->mnt_parent;
mnt->mnt_parent = mnt;
-@@ -748,6 +749,7 @@
+@@ -533,6 +534,8 @@
+ return err;
+ if (!old_name || !*old_name)
+ return -EINVAL;
++
++ intent_init(&old_nd.intent, IT_LOOKUP);
+ err = path_lookup(old_name, LOOKUP_FOLLOW, &old_nd);
+ if (err)
+ return err;
+@@ -601,6 +604,7 @@
+ return -EPERM;
+ if (!old_name || !*old_name)
+ return -EINVAL;
++ intent_init(&old_nd.intent, IT_LOOKUP);
+ err = path_lookup(old_name, LOOKUP_FOLLOW, &old_nd);
+ if (err)
+ return err;
+@@ -750,6 +754,7 @@
int retval = 0;
int mnt_flags = 0;
/* Discard magic */
if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
flags &= ~MS_MGC_MSK;
-Index: linux-2.6.4-51.0/fs/open.c
+Index: linux-2.6.5-12.1/fs/open.c
===================================================================
---- linux-2.6.4-51.0.orig/fs/open.c 2004-04-05 12:41:59.000000000 -0400
-+++ linux-2.6.4-51.0/fs/open.c 2004-04-05 17:36:42.000000000 -0400
-@@ -211,7 +211,7 @@
+--- linux-2.6.5-12.1.orig/fs/open.c 2004-05-10 19:21:56.000000000 +0300
++++ linux-2.6.5-12.1/fs/open.c 2004-05-25 17:32:14.042493592 +0300
+@@ -227,12 +227,12 @@
struct nameidata nd;
struct inode * inode;
int error;
error = -EINVAL;
if (length < 0) /* sorry, but loff_t says... */
goto out;
-@@ -470,6 +470,7 @@
+
+- FSHOOK_BEGIN_USER_PATH_WALK(truncate, error, path, nd, filename, .length = length)
++ FSHOOK_BEGIN_USER_PATH_WALK_IT(truncate, error, path, nd, filename, .length = length)
+
+ inode = nd.dentry->d_inode;
+
+@@ -466,6 +466,7 @@
int old_fsuid, old_fsgid;
kernel_cap_t old_cap;
int res;
if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */
return -EINVAL;
-@@ -501,6 +502,7 @@
+@@ -490,7 +491,7 @@
+ else
+ current->cap_effective = current->cap_permitted;
+
+- FSHOOK_BEGIN_USER_WALK(access,
++ FSHOOK_BEGIN_USER_WALK_IT(access,
+ res,
+ filename,
+ LOOKUP_FOLLOW|LOOKUP_ACCESS,
+@@ -506,6 +507,7 @@
if(!res && (mode & S_IWOTH) && IS_RDONLY(nd.dentry->d_inode)
&& !special_file(nd.dentry->d_inode->i_mode))
res = -EROFS;
+
path_release(&nd);
- }
-@@ -515,6 +517,7 @@
+ FSHOOK_END_USER_WALK(access, res, path)
+@@ -545,11 +547,13 @@
+
+ asmlinkage long sys_fchdir(unsigned int fd)
{
- struct nameidata nd;
++ struct nameidata nd;
+ struct file *file;
+ struct dentry *dentry;
+ struct inode *inode;
+ struct vfsmount *mnt;
int error;
+ intent_init(&nd.intent, IT_GETATTR);
- error = __user_walk(filename, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &nd);
- if (error)
-@@ -566,6 +569,7 @@
+ FSHOOK_BEGIN(fchdir, error, .fd = fd)
+
+@@ -582,8 +586,9 @@
{
struct nameidata nd;
int error;
+ intent_init(&nd.intent, IT_GETATTR);
- error = __user_walk(filename, LOOKUP_FOLLOW | LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd);
- if (error)
-@@ -638,7 +642,7 @@
+- FSHOOK_BEGIN_USER_WALK(chroot,
++ FSHOOK_BEGIN_USER_WALK_IT(chroot,
+ error,
+ filename,
+ LOOKUP_FOLLOW | LOOKUP_DIRECTORY | LOOKUP_NOALT,
+@@ -670,7 +675,7 @@
error = -EROFS;
if (IS_RDONLY(inode))
goto dput_and_out;
error = -EPERM;
if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
goto dput_and_out;
-@@ -746,27 +750,8 @@
+@@ -804,27 +809,8 @@
* for the internal routines (ie open_namei()/follow_link() etc). 00 is
* used by symlinks.
*/
{
struct file * f;
struct inode *inode;
-@@ -778,6 +763,7 @@
+@@ -836,6 +822,7 @@
goto cleanup_dentry;
f->f_flags = flags;
f->f_mode = (flags+1) & O_ACCMODE;
inode = dentry->d_inode;
if (f->f_mode & FMODE_WRITE) {
error = get_write_access(inode);
-@@ -797,6 +783,7 @@
+@@ -855,6 +842,7 @@
error = f->f_op->open(inode,f);
if (error)
goto cleanup_all;
}
f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
-@@ -821,6 +808,7 @@
+@@ -879,6 +867,7 @@
cleanup_file:
put_filp(f);
cleanup_dentry:
dput(dentry);
mntput(mnt);
return ERR_PTR(error);
-@@ -828,6 +816,36 @@
+@@ -886,6 +875,36 @@
EXPORT_SYMBOL(dentry_open);
/*
* Find an empty file descriptor entry, and mark it busy.
*/
-Index: linux-2.6.4-51.0/fs/stat.c
+Index: linux-2.6.5-12.1/fs/stat.c
===================================================================
---- linux-2.6.4-51.0.orig/fs/stat.c 2004-04-05 12:41:59.000000000 -0400
-+++ linux-2.6.4-51.0/fs/stat.c 2004-04-05 17:36:42.000000000 -0400
-@@ -36,7 +36,7 @@
+--- linux-2.6.5-12.1.orig/fs/stat.c 2004-05-10 19:21:56.000000000 +0300
++++ linux-2.6.5-12.1/fs/stat.c 2004-05-25 17:32:14.042493592 +0300
+@@ -37,7 +37,7 @@
EXPORT_SYMBOL(generic_fillattr);
{
struct inode *inode = dentry->d_inode;
int retval;
-@@ -45,6 +45,8 @@
+@@ -46,6 +46,8 @@
if (retval)
return retval;
if (inode->i_op->getattr)
return inode->i_op->getattr(mnt, dentry, stat);
-@@ -61,14 +63,20 @@
+@@ -62,14 +64,20 @@
EXPORT_SYMBOL(vfs_getattr);
int error;
+ intent_init(&nd.intent, IT_GETATTR);
-- error = user_path_walk(name, &nd);
-+ error = user_path_walk_it(name, &nd);
- if (!error) {
+- FSHOOK_BEGIN_USER_PATH_WALK(stat, error, name, nd, path, .link = false)
++ FSHOOK_BEGIN_USER_PATH_WALK_IT(stat, error, name, nd, path, .link = false)
+
- error = vfs_getattr(nd.mnt, nd.dentry, stat);
+ error = vfs_getattr_it(nd.mnt, nd.dentry, &nd.intent, stat);
path_release(&nd);
- }
- return error;
-@@ -80,10 +88,11 @@
+
+ FSHOOK_END_USER_WALK(stat, error, path)
+@@ -83,10 +91,11 @@
{
struct nameidata nd;
int error;
+ intent_init(&nd.intent, IT_GETATTR);
-- error = user_path_walk_link(name, &nd);
-+ error = user_path_walk_link_it(name, &nd);
- if (!error) {
+- FSHOOK_BEGIN_USER_PATH_WALK_LINK(stat, error, name, nd, path, .link = true)
++ FSHOOK_BEGIN_USER_PATH_WALK_LINK_IT(stat, error, name, nd, path, .link = true)
+
- error = vfs_getattr(nd.mnt, nd.dentry, stat);
+ error = vfs_getattr_it(nd.mnt, nd.dentry, &nd.intent, stat);
path_release(&nd);
- }
- return error;
-@@ -95,9 +104,12 @@
+
+ FSHOOK_END_USER_WALK(stat, error, path)
+@@ -99,6 +108,8 @@
+ int vfs_fstat(unsigned int fd, struct kstat *stat)
{
- struct file *f = fget(fd);
- int error = -EBADF;
+ int error;
+ struct nameidata nd;
+ intent_init(&nd.intent, IT_GETATTR);
+ FSHOOK_BEGIN(fstat, error, .fd = fd)
+
+@@ -106,7 +117,8 @@
+
+ error = -EBADF;
if (f) {
- error = vfs_getattr(f->f_vfsmnt, f->f_dentry, stat);
+ error = vfs_getattr_it(f->f_vfsmnt, f->f_dentry, &nd.intent, stat);
+ intent_release(&nd.intent);
fput(f);
}
- return error;
-Index: linux-2.6.4-51.0/fs/nfs/dir.c
+
+Index: linux-2.6.5-12.1/fs/nfs/dir.c
===================================================================
---- linux-2.6.4-51.0.orig/fs/nfs/dir.c 2004-04-05 12:41:59.000000000 -0400
-+++ linux-2.6.4-51.0/fs/nfs/dir.c 2004-04-07 13:27:47.000000000 -0400
+--- linux-2.6.5-12.1.orig/fs/nfs/dir.c 2004-05-10 19:21:53.000000000 +0300
++++ linux-2.6.5-12.1/fs/nfs/dir.c 2004-05-25 17:32:14.043493440 +0300
@@ -709,7 +709,7 @@
return 0;
if (!nd || (nd->flags & LOOKUP_CONTINUE) || !(nd->flags & LOOKUP_CREATE))
/*
* The 0 argument passed into the create function should one day
-Index: linux-2.6.4-51.0/fs/inode.c
+Index: linux-2.6.5-12.1/fs/inode.c
===================================================================
---- linux-2.6.4-51.0.orig/fs/inode.c 2004-04-05 12:41:59.000000000 -0400
-+++ linux-2.6.4-51.0/fs/inode.c 2004-04-05 17:36:43.000000000 -0400
+--- linux-2.6.5-12.1.orig/fs/inode.c 2004-05-10 19:21:56.000000000 +0300
++++ linux-2.6.5-12.1/fs/inode.c 2004-05-25 17:32:14.044493288 +0300
@@ -221,6 +221,7 @@
inodes_stat.nr_unused--;
}
/**
* clear_inode - clear an inode
* @inode: inode to clear
-Index: linux-2.6.4-51.0/fs/super.c
+Index: linux-2.6.5-12.1/fs/super.c
===================================================================
---- linux-2.6.4-51.0.orig/fs/super.c 2004-04-05 12:41:59.000000000 -0400
-+++ linux-2.6.4-51.0/fs/super.c 2004-04-05 17:36:43.000000000 -0400
-@@ -787,6 +787,8 @@
+--- linux-2.6.5-12.1.orig/fs/super.c 2004-05-10 19:21:56.000000000 +0300
++++ linux-2.6.5-12.1/fs/super.c 2004-05-25 17:32:14.045493136 +0300
+@@ -789,6 +789,8 @@
return (struct vfsmount *)sb;
}
struct vfsmount *kern_mount(struct file_system_type *type)
{
return do_kern_mount(type->name, 0, type->name, NULL);
-Index: linux-2.6.4-51.0/include/linux/dcache.h
+Index: linux-2.6.5-12.1/include/linux/dcache.h
===================================================================
---- linux-2.6.4-51.0.orig/include/linux/dcache.h 2004-04-05 12:42:07.000000000 -0400
-+++ linux-2.6.4-51.0/include/linux/dcache.h 2004-04-05 17:36:43.000000000 -0400
+--- linux-2.6.5-12.1.orig/include/linux/dcache.h 2004-04-04 06:38:24.000000000 +0300
++++ linux-2.6.5-12.1/include/linux/dcache.h 2004-05-25 17:32:14.045493136 +0300
@@ -4,6 +4,7 @@
#ifdef __KERNEL__
struct dentry_stat_t {
int nr_dentry;
int nr_unused;
-Index: linux-2.6.4-51.0/include/linux/fs.h
+Index: linux-2.6.5-12.1/include/linux/fs.h
===================================================================
---- linux-2.6.4-51.0.orig/include/linux/fs.h 2004-04-05 12:42:07.000000000 -0400
-+++ linux-2.6.4-51.0/include/linux/fs.h 2004-04-05 17:36:43.000000000 -0400
-@@ -249,6 +249,8 @@
+--- linux-2.6.5-12.1.orig/include/linux/fs.h 2004-05-10 19:21:56.000000000 +0300
++++ linux-2.6.5-12.1/include/linux/fs.h 2004-05-25 17:32:14.046492984 +0300
+@@ -250,6 +250,8 @@
#define ATTR_ATTR_FLAG 1024
#define ATTR_KILL_SUID 2048
#define ATTR_KILL_SGID 4096
/*
* This is the Inode Attributes structure, used for notify_change(). It
-@@ -422,6 +424,7 @@
+@@ -423,6 +425,7 @@
struct block_device *i_bdev;
struct cdev *i_cdev;
int i_cindex;
unsigned long i_dnotify_mask; /* Directory notify events */
struct dnotify_struct *i_dnotify; /* for directory notifications */
-@@ -554,6 +557,7 @@
+@@ -556,6 +559,7 @@
spinlock_t f_ep_lock;
#endif /* #ifdef CONFIG_EPOLL */
struct address_space *f_mapping;
};
extern spinlock_t files_lock;
#define file_list_lock() spin_lock(&files_lock);
-@@ -874,7 +878,9 @@
+@@ -886,7 +890,9 @@
void (*truncate) (struct inode *);
int (*permission) (struct inode *, int, struct nameidata *);
int (*setattr) (struct dentry *, struct iattr *);
int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t);
ssize_t (*listxattr) (struct dentry *, char *, size_t);
-@@ -1101,6 +1107,7 @@
+@@ -1114,6 +1120,7 @@
extern int unregister_filesystem(struct file_system_type *);
extern struct vfsmount *kern_mount(struct file_system_type *);
extern int may_umount(struct vfsmount *);
extern long do_mount(char *, char *, char *, unsigned long, void *);
extern int vfs_statfs(struct super_block *, struct kstatfs *);
-@@ -1165,6 +1172,7 @@
+@@ -1178,6 +1185,7 @@
extern int do_truncate(struct dentry *, loff_t start);
extern struct file *filp_open(const char *, int, int);
extern struct file * dentry_open(struct dentry *, struct vfsmount *, int);
extern int filp_close(struct file *, fl_owner_t id);
extern char * getname(const char __user *);
-Index: linux-2.6.4-51.0/include/linux/namei.h
+Index: linux-2.6.5-12.1/include/linux/namei.h
===================================================================
---- linux-2.6.4-51.0.orig/include/linux/namei.h 2004-04-05 12:42:07.000000000 -0400
-+++ linux-2.6.4-51.0/include/linux/namei.h 2004-04-05 17:36:43.000000000 -0400
+--- linux-2.6.5-12.1.orig/include/linux/namei.h 2004-05-10 19:21:56.000000000 +0300
++++ linux-2.6.5-12.1/include/linux/namei.h 2004-05-25 17:32:14.047492832 +0300
@@ -2,25 +2,55 @@
#define _LINUX_NAMEI_H
@@ -49,6 +82,12 @@
#define LOOKUP_ACCESS (0x0400)
- extern int FASTCALL(__user_walk(const char __user *, unsigned, struct nameidata *));
-+extern int FASTCALL(__user_walk_it(const char __user *name, unsigned flags, struct nameidata *nd));
+ extern int FASTCALL(__user_walk(const char __user *, unsigned, struct nameidata *, const char **));
++extern int FASTCALL(__user_walk_it(const char __user *, unsigned, struct nameidata *, const char **));
+#define user_path_walk_it(name,nd) \
-+ __user_walk_it(name, LOOKUP_FOLLOW, nd)
++ __user_walk_it(name, LOOKUP_FOLLOW, nd, 0)
+#define user_path_walk_link_it(name,nd) \
-+ __user_walk_it(name, 0, nd)
++ __user_walk_it(name, 0, nd, 0)
+extern void intent_release(struct lookup_intent *);
#define user_path_walk(name,nd) \
- __user_walk(name, LOOKUP_FOLLOW, nd)
+ __user_walk(name, LOOKUP_FOLLOW, nd, 0)
#define user_path_walk_link(name,nd) \
@@ -60,7 +99,6 @@
extern int follow_down(struct vfsmount **, struct dentry **);
extern int follow_up(struct vfsmount **, struct dentry **);
-Index: linux-2.6.4-51.0/kernel/exit.c
+Index: linux-2.6.5-12.1/kernel/exit.c
===================================================================
---- linux-2.6.4-51.0.orig/kernel/exit.c 2004-04-05 12:42:08.000000000 -0400
-+++ linux-2.6.4-51.0/kernel/exit.c 2004-04-05 17:36:43.000000000 -0400
-@@ -259,6 +259,8 @@
+--- linux-2.6.5-12.1.orig/kernel/exit.c 2004-05-10 19:21:56.000000000 +0300
++++ linux-2.6.5-12.1/kernel/exit.c 2004-05-25 17:32:14.047492832 +0300
+@@ -260,6 +260,8 @@
write_unlock_irq(&tasklist_lock);
}
void __set_special_pids(pid_t session, pid_t pgrp)
{
struct task_struct *curr = current;
-@@ -428,6 +430,8 @@
+@@ -429,6 +431,8 @@
__exit_files(tsk);
}
static inline void __put_fs_struct(struct fs_struct *fs)
{
/* No need to hold fs->lock if we are killing it */
+Index: linux-2.6.5-12.1/include/linux/fshooks.h
+===================================================================
+--- linux-2.6.5-12.1.orig/include/linux/fshooks.h 2004-05-10 19:21:56.000000000 +0300
++++ linux-2.6.5-12.1/include/linux/fshooks.h 2004-05-25 17:32:14.048492680 +0300
+@@ -90,12 +90,18 @@
+
+ #define FSHOOK_BEGIN_USER_WALK(type, err, path, flags, nd, field, args...) \
+ FSHOOK_BEGIN_USER_WALK_COMMON(type, err, __user_walk(path, flags, &nd, &info.field), nd, args)
++#define FSHOOK_BEGIN_USER_WALK_IT(type, err, path, flags, nd, field, args...) \
++ FSHOOK_BEGIN_USER_WALK_COMMON(type, err, __user_walk_it(path, flags, &nd, &info.field), nd, args)
+
+ #define FSHOOK_BEGIN_USER_PATH_WALK(type, err, path, nd, field, args...) \
+ FSHOOK_BEGIN_USER_WALK_COMMON(type, err, __user_walk(path, LOOKUP_FOLLOW, &nd, &info.field), nd, args)
++#define FSHOOK_BEGIN_USER_PATH_WALK_IT(type, err, path, nd, field, args...) \
++ FSHOOK_BEGIN_USER_WALK_COMMON(type, err, __user_walk_it(path, LOOKUP_FOLLOW, &nd, &info.field), nd, args)
+
+ #define FSHOOK_BEGIN_USER_PATH_WALK_LINK(type, err, path, nd, field, args...) \
+ FSHOOK_BEGIN_USER_WALK_COMMON(type, err, __user_walk(path, 0, &nd, &info.field), nd, args)
++#define FSHOOK_BEGIN_USER_PATH_WALK_LINK_IT(type, err, path, nd, field, args...) \
++ FSHOOK_BEGIN_USER_WALK_COMMON(type, err, __user_walk_it(path, 0, &nd, &info.field), nd, args)
+
+ #define FSHOOK_END_USER_WALK(type, err, field) \
+ (void)(&info != (struct fshook_##type##_info *)-1L); \
+@@ -126,12 +132,18 @@
+
+ #define FSHOOK_BEGIN_USER_WALK(type, err, path, flags, nd, field, args...) \
+ if (!(err = __user_walk(path, flags, &nd, 0))) {
++#define FSHOOK_BEGIN_USER_WALK_IT(type, err, path, flags, nd, field, args...) \
++ if (!(err = __user_walk_it(path, flags, &nd, 0))) {
+
+ #define FSHOOK_BEGIN_USER_PATH_WALK(type, err, path, nd, field, args...) \
+ if (!(err = user_path_walk(path, &nd))) {
++#define FSHOOK_BEGIN_USER_PATH_WALK_IT(type, err, path, nd, field, args...) \
++ if (!(err = user_path_walk_it(path, &nd))) {
+
+ #define FSHOOK_BEGIN_USER_PATH_WALK_LINK(type, err, path, nd, field, args...) \
+ if (!(err = user_path_walk_link(path, &nd))) {
++#define FSHOOK_BEGIN_USER_PATH_WALK_LINK_IT(type, err, path, nd, field, args...) \
++ if (!(err = user_path_walk_link_it(path, &nd))) {
+
+ #define FSHOOK_END_USER_WALK(type, err, field) ((void)0);}
+
+Index: linux-2.6.5-12.1/fs/block_dev.c
+===================================================================
+--- linux-2.6.5-12.1.orig/fs/block_dev.c 2004-05-10 19:21:55.000000000 +0300
++++ linux-2.6.5-12.1/fs/block_dev.c 2004-05-25 17:32:39.517620784 +0300
+@@ -834,6 +834,7 @@
+ if (!path || !*path)
+ return ERR_PTR(-EINVAL);
+
++ intent_init(&nd.intent, IT_LOOKUP);
+ error = path_lookup(path, LOOKUP_FOLLOW, &nd);
+ if (error)
+ return ERR_PTR(error);
.old..........pc/vfs_nointent_2.6.0-suse/fs/namei.c
.new.........fs/namei.c
-Index: linux-2.6.4-51.0/fs/namei.c
+Index: linux-2.6.5-12.1/fs/namei.c
===================================================================
---- linux-2.6.4-51.0.orig/fs/namei.c 2004-04-05 17:36:42.000000000 -0400
-+++ linux-2.6.4-51.0/fs/namei.c 2004-04-05 17:36:43.000000000 -0400
-@@ -1276,7 +1276,7 @@
+--- linux-2.6.5-12.1.orig/fs/namei.c 2004-05-11 15:41:54.000000000 -0400
++++ linux-2.6.5-12.1/fs/namei.c 2004-05-11 15:42:00.000000000 -0400
+@@ -1292,7 +1292,7 @@
if (!error) {
DQUOT_INIT(inode);
}
put_write_access(inode);
if (error)
-@@ -1526,6 +1526,7 @@
+@@ -1542,6 +1542,7 @@
char * tmp;
struct dentry * dentry;
struct nameidata nd;
if (S_ISDIR(mode))
return -EPERM;
-@@ -1536,6 +1537,15 @@
+@@ -1554,6 +1555,15 @@
error = path_lookup(tmp, LOOKUP_PARENT, &nd);
if (error)
goto out;
dentry = lookup_create(&nd, 0);
error = PTR_ERR(dentry);
-@@ -1562,6 +1572,7 @@
+@@ -1580,6 +1590,7 @@
dput(dentry);
}
up(&nd.dentry->d_inode->i_sem);
path_release(&nd);
out:
putname(tmp);
-@@ -1603,10 +1614,18 @@
- if (!IS_ERR(tmp)) {
+@@ -1626,10 +1637,18 @@
+
struct dentry *dentry;
struct nameidata nd;
+ intent_init(&nd.intent, IT_LOOKUP);
dentry = lookup_create(&nd, 1);
error = PTR_ERR(dentry);
if (!IS_ERR(dentry)) {
-@@ -1616,6 +1635,7 @@
+@@ -1639,6 +1658,7 @@
dput(dentry);
}
up(&nd.dentry->d_inode->i_sem);
+out2:
path_release(&nd);
out:
- putname(tmp);
-@@ -1696,6 +1716,7 @@
+
+@@ -1722,6 +1742,7 @@
char * name;
struct dentry *dentry;
struct nameidata nd;
name = getname(pathname);
if(IS_ERR(name))
-@@ -1716,6 +1737,14 @@
+@@ -1744,6 +1765,14 @@
error = -EBUSY;
goto exit1;
}
down(&nd.dentry->d_inode->i_sem);
dentry = lookup_hash(&nd.last, nd.dentry);
error = PTR_ERR(dentry);
-@@ -1774,6 +1805,7 @@
+@@ -1805,6 +1834,7 @@
struct dentry *dentry;
struct nameidata nd;
struct inode *inode = NULL;
name = getname(pathname);
if(IS_ERR(name))
-@@ -1785,6 +1817,13 @@
+@@ -1818,6 +1848,13 @@
error = -EISDIR;
if (nd.last_type != LAST_NORM)
goto exit1;
down(&nd.dentry->d_inode->i_sem);
dentry = lookup_hash(&nd.last, nd.dentry);
error = PTR_ERR(dentry);
-@@ -1852,10 +1891,18 @@
- if (!IS_ERR(to)) {
+@@ -1891,10 +1928,18 @@
+
struct dentry *dentry;
struct nameidata nd;
+ intent_init(&nd.intent, IT_LOOKUP);
dentry = lookup_create(&nd, 0);
error = PTR_ERR(dentry);
if (!IS_ERR(dentry)) {
-@@ -1863,6 +1910,7 @@
+@@ -1902,6 +1947,7 @@
dput(dentry);
}
up(&nd.dentry->d_inode->i_sem);
+out2:
path_release(&nd);
out:
- putname(to);
-@@ -1926,6 +1974,8 @@
+
+@@ -1968,6 +2014,8 @@
struct nameidata nd, old_nd;
int error;
char * to;
to = getname(newname);
if (IS_ERR(to))
-@@ -1940,6 +1990,13 @@
+@@ -1986,6 +2034,13 @@
error = -EXDEV;
if (old_nd.mnt != nd.mnt)
goto out_release;
new_dentry = lookup_create(&nd, 0);
error = PTR_ERR(new_dentry);
if (!IS_ERR(new_dentry)) {
-@@ -1990,7 +2047,7 @@
+@@ -2038,7 +2093,7 @@
* locking].
*/
int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
{
int error = 0;
struct inode *target;
-@@ -2035,7 +2092,7 @@
+@@ -2083,7 +2138,7 @@
}
int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
{
struct inode *target;
int error;
-@@ -2112,6 +2169,8 @@
+@@ -2160,6 +2215,8 @@
struct dentry * old_dentry, *new_dentry;
struct dentry * trap;
struct nameidata oldnd, newnd;
error = path_lookup(oldname, LOOKUP_PARENT, &oldnd);
if (error)
-@@ -2134,6 +2193,13 @@
+@@ -2182,6 +2239,13 @@
if (newnd.last_type != LAST_NORM)
goto exit2;
trap = lock_rename(new_dir, old_dir);
old_dentry = lookup_hash(&oldnd.last, old_dir);
-@@ -2165,8 +2231,7 @@
+@@ -2213,8 +2277,7 @@
if (new_dentry == trap)
goto exit5;
exit5:
dput(new_dentry);
exit4:
-Index: linux-2.6.4-51.0/fs/open.c
+Index: linux-2.6.5-12.1/fs/open.c
===================================================================
---- linux-2.6.4-51.0.orig/fs/open.c 2004-04-05 17:36:42.000000000 -0400
-+++ linux-2.6.4-51.0/fs/open.c 2004-04-06 01:37:39.000000000 -0400
-@@ -187,9 +187,10 @@
+--- linux-2.6.5-12.1.orig/fs/open.c 2004-05-11 15:41:54.000000000 -0400
++++ linux-2.6.5-12.1/fs/open.c 2004-05-11 16:07:02.000000000 -0400
+@@ -203,9 +203,10 @@
return error;
}
struct iattr newattrs;
/* Not pretty: "inode->i_size" shouldn't really be signed. But it is. */
-@@ -200,7 +201,14 @@
+@@ -216,7 +217,14 @@
newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
down(&dentry->d_inode->i_sem);
down_write(&dentry->d_inode->i_alloc_sem);
up_write(&dentry->d_inode->i_alloc_sem);
up(&dentry->d_inode->i_sem);
return err;
-@@ -256,7 +264,7 @@
+@@ -271,7 +279,7 @@
error = locks_verify_truncate(inode, NULL, length);
if (!error) {
DQUOT_INIT(inode);
}
put_write_access(inode);
-@@ -308,7 +316,7 @@
+@@ -328,7 +336,7 @@
error = locks_verify_truncate(inode, file, length);
if (!error)
out_putf:
fput(file);
out:
-@@ -387,9 +395,19 @@
- (error = permission(inode,MAY_WRITE,&nd)) != 0)
- goto dput_and_out;
- }
-- down(&inode->i_sem);
-- error = notify_change(nd.dentry, &newattrs);
-- up(&inode->i_sem);
-+ if (inode->i_op->setattr_raw) {
-+ struct inode_operations *op = nd.dentry->d_inode->i_op;
-+
-+ newattrs.ia_valid |= ATTR_RAW;
-+ error = op->setattr_raw(inode, &newattrs);
-+ /* the file system wants to use normal vfs path now */
-+ if (error != -EOPNOTSUPP)
-+ goto dput_and_out;
-+ } else {
-+ down(&inode->i_sem);
-+ error = notify_change(nd.dentry, &newattrs);
-+ up(&inode->i_sem);
-+ }
- dput_and_out:
- path_release(&nd);
- out:
-@@ -440,9 +458,19 @@
+@@ -402,9 +410,19 @@
(error = permission(inode,MAY_WRITE,&nd)) != 0)
goto dput_and_out;
}
+ }
dput_and_out:
path_release(&nd);
- out:
-@@ -592,36 +620,52 @@
+
+@@ -613,39 +631,55 @@
return error;
}
- struct inode * inode;
- struct dentry * dentry;
- struct file * file;
-- int err = -EBADF;
+- int err;
+ struct inode * inode = dentry->d_inode;
struct iattr newattrs;
+ int error = -EROFS;
+- FSHOOK_BEGIN(fchmod, err, .fd = fd, .mode = mode)
+-
+- err = -EBADF;
- file = fget(fd);
- if (!file)
+ if (IS_RDONLY(inode))
+
+ if (inode->i_op->setattr_raw) {
+ struct inode_operations *op = dentry->d_inode->i_op;
-
-- dentry = file->f_dentry;
-- inode = dentry->d_inode;
++
+ newattrs.ia_mode = mode;
+ newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
+ newattrs.ia_valid |= ATTR_RAW;
+ error = op->setattr_raw(inode, &newattrs);
-+ /* the file system wants to use normal vfs path now */
++ /* the file system wants to use the normal vfs path now */
+ if (error != -EOPNOTSUPP)
+ goto out;
+ }
+- dentry = file->f_dentry;
+- inode = dentry->d_inode;
+-
- err = -EROFS;
- if (IS_RDONLY(inode))
- goto out_putf;
- err = notify_change(dentry, &newattrs);
+ error = notify_change(dentry, &newattrs);
up(&inode->i_sem);
+
+-out_putf:
+out:
+ return error;
+}
-
--out_putf:
++
+asmlinkage long sys_fchmod(unsigned int fd, mode_t mode)
+{
+ struct file * file;
+ int err = -EBADF;
+
++ FSHOOK_BEGIN(fchmod, err, .fd = fd, .mode = mode)
++
+ file = fget(fd);
+ if (!file)
+ goto out;
+ err = chmod_common(file->f_dentry, mode);
fput(file);
out:
- return err;
-@@ -630,32 +674,13 @@
+
+@@ -657,9 +691,7 @@
asmlinkage long sys_chmod(const char __user * filename, mode_t mode)
{
struct nameidata nd;
int error;
- struct iattr newattrs;
- error = user_path_walk(filename, &nd);
- if (error)
- goto out;
+ FSHOOK_BEGIN_USER_PATH_WALK(chmod,
+ error,
+@@ -669,25 +701,7 @@
+ .mode = mode,
+ .link = false)
+
- inode = nd.dentry->d_inode;
-
- error = -EROFS;
- newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
- error = notify_change(nd.dentry, &newattrs);
- up(&inode->i_sem);
-
+-
-dput_and_out:
+ error = chmod_common(nd.dentry, mode);
path_release(&nd);
- out:
- return error;
-@@ -676,6 +701,18 @@
+
+ FSHOOK_END_USER_WALK(chmod, error, path)
+@@ -710,6 +724,18 @@
if (IS_RDONLY(inode))
goto out;
error = -EPERM;
if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
goto out;
newattrs.ia_valid = ATTR_CTIME;
-@@ -689,6 +726,7 @@
+@@ -723,6 +749,7 @@
}
if (!S_ISDIR(inode->i_mode))
newattrs.ia_valid |= ATTR_KILL_SUID|ATTR_KILL_SGID;
down(&inode->i_sem);
error = notify_change(dentry, &newattrs);
up(&inode->i_sem);
-Index: linux-2.6.4-51.0/fs/exec.c
+Index: linux-2.6.5-12.1/fs/exec.c
===================================================================
---- linux-2.6.4-51.0.orig/fs/exec.c 2004-04-05 17:36:42.000000000 -0400
-+++ linux-2.6.4-51.0/fs/exec.c 2004-04-05 17:36:43.000000000 -0400
-@@ -1418,7 +1418,7 @@
+--- linux-2.6.5-12.1.orig/fs/exec.c 2004-05-11 15:41:54.000000000 -0400
++++ linux-2.6.5-12.1/fs/exec.c 2004-05-11 15:42:00.000000000 -0400
+@@ -1435,7 +1435,7 @@
goto close_fail;
if (!file->f_op->write)
goto close_fail;
goto close_fail;
retval = binfmt->core_dump(signr, regs, file);
-Index: linux-2.6.4-51.0/include/linux/fs.h
+Index: linux-2.6.5-12.1/include/linux/fs.h
===================================================================
---- linux-2.6.4-51.0.orig/include/linux/fs.h 2004-04-05 17:36:43.000000000 -0400
-+++ linux-2.6.4-51.0/include/linux/fs.h 2004-04-05 17:36:43.000000000 -0400
-@@ -866,13 +866,20 @@
+--- linux-2.6.5-12.1.orig/include/linux/fs.h 2004-05-11 15:41:54.000000000 -0400
++++ linux-2.6.5-12.1/include/linux/fs.h 2004-05-11 15:42:00.000000000 -0400
+@@ -878,13 +878,20 @@
int (*create) (struct inode *,struct dentry *,int, struct nameidata *);
struct dentry * (*lookup) (struct inode *,struct dentry *, struct nameidata *);
int (*link) (struct dentry *,struct inode *,struct dentry *);
int (*readlink) (struct dentry *, char __user *,int);
int (*follow_link) (struct dentry *, struct nameidata *);
void (*truncate) (struct inode *);
-@@ -1169,7 +1176,7 @@
+@@ -1182,7 +1189,7 @@
/* fs/open.c */
extern struct file *filp_open(const char *, int, int);
extern struct file * dentry_open(struct dentry *, struct vfsmount *, int);
extern struct file * dentry_open_it(struct dentry *, struct vfsmount *, int, struct lookup_intent *);
-Index: linux-2.6.4-51.0/net/unix/af_unix.c
+Index: linux-2.6.5-12.1/net/unix/af_unix.c
===================================================================
---- linux-2.6.4-51.0.orig/net/unix/af_unix.c 2004-04-05 12:42:07.000000000 -0400
-+++ linux-2.6.4-51.0/net/unix/af_unix.c 2004-04-05 17:36:43.000000000 -0400
+--- linux-2.6.5-12.1.orig/net/unix/af_unix.c 2004-04-03 22:37:36.000000000 -0500
++++ linux-2.6.5-12.1/net/unix/af_unix.c 2004-05-11 15:42:00.000000000 -0400
@@ -676,6 +676,7 @@
int err = 0;
dev_read_only-2.6-suse.patch
export-2.6-suse.patch
header-guards-2.6-suse.patch
+md_path_lookup-2.6-suse.patch
ext3-ea-in-inode-2.6-suse.patch
export-ext3-2.6-suse.patch
ext3-include-fixes-2.6-suse.patch
+ext3-htree-rename_fix.patch
kernel_text_address-2.4.20-vanilla.patch
procfs-ndynamic-2.4.21-suse2.patch
ext3-truncate-buffer-head.patch
+loop-sync-2.4.21-suse.patch
-KERNEL=linux-2.6.4-51.8.tar.gz
+KERNEL=linux-2.6.5-12.1.tar.gz
SERIES=2.6-suse
-VERSION=2.6.4
-EXTRA_VERSION=51.8_lustre
+VERSION=2.6.5
+EXTRA_VERSION=12.1_lustre
RHBUILD=0
BASE_ARCHS=""
+if MODULES
if LDISKFS
modulefs_DATA = ldiskfs$(KMODEXT)
endif
+endif
ldiskfs_linux_headers := $(addprefix linux/,$(subst ext3,ldiskfs,$(notdir $(linux_headers))))
if (lock->l_granted_mode == LCK_PW &&
!lock->l_readers && !lock->l_writers &&
time_after(jiffies, lock->l_last_used + 10 * HZ)) {
+#ifdef __KERNEL__
+ ldlm_bl_to_thread(ns, NULL, lock);
+ l_unlock(&ns->ns_lock);
+#else
l_unlock(&ns->ns_lock);
ldlm_handle_bl_callback(ns, NULL, lock);
+#endif
EXIT;
return;
}
lock = list_entry(tmp, struct ldlm_lock, l_res_link);
LDLM_LOCK_GET(lock);
+ /* Set CBPENDING so nothing in the cancellation path
+ * can match this lock */
+ lock->l_flags |= LDLM_FL_CBPENDING;
lock->l_flags |= LDLM_FL_FAILED;
lock->l_flags |= flags;
* alternative: pretend that we got a blocking AST from
* the server, so that when the lock is decref'd, it
* will go away ... */
- lock->l_flags |= LDLM_FL_CBPENDING;
/* ... without sending a CANCEL message. */
lock->l_flags |= LDLM_FL_LOCAL_ONLY;
LDLM_DEBUG(lock, "setting FL_LOCAL_ONLY");
if MPITESTS
test_lock_cancel_SOURCES = test_lock_cancel.c
-test_lock_cancel_CFLAGS = $(LL_CFLAGS) -I/opt/lam/include -L/opt/lam/lib
-test_lock_cancel_LDADD := $(LLIB_EXEC) -lmpi -llam
+test_lock_cancel_CFLAGS = $(LL_CFLAGS) -I/opt/lam/include
+test_lock_cancel_LDADD := $(LLIB_EXEC) -L/opt/lam/lib -lmpi -llam
endif
tmpex.l_extent.end = tmpex.l_extent.start + PAGE_CACHE_SIZE - 1;
/* check to see if another DLM lock covers this page */
- ldlm_lock2handle(lock, &lockh);
- rc2 = ldlm_lock_match(NULL,
+ rc2 = ldlm_lock_match(lock->l_resource->lr_namespace,
LDLM_FL_BLOCK_GRANTED|LDLM_FL_CBPENDING |
LDLM_FL_TEST_LOCK,
- NULL, 0, &tmpex, 0, &lockh);
+ &lock->l_resource->lr_name, LDLM_EXTENT,
+ &tmpex, LCK_PR | LCK_PW, &lockh);
if (rc2 == 0 && page->mapping != NULL) {
// checking again to account for writeback's lock_page()
LL_CDEBUG_PAGE(D_PAGE, page, "truncating\n");
}
uuid = &watched->u.cli.cl_import->imp_target_uuid;
- /*
- * Must notify (MDS) before we mark the OSC as active, so that
- * the orphan deletion happens without interference from racing
- * creates.
+ /* Set OSC as active before notifying the observer, so the
+ * observer can use the OSC normally.
*/
- if (obd->obd_observer) {
- /* Pass the notification up the chain. */
- rc = obd_notify(obd->obd_observer, watched, active);
- if (rc)
- RETURN(rc);
- }
-
rc = lov_set_osc_active(&obd->u.lov, uuid, active);
-
if (rc) {
CERROR("%sactivation of %s failed: %d\n",
active ? "" : "de", uuid->uuid, rc);
+ RETURN(rc);
}
+
+ if (obd->obd_observer)
+ /* Pass the notification up the chain. */
+ rc = obd_notify(obd->obd_observer, watched, active);
+
RETURN(rc);
}
err = obd_destroy(lov->tgts[loi->loi_ost_idx].ltd_exp, &tmp,
NULL, oti);
if (err && lov->tgts[loi->loi_ost_idx].active) {
- CERROR("error: destroying objid "LPX64" subobj "
+ CDEBUG(D_INODE, "error: destroying objid "LPX64" subobj "
LPX64" on OST idx %d: rc = %d\n",
oa->o_id, loi->loi_id, loi->loi_ost_idx, err);
if (!rc)
err_cleanup:
mds_lov_clean(obd);
err_llog:
- obd_llog_cleanup(llog_get_context(&obd->obd_llogs, LLOG_CONFIG_ORIG_CTXT));
+ obd_llog_cleanup(llog_get_context(&obd->obd_llogs,
+ LLOG_CONFIG_ORIG_CTXT));
RETURN(rc);
}
-static int mds_postrecov(struct obd_device *obd)
-
+int mds_postrecov(struct obd_device *obd)
{
+ struct mds_obd *mds = &obd->u.mds;
struct llog_ctxt *ctxt;
- int rc, rc2;
+ int rc, item = 0;
ENTRY;
LASSERT(!obd->obd_recovering);
ctxt = llog_get_context(&obd->obd_llogs, LLOG_UNLINK_ORIG_CTXT);
LASSERT(ctxt != NULL);
+ /* set nextid first, so we are sure it happens */
+ rc = mds_lov_set_nextid(obd);
+ if (rc) {
+ CERROR("%s: mds_lov_set_nextid failed\n", obd->obd_name);
+ GOTO(out, rc);
+ }
+
+ /* clean PENDING dir */
+ rc = mds_cleanup_orphans(obd);
+ if (rc < 0)
+ GOTO(out, rc);
+ item = rc;
+
rc = llog_connect(ctxt, obd->u.mds.mds_lov_desc.ld_tgt_count,
NULL, NULL, NULL);
- if (rc != 0) {
- CERROR("faild at llog_origin_connect: %d\n", rc);
+ if (rc) {
+ CERROR("%s: failed at llog_origin_connect: %d\n",
+ obd->obd_name, rc);
+ GOTO(out, rc);
}
- rc = mds_cleanup_orphans(obd);
+ /* remove the orphaned precreated objects */
+ rc = mds_lov_clearorphans(mds, NULL /* all OSTs */);
+ if (rc)
+ GOTO(err_llog, rc);
+
+out:
+ RETURN(rc < 0 ? rc : item);
- rc2 = mds_lov_set_nextid(obd);
- if (rc2 == 0)
- rc2 = rc;
- RETURN(rc2);
+err_llog:
+ /* cleanup all llogging subsystems */
+ rc = obd_llog_finish(obd, &obd->obd_llogs,
+ mds->mds_lov_desc.ld_tgt_count);
+ if (rc)
+ CERROR("%s: failed to cleanup llogging subsystems\n",
+ obd->obd_name);
+ goto out;
}
int mds_lov_clean(struct obd_device *obd)
void mds_lov_update_objids(struct obd_device *obd, obd_id *ids);
int mds_lov_set_growth(struct mds_obd *mds, int count);
int mds_lov_set_nextid(struct obd_device *obd);
+int mds_lov_clearorphans(struct mds_obd *mds, struct obd_uuid *ost_uuid);
int mds_post_mds_lovconf(struct obd_device *obd);
int mds_notify(struct obd_device *obd, struct obd_device *watched, int active);
int mds_convert_lov_ea(struct obd_device *obd, struct inode *inode,
extern int mds_iocontrol(unsigned int cmd, struct obd_export *exp,
int len, void *karg, void *uarg);
extern int mds_lock_mode_for_dir(struct obd_device *, struct dentry *, int);
+int mds_postrecov(struct obd_device *obd);
#ifdef __KERNEL__
int mds_get_md(struct obd_device *, struct inode *, void *md, int *size,
RETURN(rc);
}
-static int mds_lov_clearorphans(struct mds_obd *mds, struct obd_uuid *ost_uuid)
+int mds_lov_clearorphans(struct mds_obd *mds, struct obd_uuid *ost_uuid)
{
int rc;
struct obdo oa;
rc = obd_set_info(mds->mds_osc_exp, strlen("next_id"), "next_id",
mds->mds_lov_desc.ld_tgt_count, mds->mds_lov_objids);
- if (rc < 0)
- GOTO(out, rc);
-
- rc = mds_lov_clearorphans(mds, NULL /* all OSTs */);
-
-out:
RETURN(rc);
}
* set_nextid(). The class driver can help us here, because
* it can use the obd_recovering flag to determine when the
* the OBD is full available. */
- if (!obd->obd_recovering) {
- struct llog_ctxt *ctxt;
- ctxt = llog_get_context(&obd->obd_llogs, LLOG_UNLINK_ORIG_CTXT);
- rc = llog_connect(ctxt, obd->u.mds.mds_lov_desc.ld_tgt_count,
- NULL, NULL, NULL);
- if (rc != 0)
- CERROR("faild at llog_origin_connect: %d\n", rc);
-
- rc = mds_cleanup_orphans(obd);
- if (rc > 0)
- CERROR("Cleanup %d orphans while MDS isn't recovering\n", rc);
-
- rc = mds_lov_set_nextid(obd);
- if (rc)
- GOTO(err_llog, rc);
- }
+ if (!obd->obd_recovering)
+ rc = mds_postrecov(obd);
RETURN(rc);
-err_llog:
- /* cleanup all llogging subsystems */
- rc = obd_llog_finish(obd, &obd->obd_llogs,
- mds->mds_lov_desc.ld_tgt_count);
- if (rc)
- CERROR("failed to cleanup llogging subsystems\n");
err_reg:
obd_register_observer(mds->mds_osc_obd, NULL);
err_discon:
RETURN(-EINVAL);
}
RETURN(0);
+
+}
+
+struct mds_lov_sync_info {
+ struct obd_device *mlsi_obd; /* the lov device to sync */
+ struct obd_uuid *mlsi_uuid; /* target to sync */
+};
+
+int mds_lov_synchronize(void *data)
+{
+ struct mds_lov_sync_info *mlsi = data;
+ struct llog_ctxt *ctxt;
+ struct obd_device *obd;
+ struct obd_uuid *uuid;
+ unsigned long flags;
+ int rc;
+
+ lock_kernel();
+ ptlrpc_daemonize();
+
+ SIGNAL_MASK_LOCK(current, flags);
+ sigfillset(¤t->blocked);
+ RECALC_SIGPENDING;
+ SIGNAL_MASK_UNLOCK(current, flags);
+
+ obd = mlsi->mlsi_obd;
+ uuid = mlsi->mlsi_uuid;
+
+ OBD_FREE(mlsi, sizeof(*mlsi));
+
+ LASSERT(obd != NULL);
+ LASSERT(uuid != NULL);
+
+ rc = obd_set_info(obd->u.mds.mds_osc_exp, strlen("mds_conn"),
+ "mds_conn", 0, uuid);
+ if (rc != 0)
+ RETURN(rc);
+
+ ctxt = llog_get_context(&obd->obd_llogs, LLOG_UNLINK_ORIG_CTXT);
+ LASSERT(ctxt != NULL);
+
+ rc = llog_connect(ctxt, obd->u.mds.mds_lov_desc.ld_tgt_count,
+ NULL, NULL, uuid);
+ if (rc != 0) {
+ CERROR("%s: failed at llog_origin_connect: %d\n",
+ obd->obd_name, rc);
+ RETURN(rc);
+ }
+
+ CWARN("MDS %s: %s now active, resetting orphans\n",
+ obd->obd_name, uuid->uuid);
+ rc = mds_lov_clearorphans(&obd->u.mds, uuid);
+ if (rc != 0) {
+ CERROR("%s: failed at mds_lov_clearorphans: %d\n",
+ obd->obd_name, rc);
+ RETURN(rc);
+ }
+
+ RETURN(0);
+}
+
+int mds_lov_start_synchronize(struct obd_device *obd, struct obd_uuid *uuid)
+{
+ struct mds_lov_sync_info *mlsi;
+ int rc;
+
+ ENTRY;
+
+ OBD_ALLOC(mlsi, sizeof(*mlsi));
+ if (mlsi == NULL)
+ RETURN(-ENOMEM);
+
+ mlsi->mlsi_obd = obd;
+ mlsi->mlsi_uuid = uuid;
+
+ rc = kernel_thread(mds_lov_synchronize, mlsi, CLONE_VM | CLONE_FILES);
+ if (rc < 0)
+ CERROR("%s: error starting mds_lov_synchronize: %d\n",
+ obd->obd_name, rc);
+ else {
+ CDEBUG(D_HA, "%s: mds_lov_synchronize thread: %d\n",
+ obd->obd_name, rc);
+ rc = 0;
+ }
+
+ RETURN(rc);
}
int mds_notify(struct obd_device *obd, struct obd_device *watched, int active)
CWARN("MDS %s: in recovery, not resetting orphans on %s\n",
obd->obd_name, uuid->uuid);
} else {
- struct llog_ctxt *ctxt;
-
- ctxt = llog_get_context(&obd->obd_llogs, LLOG_UNLINK_ORIG_CTXT);
- LASSERT(ctxt != NULL);
-
- rc = obd_set_info(obd->u.mds.mds_osc_exp, strlen("mds_conn"),
- "mds_conn", 0, uuid);
- if (rc != 0)
- RETURN(rc);
-
- ctxt = llog_get_context(&obd->obd_llogs, LLOG_UNLINK_ORIG_CTXT);
- rc = llog_connect(ctxt, obd->u.mds.mds_lov_desc.ld_tgt_count,
- NULL, NULL, uuid);
- if (rc != 0) {
- CERROR("faild at llog_origin_connect: %d\n", rc);
- RETURN(rc);
- }
-
- CWARN("MDS %s: %s now active, resetting orphans\n",
- obd->obd_name, uuid->uuid);
- rc = mds_lov_clearorphans(&obd->u.mds, uuid);
+ rc = mds_lov_start_synchronize(obd, uuid);
}
RETURN(rc);
}
int stripe_count = 0;
LASSERT(rc == 0); /* mds_put_write_access must have succeeded */
+ if (obd->obd_recovering) {
+ CDEBUG(D_HA, "not remove orphan %s until recovery"
+ " is over\n", fidname);
+ GOTO(out, rc);
+ }
+
CDEBUG(D_HA, "destroying orphan object %s\n", fidname);
/* Sadly, there is no easy way to save pending_child from
rc = obd_destroy(mds->mds_osc_exp, oa, lsm, &oti);
obdo_free(oa);
if (rc)
- CERROR("destroy orphan objid 0x"LPX64" on ost error "
+ CDEBUG(D_INODE, "destroy orphan objid 0x"LPX64" on ost error "
"%d\n", lsm->lsm_object_id, rc);
out_free_memmd:
obd_free_memmd(mds->mds_osc_exp, &lsm);
item ++;
CWARN("removed orphan %s from MDS and OST\n", d_name);
} else {
- CERROR("removed orphan %s from MDS and OST failed,"
+ CDEBUG(D_INODE, "removed orphan %s from MDS/OST failed,"
" rc = %d\n", d_name, rc);
rc = 0;
}
* kernel patch */
#include <linux/lustre_version.h>
#define LUSTRE_MIN_VERSION 32
-#define LUSTRE_MAX_VERSION 36
+#define LUSTRE_MAX_VERSION 37
#if (LUSTRE_KERNEL_VERSION < LUSTRE_MIN_VERSION)
# error Cannot continue: Your Lustre kernel patch is older than the sources
#elif (LUSTRE_KERNEL_VERSION > LUSTRE_MAX_VERSION)
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2002, 2003 Cluster File Systems, Inc.
- * Author: Peter Braam <braam@clusterfs.com>
- * Aurhot: Andreas Dilger <adilger@clusterfs.com>
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#ifndef EXPORT_SYMTAB
-# define EXPORT_SYMTAB
-#endif
-
-#include <linux/version.h>
-#include <linux/fs.h>
-#include <asm/unistd.h>
-
-#define DEBUG_SUBSYSTEM S_FILTER
-
-#include <linux/obd.h>
-#include <linux/lustre_lib.h>
-#include <linux/lustre_compat25.h>
-
-/* Debugging check only needed during development */
-#ifdef OBD_CTXT_DEBUG
-# define ASSERT_CTXT_MAGIC(magic) LASSERT((magic) == OBD_RUN_CTXT_MAGIC)
-# define ASSERT_NOT_KERNEL_CTXT(msg) LASSERT(!segment_eq(get_fs(), get_ds()))
-# define ASSERT_KERNEL_CTXT(msg) LASSERT(segment_eq(get_fs(), get_ds()))
-#else
-# define ASSERT_CTXT_MAGIC(magic) do {} while(0)
-# define ASSERT_NOT_KERNEL_CTXT(msg) do {} while(0)
-# define ASSERT_KERNEL_CTXT(msg) do {} while(0)
-#endif
-
-/* push / pop to root of obd store */
-void push_ctxt(struct lvfs_run_ctxt *save, struct lvfs_run_ctxt *new_ctx,
- struct lvfs_ucred *uc)
-{
- //ASSERT_NOT_KERNEL_CTXT("already in kernel context!\n");
- ASSERT_CTXT_MAGIC(new_ctx->magic);
- OBD_SET_CTXT_MAGIC(save);
-
- /*
- CDEBUG(D_INFO,
- "= push %p->%p = cur fs %p pwd %p:d%d:i%d (%*s), pwdmnt %p:%d\n",
- save, current, current->fs, current->fs->pwd,
- atomic_read(¤t->fs->pwd->d_count),
- atomic_read(¤t->fs->pwd->d_inode->i_count),
- current->fs->pwd->d_name.len, current->fs->pwd->d_name.name,
- current->fs->pwdmnt,
- atomic_read(¤t->fs->pwdmnt->mnt_count));
- */
-
- save->fs = get_fs();
- LASSERT(atomic_read(¤t->fs->pwd->d_count));
- LASSERT(atomic_read(&new_ctx->pwd->d_count));
- save->pwd = dget(current->fs->pwd);
- save->pwdmnt = mntget(current->fs->pwdmnt);
- save->ngroups = current->ngroups;
-
- LASSERT(save->pwd);
- LASSERT(save->pwdmnt);
- LASSERT(new_ctx->pwd);
- LASSERT(new_ctx->pwdmnt);
-
- if (uc) {
- save->luc.luc_fsuid = current->fsuid;
- save->luc.luc_fsgid = current->fsgid;
- save->luc.luc_cap = current->cap_effective;
- save->luc.luc_suppgid1 = current->groups[0];
- save->luc.luc_suppgid2 = current->groups[1];
-
- current->fsuid = uc->luc_fsuid;
- current->fsgid = uc->luc_fsgid;
- current->cap_effective = uc->luc_cap;
- current->ngroups = 0;
-
- if (uc->luc_suppgid1 != -1)
- current->groups[current->ngroups++] = uc->luc_suppgid1;
- if (uc->luc_suppgid2 != -1)
- current->groups[current->ngroups++] = uc->luc_suppgid2;
- }
- set_fs(new_ctx->fs);
- set_fs_pwd(current->fs, new_ctx->pwdmnt, new_ctx->pwd);
-
- /*
- CDEBUG(D_INFO,
- "= push %p->%p = cur fs %p pwd %p:d%d:i%d (%*s), pwdmnt %p:%d\n",
- new_ctx, current, current->fs, current->fs->pwd,
- atomic_read(¤t->fs->pwd->d_count),
- atomic_read(¤t->fs->pwd->d_inode->i_count),
- current->fs->pwd->d_name.len, current->fs->pwd->d_name.name,
- current->fs->pwdmnt,
- atomic_read(¤t->fs->pwdmnt->mnt_count));
- */
-}
-EXPORT_SYMBOL(push_ctxt);
-
-void pop_ctxt(struct lvfs_run_ctxt *saved, struct lvfs_run_ctxt *new_ctx,
- struct lvfs_ucred *uc)
-{
- //printk("pc0");
- ASSERT_CTXT_MAGIC(saved->magic);
- //printk("pc1");
- ASSERT_KERNEL_CTXT("popping non-kernel context!\n");
-
- /*
- CDEBUG(D_INFO,
- " = pop %p==%p = cur %p pwd %p:d%d:i%d (%*s), pwdmnt %p:%d\n",
- new_ctx, current, current->fs, current->fs->pwd,
- atomic_read(¤t->fs->pwd->d_count),
- atomic_read(¤t->fs->pwd->d_inode->i_count),
- current->fs->pwd->d_name.len, current->fs->pwd->d_name.name,
- current->fs->pwdmnt,
- atomic_read(¤t->fs->pwdmnt->mnt_count));
- */
-
- LASSERT(current->fs->pwd == new_ctx->pwd);
- LASSERT(current->fs->pwdmnt == new_ctx->pwdmnt);
-
- set_fs(saved->fs);
- set_fs_pwd(current->fs, saved->pwdmnt, saved->pwd);
-
- dput(saved->pwd);
- mntput(saved->pwdmnt);
- if (uc) {
- current->fsuid = saved->luc.luc_fsuid;
- current->fsgid = saved->luc.luc_fsgid;
- current->cap_effective = saved->luc.luc_cap;
- current->ngroups = saved->ngroups;
- current->groups[0] = saved->luc.luc_suppgid1;
- current->groups[1] = saved->luc.luc_suppgid2;
- }
-
- /*
- CDEBUG(D_INFO,
- "= pop %p->%p = cur fs %p pwd %p:d%d:i%d (%*s), pwdmnt %p:%d\n",
- saved, current, current->fs, current->fs->pwd,
- atomic_read(¤t->fs->pwd->d_count),
- atomic_read(¤t->fs->pwd->d_inode->i_count),
- current->fs->pwd->d_name.len, current->fs->pwd->d_name.name,
- current->fs->pwdmnt,
- atomic_read(¤t->fs->pwdmnt->mnt_count));
- */
-}
-EXPORT_SYMBOL(pop_ctxt);
-
-/* utility to make a file */
-struct dentry *simple_mknod(struct dentry *dir, char *name, int mode)
-{
- struct dentry *dchild;
- int err = 0;
- ENTRY;
-
- ASSERT_KERNEL_CTXT("kernel doing mknod outside kernel context\n");
- CDEBUG(D_INODE, "creating file %*s\n", (int)strlen(name), name);
-
- dchild = ll_lookup_one_len(name, dir, strlen(name));
- if (IS_ERR(dchild))
- GOTO(out_up, dchild);
-
- if (dchild->d_inode) {
- if (!S_ISREG(dchild->d_inode->i_mode))
- GOTO(out_err, err = -EEXIST);
-
- GOTO(out_up, dchild);
- }
-
- err = ll_vfs_create(dir->d_inode, dchild, (mode & ~S_IFMT) | S_IFREG, NULL);
- if (err)
- GOTO(out_err, err);
-
- RETURN(dchild);
-
-out_err:
- dput(dchild);
- dchild = ERR_PTR(err);
-out_up:
- return dchild;
-}
-EXPORT_SYMBOL(simple_mknod);
-
-/* utility to make a directory */
-struct dentry *simple_mkdir(struct dentry *dir, char *name, int mode)
-{
- struct dentry *dchild;
- int err = 0;
- ENTRY;
-
- ASSERT_KERNEL_CTXT("kernel doing mkdir outside kernel context\n");
- CDEBUG(D_INODE, "creating directory %*s\n", (int)strlen(name), name);
- dchild = ll_lookup_one_len(name, dir, strlen(name));
- if (IS_ERR(dchild))
- GOTO(out_up, dchild);
-
- if (dchild->d_inode) {
- if (!S_ISDIR(dchild->d_inode->i_mode))
- GOTO(out_err, err = -ENOTDIR);
-
- GOTO(out_up, dchild);
- }
-
- err = vfs_mkdir(dir->d_inode, dchild, mode);
- if (err)
- GOTO(out_err, err);
-
- RETURN(dchild);
-
-out_err:
- dput(dchild);
- dchild = ERR_PTR(err);
-out_up:
- return dchild;
-}
-EXPORT_SYMBOL(simple_mkdir);
-
-/*
- * Read a file from within kernel context. Prior to calling this
- * function we should already have done a push_ctxt().
- */
-int lustre_fread(struct file *file, void *buf, int len, loff_t *off)
-{
- ASSERT_KERNEL_CTXT("kernel doing read outside kernel context\n");
- if (!file || !file->f_op || !file->f_op->read || !off)
- RETURN(-ENOSYS);
-
- return file->f_op->read(file, buf, len, off);
-}
-EXPORT_SYMBOL(lustre_fread);
-
-/*
- * Write a file from within kernel context. Prior to calling this
- * function we should already have done a push_ctxt().
- */
-int lustre_fwrite(struct file *file, const void *buf, int len, loff_t *off)
-{
- ENTRY;
- ASSERT_KERNEL_CTXT("kernel doing write outside kernel context\n");
- if (!file)
- RETURN(-ENOENT);
- if (!file->f_op)
- RETURN(-ENOSYS);
- if (!off)
- RETURN(-EINVAL);
-
- if (!file->f_op->write)
- RETURN(-EROFS);
-
- RETURN(file->f_op->write(file, buf, len, off));
-}
-EXPORT_SYMBOL(lustre_fwrite);
-
push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
cleanup_phase = 2;
+ generic_osync_inode(inode, inode->i_mapping, OSYNC_DATA|OSYNC_METADATA);
+
oti->oti_handle = fsfilt_brw_start(obd, objcount, &fso,
niocount, res, oti);
offs = k * inode->i_sb->s_blocksize;
if (!bio || !can_be_merged(bio, sector) ||
- !bio_add_page(bio, lnb->page, lnb->len, offs)) {
+ !bio_add_page(bio, lnb->page, PAGE_SIZE, offs)) {
if (bio) {
atomic_inc(&dreq->numreqs);
submit_bio(WRITE, bio);
bio->bi_end_io = dio_complete_routine;
bio->bi_private = dreq;
- if (!bio_add_page(bio, lnb->page, lnb->len, 0))
+ if (!bio_add_page(bio, lnb->page, PAGE_SIZE,
+ offs))
LBUG();
}
}
iattr.ia_size = this_size;
}
-#warning This probably needs filemap_fdatasync() like filter_io_24 (bug 2366)
if (bio) {
atomic_inc(&dreq->numreqs);
fsfilt_send_bio(obd, inode, bio);
/* this is the special case where create removes orphans */
if ((oa->o_valid & OBD_MD_FLFLAGS) &&
oa->o_flags == OBD_FL_DELORPHAN) {
- CDEBUG(D_HA, "%p: oscc recovery started\n", oscc);
+ CDEBUG(D_HA, "%s; oscc recovery started\n",
+ exp->exp_obd->obd_name);
+ LASSERT(oscc->oscc_flags & OSCC_FLAG_RECOVERING);
+
/* delete from next_id on up */
oa->o_valid |= OBD_MD_FLID;
oa->o_id = oscc->oscc_next_id - 1;
CDEBUG(D_HA, "%s: deleting to next_id: "LPU64"\n",
- oscc->oscc_obd->u.cli.cl_import->imp_target_uuid.uuid,
- oa->o_id);
+ exp->exp_obd->obd_name, oa->o_id);
rc = osc_real_create(exp, oa, ea, NULL);
+ if (oscc->oscc_obd == NULL) {
+ CWARN("the obd for oscc %p has been freed\n", oscc);
+ RETURN(rc);
+ }
spin_lock(&oscc->oscc_lock);
- if (rc == -ENOSPC)
- oscc->oscc_flags |= OSCC_FLAG_NOSPC;
- oscc->oscc_flags &= ~OSCC_FLAG_RECOVERING;
- oscc->oscc_last_id = oa->o_id;
- wake_up(&oscc->oscc_waitq);
+ if (rc == 0 || rc == -ENOSPC) {
+ if (rc == -ENOSPC)
+ oscc->oscc_flags |= OSCC_FLAG_NOSPC;
+ oscc->oscc_flags &= ~OSCC_FLAG_RECOVERING;
+ oscc->oscc_last_id = oa->o_id;
+
+ /* recover happen in mds_setup, before cobd_setup, so
+ * reset oscc_gr = 0 here, it sould be no harm to CMD */
+ oscc->oscc_gr = 0;
+
+ CDEBUG(D_HA, "%s: oscc recovery finished: %d\n",
+ exp->exp_obd->obd_name, rc);
+ wake_up(&oscc->oscc_waitq);
+
+ } else {
+ CDEBUG(D_ERROR, "%s: oscc recovery failed: %d\n",
+ exp->exp_obd->obd_name, rc);
+ }
spin_unlock(&oscc->oscc_lock);
-
- /*recover happen in mds_setup, before cobd_setup, so
- *reset oscc_gr = 0 here, it sould be no harm to CMD
- */
- oscc->oscc_gr = 0;
- CDEBUG(D_HA, "%p: oscc recovery finished\n", oscc);
RETURN(rc);
}
if (oscc_recovering(oscc)) {
struct l_wait_info lwi;
- CDEBUG(D_HA,"%p: oscc recovery in progress, waiting\n",
- oscc);
+ CDEBUG(D_HA,"%s: oscc sync in progress, waiting\n",
+ exp->exp_obd->obd_name);
lwi = LWI_TIMEOUT(MAX(obd_timeout * HZ, 1), NULL, NULL);
rc = l_wait_event(oscc->oscc_waitq,
!oscc_recovering(oscc), &lwi);
LASSERT(rc == 0 || rc == -ETIMEDOUT);
if (rc == -ETIMEDOUT) {
- CDEBUG(D_HA, "%p: timed out waiting for "
- "recovery\n", oscc);
+ CDEBUG(D_HA, "%s: timed out waiting for sync\n",
+ exp->exp_obd->obd_name);
RETURN(rc);
}
- CDEBUG(D_HA, "%p: oscc recovery over, waking up\n",
- oscc);
+ CDEBUG(D_HA, "%s: oscc sync over, waking up\n",
+ exp->exp_obd->obd_name);
}
spin_lock(&oscc->oscc_lock);
oa->o_flags == OBD_FL_DELORPHAN);
DEBUG_REQ(D_HA, request,
"delorphan from OST integration");
+ /* Don't resend the delorphan request */
+ request->rq_no_resend = request->rq_no_delay = 1;
}
rc = ptlrpc_queue_wait(request);
rc = ldlm_lock_match(obd->obd_namespace, *flags, &res_id, type,
policy, mode, lockh);
if (rc) {
- osc_set_data_with_check(lockh, data);
+ if (!(*flags & LDLM_FL_TEST_LOCK))
+ osc_set_data_with_check(lockh, data);
RETURN(rc);
}
/* If we're trying to read, we also search for an existing PW lock. The
fi
LUSTRE_MODULE_TRY_MAKE(
[#include <linux/version.h>],
- [LINUXRELEASE=UTS_RELEASE],
+ [char *LINUXRELEASE;
+ LINUXRELEASE=UTS_RELEASE;],
[$makerule LUSTRE_KERNEL_TEST=conftest.i],
[test -s kernel-tests/conftest.i],
[
# LINUXRELEASE="UTS_RELEASE"
- eval $(grep LINUXRELEASE kernel-tests/conftest.i)
+ eval $(grep "LINUXRELEASE=" kernel-tests/conftest.i)
],[
AC_MSG_RESULT([unknown])
AC_MSG_ERROR([Could not preprocess test program. Consult config.log for details.])
#include <linux/libcfs.h>
#define PORTAL_DEBUG
-#ifndef offsetof
-# define offsetof(typ,memb) ((unsigned long)((char *)&(((typ *)0)->memb)))
-#endif
-
-#define LOWEST_BIT_SET(x) ((x) & ~((x) - 1))
-
#ifdef __KERNEL__
# include <linux/vmalloc.h>
# include <linux/time.h>
TCPNAL = 5,
ROUTER = 6,
IBNAL = 7,
- CRAY_KB_ERNAL = 8,
NAL_ENUM_END_MARKER
};
#ifndef _KPR_H
#define _KPR_H
-# include <portals/lib-nal.h> /* for ptl_hdr_t */
+# include <portals/lib-types.h> /* for ptl_hdr_t */
/******************************************************************************/
/* Kernel Portals Router interface */
#define S_PTLROUTER 0x00100000
#define S_COBD 0x00200000
#define S_IBNAL 0x00400000
-#define S_LMV 0x00800000
-#define S_SM 0x01000000
-#define S_CMOBD 0x02000000
+#define S_SM 0x00800000
+#define S_ASOBD 0x01000000
+#define S_LMV 0x02000000
+#define S_CMOBD 0x04000000
+
/* If you change these values, please keep portals/utils/debug.c
* up to date! */
#endif
#ifdef __KERNEL__
+# define NTOH__u16(var) le16_to_cpu(var)
# define NTOH__u32(var) le32_to_cpu(var)
# define NTOH__u64(var) le64_to_cpu(var)
+# define HTON__u16(var) cpu_to_le16(var)
# define HTON__u32(var) cpu_to_le32(var)
# define HTON__u64(var) cpu_to_le64(var)
#else
}; \
(ret); \
})
+# define NTOH__u16(var) (var)
# define NTOH__u32(var) (var)
# define NTOH__u64(var) (expansion_u64(var))
+# define HTON__u16(var) (var)
# define HTON__u32(var) (var)
# define HTON__u64(var) (expansion_u64(var))
#endif
#include <portals/internal.h>
#include <portals/nal.h>
-#include <portals/arg-blocks.h>
-/* Hack for 2.4.18 macro name collision */
-#ifdef yield
-#undef yield
-#endif
#include <portals/types.h>
-#ifndef PTL_NO_WRAP
int PtlInit(int *);
void PtlFini(void);
int PtlNIFini(ptl_handle_ni_t interface_in);
-#endif
-
int PtlGetId(ptl_handle_ni_t ni_handle, ptl_process_id_t *id);
int PtlNIDist(ptl_handle_ni_t interface_in, ptl_process_id_t process_in,
unsigned long *distance_out);
-#ifndef PTL_NO_WRAP
int PtlNIHandle(ptl_handle_any_t handle_in, ptl_handle_ni_t * interface_out);
-#endif
/*
int PtlMEUnlinkList(ptl_handle_me_t current_in);
-int PtlTblDump(ptl_handle_ni_t ni, int index_in);
-int PtlMEDump(ptl_handle_me_t current_in);
-
/*
* Memory descriptors
*/
-#ifndef PTL_NO_WRAP
int PtlMDAttach(ptl_handle_me_t current_in, ptl_md_t md_in,
ptl_unlink_t unlink_in, ptl_handle_md_t * handle_out);
int PtlMDUpdate(ptl_handle_md_t md_in, ptl_md_t * old_inout,
ptl_md_t * new_inout, ptl_handle_eq_t testq_in);
-#endif
/* These should not be called by users */
int PtlMDUpdate_internal(ptl_handle_md_t md_in, ptl_md_t * old_inout,
/*
* Event queues
*/
-#ifndef PTL_NO_WRAP
-
-/* These should be called by users */
int PtlEQAlloc(ptl_handle_ni_t ni_in, ptl_size_t count_in,
ptl_eq_handler_t handler,
ptl_handle_eq_t *handle_out);
int PtlEQFree(ptl_handle_eq_t eventq_in);
-int PtlEQCount(ptl_handle_eq_t eventq_in, ptl_size_t * count_out);
-
int PtlEQGet(ptl_handle_eq_t eventq_in, ptl_event_t * event_out);
int PtlEQPoll(ptl_handle_eq_t *eventqs_in, int neq_in, int timeout,
ptl_event_t *event_out, int *which_out);
-#endif
/*
* Access Control Table
+++ /dev/null
-#ifndef PTL_BLOCKS_H
-#define PTL_BLOCKS_H
-
-#include "build_check.h"
-
-/*
- * blocks.h
- *
- * Argument block types for the Portals 3.0 library
- * Generated by idl
- *
- */
-
-#include <portals/types.h>
-
-/* put LIB_MAX_DISPATCH last here -- these must match the
- assignements to the dispatch table in lib-p30/dispatch.c */
-#define PTL_GETID 1
-#define PTL_NISTATUS 2
-#define PTL_NIDIST 3
-// #define PTL_NIDEBUG 4
-#define PTL_MEATTACH 5
-#define PTL_MEINSERT 6
-// #define PTL_MEPREPEND 7
-#define PTL_MEUNLINK 8
-#define PTL_TBLDUMP 9
-#define PTL_MEDUMP 10
-#define PTL_MDATTACH 11
-// #define PTL_MDINSERT 12
-#define PTL_MDBIND 13
-#define PTL_MDUPDATE 14
-#define PTL_MDUNLINK 15
-#define PTL_EQALLOC 16
-#define PTL_EQFREE 17
-#define PTL_ACENTRY 18
-#define PTL_PUT 19
-#define PTL_GET 20
-#define PTL_FAILNID 21
-#define LIB_MAX_DISPATCH 21
-
-typedef struct PtlFailNid_in {
- ptl_handle_ni_t interface;
- ptl_nid_t nid;
- unsigned int threshold;
-} PtlFailNid_in;
-
-typedef struct PtlFailNid_out {
- int rc;
-} PtlFailNid_out;
-
-typedef struct PtlGetId_in {
- ptl_handle_ni_t handle_in;
-} PtlGetId_in;
-
-typedef struct PtlGetId_out {
- int rc;
- ptl_process_id_t id_out;
-} PtlGetId_out;
-
-typedef struct PtlNIStatus_in {
- ptl_handle_ni_t interface_in;
- ptl_sr_index_t register_in;
-} PtlNIStatus_in;
-
-typedef struct PtlNIStatus_out {
- int rc;
- ptl_sr_value_t status_out;
-} PtlNIStatus_out;
-
-
-typedef struct PtlNIDist_in {
- ptl_handle_ni_t interface_in;
- ptl_process_id_t process_in;
-} PtlNIDist_in;
-
-typedef struct PtlNIDist_out {
- int rc;
- unsigned long distance_out;
-} PtlNIDist_out;
-
-
-typedef struct PtlNIDebug_in {
- unsigned int mask_in;
-} PtlNIDebug_in;
-
-typedef struct PtlNIDebug_out {
- unsigned int rc;
-} PtlNIDebug_out;
-
-
-typedef struct PtlMEAttach_in {
- ptl_handle_ni_t interface_in;
- ptl_pt_index_t index_in;
- ptl_ins_pos_t position_in;
- ptl_process_id_t match_id_in;
- ptl_match_bits_t match_bits_in;
- ptl_match_bits_t ignore_bits_in;
- ptl_unlink_t unlink_in;
-} PtlMEAttach_in;
-
-typedef struct PtlMEAttach_out {
- int rc;
- ptl_handle_me_t handle_out;
-} PtlMEAttach_out;
-
-
-typedef struct PtlMEInsert_in {
- ptl_handle_me_t current_in;
- ptl_process_id_t match_id_in;
- ptl_match_bits_t match_bits_in;
- ptl_match_bits_t ignore_bits_in;
- ptl_unlink_t unlink_in;
- ptl_ins_pos_t position_in;
-} PtlMEInsert_in;
-
-typedef struct PtlMEInsert_out {
- int rc;
- ptl_handle_me_t handle_out;
-} PtlMEInsert_out;
-
-typedef struct PtlMEUnlink_in {
- ptl_handle_me_t current_in;
- ptl_unlink_t unlink_in;
-} PtlMEUnlink_in;
-
-typedef struct PtlMEUnlink_out {
- int rc;
-} PtlMEUnlink_out;
-
-
-typedef struct PtlTblDump_in {
- int index_in;
-} PtlTblDump_in;
-
-typedef struct PtlTblDump_out {
- int rc;
-} PtlTblDump_out;
-
-
-typedef struct PtlMEDump_in {
- ptl_handle_me_t current_in;
-} PtlMEDump_in;
-
-typedef struct PtlMEDump_out {
- int rc;
-} PtlMEDump_out;
-
-
-typedef struct PtlMDAttach_in {
- ptl_handle_me_t me_in;
- ptl_handle_eq_t eq_in;
- ptl_md_t md_in;
- ptl_unlink_t unlink_in;
-} PtlMDAttach_in;
-
-typedef struct PtlMDAttach_out {
- int rc;
- ptl_handle_md_t handle_out;
-} PtlMDAttach_out;
-
-
-typedef struct PtlMDBind_in {
- ptl_handle_ni_t ni_in;
- ptl_handle_eq_t eq_in;
- ptl_md_t md_in;
- ptl_unlink_t unlink_in;
-} PtlMDBind_in;
-
-typedef struct PtlMDBind_out {
- int rc;
- ptl_handle_md_t handle_out;
-} PtlMDBind_out;
-
-
-typedef struct PtlMDUpdate_internal_in {
- ptl_handle_md_t md_in;
- ptl_handle_eq_t testq_in;
- ptl_seq_t sequence_in;
-
- ptl_md_t old_inout;
- int old_inout_valid;
- ptl_md_t new_inout;
- int new_inout_valid;
-} PtlMDUpdate_internal_in;
-
-typedef struct PtlMDUpdate_internal_out {
- int rc;
- ptl_md_t old_inout;
- ptl_md_t new_inout;
-} PtlMDUpdate_internal_out;
-
-
-typedef struct PtlMDUnlink_in {
- ptl_handle_md_t md_in;
-} PtlMDUnlink_in;
-
-typedef struct PtlMDUnlink_out {
- int rc;
- ptl_md_t status_out;
-} PtlMDUnlink_out;
-
-
-typedef struct PtlEQAlloc_in {
- ptl_handle_ni_t ni_in;
- ptl_size_t count_in;
- void *base_in;
- int len_in;
- ptl_eq_handler_t callback_in;
-} PtlEQAlloc_in;
-
-typedef struct PtlEQAlloc_out {
- int rc;
- ptl_handle_eq_t handle_out;
-} PtlEQAlloc_out;
-
-
-typedef struct PtlEQFree_in {
- ptl_handle_eq_t eventq_in;
-} PtlEQFree_in;
-
-typedef struct PtlEQFree_out {
- int rc;
-} PtlEQFree_out;
-
-
-typedef struct PtlACEntry_in {
- ptl_handle_ni_t ni_in;
- ptl_ac_index_t index_in;
- ptl_process_id_t match_id_in;
- ptl_pt_index_t portal_in;
-} PtlACEntry_in;
-
-typedef struct PtlACEntry_out {
- int rc;
-} PtlACEntry_out;
-
-
-typedef struct PtlPut_in {
- ptl_handle_md_t md_in;
- ptl_ack_req_t ack_req_in;
- ptl_process_id_t target_in;
- ptl_pt_index_t portal_in;
- ptl_ac_index_t cookie_in;
- ptl_match_bits_t match_bits_in;
- ptl_size_t offset_in;
- ptl_hdr_data_t hdr_data_in;
-} PtlPut_in;
-
-typedef struct PtlPut_out {
- int rc;
-} PtlPut_out;
-
-
-typedef struct PtlGet_in {
- ptl_handle_md_t md_in;
- ptl_process_id_t target_in;
- ptl_pt_index_t portal_in;
- ptl_ac_index_t cookie_in;
- ptl_match_bits_t match_bits_in;
- ptl_size_t offset_in;
-} PtlGet_in;
-
-typedef struct PtlGet_out {
- int rc;
-} PtlGet_out;
-
-
-#endif
PTL_EQ_IN_USE = 21,
- PTL_MAX_ERRNO = 22
+ PTL_NI_INVALID = 22,
+ PTL_MD_ILLEGAL = 23,
+
+ PTL_MAX_ERRNO = 24
} ptl_err_t;
/* If you change these, you must update the string table in api-errno.c */
+++ /dev/null
-#ifndef PTL_DISPATCH_H
-#define PTL_DISPATCH_H
-
-#include "build_check.h"
-/*
- * include/dispatch.h
- *
- * Dispatch table header and externs for remote side
- * operations
- *
- * Generated by idl
- *
- */
-
-#include <portals/lib-p30.h>
-#include <portals/arg-blocks.h>
-
-extern int do_PtlGetId(nal_cb_t * nal, void *private, void *args, void *ret);
-extern int do_PtlNIStatus(nal_cb_t * nal, void *private, void *args, void *ret);
-extern int do_PtlNIDist(nal_cb_t * nal, void *private, void *args, void *ret);
-extern int do_PtlMEAttach(nal_cb_t * nal, void *private, void *args, void *ret);
-extern int do_PtlMEInsert(nal_cb_t * nal, void *private, void *args, void *ret);
-extern int do_PtlMEPrepend(nal_cb_t * nal, void *private, void *args,
- void *ret);
-extern int do_PtlMEUnlink(nal_cb_t * nal, void *private, void *args, void *ret);
-extern int do_PtlTblDump(nal_cb_t * nal, void *private, void *args, void *ret);
-extern int do_PtlMEDump(nal_cb_t * nal, void *private, void *args, void *ret);
-extern int do_PtlMDAttach(nal_cb_t * nal, void *private, void *args,
- void *ret);
-extern int do_PtlMDBind(nal_cb_t * nal, void *private, void *args,
- void *ret);
-extern int do_PtlMDUpdate_internal(nal_cb_t * nal, void *private, void *args,
- void *ret);
-extern int do_PtlMDUnlink(nal_cb_t * nal, void *private, void *args,
- void *ret);
-extern int do_PtlEQAlloc_internal(nal_cb_t * nal, void *private, void *args,
- void *ret);
-extern int do_PtlEQFree_internal(nal_cb_t * nal, void *private, void *args,
- void *ret);
-extern int do_PtlPut(nal_cb_t * nal, void *private, void *args, void *ret);
-extern int do_PtlGet(nal_cb_t * nal, void *private, void *args, void *ret);
-extern int do_PtlFailNid (nal_cb_t *nal, void *private, void *args, void *ret);
-
-extern char *dispatch_name(int index);
-#endif
+++ /dev/null
-#ifndef _LIB_NAL_H_
-#define _LIB_NAL_H_
-
-#include "build_check.h"
-/*
- * nal.h
- *
- * Library side headers that define the abstraction layer's
- * responsibilities and interfaces
- */
-
-#include <portals/lib-types.h>
-
-struct nal_cb_t {
- /*
- * Per interface portal table, access control table
- * and NAL private data field;
- */
- lib_ni_t ni;
- void *nal_data;
- /*
- * send: Sends a preformatted header and payload data to a
- * specified remote process. The payload is scattered over 'niov'
- * fragments described by iov, starting at 'offset' for 'mlen'
- * bytes.
- * NB the NAL may NOT overwrite iov.
- * PTL_OK on success => NAL has committed to send and will call
- * lib_finalize on completion
- */
- ptl_err_t (*cb_send) (nal_cb_t * nal, void *private, lib_msg_t * cookie,
- ptl_hdr_t * hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
- unsigned int niov, struct iovec *iov,
- size_t offset, size_t mlen);
-
- /* as send, but with a set of page fragments (NULL if not supported) */
- ptl_err_t (*cb_send_pages) (nal_cb_t * nal, void *private, lib_msg_t * cookie,
- ptl_hdr_t * hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
- unsigned int niov, ptl_kiov_t *iov,
- size_t offset, size_t mlen);
- /*
- * recv: Receives an incoming message from a remote process. The
- * payload is to be received into the scattered buffer of 'niov'
- * fragments described by iov, starting at 'offset' for 'mlen'
- * bytes. Payload bytes after 'mlen' up to 'rlen' are to be
- * discarded.
- * NB the NAL may NOT overwrite iov.
- * PTL_OK on success => NAL has committed to receive and will call
- * lib_finalize on completion
- */
- ptl_err_t (*cb_recv) (nal_cb_t * nal, void *private, lib_msg_t * cookie,
- unsigned int niov, struct iovec *iov,
- size_t offset, size_t mlen, size_t rlen);
-
- /* as recv, but with a set of page fragments (NULL if not supported) */
- ptl_err_t (*cb_recv_pages) (nal_cb_t * nal, void *private, lib_msg_t * cookie,
- unsigned int niov, ptl_kiov_t *iov,
- size_t offset, size_t mlen, size_t rlen);
- /*
- * read: Reads a block of data from a specified user address
- */
- ptl_err_t (*cb_read) (nal_cb_t * nal, void *private, void *dst_addr,
- user_ptr src_addr, size_t len);
-
- /*
- * write: Writes a block of data into a specified user address
- */
- ptl_err_t (*cb_write) (nal_cb_t * nal, void *private, user_ptr dsr_addr,
- void *src_addr, size_t len);
-
- /*
- * callback: Calls an event callback
- * NULL => lib calls eq's callback (if any) directly.
- */
- void (*cb_callback) (nal_cb_t * nal, void *private, lib_eq_t *eq,
- ptl_event_t *ev);
-
- /*
- * malloc: Acquire a block of memory in a system independent
- * fashion.
- */
- void *(*cb_malloc) (nal_cb_t * nal, size_t len);
-
- void (*cb_free) (nal_cb_t * nal, void *buf, size_t len);
-
- /*
- * (un)map: Tell the NAL about some memory it will access.
- * *addrkey passed to cb_unmap() is what cb_map() set it to.
- * type of *iov depends on options.
- * Set to NULL if not required.
- */
- ptl_err_t (*cb_map) (nal_cb_t * nal, unsigned int niov, struct iovec *iov,
- void **addrkey);
- void (*cb_unmap) (nal_cb_t * nal, unsigned int niov, struct iovec *iov,
- void **addrkey);
-
- /* as (un)map, but with a set of page fragments */
- ptl_err_t (*cb_map_pages) (nal_cb_t * nal, unsigned int niov, ptl_kiov_t *iov,
- void **addrkey);
- void (*cb_unmap_pages) (nal_cb_t * nal, unsigned int niov, ptl_kiov_t *iov,
- void **addrkey);
-
- void (*cb_printf) (nal_cb_t * nal, const char *fmt, ...);
-
- /* Turn interrupts off (begin of protected area) */
- void (*cb_cli) (nal_cb_t * nal, unsigned long *flags);
-
- /* Turn interrupts on (end of protected area) */
- void (*cb_sti) (nal_cb_t * nal, unsigned long *flags);
-
- /*
- * Calculate a network "distance" to given node
- */
- int (*cb_dist) (nal_cb_t * nal, ptl_nid_t nid, unsigned long *dist);
-};
-
-#endif
#else
# include <portals/list.h>
# include <string.h>
+# include <pthread.h>
#endif
#include <portals/types.h>
#include <linux/kp30.h>
#include <portals/p30.h>
+#include <portals/nal.h>
#include <portals/lib-types.h>
-#include <portals/lib-nal.h>
-#include <portals/lib-dispatch.h>
static inline int ptl_is_wire_handle_none (ptl_handle_wire_t *wh)
{
wh->wh_object_cookie == PTL_WIRE_HANDLE_NONE.wh_object_cookie);
}
-#define state_lock(nal,flagsp) \
-do { \
- CDEBUG(D_PORTALS, "taking state lock\n"); \
- nal->cb_cli(nal, flagsp); \
-} while (0)
+#ifdef __KERNEL__
+#define LIB_LOCK(nal,flags) \
+ spin_lock_irqsave(&(nal)->libnal_ni.ni_lock, flags)
+#define LIB_UNLOCK(nal,flags) \
+ spin_unlock_irqrestore(&(nal)->libnal_ni.ni_lock, flags)
+#else
+#define LIB_LOCK(nal,flags) \
+ (pthread_mutex_lock(&(nal)->libnal_ni.ni_mutex), (flags) = 0)
+#define LIB_UNLOCK(nal,flags) \
+ pthread_mutex_unlock(&(nal)->libnal_ni.ni_mutex)
+#endif
-#define state_unlock(nal,flagsp) \
-{ \
- CDEBUG(D_PORTALS, "releasing state lock\n"); \
- nal->cb_sti(nal, flagsp); \
-}
#ifdef PTL_USE_LIB_FREELIST
#define MAX_MSGS 2048 /* Outstanding messages */
#define MAX_EQS 512
-extern int lib_freelist_init (nal_cb_t *nal, lib_freelist_t *fl, int nobj, int objsize);
-extern void lib_freelist_fini (nal_cb_t *nal, lib_freelist_t *fl);
+extern int lib_freelist_init (lib_nal_t *nal, lib_freelist_t *fl, int nobj, int objsize);
+extern void lib_freelist_fini (lib_nal_t *nal, lib_freelist_t *fl);
static inline void *
lib_freelist_alloc (lib_freelist_t *fl)
{
- /* ALWAYS called with statelock held */
+ /* ALWAYS called with liblock held */
lib_freeobj_t *o;
if (list_empty (&fl->fl_list))
static inline void
lib_freelist_free (lib_freelist_t *fl, void *obj)
{
- /* ALWAYS called with statelock held */
+ /* ALWAYS called with liblock held */
lib_freeobj_t *o = list_entry (obj, lib_freeobj_t, fo_contents);
list_add (&o->fo_list, &fl->fl_list);
static inline lib_eq_t *
-lib_eq_alloc (nal_cb_t *nal)
+lib_eq_alloc (lib_nal_t *nal)
{
- /* NEVER called with statelock held */
+ /* NEVER called with liblock held */
unsigned long flags;
lib_eq_t *eq;
- state_lock (nal, &flags);
- eq = (lib_eq_t *)lib_freelist_alloc (&nal->ni.ni_free_eqs);
- state_unlock (nal, &flags);
+ LIB_LOCK (nal, flags);
+ eq = (lib_eq_t *)lib_freelist_alloc (&nal->libnal_ni.ni_free_eqs);
+ LIB_UNLOCK (nal, flags);
return (eq);
}
static inline void
-lib_eq_free (nal_cb_t *nal, lib_eq_t *eq)
+lib_eq_free (lib_nal_t *nal, lib_eq_t *eq)
{
- /* ALWAYS called with statelock held */
- lib_freelist_free (&nal->ni.ni_free_eqs, eq);
+ /* ALWAYS called with liblock held */
+ lib_freelist_free (&nal->libnal_ni.ni_free_eqs, eq);
}
static inline lib_md_t *
-lib_md_alloc (nal_cb_t *nal, ptl_md_t *umd)
+lib_md_alloc (lib_nal_t *nal, ptl_md_t *umd)
{
- /* NEVER called with statelock held */
+ /* NEVER called with liblock held */
unsigned long flags;
lib_md_t *md;
- state_lock (nal, &flags);
- md = (lib_md_t *)lib_freelist_alloc (&nal->ni.ni_free_mds);
- state_unlock (nal, &flags);
+ LIB_LOCK (nal, flags);
+ md = (lib_md_t *)lib_freelist_alloc (&nal->libnal_ni.ni_free_mds);
+ LIB_UNLOCK (nal, flags);
return (md);
}
static inline void
-lib_md_free (nal_cb_t *nal, lib_md_t *md)
+lib_md_free (lib_nal_t *nal, lib_md_t *md)
{
- /* ALWAYS called with statelock held */
- lib_freelist_free (&nal->ni.ni_free_mds, md);
+ /* ALWAYS called with liblock held */
+ lib_freelist_free (&nal->libnal_ni.ni_free_mds, md);
}
static inline lib_me_t *
-lib_me_alloc (nal_cb_t *nal)
+lib_me_alloc (lib_nal_t *nal)
{
- /* NEVER called with statelock held */
+ /* NEVER called with liblock held */
unsigned long flags;
lib_me_t *me;
- state_lock (nal, &flags);
- me = (lib_me_t *)lib_freelist_alloc (&nal->ni.ni_free_mes);
- state_unlock (nal, &flags);
+ LIB_LOCK (nal, flags);
+ me = (lib_me_t *)lib_freelist_alloc (&nal->libnal_ni.ni_free_mes);
+ LIB_UNLOCK (nal, flags);
return (me);
}
static inline void
-lib_me_free (nal_cb_t *nal, lib_me_t *me)
+lib_me_free (lib_nal_t *nal, lib_me_t *me)
{
- /* ALWAYS called with statelock held */
- lib_freelist_free (&nal->ni.ni_free_mes, me);
+ /* ALWAYS called with liblock held */
+ lib_freelist_free (&nal->libnal_ni.ni_free_mes, me);
}
static inline lib_msg_t *
-lib_msg_alloc (nal_cb_t *nal)
+lib_msg_alloc (lib_nal_t *nal)
{
- /* NEVER called with statelock held */
+ /* NEVER called with liblock held */
unsigned long flags;
lib_msg_t *msg;
- state_lock (nal, &flags);
- msg = (lib_msg_t *)lib_freelist_alloc (&nal->ni.ni_free_msgs);
- state_unlock (nal, &flags);
+ LIB_LOCK (nal, flags);
+ msg = (lib_msg_t *)lib_freelist_alloc (&nal->libnal_ni.ni_free_msgs);
+ LIB_UNLOCK (nal, flags);
if (msg != NULL) {
/* NULL pointers, clear flags etc */
}
static inline void
-lib_msg_free (nal_cb_t *nal, lib_msg_t *msg)
+lib_msg_free (lib_nal_t *nal, lib_msg_t *msg)
{
- /* ALWAYS called with statelock held */
- lib_freelist_free (&nal->ni.ni_free_msgs, msg);
+ /* ALWAYS called with liblock held */
+ lib_freelist_free (&nal->libnal_ni.ni_free_msgs, msg);
}
#else
static inline lib_eq_t *
-lib_eq_alloc (nal_cb_t *nal)
+lib_eq_alloc (lib_nal_t *nal)
{
- /* NEVER called with statelock held */
+ /* NEVER called with liblock held */
lib_eq_t *eq;
PORTAL_ALLOC(eq, sizeof(*eq));
}
static inline void
-lib_eq_free (nal_cb_t *nal, lib_eq_t *eq)
+lib_eq_free (lib_nal_t *nal, lib_eq_t *eq)
{
- /* ALWAYS called with statelock held */
+ /* ALWAYS called with liblock held */
PORTAL_FREE(eq, sizeof(*eq));
}
static inline lib_md_t *
-lib_md_alloc (nal_cb_t *nal, ptl_md_t *umd)
+lib_md_alloc (lib_nal_t *nal, ptl_md_t *umd)
{
- /* NEVER called with statelock held */
+ /* NEVER called with liblock held */
lib_md_t *md;
int size;
int niov;
}
static inline void
-lib_md_free (nal_cb_t *nal, lib_md_t *md)
+lib_md_free (lib_nal_t *nal, lib_md_t *md)
{
- /* ALWAYS called with statelock held */
+ /* ALWAYS called with liblock held */
int size;
if ((md->options & PTL_MD_KIOV) != 0)
}
static inline lib_me_t *
-lib_me_alloc (nal_cb_t *nal)
+lib_me_alloc (lib_nal_t *nal)
{
- /* NEVER called with statelock held */
+ /* NEVER called with liblock held */
lib_me_t *me;
PORTAL_ALLOC(me, sizeof(*me));
}
static inline void
-lib_me_free(nal_cb_t *nal, lib_me_t *me)
+lib_me_free(lib_nal_t *nal, lib_me_t *me)
{
- /* ALWAYS called with statelock held */
+ /* ALWAYS called with liblock held */
PORTAL_FREE(me, sizeof(*me));
}
static inline lib_msg_t *
-lib_msg_alloc(nal_cb_t *nal)
+lib_msg_alloc(lib_nal_t *nal)
{
- /* NEVER called with statelock held; may be in interrupt... */
+ /* NEVER called with liblock held; may be in interrupt... */
lib_msg_t *msg;
if (in_interrupt())
}
static inline void
-lib_msg_free(nal_cb_t *nal, lib_msg_t *msg)
+lib_msg_free(lib_nal_t *nal, lib_msg_t *msg)
{
- /* ALWAYS called with statelock held */
+ /* ALWAYS called with liblock held */
PORTAL_FREE(msg, sizeof(*msg));
}
#endif
-extern lib_handle_t *lib_lookup_cookie (nal_cb_t *nal, __u64 cookie, int type);
-extern void lib_initialise_handle (nal_cb_t *nal, lib_handle_t *lh, int type);
-extern void lib_invalidate_handle (nal_cb_t *nal, lib_handle_t *lh);
+extern lib_handle_t *lib_lookup_cookie (lib_nal_t *nal, __u64 cookie, int type);
+extern void lib_initialise_handle (lib_nal_t *nal, lib_handle_t *lh, int type);
+extern void lib_invalidate_handle (lib_nal_t *nal, lib_handle_t *lh);
static inline void
-ptl_eq2handle (ptl_handle_eq_t *handle, lib_eq_t *eq)
+ptl_eq2handle (ptl_handle_eq_t *handle, lib_nal_t *nal, lib_eq_t *eq)
{
+ handle->nal_idx = nal->libnal_ni.ni_api->nal_handle.nal_idx;
handle->cookie = eq->eq_lh.lh_cookie;
}
static inline lib_eq_t *
-ptl_handle2eq (ptl_handle_eq_t *handle, nal_cb_t *nal)
+ptl_handle2eq (ptl_handle_eq_t *handle, lib_nal_t *nal)
{
- /* ALWAYS called with statelock held */
+ /* ALWAYS called with liblock held */
lib_handle_t *lh = lib_lookup_cookie (nal, handle->cookie,
PTL_COOKIE_TYPE_EQ);
if (lh == NULL)
}
static inline void
-ptl_md2handle (ptl_handle_md_t *handle, lib_md_t *md)
+ptl_md2handle (ptl_handle_md_t *handle, lib_nal_t *nal, lib_md_t *md)
{
+ handle->nal_idx = nal->libnal_ni.ni_api->nal_handle.nal_idx;
handle->cookie = md->md_lh.lh_cookie;
}
static inline lib_md_t *
-ptl_handle2md (ptl_handle_md_t *handle, nal_cb_t *nal)
+ptl_handle2md (ptl_handle_md_t *handle, lib_nal_t *nal)
{
- /* ALWAYS called with statelock held */
+ /* ALWAYS called with liblock held */
lib_handle_t *lh = lib_lookup_cookie (nal, handle->cookie,
PTL_COOKIE_TYPE_MD);
if (lh == NULL)
}
static inline lib_md_t *
-ptl_wire_handle2md (ptl_handle_wire_t *wh, nal_cb_t *nal)
+ptl_wire_handle2md (ptl_handle_wire_t *wh, lib_nal_t *nal)
{
- /* ALWAYS called with statelock held */
+ /* ALWAYS called with liblock held */
lib_handle_t *lh;
- if (wh->wh_interface_cookie != nal->ni.ni_interface_cookie)
+ if (wh->wh_interface_cookie != nal->libnal_ni.ni_interface_cookie)
return (NULL);
lh = lib_lookup_cookie (nal, wh->wh_object_cookie,
}
static inline void
-ptl_me2handle (ptl_handle_me_t *handle, lib_me_t *me)
+ptl_me2handle (ptl_handle_me_t *handle, lib_nal_t *nal, lib_me_t *me)
{
+ handle->nal_idx = nal->libnal_ni.ni_api->nal_handle.nal_idx;
handle->cookie = me->me_lh.lh_cookie;
}
static inline lib_me_t *
-ptl_handle2me (ptl_handle_me_t *handle, nal_cb_t *nal)
+ptl_handle2me (ptl_handle_me_t *handle, lib_nal_t *nal)
{
- /* ALWAYS called with statelock held */
+ /* ALWAYS called with liblock held */
lib_handle_t *lh = lib_lookup_cookie (nal, handle->cookie,
PTL_COOKIE_TYPE_ME);
if (lh == NULL)
return (lh_entry (lh, lib_me_t, me_lh));
}
-extern int lib_init(nal_cb_t *cb, ptl_process_id_t pid,
+extern int lib_init(lib_nal_t *libnal, nal_t *apinal,
+ ptl_process_id_t pid,
ptl_ni_limits_t *desired_limits,
ptl_ni_limits_t *actual_limits);
-extern int lib_fini(nal_cb_t * cb);
-extern void lib_dispatch(nal_cb_t * cb, void *private, int index,
- void *arg_block, void *ret_block);
-extern char *dispatch_name(int index);
+extern int lib_fini(lib_nal_t *libnal);
/*
- * When the NAL detects an incoming message, it should call
- * lib_parse() decode it. The NAL callbacks will be handed
- * the private cookie as a way for the NAL to maintain state
- * about which transaction is being processed. An extra parameter,
- * lib_cookie will contain the necessary information for
- * finalizing the message.
- *
- * After it has finished the handling the message, it should
- * call lib_finalize() with the lib_cookie parameter.
- * Call backs will be made to write events, send acks or
- * replies and so on.
+ * When the NAL detects an incoming message header, it should call
+ * lib_parse() decode it. If the message header is garbage, lib_parse()
+ * returns immediately with failure, otherwise the NAL callbacks will be
+ * called to receive the message body. They are handed the private cookie
+ * as a way for the NAL to maintain state about which transaction is being
+ * processed. An extra parameter, lib_msg contains the lib-level message
+ * state for passing to lib_finalize() when the message body has been
+ * received.
*/
-extern void lib_enq_event_locked (nal_cb_t *nal, void *private,
+extern void lib_enq_event_locked (lib_nal_t *nal, void *private,
lib_eq_t *eq, ptl_event_t *ev);
-extern void lib_finalize (nal_cb_t *nal, void *private, lib_msg_t *msg,
+extern void lib_finalize (lib_nal_t *nal, void *private, lib_msg_t *msg,
ptl_ni_fail_t ni_fail_type);
-extern void lib_parse (nal_cb_t *nal, ptl_hdr_t *hdr, void *private);
-extern lib_msg_t *lib_create_reply_msg (nal_cb_t *nal, ptl_nid_t peer_nid,
+extern ptl_err_t lib_parse (lib_nal_t *nal, ptl_hdr_t *hdr, void *private);
+extern lib_msg_t *lib_create_reply_msg (lib_nal_t *nal, ptl_nid_t peer_nid,
lib_msg_t *get_msg);
-extern void print_hdr (nal_cb_t * nal, ptl_hdr_t * hdr);
+extern void print_hdr (lib_nal_t * nal, ptl_hdr_t * hdr);
extern ptl_size_t lib_iov_nob (int niov, struct iovec *iov);
extern void lib_assert_wire_constants (void);
-extern ptl_err_t lib_recv (nal_cb_t *nal, void *private, lib_msg_t *msg, lib_md_t *md,
+extern ptl_err_t lib_recv (lib_nal_t *nal, void *private, lib_msg_t *msg, lib_md_t *md,
ptl_size_t offset, ptl_size_t mlen, ptl_size_t rlen);
-extern ptl_err_t lib_send (nal_cb_t *nal, void *private, lib_msg_t *msg,
+extern ptl_err_t lib_send (lib_nal_t *nal, void *private, lib_msg_t *msg,
ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
lib_md_t *md, ptl_size_t offset, ptl_size_t len);
-extern void lib_md_deconstruct(nal_cb_t * nal, lib_md_t * md_in,
- ptl_md_t * md_out);
-extern void lib_md_unlink(nal_cb_t * nal, lib_md_t * md_in);
-extern void lib_me_unlink(nal_cb_t * nal, lib_me_t * me_in);
+extern int lib_api_ni_status (nal_t *nal, ptl_sr_index_t sr_idx,
+ ptl_sr_value_t *status);
+extern int lib_api_ni_dist (nal_t *nal, ptl_process_id_t *pid,
+ unsigned long *dist);
+
+extern int lib_api_eq_alloc (nal_t *nal, ptl_size_t count,
+ ptl_eq_handler_t callback,
+ ptl_handle_eq_t *handle);
+extern int lib_api_eq_free(nal_t *nal, ptl_handle_eq_t *eqh);
+extern int lib_api_eq_poll (nal_t *nal,
+ ptl_handle_eq_t *eventqs, int neq, int timeout_ms,
+ ptl_event_t *event, int *which);
+
+extern int lib_api_me_attach(nal_t *nal,
+ ptl_pt_index_t portal,
+ ptl_process_id_t match_id,
+ ptl_match_bits_t match_bits,
+ ptl_match_bits_t ignore_bits,
+ ptl_unlink_t unlink, ptl_ins_pos_t pos,
+ ptl_handle_me_t *handle);
+extern int lib_api_me_insert(nal_t *nal,
+ ptl_handle_me_t *current_meh,
+ ptl_process_id_t match_id,
+ ptl_match_bits_t match_bits,
+ ptl_match_bits_t ignore_bits,
+ ptl_unlink_t unlink, ptl_ins_pos_t pos,
+ ptl_handle_me_t *handle);
+extern int lib_api_me_unlink (nal_t *nal, ptl_handle_me_t *meh);
+extern void lib_me_unlink(lib_nal_t *nal, lib_me_t *me);
+
+extern int lib_api_get_id(nal_t *nal, ptl_process_id_t *pid);
+
+extern void lib_md_unlink(lib_nal_t *nal, lib_md_t *md);
+extern void lib_md_deconstruct(lib_nal_t *nal, lib_md_t *lmd, ptl_md_t *umd);
+extern int lib_api_md_attach(nal_t *nal, ptl_handle_me_t *meh,
+ ptl_md_t *umd, ptl_unlink_t unlink,
+ ptl_handle_md_t *handle);
+extern int lib_api_md_bind(nal_t *nal, ptl_md_t *umd, ptl_unlink_t unlink,
+ ptl_handle_md_t *handle);
+extern int lib_api_md_unlink (nal_t *nal, ptl_handle_md_t *mdh);
+extern int lib_api_md_update (nal_t *nal, ptl_handle_md_t *mdh,
+ ptl_md_t *oldumd, ptl_md_t *newumd,
+ ptl_handle_eq_t *testqh);
+
+extern int lib_api_get(nal_t *apinal, ptl_handle_md_t *mdh,
+ ptl_process_id_t *id,
+ ptl_pt_index_t portal, ptl_ac_index_t ac,
+ ptl_match_bits_t match_bits, ptl_size_t offset);
+extern int lib_api_put(nal_t *apinal, ptl_handle_md_t *mdh,
+ ptl_ack_req_t ack, ptl_process_id_t *id,
+ ptl_pt_index_t portal, ptl_ac_index_t ac,
+ ptl_match_bits_t match_bits,
+ ptl_size_t offset, ptl_hdr_data_t hdr_data);
+extern int lib_api_fail_nid(nal_t *apinal, ptl_nid_t nid, unsigned int threshold);
+
#endif
#include "build_check.h"
#include <portals/types.h>
+#include <portals/nal.h>
#ifdef __KERNEL__
# include <linux/uio.h>
# include <linux/smp_lock.h>
# include <sys/types.h>
#endif
-/* struct nal_cb_t is defined in lib-nal.h */
-typedef struct nal_cb_t nal_cb_t;
-
typedef char *user_ptr;
typedef struct lib_msg_t lib_msg_t;
typedef struct lib_ptl_t lib_ptl_t;
struct lib_eq_t {
struct list_head eq_list;
lib_handle_t eq_lh;
- ptl_seq_t sequence;
- ptl_size_t size;
- ptl_event_t *base;
+ ptl_seq_t eq_enq_seq;
+ ptl_seq_t eq_deq_seq;
+ ptl_size_t eq_size;
+ ptl_event_t *eq_events;
int eq_refcount;
- ptl_eq_handler_t event_callback;
+ ptl_eq_handler_t eq_callback;
void *eq_addrkey;
};
/* PTL_COOKIE_TYPES must be a power of 2, so the cookie type can be
* extracted by masking with (PTL_COOKIE_TYPES - 1) */
-typedef struct {
- ptl_nid_t nid;
- ptl_pid_t pid;
- lib_ptl_t tbl;
- lib_counters_t counters;
- ptl_ni_limits_t actual_limits;
+typedef struct lib_ni
+{
+ nal_t *ni_api;
+ ptl_process_id_t ni_pid;
+ lib_ptl_t ni_portals;
+ lib_counters_t ni_counters;
+ ptl_ni_limits_t ni_actual_limits;
int ni_lh_hash_size; /* size of lib handle hash table */
struct list_head *ni_lh_hash_table; /* all extant lib handles, this interface */
__u64 ni_next_object_cookie; /* cookie generator */
__u64 ni_interface_cookie; /* uniquely identifies this ni in this epoch */
- struct list_head ni_test_peers;
+ struct list_head ni_test_peers;
#ifdef PTL_USE_LIB_FREELIST
- lib_freelist_t ni_free_mes;
- lib_freelist_t ni_free_msgs;
- lib_freelist_t ni_free_mds;
- lib_freelist_t ni_free_eqs;
+ lib_freelist_t ni_free_mes;
+ lib_freelist_t ni_free_msgs;
+ lib_freelist_t ni_free_mds;
+ lib_freelist_t ni_free_eqs;
+#endif
+
+ struct list_head ni_active_msgs;
+ struct list_head ni_active_mds;
+ struct list_head ni_active_eqs;
+
+#ifdef __KERNEL__
+ spinlock_t ni_lock;
+ wait_queue_head_t ni_waitq;
+#else
+ pthread_mutex_t ni_mutex;
+ pthread_cond_t ni_cond;
#endif
- struct list_head ni_active_msgs;
- struct list_head ni_active_mds;
- struct list_head ni_active_eqs;
} lib_ni_t;
+
+typedef struct lib_nal
+{
+ /* lib-level interface state */
+ lib_ni_t libnal_ni;
+
+ /* NAL-private data */
+ void *libnal_data;
+
+ /*
+ * send: Sends a preformatted header and payload data to a
+ * specified remote process. The payload is scattered over 'niov'
+ * fragments described by iov, starting at 'offset' for 'mlen'
+ * bytes.
+ * NB the NAL may NOT overwrite iov.
+ * PTL_OK on success => NAL has committed to send and will call
+ * lib_finalize on completion
+ */
+ ptl_err_t (*libnal_send)
+ (struct lib_nal *nal, void *private, lib_msg_t *cookie,
+ ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
+ unsigned int niov, struct iovec *iov,
+ size_t offset, size_t mlen);
+
+ /* as send, but with a set of page fragments (NULL if not supported) */
+ ptl_err_t (*libnal_send_pages)
+ (struct lib_nal *nal, void *private, lib_msg_t * cookie,
+ ptl_hdr_t * hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
+ unsigned int niov, ptl_kiov_t *iov,
+ size_t offset, size_t mlen);
+ /*
+ * recv: Receives an incoming message from a remote process. The
+ * payload is to be received into the scattered buffer of 'niov'
+ * fragments described by iov, starting at 'offset' for 'mlen'
+ * bytes. Payload bytes after 'mlen' up to 'rlen' are to be
+ * discarded.
+ * NB the NAL may NOT overwrite iov.
+ * PTL_OK on success => NAL has committed to receive and will call
+ * lib_finalize on completion
+ */
+ ptl_err_t (*libnal_recv)
+ (struct lib_nal *nal, void *private, lib_msg_t * cookie,
+ unsigned int niov, struct iovec *iov,
+ size_t offset, size_t mlen, size_t rlen);
+
+ /* as recv, but with a set of page fragments (NULL if not supported) */
+ ptl_err_t (*libnal_recv_pages)
+ (struct lib_nal *nal, void *private, lib_msg_t * cookie,
+ unsigned int niov, ptl_kiov_t *iov,
+ size_t offset, size_t mlen, size_t rlen);
+
+ /*
+ * (un)map: Tell the NAL about some memory it will access.
+ * *addrkey passed to libnal_unmap() is what libnal_map() set it to.
+ * type of *iov depends on options.
+ * Set to NULL if not required.
+ */
+ ptl_err_t (*libnal_map)
+ (struct lib_nal *nal, unsigned int niov, struct iovec *iov,
+ void **addrkey);
+ void (*libnal_unmap)
+ (struct lib_nal *nal, unsigned int niov, struct iovec *iov,
+ void **addrkey);
+
+ /* as (un)map, but with a set of page fragments */
+ ptl_err_t (*libnal_map_pages)
+ (struct lib_nal *nal, unsigned int niov, ptl_kiov_t *iov,
+ void **addrkey);
+ void (*libnal_unmap_pages)
+ (struct lib_nal *nal, unsigned int niov, ptl_kiov_t *iov,
+ void **addrkey);
+
+ void (*libnal_printf)(struct lib_nal *nal, const char *fmt, ...);
+
+ /* Calculate a network "distance" to given node */
+ int (*libnal_dist) (struct lib_nal *nal, ptl_nid_t nid, unsigned long *dist);
+} lib_nal_t;
+
#endif
#include <portals/types.h>
-#ifdef yield
-#undef yield
-#endif
-
typedef struct nal_t nal_t;
struct nal_t {
+ /* common interface state */
int nal_refct;
+ ptl_handle_ni_t nal_handle;
+
+ /* NAL-private data */
void *nal_data;
- int (*startup) (nal_t *nal, ptl_pid_t requested_pid,
- ptl_ni_limits_t *req, ptl_ni_limits_t *actual);
+ /* NAL API implementation
+ * NB only nal_ni_init needs to be set when the NAL registers itself */
+ int (*nal_ni_init) (nal_t *nal, ptl_pid_t requested_pid,
+ ptl_ni_limits_t *req, ptl_ni_limits_t *actual);
- void (*shutdown) (nal_t *nal);
+ void (*nal_ni_fini) (nal_t *nal);
- int (*forward) (nal_t *nal, int index, /* Function ID */
- void *args, size_t arg_len, void *ret, size_t ret_len);
+ int (*nal_get_id) (nal_t *nal, ptl_process_id_t *id);
+ int (*nal_ni_status) (nal_t *nal, ptl_sr_index_t register, ptl_sr_value_t *status);
+ int (*nal_ni_dist) (nal_t *nal, ptl_process_id_t *id, unsigned long *distance);
+ int (*nal_fail_nid) (nal_t *nal, ptl_nid_t nid, unsigned int threshold);
- int (*yield) (nal_t *nal, unsigned long *flags, int milliseconds);
+ int (*nal_me_attach) (nal_t *nal, ptl_pt_index_t portal,
+ ptl_process_id_t match_id,
+ ptl_match_bits_t match_bits, ptl_match_bits_t ignore_bits,
+ ptl_unlink_t unlink, ptl_ins_pos_t pos,
+ ptl_handle_me_t *handle);
+ int (*nal_me_insert) (nal_t *nal, ptl_handle_me_t *me,
+ ptl_process_id_t match_id,
+ ptl_match_bits_t match_bits, ptl_match_bits_t ignore_bits,
+ ptl_unlink_t unlink, ptl_ins_pos_t pos,
+ ptl_handle_me_t *handle);
+ int (*nal_me_unlink) (nal_t *nal, ptl_handle_me_t *me);
+
+ int (*nal_md_attach) (nal_t *nal, ptl_handle_me_t *me,
+ ptl_md_t *md, ptl_unlink_t unlink,
+ ptl_handle_md_t *handle);
+ int (*nal_md_bind) (nal_t *nal,
+ ptl_md_t *md, ptl_unlink_t unlink,
+ ptl_handle_md_t *handle);
+ int (*nal_md_unlink) (nal_t *nal, ptl_handle_md_t *md);
+ int (*nal_md_update) (nal_t *nal, ptl_handle_md_t *md,
+ ptl_md_t *old_md, ptl_md_t *new_md,
+ ptl_handle_eq_t *testq);
- void (*lock) (nal_t *nal, unsigned long *flags);
+ int (*nal_eq_alloc) (nal_t *nal, ptl_size_t count,
+ ptl_eq_handler_t handler,
+ ptl_handle_eq_t *handle);
+ int (*nal_eq_free) (nal_t *nal, ptl_handle_eq_t *eq);
+ int (*nal_eq_poll) (nal_t *nal,
+ ptl_handle_eq_t *eqs, int neqs, int timeout,
+ ptl_event_t *event, int *which);
- void (*unlock) (nal_t *nal, unsigned long *flags);
+ int (*nal_ace_entry) (nal_t *nal, ptl_ac_index_t index,
+ ptl_process_id_t match_id, ptl_pt_index_t portal);
+
+ int (*nal_put) (nal_t *nal, ptl_handle_md_t *md, ptl_ack_req_t ack,
+ ptl_process_id_t *target, ptl_pt_index_t portal,
+ ptl_ac_index_t ac, ptl_match_bits_t match,
+ ptl_size_t offset, ptl_hdr_data_t hdr_data);
+ int (*nal_get) (nal_t *nal, ptl_handle_md_t *md,
+ ptl_process_id_t *target, ptl_pt_index_t portal,
+ ptl_ac_index_t ac, ptl_match_bits_t match,
+ ptl_size_t offset);
};
-extern nal_t *ptl_hndl2nal(ptl_handle_any_t * any);
+extern nal_t *ptl_hndl2nal(ptl_handle_any_t *any);
#ifdef __KERNEL__
extern int ptl_register_nal(ptl_interface_t interface, nal_t *nal);
#define PTL_EQ_HANDLER_NONE NULL
typedef struct {
- volatile ptl_seq_t sequence;
- ptl_size_t size;
- ptl_event_t *base;
- ptl_handle_any_t cb_eq_handle;
-} ptl_eq_t;
-
-typedef struct {
- ptl_eq_t *eq;
-} ptl_ni_t;
-
-typedef struct {
int max_mes;
int max_mds;
int max_eqs;
#define NRXTHREADS 10 /* max number of receiver threads */
typedef struct _gmnal_data_t {
- spinlock_t cb_lock;
spinlock_t stxd_lock;
struct semaphore stxd_token;
gmnal_stxd_t *stxd;
gmnal_srxd_t *srxd;
struct gm_hash *srxd_hash;
nal_t *nal;
- nal_cb_t *nal_cb;
+ lib_nal_t *libnal;
struct gm_port *gm_port;
unsigned int gm_local_nid;
unsigned int gm_global_nid;
#define GMNAL_GM_LOCK_INIT(a) spin_lock_init(&a->gm_lock);
#define GMNAL_GM_LOCK(a) spin_lock(&a->gm_lock);
#define GMNAL_GM_UNLOCK(a) spin_unlock(&a->gm_lock);
-#define GMNAL_CB_LOCK_INIT(a) spin_lock_init(&a->cb_lock);
/*
* CB NAL
*/
-int gmnal_cb_send(nal_cb_t *, void *, lib_msg_t *, ptl_hdr_t *,
+int gmnal_cb_send(lib_nal_t *, void *, lib_msg_t *, ptl_hdr_t *,
int, ptl_nid_t, ptl_pid_t, unsigned int, struct iovec *, size_t);
-int gmnal_cb_send_pages(nal_cb_t *, void *, lib_msg_t *, ptl_hdr_t *,
+int gmnal_cb_send_pages(lib_nal_t *, void *, lib_msg_t *, ptl_hdr_t *,
int, ptl_nid_t, ptl_pid_t, unsigned int, ptl_kiov_t *, size_t);
-int gmnal_cb_recv(nal_cb_t *, void *, lib_msg_t *,
+int gmnal_cb_recv(lib_nal_t *, void *, lib_msg_t *,
unsigned int, struct iovec *, size_t, size_t);
-int gmnal_cb_recv_pages(nal_cb_t *, void *, lib_msg_t *,
+int gmnal_cb_recv_pages(lib_nal_t *, void *, lib_msg_t *,
unsigned int, ptl_kiov_t *, size_t, size_t);
-int gmnal_cb_read(nal_cb_t *, void *private, void *, user_ptr, size_t);
-
-int gmnal_cb_write(nal_cb_t *, void *private, user_ptr, void *, size_t);
-
-int gmnal_cb_callback(nal_cb_t *, void *, lib_eq_t *, ptl_event_t *);
-
-void *gmnal_cb_malloc(nal_cb_t *, size_t);
-
-void gmnal_cb_free(nal_cb_t *, void *, size_t);
-
-void gmnal_cb_unmap(nal_cb_t *, unsigned int, struct iovec*, void **);
-
-int gmnal_cb_map(nal_cb_t *, unsigned int, struct iovec*, void **);
-
-void gmnal_cb_printf(nal_cb_t *, const char *fmt, ...);
-
-void gmnal_cb_cli(nal_cb_t *, unsigned long *);
-
-void gmnal_cb_sti(nal_cb_t *, unsigned long *);
-
-int gmnal_cb_dist(nal_cb_t *, ptl_nid_t, unsigned long *);
+int gmnal_cb_dist(lib_nal_t *, ptl_nid_t, unsigned long *);
int gmnal_init(void);
#define GMNAL_INIT_NAL_CB(a) do { \
- a->cb_send = gmnal_cb_send; \
- a->cb_send_pages = gmnal_cb_send_pages; \
- a->cb_recv = gmnal_cb_recv; \
- a->cb_recv_pages = gmnal_cb_recv_pages; \
- a->cb_read = gmnal_cb_read; \
- a->cb_write = gmnal_cb_write; \
- a->cb_callback = gmnal_cb_callback; \
- a->cb_malloc = gmnal_cb_malloc; \
- a->cb_free = gmnal_cb_free; \
- a->cb_map = NULL; \
- a->cb_unmap = NULL; \
- a->cb_printf = gmnal_cb_printf; \
- a->cb_cli = gmnal_cb_cli; \
- a->cb_sti = gmnal_cb_sti; \
- a->cb_dist = gmnal_cb_dist; \
- a->nal_data = NULL; \
+ a->libnal_send = gmnal_cb_send; \
+ a->libnal_send_pages = gmnal_cb_send_pages; \
+ a->libnal_recv = gmnal_cb_recv; \
+ a->libnal_recv_pages = gmnal_cb_recv_pages; \
+ a->libnal_map = NULL; \
+ a->libnal_unmap = NULL; \
+ a->libnal_dist = gmnal_cb_dist; \
+ a->libnal_data = NULL; \
} while (0)
/*
* Small messages
*/
-int gmnal_small_rx(nal_cb_t *, void *, lib_msg_t *, unsigned int,
+int gmnal_small_rx(lib_nal_t *, void *, lib_msg_t *, unsigned int,
struct iovec *, size_t, size_t);
-int gmnal_small_tx(nal_cb_t *, void *, lib_msg_t *, ptl_hdr_t *,
+int gmnal_small_tx(lib_nal_t *, void *, lib_msg_t *, ptl_hdr_t *,
int, ptl_nid_t, ptl_pid_t,
unsigned int, struct iovec*, int);
void gmnal_small_tx_callback(gm_port_t *, void *, gm_status_t);
/*
* Large messages
*/
-int gmnal_large_rx(nal_cb_t *, void *, lib_msg_t *, unsigned int,
+int gmnal_large_rx(lib_nal_t *, void *, lib_msg_t *, unsigned int,
struct iovec *, size_t, size_t);
-int gmnal_large_tx(nal_cb_t *, void *, lib_msg_t *, ptl_hdr_t *,
+int gmnal_large_tx(lib_nal_t *, void *, lib_msg_t *, ptl_hdr_t *,
int, ptl_nid_t, ptl_pid_t, unsigned int,
struct iovec*, int);
{ 0 }
};
-
-
-
-
-
-/*
- * gmnal_api_forward
- * This function takes a pack block of arguments from the NAL API
- * module and passes them to the NAL CB module. The CB module unpacks
- * the args and calls the appropriate function indicated by index.
- * Typically this function is used to pass args between kernel and use
- * space.
- * As lgmanl exists entirely in kernel, just pass the arg block directly
- * to the NAL CB, buy passing the args to lib_dispatch
- * Arguments are
- * nal_t nal Our nal
- * int index the api function that initiated this call
- * void *args packed block of function args
- * size_t arg_len length of args block
- * void *ret A return value for the API NAL
- * size_t ret_len Size of the return value
- *
- */
-
-int
-gmnal_api_forward(nal_t *nal, int index, void *args, size_t arg_len,
- void *ret, size_t ret_len)
-{
-
- nal_cb_t *nal_cb = NULL;
- gmnal_data_t *nal_data = NULL;
-
-
-
-
-
- if (!nal || !args || (index < 0) || (arg_len < 0)) {
- CDEBUG(D_ERROR, "Bad args to gmnal_api_forward\n");
- return (PTL_FAIL);
- }
-
- if (ret && (ret_len <= 0)) {
- CDEBUG(D_ERROR, "Bad args to gmnal_api_forward\n");
- return (PTL_FAIL);
- }
-
-
- if (!nal->nal_data) {
- CDEBUG(D_ERROR, "bad nal, no nal data\n");
- return (PTL_FAIL);
- }
-
- nal_data = nal->nal_data;
- CDEBUG(D_INFO, "nal_data is [%p]\n", nal_data);
-
- if (!nal_data->nal_cb) {
- CDEBUG(D_ERROR, "bad nal_data, no nal_cb\n");
- return (PTL_FAIL);
- }
-
- nal_cb = nal_data->nal_cb;
- CDEBUG(D_INFO, "nal_cb is [%p]\n", nal_cb);
-
- CDEBUG(D_PORTALS, "gmnal_api_forward calling lib_dispatch\n");
- lib_dispatch(nal_cb, NULL, index, args, ret);
- CDEBUG(D_PORTALS, "gmnal_api_forward returns from lib_dispatch\n");
-
- return(PTL_OK);
-}
-
-
/*
* gmnal_api_shutdown
* nal_refct == 0 => called on last matching PtlNIFini()
gmnal_api_shutdown(nal_t *nal, int interface)
{
gmnal_data_t *nal_data;
- nal_cb_t *nal_cb;
+ lib_nal_t *libnal;
if (nal->nal_refct != 0)
return;
CDEBUG(D_TRACE, "gmnal_api_shutdown: nal_data [%p]\n", nal_data);
LASSERT(nal == global_nal_data->nal);
- nal_data = nal->nal_data;
+ libnal = (lib_nal_t *)nal->nal_data;
+ nal_data = (gmnal_data_t *)libnal->libnal_data;
LASSERT(nal_data == global_nal_data);
- nal_cb = nal_data->nal_cb;
/* Stop portals calling our ioctl handler */
libcfs_nal_cmd_unregister(GMNAL);
* flag so when lib calls us we fail immediately and dont queue any
* more work but our threads can still call into lib OK. THEN
* shutdown our threads, THEN lib_fini() */
- lib_fini(nal_cb);
+ lib_fini(libnal);
gmnal_stop_rxthread(nal_data);
gmnal_stop_ctthread(nal_data);
GMNAL_GM_UNLOCK(nal_data);
if (nal_data->sysctl)
unregister_sysctl_table (nal_data->sysctl);
- PORTAL_FREE(nal, sizeof(nal_t));
+ /* Don't free 'nal'; it's a static struct */
PORTAL_FREE(nal_data, sizeof(gmnal_data_t));
- PORTAL_FREE(nal_cb, sizeof(nal_cb_t));
+ PORTAL_FREE(libnal, sizeof(lib_nal_t));
global_nal_data = NULL;
PORTAL_MODULE_UNUSE;
}
-/*
- * gmnal_api_validate
- * validate a user address for use in communications
- * There's nothing to be done here
- */
-int
-gmnal_api_validate(nal_t *nal, void *base, size_t extent)
-{
-
- return(PTL_OK);
-}
-
-
-
-/*
- * gmnal_api_yield
- * Give up the processor
- */
-void
-gmnal_api_yield(nal_t *nal, unsigned long *flags, int milliseconds)
-{
- CDEBUG(D_TRACE, "gmnal_api_yield : nal [%p]\n", nal);
-
- if (milliseconds != 0) {
- CERROR("Blocking yield not implemented yet\n");
- LBUG();
- }
-
- our_cond_resched();
- return;
-}
-
-
-
-/*
- * gmnal_api_lock
- * Take a threadsafe lock
- */
-void
-gmnal_api_lock(nal_t *nal, unsigned long *flags)
-{
-
- gmnal_data_t *nal_data;
- nal_cb_t *nal_cb;
-
- nal_data = nal->nal_data;
- nal_cb = nal_data->nal_cb;
-
- nal_cb->cb_cli(nal_cb, flags);
-
- return;
-}
-
-/*
- * gmnal_api_unlock
- * Release a threadsafe lock
- */
-void
-gmnal_api_unlock(nal_t *nal, unsigned long *flags)
-{
- gmnal_data_t *nal_data;
- nal_cb_t *nal_cb;
-
- nal_data = nal->nal_data;
- nal_cb = nal_data->nal_cb;
-
- nal_cb->cb_sti(nal_cb, flags);
-
- return;
-}
-
-
int
gmnal_api_startup(nal_t *nal, ptl_pid_t requested_pid,
ptl_ni_limits_t *requested_limits,
ptl_ni_limits_t *actual_limits)
{
- nal_cb_t *nal_cb = NULL;
+ lib_nal_t *libnal = NULL;
gmnal_data_t *nal_data = NULL;
gmnal_srxd_t *srxd = NULL;
gm_status_t gm_status;
if (nal->nal_refct != 0) {
if (actual_limits != NULL) {
- nal_data = (gmnal_data_t *)nal->nal_data;
- nal_cb = nal_data->nal_cb;
- *actual_limits = nal->_cb->ni.actual_limits;
+ libnal = (lib_nal_t *)nal->nal_data;
+ *actual_limits = nal->libnal_ni.ni_actual_limits;
return (PTL_OK);
}
CDEBUG(D_INFO, "Allocd and reset nal_data[%p]\n", nal_data);
CDEBUG(D_INFO, "small_msg_size is [%d]\n", nal_data->small_msg_size);
- PORTAL_ALLOC(nal_cb, sizeof(nal_cb_t));
- if (!nal_cb) {
+ PORTAL_ALLOC(libnal, sizeof(lib_nal_t));
+ if (!libnal) {
PORTAL_FREE(nal_data, sizeof(gmnal_data_t));
return(PTL_NO_SPACE);
}
- memset(nal_cb, 0, sizeof(nal_cb_t));
- CDEBUG(D_INFO, "Allocd and reset nal_cb[%p]\n", nal_cb);
+ memset(libnal, 0, sizeof(lib_nal_t));
+ CDEBUG(D_INFO, "Allocd and reset libnal[%p]\n", libnal);
- GMNAL_INIT_NAL_CB(nal_cb);
+ GMNAL_INIT_NAL_CB(libnal);
/*
* String them all together
*/
- nal->nal_data = (void*)nal_data;
- nal_cb->nal_data = (void*)nal_data;
+ libnal->libnal_data = (void*)nal_data;
nal_data->nal = nal;
- nal_data->nal_cb = nal_cb;
+ nal_data->libnal = libnal;
- GMNAL_CB_LOCK_INIT(nal_data);
GMNAL_GM_LOCK_INIT(nal_data);
if (gm_init() != GM_SUCCESS) {
CDEBUG(D_ERROR, "call to gm_init failed\n");
PORTAL_FREE(nal_data, sizeof(gmnal_data_t));
- PORTAL_FREE(nal_cb, sizeof(nal_cb_t));
+ PORTAL_FREE(libnal, sizeof(lib_nal_t));
return(PTL_FAIL);
}
gm_finalize();
GMNAL_GM_UNLOCK(nal_data);
PORTAL_FREE(nal_data, sizeof(gmnal_data_t));
- PORTAL_FREE(nal_cb, sizeof(nal_cb_t));
+ PORTAL_FREE(libnal, sizeof(lib_nal_t));
return(PTL_FAIL);
}
gm_finalize();
GMNAL_GM_UNLOCK(nal_data);
PORTAL_FREE(nal_data, sizeof(gmnal_data_t));
- PORTAL_FREE(nal_cb, sizeof(nal_cb_t));
+ PORTAL_FREE(libnal, sizeof(lib_nal_t));
return(PTL_FAIL);
}
gm_finalize();
GMNAL_GM_UNLOCK(nal_data);
PORTAL_FREE(nal_data, sizeof(gmnal_data_t));
- PORTAL_FREE(nal_cb, sizeof(nal_cb_t));
+ PORTAL_FREE(libnal, sizeof(lib_nal_t));
return(PTL_FAIL);
}
gm_finalize();
GMNAL_GM_UNLOCK(nal_data);
PORTAL_FREE(nal_data, sizeof(gmnal_data_t));
- PORTAL_FREE(nal_cb, sizeof(nal_cb_t));
+ PORTAL_FREE(libnal, sizeof(lib_nal_t));
return(PTL_FAIL);
}
nal_data->gm_local_nid = local_nid;
gm_finalize();
GMNAL_GM_UNLOCK(nal_data);
PORTAL_FREE(nal_data, sizeof(gmnal_data_t));
- PORTAL_FREE(nal_cb, sizeof(nal_cb_t));
+ PORTAL_FREE(libnal, sizeof(lib_nal_t));
return(PTL_FAIL);
}
CDEBUG(D_INFO, "Global node id is [%u]\n", global_nid);
CDEBUG(D_INFO, "portals_nid is ["LPU64"]\n", process_id.nid);
CDEBUG(D_PORTALS, "calling lib_init\n");
- if (lib_init(nal_cb, process_id,
+ if (lib_init(libnal, nal, process_id,
requested_limits, actual_limits) != PTL_OK) {
CDEBUG(D_ERROR, "lib_init failed\n");
gmnal_stop_rxthread(nal_data);
gm_finalize();
GMNAL_GM_UNLOCK(nal_data);
PORTAL_FREE(nal_data, sizeof(gmnal_data_t));
- PORTAL_FREE(nal_cb, sizeof(nal_cb_t));
+ PORTAL_FREE(libnal, sizeof(lib_nal_t));
return(PTL_FAIL);
}
/* XXX these cleanup cases should be restructured to
* minimise duplication... */
- lib_fini(nal_cb);
+ lib_fini(libnal);
gmnal_stop_rxthread(nal_data);
gmnal_stop_ctthread(nal_data);
gm_finalize();
GMNAL_GM_UNLOCK(nal_data);
PORTAL_FREE(nal_data, sizeof(gmnal_data_t));
- PORTAL_FREE(nal_cb, sizeof(nal_cb_t));
+ PORTAL_FREE(libnal, sizeof(lib_nal_t));
return(PTL_FAIL);
}
*/
void gmnal_fini()
{
- gmnal_data_t *nal_data = global_nal_data;
- nal_t *nal = nal_data->nal;
- nal_cb_t *nal_cb = nal_data->nal_cb;
-
CDEBUG(D_TRACE, "gmnal_fini\n");
LASSERT(global_nal_data == NULL);
#include "gmnal.h"
-int gmnal_cb_recv(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
+int gmnal_cb_recv(lib_nal_t *libnal, void *private, lib_msg_t *cookie,
unsigned int niov, struct iovec *iov, size_t mlen,
size_t rlen)
{
int status = PTL_OK;
- CDEBUG(D_TRACE, "gmnal_cb_recv nal_cb [%p], private[%p], cookie[%p], "
+ CDEBUG(D_TRACE, "gmnal_cb_recv libnal [%p], private[%p], cookie[%p], "
"niov[%d], iov [%p], mlen["LPSZ"], rlen["LPSZ"]\n",
- nal_cb, private, cookie, niov, iov, mlen, rlen);
+ libnal, private, cookie, niov, iov, mlen, rlen);
switch(srxd->type) {
case(GMNAL_SMALL_MESSAGE):
CDEBUG(D_INFO, "gmnal_cb_recv got small message\n");
- status = gmnal_small_rx(nal_cb, private, cookie, niov,
+ status = gmnal_small_rx(libnal, private, cookie, niov,
iov, mlen, rlen);
break;
case(GMNAL_LARGE_MESSAGE_INIT):
CDEBUG(D_INFO, "gmnal_cb_recv got large message init\n");
- status = gmnal_large_rx(nal_cb, private, cookie, niov,
+ status = gmnal_large_rx(libnal, private, cookie, niov,
iov, mlen, rlen);
}
return(status);
}
-int gmnal_cb_recv_pages(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
+int gmnal_cb_recv_pages(lib_nal_t *libnal, void *private, lib_msg_t *cookie,
unsigned int kniov, ptl_kiov_t *kiov, size_t mlen,
size_t rlen)
{
ptl_kiov_t *kiov_dup = kiov;;
- CDEBUG(D_TRACE, "gmnal_cb_recv_pages nal_cb [%p],private[%p], "
+ CDEBUG(D_TRACE, "gmnal_cb_recv_pages libnal [%p],private[%p], "
"cookie[%p], kniov[%d], kiov [%p], mlen["LPSZ"], rlen["LPSZ"]\n",
- nal_cb, private, cookie, kniov, kiov, mlen, rlen);
+ libnal, private, cookie, kniov, kiov, mlen, rlen);
if (srxd->type == GMNAL_SMALL_MESSAGE) {
PORTAL_ALLOC(iovec, sizeof(struct iovec)*kniov);
kiov++;
}
CDEBUG(D_INFO, "calling gmnal_small_rx\n");
- status = gmnal_small_rx(nal_cb, private, cookie, kniov,
+ status = gmnal_small_rx(libnal, private, cookie, kniov,
iovec_dup, mlen, rlen);
for (i=0; i<kniov; i++) {
kunmap(kiov_dup->kiov_page);
}
-int gmnal_cb_send(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
+int gmnal_cb_send(lib_nal_t *libnal, void *private, lib_msg_t *cookie,
ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
unsigned int niov, struct iovec *iov, size_t len)
{
CDEBUG(D_TRACE, "gmnal_cb_send niov[%d] len["LPSZ"] nid["LPU64"]\n",
niov, len, nid);
- nal_data = nal_cb->nal_data;
+ nal_data = libnal->libnal_data;
if (GMNAL_IS_SMALL_MESSAGE(nal_data, niov, iov, len)) {
CDEBUG(D_INFO, "This is a small message send\n");
- gmnal_small_tx(nal_cb, private, cookie, hdr, type, nid, pid,
+ gmnal_small_tx(libnal, private, cookie, hdr, type, nid, pid,
niov, iov, len);
} else {
CDEBUG(D_ERROR, "Large message send it is not supported\n");
- lib_finalize(nal_cb, private, cookie, PTL_FAIL);
+ lib_finalize(libnal, private, cookie, PTL_FAIL);
return(PTL_FAIL);
- gmnal_large_tx(nal_cb, private, cookie, hdr, type, nid, pid,
+ gmnal_large_tx(libnal, private, cookie, hdr, type, nid, pid,
niov, iov, len);
}
return(PTL_OK);
}
-int gmnal_cb_send_pages(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
- ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, unsigned int kniov, ptl_kiov_t *kiov, size_t len)
+int gmnal_cb_send_pages(lib_nal_t *libnal, void *private, lib_msg_t *cookie,
+ ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
+ unsigned int kniov, ptl_kiov_t *kiov, size_t len)
{
int i = 0;
ptl_kiov_t *kiov_dup = kiov;
CDEBUG(D_TRACE, "gmnal_cb_send_pages nid ["LPU64"] niov[%d] len["LPSZ"]\n", nid, kniov, len);
- nal_data = nal_cb->nal_data;
+ nal_data = libnal->libnal_data;
PORTAL_ALLOC(iovec, kniov*sizeof(struct iovec));
iovec_dup = iovec;
if (GMNAL_IS_SMALL_MESSAGE(nal_data, 0, NULL, len)) {
iovec++;
kiov++;
}
- gmnal_small_tx(nal_cb, private, cookie, hdr, type, nid,
+ gmnal_small_tx(libnal, private, cookie, hdr, type, nid,
pid, kniov, iovec_dup, len);
} else {
CDEBUG(D_ERROR, "Large message send it is not supported yet\n");
iovec++;
kiov++;
}
- gmnal_large_tx(nal_cb, private, cookie, hdr, type, nid,
+ gmnal_large_tx(libnal, private, cookie, hdr, type, nid,
pid, kniov, iovec, len);
}
for (i=0; i<kniov; i++) {
return(PTL_OK);
}
-int gmnal_cb_read(nal_cb_t *nal_cb, void *private, void *dst,
- user_ptr src, size_t len)
-{
- gm_bcopy(src, dst, len);
- return(PTL_OK);
-}
-
-int gmnal_cb_write(nal_cb_t *nal_cb, void *private, user_ptr dst,
- void *src, size_t len)
-{
- gm_bcopy(src, dst, len);
- return(PTL_OK);
-}
-
-int gmnal_cb_callback(nal_cb_t *nal_cb, void *private, lib_eq_t *eq,
- ptl_event_t *ev)
-{
-
- if (eq->event_callback != NULL) {
- CDEBUG(D_INFO, "found callback\n");
- eq->event_callback(ev);
- }
-
- return(PTL_OK);
-}
-
-void *gmnal_cb_malloc(nal_cb_t *nal_cb, size_t len)
-{
- void *ptr = NULL;
- CDEBUG(D_TRACE, "gmnal_cb_malloc len["LPSZ"]\n", len);
- PORTAL_ALLOC(ptr, len);
- return(ptr);
-}
-
-void gmnal_cb_free(nal_cb_t *nal_cb, void *buf, size_t len)
-{
- CDEBUG(D_TRACE, "gmnal_cb_free :: buf[%p] len["LPSZ"]\n", buf, len);
- PORTAL_FREE(buf, len);
- return;
-}
-
-void gmnal_cb_unmap(nal_cb_t *nal_cb, unsigned int niov, struct iovec *iov,
- void **addrkey)
-{
- return;
-}
-
-int gmnal_cb_map(nal_cb_t *nal_cb, unsigned int niov, struct iovec *iov,
- void**addrkey)
-{
- return(PTL_OK);
-}
-
-void gmnal_cb_printf(nal_cb_t *nal_cb, const char *fmt, ...)
-{
- CDEBUG(D_TRACE, "gmnal_cb_printf\n");
- printk(fmt);
- return;
-}
-
-void gmnal_cb_cli(nal_cb_t *nal_cb, unsigned long *flags)
-{
- gmnal_data_t *nal_data = (gmnal_data_t*)nal_cb->nal_data;
-
- spin_lock_irqsave(&nal_data->cb_lock, *flags);
- return;
-}
-
-void gmnal_cb_sti(nal_cb_t *nal_cb, unsigned long *flags)
-{
- gmnal_data_t *nal_data = (gmnal_data_t*)nal_cb->nal_data;
-
- spin_unlock_irqrestore(&nal_data->cb_lock, *flags);
- return;
-}
-
-void gmnal_cb_callback(nal_cb_t *nal_cb, void *private, lib_eq_t *eq, ptl_event_t *ev)
-{
- /* holding cb_lock */
-
- if (eq->event_callback != NULL)
- eq->event_callback(ev);
-
- /* We will wake theads sleeping in yield() here, AFTER the
- * callback, when we implement blocking yield */
-}
-
-int gmnal_cb_dist(nal_cb_t *nal_cb, ptl_nid_t nid, unsigned long *dist)
+int gmnal_cb_dist(lib_nal_t *libnal, ptl_nid_t nid, unsigned long *dist)
{
CDEBUG(D_TRACE, "gmnal_cb_dist\n");
if (dist)
unsigned int snode, sport, type, length;
gmnal_msghdr_t *gmnal_msghdr;
ptl_hdr_t *portals_hdr;
+ int rc;
CDEBUG(D_INFO, "nal_data [%p], we[%p] type [%d]\n",
nal_data, we, gmnal_type);
*/
srxd = gmnal_rxbuffer_to_srxd(nal_data, buffer);
CDEBUG(D_INFO, "Back from gmnal_rxbuffer_to_srxd\n");
- srxd->nal_data = nal_data;
if (!srxd) {
CDEBUG(D_ERROR, "Failed to get receive descriptor\n");
- lib_parse(nal_data->nal_cb, portals_hdr, srxd);
+ /* I think passing a NULL srxd to lib_parse will crash
+ * gmnal_recv() */
+ LBUG();
+ lib_parse(nal_data->libnal, portals_hdr, srxd);
return(GMNAL_STATUS_FAIL);
}
return(GMNAL_STATUS_OK);
}
+ srxd->nal_data = nal_data;
srxd->type = gmnal_type;
srxd->nsiov = gmnal_msghdr->niov;
srxd->gm_source_node = gmnal_msghdr->sender_node_id;
* cb_recv is responsible for returning the buffer
* for future receive
*/
- lib_parse(nal_data->nal_cb, portals_hdr, srxd);
+ rc = lib_parse(nal_data->libnal, portals_hdr, srxd);
+
+ if (rc != PTL_OK) {
+ /* I just received garbage; take appropriate action... */
+ LBUG();
+ }
return(GMNAL_STATUS_OK);
}
* Call lib_finalize
*/
int
-gmnal_small_rx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
+gmnal_small_rx(lib_nal_t *libnal, void *private, lib_msg_t *cookie,
unsigned int niov, struct iovec *iov, size_t mlen, size_t rlen)
{
gmnal_srxd_t *srxd = NULL;
void *buffer = NULL;
- gmnal_data_t *nal_data = (gmnal_data_t*)nal_cb->nal_data;
+ gmnal_data_t *nal_data = (gmnal_data_t*)libnal->nal_data;
CDEBUG(D_TRACE, "niov [%d] mlen["LPSZ"]\n", niov, mlen);
if (!private) {
CDEBUG(D_ERROR, "gmnal_small_rx no context\n");
- lib_finalize(nal_cb, private, cookie, PTL_FAIL);
+ lib_finalize(libnal, private, cookie, PTL_FAIL);
return(PTL_FAIL);
}
* let portals library know receive is complete
*/
CDEBUG(D_PORTALS, "calling lib_finalize\n");
- lib_finalize(nal_cb, private, cookie, PTL_OK);
+ lib_finalize(libnal, private, cookie, PTL_OK);
/*
* return buffer so it can be used again
*/
* The callback function informs when the send is complete.
*/
int
-gmnal_small_tx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
+gmnal_small_tx(lib_nal_t *libnal, void *private, lib_msg_t *cookie,
ptl_hdr_t *hdr, int type, ptl_nid_t global_nid, ptl_pid_t pid,
unsigned int niov, struct iovec *iov, int size)
{
- gmnal_data_t *nal_data = (gmnal_data_t*)nal_cb->nal_data;
+ gmnal_data_t *nal_data = (gmnal_data_t*)libnal->nal_data;
gmnal_stxd_t *stxd = NULL;
void *buffer = NULL;
gmnal_msghdr_t *msghdr = NULL;
unsigned int local_nid;
gm_status_t gm_status = GM_SUCCESS;
- CDEBUG(D_TRACE, "gmnal_small_tx nal_cb [%p] private [%p] cookie [%p] "
+ CDEBUG(D_TRACE, "gmnal_small_tx libnal [%p] private [%p] cookie [%p] "
"hdr [%p] type [%d] global_nid ["LPU64"] pid [%d] niov [%d] "
- "iov [%p] size [%d]\n", nal_cb, private, cookie, hdr, type,
+ "iov [%p] size [%d]\n", libnal, private, cookie, hdr, type,
global_nid, pid, niov, iov, size);
CDEBUG(D_INFO, "portals_hdr:: dest_nid ["LPU64"], src_nid ["LPU64"]\n",
gmnal_stxd_t *stxd = (gmnal_stxd_t*)context;
lib_msg_t *cookie = stxd->cookie;
gmnal_data_t *nal_data = (gmnal_data_t*)stxd->nal_data;
- nal_cb_t *nal_cb = nal_data->nal_cb;
+ lib_nal_t *libnal = nal_data->libnal;
if (!stxd) {
CDEBUG(D_TRACE, "send completion event for unknown stxd\n");
return;
}
gmnal_return_stxd(nal_data, stxd);
- lib_finalize(nal_cb, stxd, cookie, PTL_OK);
+ lib_finalize(libnal, stxd, cookie, PTL_OK);
return;
}
* this ack, deregister the memory. Only 1 send token is required here.
*/
int
-gmnal_large_tx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
+gmnal_large_tx(lib_nal_t *libnal, void *private, lib_msg_t *cookie,
ptl_hdr_t *hdr, int type, ptl_nid_t global_nid, ptl_pid_t pid,
unsigned int niov, struct iovec *iov, int size)
{
int niov_dup;
- CDEBUG(D_TRACE, "gmnal_large_tx nal_cb [%p] private [%p], cookie [%p] "
+ CDEBUG(D_TRACE, "gmnal_large_tx libnal [%p] private [%p], cookie [%p] "
"hdr [%p], type [%d] global_nid ["LPU64"], pid [%d], niov [%d], "
- "iov [%p], size [%d]\n", nal_cb, private, cookie, hdr, type,
+ "iov [%p], size [%d]\n", libnal, private, cookie, hdr, type,
global_nid, pid, niov, iov, size);
- if (nal_cb)
- nal_data = (gmnal_data_t*)nal_cb->nal_data;
+ if (libnal)
+ nal_data = (gmnal_data_t*)libnal->nal_data;
else {
- CDEBUG(D_ERROR, "no nal_cb.\n");
+ CDEBUG(D_ERROR, "no libnal.\n");
return(GMNAL_STATUS_FAIL);
}
* data from the sender.
*/
int
-gmnal_large_rx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
+gmnal_large_rx(lib_nal_t *libnal, void *private, lib_msg_t *cookie,
unsigned int nriov, struct iovec *riov, size_t mlen,
size_t rlen)
{
- gmnal_data_t *nal_data = nal_cb->nal_data;
+ gmnal_data_t *nal_data = libnal->nal_data;
gmnal_srxd_t *srxd = (gmnal_srxd_t*)private;
void *buffer = NULL;
struct iovec *riov_dup;
gmnal_msghdr_t *msghdr = NULL;
gm_status_t gm_status;
- CDEBUG(D_TRACE, "gmnal_large_rx :: nal_cb[%p], private[%p], "
+ CDEBUG(D_TRACE, "gmnal_large_rx :: libnal[%p], private[%p], "
"cookie[%p], niov[%d], iov[%p], mlen["LPSZ"], rlen["LPSZ"]\n",
- nal_cb, private, cookie, nriov, riov, mlen, rlen);
+ libnal, private, cookie, nriov, riov, mlen, rlen);
if (!srxd) {
CDEBUG(D_ERROR, "gmnal_large_rx no context\n");
- lib_finalize(nal_cb, private, cookie, PTL_FAIL);
+ lib_finalize(libnal, private, cookie, PTL_FAIL);
return(PTL_FAIL);
}
gmnal_ltxd_t *ltxd = (gmnal_ltxd_t*)context;
gmnal_srxd_t *srxd = ltxd->srxd;
- nal_cb_t *nal_cb = srxd->nal_data->nal_cb;
+ lib_nal_t *libnal = srxd->nal_data->libnal;
int lastone;
struct iovec *riov;
int nriov;
* Let our client application proceed
*/
CDEBUG(D_ERROR, "final callback context[%p]\n", srxd);
- lib_finalize(nal_cb, srxd, srxd->cookie, PTL_OK);
+ lib_finalize(libnal, srxd, srxd->cookie, PTL_OK);
/*
* send an ack to the sender to let him know we got the data
void
gmnal_large_tx_ack_received(gmnal_data_t *nal_data, gmnal_srxd_t *srxd)
{
- nal_cb_t *nal_cb = nal_data->nal_cb;
+ lib_nal_t *libnal = nal_data->libnal;
gmnal_stxd_t *stxd = NULL;
gmnal_msghdr_t *msghdr = NULL;
void *buffer = NULL;
CDEBUG(D_INFO, "gmnal_large_tx_ack_received stxd [%p]\n", stxd);
- lib_finalize(nal_cb, stxd, stxd->cookie, PTL_OK);
+ lib_finalize(libnal, stxd, stxd->cookie, PTL_OK);
/*
* extract the iovec from the stxd, deregister the memory.
#define QSWNAL_SYSCTL_COPY_SMALL_FWD 2
static ctl_table kqswnal_ctl_table[] = {
+ {QSWNAL_SYSCTL_OPTIMIZED_GETS, "optimized_puts",
+ &kqswnal_tunables.kqn_optimized_puts, sizeof (int),
+ 0644, NULL, &proc_dointvec},
{QSWNAL_SYSCTL_OPTIMIZED_GETS, "optimized_gets",
&kqswnal_tunables.kqn_optimized_gets, sizeof (int),
0644, NULL, &proc_dointvec},
};
#endif
-static int
-kqswnal_forward(nal_t *nal,
- int id,
- void *args, size_t args_len,
- void *ret, size_t ret_len)
-{
- kqswnal_data_t *k = nal->nal_data;
- nal_cb_t *nal_cb = k->kqn_cb;
-
- LASSERT (nal == &kqswnal_api);
- LASSERT (k == &kqswnal_data);
- LASSERT (nal_cb == &kqswnal_lib);
-
- lib_dispatch(nal_cb, k, id, args, ret); /* nal needs k */
- return (PTL_OK);
-}
-
-static void
-kqswnal_lock (nal_t *nal, unsigned long *flags)
-{
- kqswnal_data_t *k = nal->nal_data;
- nal_cb_t *nal_cb = k->kqn_cb;
-
- LASSERT (nal == &kqswnal_api);
- LASSERT (k == &kqswnal_data);
- LASSERT (nal_cb == &kqswnal_lib);
-
- nal_cb->cb_cli(nal_cb,flags);
-}
-
-static void
-kqswnal_unlock(nal_t *nal, unsigned long *flags)
-{
- kqswnal_data_t *k = nal->nal_data;
- nal_cb_t *nal_cb = k->kqn_cb;
-
- LASSERT (nal == &kqswnal_api);
- LASSERT (k == &kqswnal_data);
- LASSERT (nal_cb == &kqswnal_lib);
-
- nal_cb->cb_sti(nal_cb,flags);
-}
-
-static int
-kqswnal_yield(nal_t *nal, unsigned long *flags, int milliseconds)
-{
- /* NB called holding statelock */
- wait_queue_t wait;
- unsigned long now = jiffies;
-
- CDEBUG (D_NET, "yield\n");
-
- if (milliseconds == 0) {
- if (need_resched())
- schedule();
- return 0;
- }
-
- init_waitqueue_entry(&wait, current);
- set_current_state(TASK_INTERRUPTIBLE);
- add_wait_queue(&kqswnal_data.kqn_yield_waitq, &wait);
-
- kqswnal_unlock(nal, flags);
-
- if (milliseconds < 0)
- schedule ();
- else
- schedule_timeout((milliseconds * HZ) / 1000);
-
- kqswnal_lock(nal, flags);
-
- remove_wait_queue(&kqswnal_data.kqn_yield_waitq, &wait);
-
- if (milliseconds > 0) {
- milliseconds -= ((jiffies - now) * 1000) / HZ;
- if (milliseconds < 0)
- milliseconds = 0;
- }
-
- return (milliseconds);
-}
-
int
kqswnal_get_tx_desc (struct portals_cfg *pcfg)
{
kqswnal_data.kqn_nid_offset);
kqswnal_data.kqn_nid_offset =
pcfg->pcfg_nid - kqswnal_data.kqn_elanid;
- kqswnal_lib.ni.nid = pcfg->pcfg_nid;
+ kqswnal_lib.libnal_ni.ni_pid.nid = pcfg->pcfg_nid;
return (0);
default:
ptl_process_id_t my_process_id;
int pkmem = atomic_read(&portal_kmemory);
+ LASSERT (nal == &kqswnal_api);
+
if (nal->nal_refct != 0) {
if (actual_limits != NULL)
- *actual_limits = kqswnal_lib.ni.actual_limits;
+ *actual_limits = kqswnal_lib.libnal_ni.ni_actual_limits;
/* This module got the first ref */
PORTAL_MODULE_USE;
return (PTL_OK);
CDEBUG (D_MALLOC, "start kmem %d\n", atomic_read(&portal_kmemory));
- memset(&kqswnal_rpc_success, 0, sizeof(kqswnal_rpc_success));
- memset(&kqswnal_rpc_failed, 0, sizeof(kqswnal_rpc_failed));
-#if MULTIRAIL_EKC
- kqswnal_rpc_failed.Data[0] = -ECONNREFUSED;
-#else
- kqswnal_rpc_failed.Status = -ECONNREFUSED;
-#endif
/* ensure all pointers NULL etc */
memset (&kqswnal_data, 0, sizeof (kqswnal_data));
- kqswnal_data.kqn_cb = &kqswnal_lib;
-
INIT_LIST_HEAD (&kqswnal_data.kqn_idletxds);
INIT_LIST_HEAD (&kqswnal_data.kqn_nblk_idletxds);
INIT_LIST_HEAD (&kqswnal_data.kqn_activetxds);
spin_lock_init (&kqswnal_data.kqn_sched_lock);
init_waitqueue_head (&kqswnal_data.kqn_sched_waitq);
- spin_lock_init (&kqswnal_data.kqn_statelock);
- init_waitqueue_head (&kqswnal_data.kqn_yield_waitq);
+ /* Leave kqn_rpc_success zeroed */
+#if MULTIRAIL_EKC
+ kqswnal_data.kqn_rpc_failed.Data[0] = -ECONNREFUSED;
+#else
+ kqswnal_data.kqn_rpc_failed.Status = -ECONNREFUSED;
+#endif
/* pointers/lists/locks initialised */
kqswnal_data.kqn_init = KQN_INIT_DATA;
kqswnal_data.kqn_ep = ep_system();
if (kqswnal_data.kqn_ep == NULL) {
CERROR("Can't initialise EKC\n");
- kqswnal_shutdown(&kqswnal_api);
+ kqswnal_shutdown(nal);
return (PTL_IFACE_INVALID);
}
if (ep_waitfor_nodeid(kqswnal_data.kqn_ep) == ELAN_INVALID_NODE) {
CERROR("Can't get elan ID\n");
- kqswnal_shutdown(&kqswnal_api);
+ kqswnal_shutdown(nal);
return (PTL_IFACE_INVALID);
}
#else
if (kqswnal_data.kqn_ep == NULL)
{
CERROR ("Can't get elan device 0\n");
- kqswnal_shutdown(&kqswnal_api);
+ kqswnal_shutdown(nal);
return (PTL_IFACE_INVALID);
}
#endif
if (kqswnal_data.kqn_eptx == NULL)
{
CERROR ("Can't allocate transmitter\n");
- kqswnal_shutdown (&kqswnal_api);
+ kqswnal_shutdown (nal);
return (PTL_NO_SPACE);
}
if (kqswnal_data.kqn_eprx_small == NULL)
{
CERROR ("Can't install small msg receiver\n");
- kqswnal_shutdown (&kqswnal_api);
+ kqswnal_shutdown (nal);
return (PTL_NO_SPACE);
}
if (kqswnal_data.kqn_eprx_large == NULL)
{
CERROR ("Can't install large msg receiver\n");
- kqswnal_shutdown (&kqswnal_api);
+ kqswnal_shutdown (nal);
return (PTL_NO_SPACE);
}
EP_PERM_WRITE);
if (kqswnal_data.kqn_ep_tx_nmh == NULL) {
CERROR("Can't reserve tx dma space\n");
- kqswnal_shutdown(&kqswnal_api);
+ kqswnal_shutdown(nal);
return (PTL_NO_SPACE);
}
#else
if (rc != DDI_SUCCESS)
{
CERROR ("Can't reserve rx dma space\n");
- kqswnal_shutdown (&kqswnal_api);
+ kqswnal_shutdown (nal);
return (PTL_NO_SPACE);
}
#endif
EP_PERM_WRITE);
if (kqswnal_data.kqn_ep_tx_nmh == NULL) {
CERROR("Can't reserve rx dma space\n");
- kqswnal_shutdown(&kqswnal_api);
+ kqswnal_shutdown(nal);
return (PTL_NO_SPACE);
}
#else
if (rc != DDI_SUCCESS)
{
CERROR ("Can't reserve rx dma space\n");
- kqswnal_shutdown (&kqswnal_api);
+ kqswnal_shutdown (nal);
return (PTL_NO_SPACE);
}
#endif
sizeof(kqswnal_tx_t) * (KQSW_NTXMSGS + KQSW_NNBLK_TXMSGS));
if (kqswnal_data.kqn_txds == NULL)
{
- kqswnal_shutdown (&kqswnal_api);
+ kqswnal_shutdown (nal);
return (PTL_NO_SPACE);
}
PORTAL_ALLOC (ktx->ktx_buffer, KQSW_TX_BUFFER_SIZE);
if (ktx->ktx_buffer == NULL)
{
- kqswnal_shutdown (&kqswnal_api);
+ kqswnal_shutdown (nal);
return (PTL_NO_SPACE);
}
sizeof (kqswnal_rx_t) * (KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE));
if (kqswnal_data.kqn_rxds == NULL)
{
- kqswnal_shutdown (&kqswnal_api);
+ kqswnal_shutdown (nal);
return (PTL_NO_SPACE);
}
struct page *page = alloc_page(GFP_KERNEL);
if (page == NULL) {
- kqswnal_shutdown (&kqswnal_api);
+ kqswnal_shutdown (nal);
return (PTL_NO_SPACE);
}
my_process_id.nid = kqswnal_elanid2nid(kqswnal_data.kqn_elanid);
my_process_id.pid = 0;
- rc = lib_init(&kqswnal_lib, my_process_id,
+ rc = lib_init(&kqswnal_lib, nal, my_process_id,
requested_limits, actual_limits);
if (rc != PTL_OK)
{
CERROR ("lib_init failed %d\n", rc);
- kqswnal_shutdown (&kqswnal_api);
+ kqswnal_shutdown (nal);
return (rc);
}
kqswnal_rx_t *krx = &kqswnal_data.kqn_rxds[i];
/* NB this enqueue can allocate/sleep (attr == 0) */
+ krx->krx_state = KRX_POSTED;
#if MULTIRAIL_EKC
rc = ep_queue_receive(krx->krx_eprx, kqswnal_rxhandler, krx,
&krx->krx_elanbuffer, 0);
if (rc != EP_SUCCESS)
{
CERROR ("failed ep_queue_receive %d\n", rc);
- kqswnal_shutdown (&kqswnal_api);
+ kqswnal_shutdown (nal);
return (PTL_FAIL);
}
}
if (rc != 0)
{
CERROR ("failed to spawn scheduling thread: %d\n", rc);
- kqswnal_shutdown (&kqswnal_api);
+ kqswnal_shutdown (nal);
return (PTL_FAIL);
}
}
rc = libcfs_nal_cmd_register (QSWNAL, &kqswnal_cmd, NULL);
if (rc != 0) {
CERROR ("Can't initialise command interface (rc = %d)\n", rc);
- kqswnal_shutdown (&kqswnal_api);
+ kqswnal_shutdown (nal);
return (PTL_FAIL);
}
{
int rc;
- kqswnal_api.startup = kqswnal_startup;
- kqswnal_api.shutdown = kqswnal_shutdown;
- kqswnal_api.forward = kqswnal_forward;
- kqswnal_api.yield = kqswnal_yield;
- kqswnal_api.lock = kqswnal_lock;
- kqswnal_api.unlock = kqswnal_unlock;
- kqswnal_api.nal_data = &kqswnal_data;
-
- kqswnal_lib.nal_data = &kqswnal_data;
+ kqswnal_api.nal_ni_init = kqswnal_startup;
+ kqswnal_api.nal_ni_fini = kqswnal_shutdown;
/* Initialise dynamic tunables to defaults once only */
+ kqswnal_tunables.kqn_optimized_puts = KQSW_OPTIMIZED_PUTS;
kqswnal_tunables.kqn_optimized_gets = KQSW_OPTIMIZED_GETS;
rc = ptl_register_nal(QSWNAL, &kqswnal_api);
#define KQSW_RESCHED 100 /* # busy loops that forces scheduler to yield */
-#define KQSW_OPTIMIZED_GETS 1 /* optimized gets? */
+#define KQSW_OPTIMIZED_GETS 1 /* optimize gets >= this size */
+#define KQSW_OPTIMIZED_PUTS (32<<10) /* optimize puts >= this size */
#define KQSW_COPY_SMALL_FWD 0 /* copy small fwd messages to pre-mapped buffer? */
/*
int krx_npages; /* # pages in receive buffer */
int krx_nob; /* Number Of Bytes received into buffer */
int krx_rpc_reply_needed; /* peer waiting for EKC RPC reply */
- int krx_rpc_reply_sent; /* rpc reply sent */
+ int krx_rpc_reply_status; /* what status to send */
+ int krx_state; /* what this RX is doing */
atomic_t krx_refcount; /* how to tell when rpc is done */
kpr_fwd_desc_t krx_fwd; /* embedded forwarding descriptor */
ptl_kiov_t krx_kiov[KQSW_NRXMSGPAGES_LARGE]; /* buffer frags */
} kqswnal_rx_t;
+#define KRX_POSTED 1 /* receiving */
+#define KRX_PARSE 2 /* ready to be parsed */
+#define KRX_COMPLETING 3 /* waiting to be completed */
+
+
typedef struct
{
struct list_head ktx_list; /* enqueue idle/active */
int ktx_nmappedpages; /* # pages mapped for current message */
int ktx_port; /* destination ep port */
ptl_nid_t ktx_nid; /* destination node */
- void *ktx_args[2]; /* completion passthru */
+ void *ktx_args[3]; /* completion passthru */
char *ktx_buffer; /* pre-allocated contiguous buffer for hdr + small payloads */
unsigned long ktx_launchtime; /* when (in jiffies) the transmit was launched */
} kqswnal_tx_t;
#define KTX_IDLE 0 /* on kqn_(nblk_)idletxds */
-#define KTX_SENDING 1 /* local send */
-#define KTX_FORWARDING 2 /* routing a packet */
-#define KTX_GETTING 3 /* local optimised get */
+#define KTX_FORWARDING 1 /* sending a forwarded packet */
+#define KTX_SENDING 2 /* normal send */
+#define KTX_GETTING 3 /* sending optimised get */
+#define KTX_PUTTING 4 /* sending optimised put */
+#define KTX_RDMAING 5 /* handling optimised put/get */
typedef struct
{
/* dynamic tunables... */
+ int kqn_optimized_puts; /* optimized PUTs? */
int kqn_optimized_gets; /* optimized GETs? */
#if CONFIG_SYSCTL
struct ctl_table_header *kqn_sysctl; /* sysctl interface */
struct list_head kqn_delayedfwds; /* delayed forwards */
struct list_head kqn_delayedtxds; /* delayed transmits */
- spinlock_t kqn_statelock; /* cb_cli/cb_sti */
- wait_queue_head_t kqn_yield_waitq; /* where yield waits */
- nal_cb_t *kqn_cb; /* -> kqswnal_lib */
#if MULTIRAIL_EKC
EP_SYS *kqn_ep; /* elan system */
EP_NMH *kqn_ep_tx_nmh; /* elan reserved tx vaddrs */
ptl_nid_t kqn_nid_offset; /* this cluster's NID offset */
int kqn_nnodes; /* this cluster's size */
int kqn_elanid; /* this nodes's elan ID */
+
+ EP_STATUSBLK kqn_rpc_success; /* preset RPC reply status blocks */
+ EP_STATUSBLK kqn_rpc_failed;
} kqswnal_data_t;
/* kqn_init state */
#define KQN_INIT_LIB 2
#define KQN_INIT_ALL 3
-extern nal_cb_t kqswnal_lib;
+extern lib_nal_t kqswnal_lib;
extern nal_t kqswnal_api;
extern kqswnal_tunables_t kqswnal_tunables;
extern kqswnal_data_t kqswnal_data;
-/* global pre-prepared replies to keep off the stack */
-extern EP_STATUSBLK kqswnal_rpc_success;
-extern EP_STATUSBLK kqswnal_rpc_failed;
-
extern int kqswnal_thread_start (int (*fn)(void *arg), void *arg);
extern void kqswnal_rxhandler(EP_RXD *rxd);
extern int kqswnal_scheduler (void *);
extern void kqswnal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd);
-extern void kqswnal_dma_reply_complete (EP_RXD *rxd);
-extern void kqswnal_requeue_rx (kqswnal_rx_t *krx);
+extern void kqswnal_rx_done (kqswnal_rx_t *krx);
static inline ptl_nid_t
kqswnal_elanid2nid (int elanid)
return (nid - kqswnal_data.kqn_nid_offset);
}
+static inline ptl_nid_t
+kqswnal_rx_nid(kqswnal_rx_t *krx)
+{
+ return (kqswnal_elanid2nid(ep_rxd_node(krx->krx_rxd)));
+}
+
static inline int
kqswnal_pages_spanned (void *base, int nob)
{
}
#endif
-static inline void kqswnal_rx_done (kqswnal_rx_t *krx)
+static inline void kqswnal_rx_decref (kqswnal_rx_t *krx)
{
LASSERT (atomic_read (&krx->krx_refcount) > 0);
if (atomic_dec_and_test (&krx->krx_refcount))
- kqswnal_requeue_rx(krx);
+ kqswnal_rx_done(krx);
}
#if MULTIRAIL_EKC
#include "qswnal.h"
-EP_STATUSBLK kqswnal_rpc_success;
-EP_STATUSBLK kqswnal_rpc_failed;
-
/*
* LIB functions follow
*
*/
-static ptl_err_t
-kqswnal_read(nal_cb_t *nal, void *private, void *dst_addr, user_ptr src_addr,
- size_t len)
-{
- CDEBUG (D_NET, LPX64": reading "LPSZ" bytes from %p -> %p\n",
- nal->ni.nid, len, src_addr, dst_addr );
- memcpy( dst_addr, src_addr, len );
-
- return (PTL_OK);
-}
-
-static ptl_err_t
-kqswnal_write(nal_cb_t *nal, void *private, user_ptr dst_addr, void *src_addr,
- size_t len)
-{
- CDEBUG (D_NET, LPX64": writing "LPSZ" bytes from %p -> %p\n",
- nal->ni.nid, len, src_addr, dst_addr );
- memcpy( dst_addr, src_addr, len );
-
- return (PTL_OK);
-}
-
-static void *
-kqswnal_malloc(nal_cb_t *nal, size_t len)
-{
- void *buf;
-
- PORTAL_ALLOC(buf, len);
- return (buf);
-}
-
-static void
-kqswnal_free(nal_cb_t *nal, void *buf, size_t len)
-{
- PORTAL_FREE(buf, len);
-}
-
-static void
-kqswnal_printf (nal_cb_t * nal, const char *fmt, ...)
-{
- va_list ap;
- char msg[256];
-
- va_start (ap, fmt);
- vsnprintf (msg, sizeof (msg), fmt, ap); /* sprint safely */
- va_end (ap);
-
- msg[sizeof (msg) - 1] = 0; /* ensure terminated */
-
- CDEBUG (D_NET, "%s", msg);
-}
-
-#if (defined(CONFIG_SPARC32) || defined(CONFIG_SPARC64))
-# error "Can't save/restore irq contexts in different procedures"
-#endif
-
-static void
-kqswnal_cli(nal_cb_t *nal, unsigned long *flags)
-{
- kqswnal_data_t *data= nal->nal_data;
-
- spin_lock_irqsave(&data->kqn_statelock, *flags);
-}
-
-
-static void
-kqswnal_sti(nal_cb_t *nal, unsigned long *flags)
-{
- kqswnal_data_t *data= nal->nal_data;
-
- spin_unlock_irqrestore(&data->kqn_statelock, *flags);
-}
-
-static void
-kqswnal_callback(nal_cb_t *nal, void *private, lib_eq_t *eq, ptl_event_t *ev)
-{
- /* holding kqn_statelock */
-
- if (eq->event_callback != NULL)
- eq->event_callback(ev);
-
- if (waitqueue_active(&kqswnal_data.kqn_yield_waitq))
- wake_up_all(&kqswnal_data.kqn_yield_waitq);
-}
-
static int
-kqswnal_dist(nal_cb_t *nal, ptl_nid_t nid, unsigned long *dist)
+kqswnal_dist(lib_nal_t *nal, ptl_nid_t nid, unsigned long *dist)
{
- if (nid == nal->ni.nid)
+ if (nid == nal->libnal_ni.ni_pid.nid)
*dist = 0; /* it's me */
else if (kqswnal_nid2elanid (nid) >= 0)
*dist = 1; /* it's my peer */
do {
int fraglen = kiov->kiov_len - offset;
- /* nob exactly spans the iovs */
- LASSERT (fraglen <= nob);
- /* each frag fits in a page */
+ /* each page frag is contained in one page */
LASSERT (kiov->kiov_offset + kiov->kiov_len <= PAGE_SIZE);
+ if (fraglen > nob)
+ fraglen = nob;
+
nmapped++;
if (nmapped > maxmapped) {
CERROR("Can't map message in %d pages (max %d)\n",
do {
int fraglen = iov->iov_len - offset;
- long npages = kqswnal_pages_spanned (iov->iov_base, fraglen);
-
- /* nob exactly spans the iovs */
- LASSERT (fraglen <= nob);
+ long npages;
+ if (fraglen > nob)
+ fraglen = nob;
+ npages = kqswnal_pages_spanned (iov->iov_base, fraglen);
+
nmapped += npages;
if (nmapped > maxmapped) {
CERROR("Can't map message in %d pages (max %d)\n",
void
kqswnal_tx_done (kqswnal_tx_t *ktx, int error)
{
- lib_msg_t *msg;
- lib_msg_t *repmsg = NULL;
-
switch (ktx->ktx_state) {
case KTX_FORWARDING: /* router asked me to forward this packet */
kpr_fwd_done (&kqswnal_data.kqn_router,
(kpr_fwd_desc_t *)ktx->ktx_args[0], error);
break;
- case KTX_SENDING: /* packet sourced locally */
- lib_finalize (&kqswnal_lib, ktx->ktx_args[0],
+ case KTX_RDMAING: /* optimized GET/PUT handled */
+ case KTX_PUTTING: /* optimized PUT sent */
+ case KTX_SENDING: /* normal send */
+ lib_finalize (&kqswnal_lib, NULL,
(lib_msg_t *)ktx->ktx_args[1],
- (error == 0) ? PTL_OK :
- (error == -ENOMEM) ? PTL_NO_SPACE : PTL_FAIL);
+ (error == 0) ? PTL_OK : PTL_FAIL);
break;
- case KTX_GETTING: /* Peer has DMA-ed direct? */
- msg = (lib_msg_t *)ktx->ktx_args[1];
-
- if (error == 0) {
- repmsg = lib_create_reply_msg (&kqswnal_lib,
- ktx->ktx_nid, msg);
- if (repmsg == NULL)
- error = -ENOMEM;
- }
-
- if (error == 0) {
- lib_finalize (&kqswnal_lib, ktx->ktx_args[0],
- msg, PTL_OK);
- lib_finalize (&kqswnal_lib, NULL, repmsg, PTL_OK);
- } else {
- lib_finalize (&kqswnal_lib, ktx->ktx_args[0], msg,
- (error == -ENOMEM) ? PTL_NO_SPACE : PTL_FAIL);
- }
+ case KTX_GETTING: /* optimized GET sent & REPLY received */
+ /* Complete the GET with success since we can't avoid
+ * delivering a REPLY event; we committed to it when we
+ * launched the GET */
+ lib_finalize (&kqswnal_lib, NULL,
+ (lib_msg_t *)ktx->ktx_args[1], PTL_OK);
+ lib_finalize (&kqswnal_lib, NULL,
+ (lib_msg_t *)ktx->ktx_args[2],
+ (error == 0) ? PTL_OK : PTL_FAIL);
break;
default:
kqswnal_notify_peer_down(ktx);
status = -EHOSTDOWN;
- } else if (ktx->ktx_state == KTX_GETTING) {
- /* RPC completed OK; what did our peer put in the status
+ } else switch (ktx->ktx_state) {
+
+ case KTX_GETTING:
+ case KTX_PUTTING:
+ /* RPC completed OK; but what did our peer put in the status
* block? */
#if MULTIRAIL_EKC
status = ep_txd_statusblk(txd)->Data[0];
#else
status = ep_txd_statusblk(txd)->Status;
#endif
- } else {
+ break;
+
+ case KTX_FORWARDING:
+ case KTX_SENDING:
status = 0;
+ break;
+
+ default:
+ LBUG();
+ break;
}
kqswnal_tx_done (ktx, status);
return (-ESHUTDOWN);
LASSERT (dest >= 0); /* must be a peer */
- if (ktx->ktx_state == KTX_GETTING) {
- /* NB ktx_frag[0] is the GET hdr + kqswnal_remotemd_t. The
- * other frags are the GET sink which we obviously don't
- * send here :) */
-#if MULTIRAIL_EKC
+
+ switch (ktx->ktx_state) {
+ case KTX_GETTING:
+ case KTX_PUTTING:
+ /* NB ktx_frag[0] is the GET/PUT hdr + kqswnal_remotemd_t.
+ * The other frags are the payload, awaiting RDMA */
rc = ep_transmit_rpc(kqswnal_data.kqn_eptx, dest,
ktx->ktx_port, attr,
kqswnal_txhandler, ktx,
NULL, ktx->ktx_frags, 1);
-#else
- rc = ep_transmit_rpc(kqswnal_data.kqn_eptx, dest,
- ktx->ktx_port, attr, kqswnal_txhandler,
- ktx, NULL, ktx->ktx_frags, 1);
-#endif
- } else {
+ break;
+
+ case KTX_FORWARDING:
+ case KTX_SENDING:
#if MULTIRAIL_EKC
rc = ep_transmit_message(kqswnal_data.kqn_eptx, dest,
ktx->ktx_port, attr,
kqswnal_txhandler, ktx,
ktx->ktx_frags, ktx->ktx_nfrag);
#endif
+ break;
+
+ default:
+ LBUG();
+ rc = -EINVAL; /* no compiler warning please */
+ break;
}
switch (rc) {
}
}
+#if 0
static char *
hdr_type_string (ptl_hdr_t *hdr)
{
}
} /* end of print_hdr() */
+#endif
#if !MULTIRAIL_EKC
void
CERROR ("DATAVEC too small\n");
return (-E2BIG);
}
+#else
+int
+kqswnal_check_rdma (int nlfrag, EP_NMD *lfrag,
+ int nrfrag, EP_NMD *rfrag)
+{
+ int i;
+
+ if (nlfrag != nrfrag) {
+ CERROR("Can't cope with unequal # frags: %d local %d remote\n",
+ nlfrag, nrfrag);
+ return (-EINVAL);
+ }
+
+ for (i = 0; i < nlfrag; i++)
+ if (lfrag[i].nmd_len != rfrag[i].nmd_len) {
+ CERROR("Can't cope with unequal frags %d(%d):"
+ " %d local %d remote\n",
+ i, nlfrag, lfrag[i].nmd_len, rfrag[i].nmd_len);
+ return (-EINVAL);
+ }
+
+ return (0);
+}
#endif
-int
-kqswnal_dma_reply (kqswnal_tx_t *ktx, int nfrag,
- struct iovec *iov, ptl_kiov_t *kiov,
- int offset, int nob)
+kqswnal_remotemd_t *
+kqswnal_parse_rmd (kqswnal_rx_t *krx, int type, ptl_nid_t expected_nid)
{
- kqswnal_rx_t *krx = (kqswnal_rx_t *)ktx->ktx_args[0];
char *buffer = (char *)page_address(krx->krx_kiov[0].kiov_page);
+ ptl_hdr_t *hdr = (ptl_hdr_t *)buffer;
kqswnal_remotemd_t *rmd = (kqswnal_remotemd_t *)(buffer + KQSW_HDR_SIZE);
- int rc;
-#if MULTIRAIL_EKC
- int i;
-#else
- EP_DATAVEC datav[EP_MAXFRAG];
- int ndatav;
-#endif
- LASSERT (krx->krx_rpc_reply_needed);
- LASSERT ((iov == NULL) != (kiov == NULL));
+ ptl_nid_t nid = kqswnal_rx_nid(krx);
+
+ /* Note (1) lib_parse has already flipped hdr.
+ * (2) RDMA addresses are sent in native endian-ness. When
+ * EKC copes with different endian nodes, I'll fix this (and
+ * eat my hat :) */
+
+ LASSERT (krx->krx_nob >= sizeof(*hdr));
+
+ if (hdr->type != type) {
+ CERROR ("Unexpected optimized get/put type %d (%d expected)"
+ "from "LPX64"\n", hdr->type, type, nid);
+ return (NULL);
+ }
+
+ if (hdr->src_nid != nid) {
+ CERROR ("Unexpected optimized get/put source NID "
+ LPX64" from "LPX64"\n", hdr->src_nid, nid);
+ return (NULL);
+ }
+
+ LASSERT (nid == expected_nid);
- /* see kqswnal_sendmsg comment regarding endian-ness */
if (buffer + krx->krx_nob < (char *)(rmd + 1)) {
/* msg too small to discover rmd size */
CERROR ("Incoming message [%d] too small for RMD (%d needed)\n",
krx->krx_nob, (int)(((char *)(rmd + 1)) - buffer));
- return (-EINVAL);
+ return (NULL);
}
-
+
if (buffer + krx->krx_nob < (char *)&rmd->kqrmd_frag[rmd->kqrmd_nfrag]) {
/* rmd doesn't fit in the incoming message */
CERROR ("Incoming message [%d] too small for RMD[%d] (%d needed)\n",
krx->krx_nob, rmd->kqrmd_nfrag,
(int)(((char *)&rmd->kqrmd_frag[rmd->kqrmd_nfrag]) - buffer));
- return (-EINVAL);
+ return (NULL);
}
- /* Map the source data... */
+ return (rmd);
+}
+
+void
+kqswnal_rdma_store_complete (EP_RXD *rxd)
+{
+ int status = ep_rxd_status(rxd);
+ kqswnal_tx_t *ktx = (kqswnal_tx_t *)ep_rxd_arg(rxd);
+ kqswnal_rx_t *krx = (kqswnal_rx_t *)ktx->ktx_args[0];
+
+ CDEBUG((status == EP_SUCCESS) ? D_NET : D_ERROR,
+ "rxd %p, ktx %p, status %d\n", rxd, ktx, status);
+
+ LASSERT (ktx->ktx_state == KTX_RDMAING);
+ LASSERT (krx->krx_rxd == rxd);
+ LASSERT (krx->krx_rpc_reply_needed);
+
+ krx->krx_rpc_reply_needed = 0;
+ kqswnal_rx_decref (krx);
+
+ /* free ktx & finalize() its lib_msg_t */
+ kqswnal_tx_done(ktx, (status == EP_SUCCESS) ? 0 : -ECONNABORTED);
+}
+
+void
+kqswnal_rdma_fetch_complete (EP_RXD *rxd)
+{
+ /* Completed fetching the PUT data */
+ int status = ep_rxd_status(rxd);
+ kqswnal_tx_t *ktx = (kqswnal_tx_t *)ep_rxd_arg(rxd);
+ kqswnal_rx_t *krx = (kqswnal_rx_t *)ktx->ktx_args[0];
+ unsigned long flags;
+
+ CDEBUG((status == EP_SUCCESS) ? D_NET : D_ERROR,
+ "rxd %p, ktx %p, status %d\n", rxd, ktx, status);
+
+ LASSERT (ktx->ktx_state == KTX_RDMAING);
+ LASSERT (krx->krx_rxd == rxd);
+ LASSERT (krx->krx_rpc_reply_needed);
+
+ /* Set the RPC completion status */
+ status = (status == EP_SUCCESS) ? 0 : -ECONNABORTED;
+ krx->krx_rpc_reply_status = status;
+
+ /* free ktx & finalize() its lib_msg_t */
+ kqswnal_tx_done(ktx, status);
+
+ if (!in_interrupt()) {
+ /* OK to complete the RPC now (iff I had the last ref) */
+ kqswnal_rx_decref (krx);
+ return;
+ }
+
+ LASSERT (krx->krx_state == KRX_PARSE);
+ krx->krx_state = KRX_COMPLETING;
+
+ /* Complete the RPC in thread context */
+ spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags);
+
+ list_add_tail (&krx->krx_list, &kqswnal_data.kqn_readyrxds);
+ wake_up (&kqswnal_data.kqn_sched_waitq);
+
+ spin_unlock_irqrestore (&kqswnal_data.kqn_sched_lock, flags);
+}
+
+int
+kqswnal_rdma (kqswnal_rx_t *krx, lib_msg_t *libmsg, int type,
+ int niov, struct iovec *iov, ptl_kiov_t *kiov,
+ size_t offset, size_t len)
+{
+ kqswnal_remotemd_t *rmd;
+ kqswnal_tx_t *ktx;
+ int eprc;
+ int rc;
+#if !MULTIRAIL_EKC
+ EP_DATAVEC datav[EP_MAXFRAG];
+ int ndatav;
+#endif
+
+ LASSERT (type == PTL_MSG_GET || type == PTL_MSG_PUT);
+ /* Not both mapped and paged payload */
+ LASSERT (iov == NULL || kiov == NULL);
+ /* RPC completes with failure by default */
+ LASSERT (krx->krx_rpc_reply_needed);
+ LASSERT (krx->krx_rpc_reply_status != 0);
+
+ rmd = kqswnal_parse_rmd(krx, type, libmsg->ev.initiator.nid);
+ if (rmd == NULL)
+ return (-EPROTO);
+
+ if (len == 0) {
+ /* data got truncated to nothing. */
+ lib_finalize(&kqswnal_lib, krx, libmsg, PTL_OK);
+ /* Let kqswnal_rx_done() complete the RPC with success */
+ krx->krx_rpc_reply_status = 0;
+ return (0);
+ }
+
+ /* NB I'm using 'ktx' just to map the local RDMA buffers; I'm not
+ actually sending a portals message with it */
+ ktx = kqswnal_get_idle_tx(NULL, 0);
+ if (ktx == NULL) {
+ CERROR ("Can't get txd for RDMA with "LPX64"\n",
+ libmsg->ev.initiator.nid);
+ return (-ENOMEM);
+ }
+
+ ktx->ktx_state = KTX_RDMAING;
+ ktx->ktx_nid = libmsg->ev.initiator.nid;
+ ktx->ktx_args[0] = krx;
+ ktx->ktx_args[1] = libmsg;
+
+ /* Start mapping at offset 0 (we're not mapping any headers) */
ktx->ktx_nfrag = ktx->ktx_firsttmpfrag = 0;
+
if (kiov != NULL)
- rc = kqswnal_map_tx_kiov (ktx, offset, nob, nfrag, kiov);
+ rc = kqswnal_map_tx_kiov(ktx, offset, len, niov, kiov);
else
- rc = kqswnal_map_tx_iov (ktx, offset, nob, nfrag, iov);
+ rc = kqswnal_map_tx_iov(ktx, offset, len, niov, iov);
if (rc != 0) {
- CERROR ("Can't map source data: %d\n", rc);
- return (rc);
+ CERROR ("Can't map local RDMA data: %d\n", rc);
+ goto out;
}
#if MULTIRAIL_EKC
- if (ktx->ktx_nfrag != rmd->kqrmd_nfrag) {
- CERROR("Can't cope with unequal # frags: %d local %d remote\n",
- ktx->ktx_nfrag, rmd->kqrmd_nfrag);
- return (-EINVAL);
+ rc = kqswnal_check_rdma (ktx->ktx_nfrag, ktx->ktx_frags,
+ rmd->kqrmd_nfrag, rmd->kqrmd_frag);
+ if (rc != 0) {
+ CERROR ("Incompatible RDMA descriptors\n");
+ goto out;
}
-
- for (i = 0; i < rmd->kqrmd_nfrag; i++)
- if (ktx->ktx_frags[i].nmd_len != rmd->kqrmd_frag[i].nmd_len) {
- CERROR("Can't cope with unequal frags %d(%d):"
- " %d local %d remote\n",
- i, rmd->kqrmd_nfrag,
- ktx->ktx_frags[i].nmd_len,
- rmd->kqrmd_frag[i].nmd_len);
- return (-EINVAL);
- }
#else
- ndatav = kqswnal_eiovs2datav (EP_MAXFRAG, datav,
- ktx->ktx_nfrag, ktx->ktx_frags,
- rmd->kqrmd_nfrag, rmd->kqrmd_frag);
+ switch (type) {
+ default:
+ LBUG();
+
+ case PTL_MSG_GET:
+ ndatav = kqswnal_eiovs2datav(EP_MAXFRAG, datav,
+ ktx->ktx_nfrag, ktx->ktx_frags,
+ rmd->kqrmd_nfrag, rmd->kqrmd_frag);
+ break;
+
+ case PTL_MSG_PUT:
+ ndatav = kqswnal_eiovs2datav(EP_MAXFRAG, datav,
+ rmd->kqrmd_nfrag, rmd->kqrmd_frag,
+ ktx->ktx_nfrag, ktx->ktx_frags);
+ break;
+ }
+
if (ndatav < 0) {
CERROR ("Can't create datavec: %d\n", ndatav);
- return (ndatav);
+ rc = ndatav;
+ goto out;
}
#endif
- /* Our caller will start to race with kqswnal_dma_reply_complete... */
- LASSERT (atomic_read (&krx->krx_refcount) == 1);
- atomic_set (&krx->krx_refcount, 2);
+ LASSERT (atomic_read(&krx->krx_refcount) > 0);
+ /* Take an extra ref for the completion callback */
+ atomic_inc(&krx->krx_refcount);
-#if MULTIRAIL_EKC
- rc = ep_complete_rpc(krx->krx_rxd, kqswnal_dma_reply_complete, ktx,
- &kqswnal_rpc_success,
- ktx->ktx_frags, rmd->kqrmd_frag, rmd->kqrmd_nfrag);
- if (rc == EP_SUCCESS)
- return (0);
+ switch (type) {
+ default:
+ LBUG();
- /* Well we tried... */
- krx->krx_rpc_reply_needed = 0;
+ case PTL_MSG_GET:
+#if MULTIRAIL_EKC
+ eprc = ep_complete_rpc(krx->krx_rxd,
+ kqswnal_rdma_store_complete, ktx,
+ &kqswnal_data.kqn_rpc_success,
+ ktx->ktx_frags, rmd->kqrmd_frag, rmd->kqrmd_nfrag);
#else
- rc = ep_complete_rpc (krx->krx_rxd, kqswnal_dma_reply_complete, ktx,
- &kqswnal_rpc_success, datav, ndatav);
- if (rc == EP_SUCCESS)
- return (0);
-
- /* "old" EKC destroys rxd on failed completion */
- krx->krx_rxd = NULL;
+ eprc = ep_complete_rpc (krx->krx_rxd,
+ kqswnal_rdma_store_complete, ktx,
+ &kqswnal_data.kqn_rpc_success,
+ datav, ndatav);
+ if (eprc != EP_SUCCESS) /* "old" EKC destroys rxd on failed completion */
+ krx->krx_rxd = NULL;
#endif
+ if (eprc != EP_SUCCESS) {
+ CERROR("can't complete RPC: %d\n", eprc);
+ /* don't re-attempt RPC completion */
+ krx->krx_rpc_reply_needed = 0;
+ rc = -ECONNABORTED;
+ }
+ break;
+
+ case PTL_MSG_PUT:
+#if MULTIRAIL_EKC
+ eprc = ep_rpc_get (krx->krx_rxd,
+ kqswnal_rdma_fetch_complete, ktx,
+ rmd->kqrmd_frag, ktx->ktx_frags, ktx->ktx_nfrag);
+#else
+ eprc = ep_rpc_get (krx->krx_rxd,
+ kqswnal_rdma_fetch_complete, ktx,
+ datav, ndatav);
+#endif
+ if (eprc != EP_SUCCESS) {
+ CERROR("ep_rpc_get failed: %d\n", eprc);
+ rc = -ECONNABORTED;
+ }
+ break;
+ }
- CERROR("can't complete RPC: %d\n", rc);
-
- /* reset refcount back to 1: we're not going to be racing with
- * kqswnal_dma_reply_complete. */
- atomic_set (&krx->krx_refcount, 1);
+ out:
+ if (rc != 0) {
+ kqswnal_rx_decref(krx); /* drop callback's ref */
+ kqswnal_put_idle_tx (ktx);
+ }
- return (-ECONNABORTED);
+ atomic_dec(&kqswnal_data.kqn_pending_txs);
+ return (rc);
}
static ptl_err_t
-kqswnal_sendmsg (nal_cb_t *nal,
+kqswnal_sendmsg (lib_nal_t *nal,
void *private,
lib_msg_t *libmsg,
ptl_hdr_t *hdr,
int sumoff;
int sumnob;
#endif
+ /* NB 1. hdr is in network byte order */
+ /* 2. 'private' depends on the message type */
CDEBUG(D_NET, "sending "LPSZ" bytes in %d frags to nid: "LPX64
" pid %u\n", payload_nob, payload_niov, nid, pid);
return (PTL_FAIL);
}
+ if (type == PTL_MSG_REPLY && /* can I look in 'private' */
+ ((kqswnal_rx_t *)private)->krx_rpc_reply_needed) { /* is it an RPC */
+ /* Must be a REPLY for an optimized GET */
+ rc = kqswnal_rdma ((kqswnal_rx_t *)private, libmsg, PTL_MSG_GET,
+ payload_niov, payload_iov, payload_kiov,
+ payload_offset, payload_nob);
+ return ((rc == 0) ? PTL_OK : PTL_FAIL);
+ }
+
targetnid = nid;
if (kqswnal_nid2elanid (nid) < 0) { /* Can't send direct: find gateway? */
rc = kpr_lookup (&kqswnal_data.kqn_router, nid,
type == PTL_MSG_REPLY ||
in_interrupt()));
if (ktx == NULL) {
- kqswnal_cerror_hdr (hdr);
+ CERROR ("Can't get txd for msg type %d for "LPX64"\n",
+ type, libmsg->ev.initiator.nid);
return (PTL_NO_SPACE);
}
+ ktx->ktx_state = KTX_SENDING;
ktx->ktx_nid = targetnid;
ktx->ktx_args[0] = private;
ktx->ktx_args[1] = libmsg;
-
- if (type == PTL_MSG_REPLY &&
- ((kqswnal_rx_t *)private)->krx_rpc_reply_needed) {
- if (nid != targetnid ||
- kqswnal_nid2elanid(nid) !=
- ep_rxd_node(((kqswnal_rx_t *)private)->krx_rxd)) {
- CERROR("Optimized reply nid conflict: "
- "nid "LPX64" via "LPX64" elanID %d\n",
- nid, targetnid,
- ep_rxd_node(((kqswnal_rx_t *)private)->krx_rxd));
- rc = -EINVAL;
- goto out;
- }
-
- /* peer expects RPC completion with GET data */
- rc = kqswnal_dma_reply (ktx, payload_niov,
- payload_iov, payload_kiov,
- payload_offset, payload_nob);
- if (rc != 0)
- CERROR ("Can't DMA reply to "LPX64": %d\n", nid, rc);
- goto out;
- }
+ ktx->ktx_args[2] = NULL; /* set when a GET commits to REPLY */
memcpy (ktx->ktx_buffer, hdr, sizeof (*hdr)); /* copy hdr from caller's stack */
ktx->ktx_wire_hdr = (ptl_hdr_t *)ktx->ktx_buffer;
memcpy(ktx->ktx_buffer + sizeof(*hdr) + sizeof(csum), &csum, sizeof(csum));
#endif
- if (kqswnal_tunables.kqn_optimized_gets &&
- type == PTL_MSG_GET && /* doing a GET */
- nid == targetnid) { /* not forwarding */
+ /* The first frag will be the pre-mapped buffer for (at least) the
+ * portals header. */
+ ktx->ktx_nfrag = ktx->ktx_firsttmpfrag = 1;
+
+ if (nid == targetnid && /* not forwarding */
+ ((type == PTL_MSG_GET && /* optimize GET? */
+ kqswnal_tunables.kqn_optimized_gets != 0 &&
+ NTOH__u32(hdr->msg.get.sink_length) >= kqswnal_tunables.kqn_optimized_gets) ||
+ (type == PTL_MSG_PUT && /* optimize PUT? */
+ kqswnal_tunables.kqn_optimized_puts != 0 &&
+ payload_nob >= kqswnal_tunables.kqn_optimized_puts))) {
lib_md_t *md = libmsg->md;
kqswnal_remotemd_t *rmd = (kqswnal_remotemd_t *)(ktx->ktx_buffer + KQSW_HDR_SIZE);
- /* Optimised path: I send over the Elan vaddrs of the get
- * sink buffers, and my peer DMAs directly into them.
+ /* Optimised path: I send over the Elan vaddrs of the local
+ * buffers, and my peer DMAs directly to/from them.
*
* First I set up ktx as if it was going to send this
* payload, (it needs to map it anyway). This fills
* ktx_frags[1] and onward with the network addresses
* of the GET sink frags. I copy these into ktx_buffer,
- * immediately after the header, and send that as my GET
- * message.
- *
- * Note that the addresses are sent in native endian-ness.
- * When EKC copes with different endian nodes, I'll fix
- * this (and eat my hat :) */
+ * immediately after the header, and send that as my
+ * message. */
- ktx->ktx_nfrag = ktx->ktx_firsttmpfrag = 1;
- ktx->ktx_state = KTX_GETTING;
+ ktx->ktx_state = (type == PTL_MSG_PUT) ? KTX_PUTTING : KTX_GETTING;
if ((libmsg->md->options & PTL_MD_KIOV) != 0)
rc = kqswnal_map_tx_kiov (ktx, 0, md->length,
ktx->ktx_frags[0].Base = ktx->ktx_ebuffer;
ktx->ktx_frags[0].Len = KQSW_HDR_SIZE + payload_nob;
#endif
+ if (type == PTL_MSG_GET) {
+ /* Allocate reply message now while I'm in thread context */
+ ktx->ktx_args[2] = lib_create_reply_msg (&kqswnal_lib,
+ nid, libmsg);
+ if (ktx->ktx_args[2] == NULL)
+ goto out;
+
+ /* NB finalizing the REPLY message is my
+ * responsibility now, whatever happens. */
+ }
+
} else if (payload_nob <= KQSW_TX_MAXCONTIG) {
/* small message: single frag copied into the pre-mapped buffer */
- ktx->ktx_nfrag = ktx->ktx_firsttmpfrag = 1;
- ktx->ktx_state = KTX_SENDING;
#if MULTIRAIL_EKC
ep_nmd_subset(&ktx->ktx_frags[0], &ktx->ktx_ebuffer,
0, KQSW_HDR_SIZE + payload_nob);
/* large message: multiple frags: first is hdr in pre-mapped buffer */
- ktx->ktx_nfrag = ktx->ktx_firsttmpfrag = 1;
- ktx->ktx_state = KTX_SENDING;
#if MULTIRAIL_EKC
ep_nmd_subset(&ktx->ktx_frags[0], &ktx->ktx_ebuffer,
0, KQSW_HDR_SIZE);
rc == 0 ? "Sent" : "Failed to send",
payload_nob, nid, targetnid, rc);
- if (rc != 0)
+ if (rc != 0) {
+ if (ktx->ktx_state == KTX_GETTING &&
+ ktx->ktx_args[2] != NULL) {
+ /* We committed to reply, but there was a problem
+ * launching the GET. We can't avoid delivering a
+ * REPLY event since we committed above, so we
+ * pretend the GET succeeded but the REPLY
+ * failed. */
+ rc = 0;
+ lib_finalize (&kqswnal_lib, private, libmsg, PTL_OK);
+ lib_finalize (&kqswnal_lib, private,
+ (lib_msg_t *)ktx->ktx_args[2], PTL_FAIL);
+ }
+
kqswnal_put_idle_tx (ktx);
-
+ }
+
atomic_dec(&kqswnal_data.kqn_pending_txs);
return (rc == 0 ? PTL_OK : PTL_FAIL);
}
static ptl_err_t
-kqswnal_send (nal_cb_t *nal,
+kqswnal_send (lib_nal_t *nal,
void *private,
lib_msg_t *libmsg,
ptl_hdr_t *hdr,
}
static ptl_err_t
-kqswnal_send_pages (nal_cb_t *nal,
+kqswnal_send_pages (lib_nal_t *nal,
void *private,
lib_msg_t *libmsg,
ptl_hdr_t *hdr,
if (ktx == NULL) /* can't get txd right now */
return; /* fwd will be scheduled when tx desc freed */
- if (nid == kqswnal_lib.ni.nid) /* gateway is me */
+ if (nid == kqswnal_lib.libnal_ni.ni_pid.nid) /* gateway is me */
nid = fwd->kprfd_target_nid; /* target is final dest */
if (kqswnal_nid2elanid (nid) < 0) {
if (rc != 0) {
CERROR ("Failed to forward [%p] to "LPX64": %d\n", fwd, nid, rc);
- kqswnal_put_idle_tx (ktx);
/* complete now (with failure) */
- kpr_fwd_done (&kqswnal_data.kqn_router, fwd, rc);
+ kqswnal_tx_done (ktx, rc);
}
atomic_dec(&kqswnal_data.kqn_pending_txs);
NTOH__u64(hdr->src_nid), NTOH__u64(hdr->dest_nid),error);
}
- kqswnal_requeue_rx (krx);
+ LASSERT (atomic_read(&krx->krx_refcount) == 1);
+ kqswnal_rx_decref (krx);
}
void
-kqswnal_dma_reply_complete (EP_RXD *rxd)
+kqswnal_requeue_rx (kqswnal_rx_t *krx)
{
- int status = ep_rxd_status(rxd);
- kqswnal_tx_t *ktx = (kqswnal_tx_t *)ep_rxd_arg(rxd);
- kqswnal_rx_t *krx = (kqswnal_rx_t *)ktx->ktx_args[0];
- lib_msg_t *msg = (lib_msg_t *)ktx->ktx_args[1];
-
- CDEBUG((status == EP_SUCCESS) ? D_NET : D_ERROR,
- "rxd %p, ktx %p, status %d\n", rxd, ktx, status);
+ LASSERT (atomic_read(&krx->krx_refcount) == 0);
+ LASSERT (!krx->krx_rpc_reply_needed);
- LASSERT (krx->krx_rxd == rxd);
- LASSERT (krx->krx_rpc_reply_needed);
+ krx->krx_state = KRX_POSTED;
- krx->krx_rpc_reply_needed = 0;
- kqswnal_rx_done (krx);
+#if MULTIRAIL_EKC
+ if (kqswnal_data.kqn_shuttingdown) {
+ /* free EKC rxd on shutdown */
+ ep_complete_receive(krx->krx_rxd);
+ } else {
+ /* repost receive */
+ ep_requeue_receive(krx->krx_rxd,
+ kqswnal_rxhandler, krx,
+ &krx->krx_elanbuffer, 0);
+ }
+#else
+ if (kqswnal_data.kqn_shuttingdown)
+ return;
- lib_finalize (&kqswnal_lib, NULL, msg,
- (status == EP_SUCCESS) ? PTL_OK : PTL_FAIL);
- kqswnal_put_idle_tx (ktx);
+ if (krx->krx_rxd == NULL) {
+ /* We had a failed ep_complete_rpc() which nukes the
+ * descriptor in "old" EKC */
+ int eprc = ep_queue_receive(krx->krx_eprx,
+ kqswnal_rxhandler, krx,
+ krx->krx_elanbuffer,
+ krx->krx_npages * PAGE_SIZE, 0);
+ LASSERT (eprc == EP_SUCCESS);
+ /* We don't handle failure here; it's incredibly rare
+ * (never reported?) and only happens with "old" EKC */
+ } else {
+ ep_requeue_receive(krx->krx_rxd, kqswnal_rxhandler, krx,
+ krx->krx_elanbuffer,
+ krx->krx_npages * PAGE_SIZE);
+ }
+#endif
}
void
}
void
-kqswnal_requeue_rx (kqswnal_rx_t *krx)
+kqswnal_rx_done (kqswnal_rx_t *krx)
{
- int rc;
+ int rc;
+ EP_STATUSBLK *sblk;
LASSERT (atomic_read(&krx->krx_refcount) == 0);
if (krx->krx_rpc_reply_needed) {
+ /* We've not completed the peer's RPC yet... */
+ sblk = (krx->krx_rpc_reply_status == 0) ?
+ &kqswnal_data.kqn_rpc_success :
+ &kqswnal_data.kqn_rpc_failed;
- /* We failed to complete the peer's optimized GET (e.g. we
- * couldn't map the source buffers). We complete the
- * peer's EKC rpc now with failure. */
+ LASSERT (!in_interrupt());
#if MULTIRAIL_EKC
- rc = ep_complete_rpc(krx->krx_rxd, kqswnal_rpc_complete, krx,
- &kqswnal_rpc_failed, NULL, NULL, 0);
+ rc = ep_complete_rpc(krx->krx_rxd,
+ kqswnal_rpc_complete, krx,
+ sblk, NULL, NULL, 0);
if (rc == EP_SUCCESS)
return;
-
- CERROR("can't complete RPC: %d\n", rc);
#else
- if (krx->krx_rxd != NULL) {
- /* We didn't try (and fail) to complete earlier... */
- rc = ep_complete_rpc(krx->krx_rxd,
- kqswnal_rpc_complete, krx,
- &kqswnal_rpc_failed, NULL, 0);
- if (rc == EP_SUCCESS)
- return;
-
- CERROR("can't complete RPC: %d\n", rc);
- }
-
- /* NB the old ep_complete_rpc() frees rxd on failure, so we
- * have to requeue from scratch here, unless we're shutting
- * down */
- if (kqswnal_data.kqn_shuttingdown)
+ rc = ep_complete_rpc(krx->krx_rxd,
+ kqswnal_rpc_complete, krx,
+ sblk, NULL, 0);
+ if (rc == EP_SUCCESS)
return;
- rc = ep_queue_receive(krx->krx_eprx, kqswnal_rxhandler, krx,
- krx->krx_elanbuffer,
- krx->krx_npages * PAGE_SIZE, 0);
- LASSERT (rc == EP_SUCCESS);
- /* We don't handle failure here; it's incredibly rare
- * (never reported?) and only happens with "old" EKC */
- return;
+ /* "old" EKC destroys rxd on failed completion */
+ krx->krx_rxd = NULL;
#endif
+ CERROR("can't complete RPC: %d\n", rc);
+ krx->krx_rpc_reply_needed = 0;
}
-#if MULTIRAIL_EKC
- if (kqswnal_data.kqn_shuttingdown) {
- /* free EKC rxd on shutdown */
- ep_complete_receive(krx->krx_rxd);
- } else {
- /* repost receive */
- ep_requeue_receive(krx->krx_rxd, kqswnal_rxhandler, krx,
- &krx->krx_elanbuffer, 0);
- }
-#else
- /* don't actually requeue on shutdown */
- if (!kqswnal_data.kqn_shuttingdown)
- ep_requeue_receive(krx->krx_rxd, kqswnal_rxhandler, krx,
- krx->krx_elanbuffer, krx->krx_npages * PAGE_SIZE);
-#endif
+ kqswnal_requeue_rx(krx);
}
void
-kqswnal_rx (kqswnal_rx_t *krx)
+kqswnal_parse (kqswnal_rx_t *krx)
{
ptl_hdr_t *hdr = (ptl_hdr_t *) page_address(krx->krx_kiov[0].kiov_page);
ptl_nid_t dest_nid = NTOH__u64 (hdr->dest_nid);
int nob;
int niov;
- LASSERT (atomic_read(&krx->krx_refcount) == 0);
+ LASSERT (atomic_read(&krx->krx_refcount) == 1);
+
+ if (dest_nid == kqswnal_lib.libnal_ni.ni_pid.nid) { /* It's for me :) */
+ /* I ignore parse errors since I'm not consuming a byte
+ * stream */
+ (void)lib_parse (&kqswnal_lib, hdr, krx);
- if (dest_nid == kqswnal_lib.ni.nid) { /* It's for me :) */
- atomic_set(&krx->krx_refcount, 1);
- lib_parse (&kqswnal_lib, hdr, krx);
- kqswnal_rx_done(krx);
+ /* Drop my ref; any RDMA activity takes an additional ref */
+ kqswnal_rx_decref(krx);
return;
}
#if KQSW_CHECKSUM
- CERROR ("checksums for forwarded packets not implemented\n");
- LBUG ();
+ LASSERTF (0, "checksums for forwarded packets not implemented\n");
#endif
+
if (kqswnal_nid2elanid (dest_nid) >= 0) /* should have gone direct to peer */
{
CERROR("dropping packet from "LPX64" for "LPX64
": target is peer\n", NTOH__u64(hdr->src_nid), dest_nid);
- kqswnal_requeue_rx (krx);
+ kqswnal_rx_decref (krx);
return;
}
rxd, krx, nob, status);
LASSERT (krx != NULL);
-
+ LASSERT (krx->krx_state = KRX_POSTED);
+
+ krx->krx_state = KRX_PARSE;
krx->krx_rxd = rxd;
krx->krx_nob = nob;
#if MULTIRAIL_EKC
#else
krx->krx_rpc_reply_needed = ep_rxd_isrpc(rxd);
#endif
-
+ /* Default to failure if an RPC reply is requested but not handled */
+ krx->krx_rpc_reply_status = -EPROTO;
+ atomic_set (&krx->krx_refcount, 1);
+
/* must receive a whole header to be able to parse */
if (status != EP_SUCCESS || nob < sizeof (ptl_hdr_t))
{
CERROR("receive status failed with status %d nob %d\n",
ep_rxd_status(rxd), nob);
#endif
- kqswnal_requeue_rx (krx);
+ kqswnal_rx_decref(krx);
return;
}
if (!in_interrupt()) {
- kqswnal_rx (krx);
+ kqswnal_parse(krx);
return;
}
#endif
static ptl_err_t
-kqswnal_recvmsg (nal_cb_t *nal,
+kqswnal_recvmsg (lib_nal_t *nal,
void *private,
lib_msg_t *libmsg,
unsigned int niov,
{
kqswnal_rx_t *krx = (kqswnal_rx_t *)private;
char *buffer = page_address(krx->krx_kiov[0].kiov_page);
+ ptl_hdr_t *hdr = (ptl_hdr_t *)buffer;
int page;
char *page_ptr;
int page_nob;
char *iov_ptr;
int iov_nob;
int frag;
+ int rc;
#if KQSW_CHECKSUM
kqsw_csum_t senders_csum;
kqsw_csum_t payload_csum = 0;
- kqsw_csum_t hdr_csum = kqsw_csum(0, buffer, sizeof(ptl_hdr_t));
+ kqsw_csum_t hdr_csum = kqsw_csum(0, hdr, sizeof(*hdr));
size_t csum_len = mlen;
int csum_frags = 0;
int csum_nob = 0;
if (senders_csum != hdr_csum)
kqswnal_csum_error (krx, 1);
#endif
+ /* NB lib_parse() has already flipped *hdr */
+
CDEBUG(D_NET,"kqswnal_recv, mlen="LPSZ", rlen="LPSZ"\n", mlen, rlen);
+ if (krx->krx_rpc_reply_needed &&
+ hdr->type == PTL_MSG_PUT) {
+ /* This must be an optimized PUT */
+ rc = kqswnal_rdma (krx, libmsg, PTL_MSG_PUT,
+ niov, iov, kiov, offset, mlen);
+ return (rc == 0 ? PTL_OK : PTL_FAIL);
+ }
+
/* What was actually received must be >= payload. */
LASSERT (mlen <= rlen);
if (krx->krx_nob < KQSW_HDR_SIZE + mlen) {
}
static ptl_err_t
-kqswnal_recv(nal_cb_t *nal,
+kqswnal_recv(lib_nal_t *nal,
void *private,
lib_msg_t *libmsg,
unsigned int niov,
}
static ptl_err_t
-kqswnal_recv_pages (nal_cb_t *nal,
+kqswnal_recv_pages (lib_nal_t *nal,
void *private,
lib_msg_t *libmsg,
unsigned int niov,
spin_unlock_irqrestore(&kqswnal_data.kqn_sched_lock,
flags);
- kqswnal_rx (krx);
+ switch (krx->krx_state) {
+ case KRX_PARSE:
+ kqswnal_parse (krx);
+ break;
+ case KRX_COMPLETING:
+ /* Drop last ref to reply to RPC and requeue */
+ LASSERT (krx->krx_rpc_reply_needed);
+ kqswnal_rx_decref (krx);
+ break;
+ default:
+ LBUG();
+ }
did_something = 1;
spin_lock_irqsave(&kqswnal_data.kqn_sched_lock, flags);
return (0);
}
-nal_cb_t kqswnal_lib =
+lib_nal_t kqswnal_lib =
{
- nal_data: &kqswnal_data, /* NAL private data */
- cb_send: kqswnal_send,
- cb_send_pages: kqswnal_send_pages,
- cb_recv: kqswnal_recv,
- cb_recv_pages: kqswnal_recv_pages,
- cb_read: kqswnal_read,
- cb_write: kqswnal_write,
- cb_malloc: kqswnal_malloc,
- cb_free: kqswnal_free,
- cb_printf: kqswnal_printf,
- cb_cli: kqswnal_cli,
- cb_sti: kqswnal_sti,
- cb_callback: kqswnal_callback,
- cb_dist: kqswnal_dist
+ libnal_data: &kqswnal_data, /* NAL private data */
+ libnal_send: kqswnal_send,
+ libnal_send_pages: kqswnal_send_pages,
+ libnal_recv: kqswnal_recv,
+ libnal_recv_pages: kqswnal_recv_pages,
+ libnal_dist: kqswnal_dist
};
#endif
int
-ksocknal_api_forward(nal_t *nal, int id, void *args, size_t args_len,
- void *ret, size_t ret_len)
-{
- ksock_nal_data_t *k;
- nal_cb_t *nal_cb;
-
- k = nal->nal_data;
- nal_cb = k->ksnd_nal_cb;
-
- lib_dispatch(nal_cb, k, id, args, ret); /* ksocknal_send needs k */
- return PTL_OK;
-}
-
-void
-ksocknal_api_lock(nal_t *nal, unsigned long *flags)
-{
- ksock_nal_data_t *k;
- nal_cb_t *nal_cb;
-
- k = nal->nal_data;
- nal_cb = k->ksnd_nal_cb;
- nal_cb->cb_cli(nal_cb,flags);
-}
-
-void
-ksocknal_api_unlock(nal_t *nal, unsigned long *flags)
-{
- ksock_nal_data_t *k;
- nal_cb_t *nal_cb;
-
- k = nal->nal_data;
- nal_cb = k->ksnd_nal_cb;
- nal_cb->cb_sti(nal_cb,flags);
-}
-
-int
-ksocknal_api_yield(nal_t *nal, unsigned long *flags, int milliseconds)
-{
- /* NB called holding statelock */
- wait_queue_t wait;
- unsigned long now = jiffies;
-
- CDEBUG (D_NET, "yield\n");
-
- if (milliseconds == 0) {
- our_cond_resched();
- return 0;
- }
-
- init_waitqueue_entry(&wait, current);
- set_current_state (TASK_INTERRUPTIBLE);
- add_wait_queue (&ksocknal_data.ksnd_yield_waitq, &wait);
-
- ksocknal_api_unlock(nal, flags);
-
- if (milliseconds < 0)
- schedule ();
- else
- schedule_timeout((milliseconds * HZ) / 1000);
-
- ksocknal_api_lock(nal, flags);
-
- remove_wait_queue (&ksocknal_data.ksnd_yield_waitq, &wait);
-
- if (milliseconds > 0) {
- milliseconds -= ((jiffies - now) * 1000) / HZ;
- if (milliseconds < 0)
- milliseconds = 0;
- }
-
- return (milliseconds);
-}
-
-int
ksocknal_set_mynid(ptl_nid_t nid)
{
- lib_ni_t *ni = &ksocknal_lib.ni;
+ lib_ni_t *ni = &ksocknal_lib.libnal_ni;
/* FIXME: we have to do this because we call lib_init() at module
* insertion time, which is before we have 'mynid' available. lib_init
* problem. */
CDEBUG(D_IOCTL, "setting mynid to "LPX64" (old nid="LPX64")\n",
- nid, ni->nid);
+ nid, ni->ni_pid.nid);
- ni->nid = nid;
+ ni->ni_pid.nid = nid;
return (0);
}
/* flag threads to terminate; wake and wait for them to die */
ksocknal_data.ksnd_shuttingdown = 1;
+ mb();
wake_up_all (&ksocknal_data.ksnd_autoconnectd_waitq);
wake_up_all (&ksocknal_data.ksnd_reaper_waitq);
for (i = 0; i < SOCKNAL_N_SCHED; i++)
wake_up_all(&ksocknal_data.ksnd_schedulers[i].kss_waitq);
+ i = 4;
while (atomic_read (&ksocknal_data.ksnd_nthreads) != 0) {
- CDEBUG (D_NET, "waitinf for %d threads to terminate\n",
+ i++;
+ CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
+ "waiting for %d threads to terminate\n",
atomic_read (&ksocknal_data.ksnd_nthreads));
set_current_state (TASK_UNINTERRUPTIBLE);
schedule_timeout (HZ);
if (nal->nal_refct != 0) {
if (actual_limits != NULL)
- *actual_limits = ksocknal_lib.ni.actual_limits;
+ *actual_limits = ksocknal_lib.libnal_ni.ni_actual_limits;
/* This module got the first ref */
PORTAL_MODULE_USE;
return (PTL_OK);
rwlock_init(&ksocknal_data.ksnd_global_lock);
- ksocknal_data.ksnd_nal_cb = &ksocknal_lib;
- spin_lock_init (&ksocknal_data.ksnd_nal_cb_lock);
- init_waitqueue_head(&ksocknal_data.ksnd_yield_waitq);
-
spin_lock_init(&ksocknal_data.ksnd_small_fmp.fmp_lock);
INIT_LIST_HEAD(&ksocknal_data.ksnd_small_fmp.fmp_idle_fmbs);
INIT_LIST_HEAD(&ksocknal_data.ksnd_small_fmp.fmp_blocked_conns);
PORTAL_ALLOC(ksocknal_data.ksnd_schedulers,
sizeof(ksock_sched_t) * SOCKNAL_N_SCHED);
if (ksocknal_data.ksnd_schedulers == NULL) {
- ksocknal_api_shutdown (&ksocknal_api);
+ ksocknal_api_shutdown (nal);
return (-ENOMEM);
}
process_id.pid = 0;
process_id.nid = 0;
- rc = lib_init(&ksocknal_lib, process_id,
+ rc = lib_init(&ksocknal_lib, nal, process_id,
requested_limits, actual_limits);
if (rc != PTL_OK) {
CERROR("lib_init failed: error %d\n", rc);
- ksocknal_api_shutdown (&ksocknal_api);
+ ksocknal_api_shutdown (nal);
return (rc);
}
if (rc != 0) {
CERROR("Can't spawn socknal scheduler[%d]: %d\n",
i, rc);
- ksocknal_api_shutdown (&ksocknal_api);
+ ksocknal_api_shutdown (nal);
return (rc);
}
}
rc = ksocknal_thread_start (ksocknal_autoconnectd, (void *)((long)i));
if (rc != 0) {
CERROR("Can't spawn socknal autoconnectd: %d\n", rc);
- ksocknal_api_shutdown (&ksocknal_api);
+ ksocknal_api_shutdown (nal);
return (rc);
}
}
rc = ksocknal_thread_start (ksocknal_reaper, NULL);
if (rc != 0) {
CERROR ("Can't spawn socknal reaper: %d\n", rc);
- ksocknal_api_shutdown (&ksocknal_api);
+ ksocknal_api_shutdown (nal);
return (rc);
}
PORTAL_ALLOC(fmb, offsetof(ksock_fmb_t,
fmb_kiov[pool->fmp_buff_pages]));
if (fmb == NULL) {
- ksocknal_api_shutdown(&ksocknal_api);
+ ksocknal_api_shutdown(nal);
return (-ENOMEM);
}
fmb->fmb_kiov[j].kiov_page = alloc_page(GFP_KERNEL);
if (fmb->fmb_kiov[j].kiov_page == NULL) {
- ksocknal_api_shutdown (&ksocknal_api);
+ ksocknal_api_shutdown (nal);
return (-ENOMEM);
}
rc = libcfs_nal_cmd_register(SOCKNAL, &ksocknal_cmd, NULL);
if (rc != 0) {
CERROR ("Can't initialise command interface (rc = %d)\n", rc);
- ksocknal_api_shutdown (&ksocknal_api);
+ ksocknal_api_shutdown (nal);
return (rc);
}
/* check ksnr_connected/connecting field large enough */
LASSERT(SOCKNAL_CONN_NTYPES <= 4);
- ksocknal_api.startup = ksocknal_api_startup;
- ksocknal_api.forward = ksocknal_api_forward;
- ksocknal_api.shutdown = ksocknal_api_shutdown;
- ksocknal_api.lock = ksocknal_api_lock;
- ksocknal_api.unlock = ksocknal_api_unlock;
- ksocknal_api.nal_data = &ksocknal_data;
-
- ksocknal_lib.nal_data = &ksocknal_data;
+ ksocknal_api.nal_ni_init = ksocknal_api_startup;
+ ksocknal_api.nal_ni_fini = ksocknal_api_shutdown;
/* Initialise dynamic tunables to defaults once only */
ksocknal_tunables.ksnd_io_timeout = SOCKNAL_IO_TIMEOUT;
struct list_head *ksnd_peers; /* hash table of all my known peers */
int ksnd_peer_hash_size; /* size of ksnd_peers */
- nal_cb_t *ksnd_nal_cb;
- spinlock_t ksnd_nal_cb_lock; /* lib cli/sti lock */
- wait_queue_head_t ksnd_yield_waitq; /* where yield waits */
-
atomic_t ksnd_nthreads; /* # live threads */
int ksnd_shuttingdown; /* tell threads to exit */
ksock_sched_t *ksnd_schedulers; /* scheduler state */
} ksock_peer_t;
-extern nal_cb_t ksocknal_lib;
+extern lib_nal_t ksocknal_lib;
extern ksock_nal_data_t ksocknal_data;
extern ksock_tunables_t ksocknal_tunables;
* LIB functions follow
*
*/
-ptl_err_t
-ksocknal_read(nal_cb_t *nal, void *private, void *dst_addr,
- user_ptr src_addr, size_t len)
-{
- CDEBUG(D_NET, LPX64": reading %ld bytes from %p -> %p\n",
- nal->ni.nid, (long)len, src_addr, dst_addr);
-
- memcpy( dst_addr, src_addr, len );
- return PTL_OK;
-}
-
-ptl_err_t
-ksocknal_write(nal_cb_t *nal, void *private, user_ptr dst_addr,
- void *src_addr, size_t len)
-{
- CDEBUG(D_NET, LPX64": writing %ld bytes from %p -> %p\n",
- nal->ni.nid, (long)len, src_addr, dst_addr);
-
- memcpy( dst_addr, src_addr, len );
- return PTL_OK;
-}
-
-void *
-ksocknal_malloc(nal_cb_t *nal, size_t len)
-{
- void *buf;
-
- PORTAL_ALLOC(buf, len);
-
- if (buf != NULL)
- memset(buf, 0, len);
-
- return (buf);
-}
-
-void
-ksocknal_free(nal_cb_t *nal, void *buf, size_t len)
-{
- PORTAL_FREE(buf, len);
-}
-
-void
-ksocknal_printf(nal_cb_t *nal, const char *fmt, ...)
-{
- va_list ap;
- char msg[256];
-
- va_start (ap, fmt);
- vsnprintf (msg, sizeof (msg), fmt, ap); /* sprint safely */
- va_end (ap);
-
- msg[sizeof (msg) - 1] = 0; /* ensure terminated */
-
- CDEBUG (D_NET, "%s", msg);
-}
-
-void
-ksocknal_cli(nal_cb_t *nal, unsigned long *flags)
-{
- ksock_nal_data_t *data = nal->nal_data;
-
- /* OK to ignore 'flags'; we're only ever serialise threads and
- * never need to lock out interrupts */
- spin_lock(&data->ksnd_nal_cb_lock);
-}
-
-void
-ksocknal_sti(nal_cb_t *nal, unsigned long *flags)
-{
- ksock_nal_data_t *data;
- data = nal->nal_data;
-
- /* OK to ignore 'flags'; we're only ever serialise threads and
- * never need to lock out interrupts */
- spin_unlock(&data->ksnd_nal_cb_lock);
-}
-
-void
-ksocknal_callback(nal_cb_t *nal, void *private, lib_eq_t *eq, ptl_event_t *ev)
-{
- /* holding ksnd_nal_cb_lock */
-
- if (eq->event_callback != NULL)
- eq->event_callback(ev);
-
- if (waitqueue_active(&ksocknal_data.ksnd_yield_waitq))
- wake_up_all(&ksocknal_data.ksnd_yield_waitq);
-}
-
int
-ksocknal_dist(nal_cb_t *nal, ptl_nid_t nid, unsigned long *dist)
+ksocknal_dist(lib_nal_t *nal, ptl_nid_t nid, unsigned long *dist)
{
/* I would guess that if ksocknal_get_peer (nid) == NULL,
and we're not routing, then 'nid' is very distant :) */
- if ( nal->ni.nid == nid ) {
+ if (nal->libnal_ni.ni_pid.nid == nid) {
*dist = 0;
} else {
*dist = 1;
{
struct list_head *tmp;
ksock_route_t *route;
- ksock_route_t *candidate = NULL;
- int found = 0;
+ ksock_route_t *first_lazy = NULL;
+ int found_connecting_or_connected = 0;
int bits;
list_for_each (tmp, &peer->ksnp_routes) {
/* All typed connections have been established, or
* an untyped connection has been established, or
* connections are currently being established */
- found = 1;
+ found_connecting_or_connected = 1;
continue;
}
if (!time_after_eq (jiffies, route->ksnr_timeout))
continue;
- /* always do eager routes */
+ /* eager routes always want to be connected */
if (route->ksnr_eager)
return (route);
- if (candidate == NULL) {
- /* If we don't find any other route that is fully
- * connected or connecting, the first connectable
- * route is returned. If it fails to connect, it
- * will get placed at the end of the list */
- candidate = route;
- }
+ if (first_lazy == NULL)
+ first_lazy = route;
}
-
- return (found ? NULL : candidate);
+
+ /* No eager routes need to be connected. If some connection has
+ * already been established, or is being established there's nothing to
+ * do. Otherwise we return the first lazy route we found. If it fails
+ * to connect, it will go to the end of the list. */
+
+ if (!list_empty (&peer->ksnp_conns) ||
+ found_connecting_or_connected)
+ return (NULL);
+
+ return (first_lazy);
}
ksock_route_t *
}
ptl_err_t
-ksocknal_sendmsg(nal_cb_t *nal,
+ksocknal_sendmsg(lib_nal_t *nal,
void *private,
lib_msg_t *cookie,
ptl_hdr_t *hdr,
}
ptl_err_t
-ksocknal_send (nal_cb_t *nal, void *private, lib_msg_t *cookie,
+ksocknal_send (lib_nal_t *nal, void *private, lib_msg_t *cookie,
ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
unsigned int payload_niov, struct iovec *payload_iov,
size_t payload_offset, size_t payload_len)
}
ptl_err_t
-ksocknal_send_pages (nal_cb_t *nal, void *private, lib_msg_t *cookie,
+ksocknal_send_pages (lib_nal_t *nal, void *private, lib_msg_t *cookie,
ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
unsigned int payload_niov, ptl_kiov_t *payload_kiov,
size_t payload_offset, size_t payload_len)
fwd->kprfd_gateway_nid, fwd->kprfd_target_nid);
/* I'm the gateway; must be the last hop */
- if (nid == ksocknal_lib.ni.nid)
+ if (nid == ksocknal_lib.libnal_ni.ni_pid.nid)
nid = fwd->kprfd_target_nid;
/* setup iov for hdr */
switch (conn->ksnc_rx_state) {
case SOCKNAL_RX_HEADER:
if (conn->ksnc_hdr.type != HTON__u32(PTL_MSG_HELLO) &&
- NTOH__u64(conn->ksnc_hdr.dest_nid) != ksocknal_lib.ni.nid) {
+ NTOH__u64(conn->ksnc_hdr.dest_nid) !=
+ ksocknal_lib.libnal_ni.ni_pid.nid) {
/* This packet isn't for me */
ksocknal_fwd_parse (conn);
switch (conn->ksnc_rx_state) {
}
/* sets wanted_len, iovs etc */
- lib_parse(&ksocknal_lib, &conn->ksnc_hdr, conn);
+ rc = lib_parse(&ksocknal_lib, &conn->ksnc_hdr, conn);
+
+ if (rc != PTL_OK) {
+ /* I just received garbage: give up on this conn */
+ ksocknal_close_conn_and_siblings (conn, rc);
+ return (-EPROTO);
+ }
if (conn->ksnc_rx_nob_wanted != 0) { /* need to get payload? */
conn->ksnc_rx_state = SOCKNAL_RX_BODY;
}
ptl_err_t
-ksocknal_recv (nal_cb_t *nal, void *private, lib_msg_t *msg,
+ksocknal_recv (lib_nal_t *nal, void *private, lib_msg_t *msg,
unsigned int niov, struct iovec *iov,
size_t offset, size_t mlen, size_t rlen)
{
}
ptl_err_t
-ksocknal_recv_pages (nal_cb_t *nal, void *private, lib_msg_t *msg,
+ksocknal_recv_pages (lib_nal_t *nal, void *private, lib_msg_t *msg,
unsigned int niov, ptl_kiov_t *kiov,
size_t offset, size_t mlen, size_t rlen)
{
hmv->version_major = __cpu_to_le16 (PORTALS_PROTO_VERSION_MAJOR);
hmv->version_minor = __cpu_to_le16 (PORTALS_PROTO_VERSION_MINOR);
- hdr.src_nid = __cpu_to_le64 (ksocknal_lib.ni.nid);
+ hdr.src_nid = __cpu_to_le64 (ksocknal_lib.libnal_ni.ni_pid.nid);
hdr.type = __cpu_to_le32 (PTL_MSG_HELLO);
hdr.msg.hello.type = __cpu_to_le32 (*type);
return (0);
}
-nal_cb_t ksocknal_lib = {
- nal_data: &ksocknal_data, /* NAL private data */
- cb_send: ksocknal_send,
- cb_send_pages: ksocknal_send_pages,
- cb_recv: ksocknal_recv,
- cb_recv_pages: ksocknal_recv_pages,
- cb_read: ksocknal_read,
- cb_write: ksocknal_write,
- cb_malloc: ksocknal_malloc,
- cb_free: ksocknal_free,
- cb_printf: ksocknal_printf,
- cb_cli: ksocknal_cli,
- cb_sti: ksocknal_sti,
- cb_callback: ksocknal_callback,
- cb_dist: ksocknal_dist
+lib_nal_t ksocknal_lib = {
+ libnal_data: &ksocknal_data, /* NAL private data */
+ libnal_send: ksocknal_send,
+ libnal_send_pages: ksocknal_send_pages,
+ libnal_recv: ksocknal_recv,
+ libnal_recv_pages: ksocknal_recv_pages,
+ libnal_dist: ksocknal_dist
};
#define PORTAL_MINOR 240
struct nal_cmd_handler {
+ int nch_number;
nal_cmd_handler_fn *nch_handler;
void *nch_private;
};
-static struct nal_cmd_handler nal_cmd[NAL_MAX_NR + 1];
+static struct nal_cmd_handler nal_cmd[16];
static DECLARE_MUTEX(nal_cmd_sem);
#ifdef PORTAL_DEBUG
PORTAL_FREE(data, len);
}
+struct nal_cmd_handler *
+libcfs_find_nal_cmd_handler(int nal)
+{
+ int i;
+
+ for (i = 0; i < sizeof(nal_cmd)/sizeof(nal_cmd[0]); i++)
+ if (nal_cmd[i].nch_handler != NULL &&
+ nal_cmd[i].nch_number == nal)
+ return (&nal_cmd[i]);
+
+ return (NULL);
+}
+
int
libcfs_nal_cmd_register(int nal, nal_cmd_handler_fn *handler, void *private)
{
- int rc = 0;
+ struct nal_cmd_handler *cmd;
+ int i;
+ int rc;
CDEBUG(D_IOCTL, "Register NAL %d, handler: %p\n", nal, handler);
- if (nal > 0 && nal <= NAL_MAX_NR) {
- down(&nal_cmd_sem);
- if (nal_cmd[nal].nch_handler != NULL)
- rc = -EBUSY;
- else {
- nal_cmd[nal].nch_handler = handler;
- nal_cmd[nal].nch_private = private;
+ down(&nal_cmd_sem);
+
+ if (libcfs_find_nal_cmd_handler(nal) != NULL) {
+ up (&nal_cmd_sem);
+ return (-EBUSY);
+ }
+
+ cmd = NULL;
+ for (i = 0; i < sizeof(nal_cmd)/sizeof(nal_cmd[0]); i++)
+ if (nal_cmd[i].nch_handler == NULL) {
+ cmd = &nal_cmd[i];
+ break;
}
- up(&nal_cmd_sem);
+
+ if (cmd == NULL) {
+ rc = -EBUSY;
+ } else {
+ rc = 0;
+ cmd->nch_number = nal;
+ cmd->nch_handler = handler;
+ cmd->nch_private = private;
}
+
+ up(&nal_cmd_sem);
+
return rc;
}
EXPORT_SYMBOL(libcfs_nal_cmd_register);
void
libcfs_nal_cmd_unregister(int nal)
{
- CDEBUG(D_IOCTL, "Unregister NAL %d\n", nal);
+ struct nal_cmd_handler *cmd;
- LASSERT(nal > 0 && nal <= NAL_MAX_NR);
- LASSERT(nal_cmd[nal].nch_handler != NULL);
+ CDEBUG(D_IOCTL, "Unregister NAL %d\n", nal);
down(&nal_cmd_sem);
- nal_cmd[nal].nch_handler = NULL;
- nal_cmd[nal].nch_private = NULL;
+ cmd = libcfs_find_nal_cmd_handler(nal);
+ LASSERT (cmd != NULL);
+ cmd->nch_handler = NULL;
+ cmd->nch_private = NULL;
up(&nal_cmd_sem);
}
EXPORT_SYMBOL(libcfs_nal_cmd_unregister);
int
libcfs_nal_cmd(struct portals_cfg *pcfg)
{
+ struct nal_cmd_handler *cmd;
__u32 nal = pcfg->pcfg_nal;
int rc = -EINVAL;
ENTRY;
down(&nal_cmd_sem);
- if (nal > 0 && nal <= NAL_MAX_NR &&
- nal_cmd[nal].nch_handler != NULL) {
+ cmd = libcfs_find_nal_cmd_handler(nal);
+ if (cmd != NULL) {
CDEBUG(D_IOCTL, "calling handler nal: %d, cmd: %d\n", nal,
pcfg->pcfg_command);
- rc = nal_cmd[nal].nch_handler(pcfg, nal_cmd[nal].nch_private);
+ rc = cmd->nch_handler(pcfg, cmd->nch_private);
}
up(&nal_cmd_sem);
MODULES := portals
-portals-objs := api-eq.o api-init.o api-me.o api-errno.o api-ni.o api-wrap.o
-portals-objs += lib-dispatch.o lib-init.o lib-me.o lib-msg.o lib-eq.o lib-md.o
+portals-objs := api-errno.o api-ni.o api-wrap.o
+portals-objs += lib-init.o lib-me.o lib-msg.o lib-eq.o lib-md.o
portals-objs += lib-move.o lib-ni.o lib-pid.o module.o
@INCLUDE_RULES@
include $(src)/../Kernelenv
obj-y += portals.o
-portals-objs := lib-dispatch.o lib-eq.o lib-init.o lib-md.o lib-me.o \
+portals-objs := lib-eq.o lib-init.o lib-md.o lib-me.o \
lib-move.o lib-msg.o lib-ni.o lib-pid.o \
- api-eq.o api-errno.o api-init.o api-me.o api-ni.o \
- api-wrap.o module.o
+ api-errno.o api-ni.o api-wrap.o \
+ module.o
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * api/api-eq.c
- * User-level event queue management routines
- *
- * Copyright (c) 2001-2003 Cluster File Systems, Inc.
- * Copyright (c) 2001-2002 Sandia National Laboratories
- *
- * This file is part of Lustre, http://www.sf.net/projects/lustre/
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#define DEBUG_SUBSYSTEM S_PORTALS
-#include <portals/api-support.h>
-
-int ptl_get_event (ptl_eq_t *eq, ptl_event_t *ev)
-{
- int new_index = eq->sequence & (eq->size - 1);
- ptl_event_t *new_event = &eq->base[new_index];
- ENTRY;
-
- CDEBUG(D_INFO, "new_event: %p, sequence: %lu, eq->size: %u\n",
- new_event, eq->sequence, eq->size);
-
- if (PTL_SEQ_GT (eq->sequence, new_event->sequence)) {
- RETURN(PTL_EQ_EMPTY);
- }
-
- *ev = *new_event;
-
- /* ensure event is delivered correctly despite possible
- races with lib_finalize */
- if (eq->sequence != new_event->sequence) {
- CERROR("DROPPING EVENT: eq seq %lu ev seq %lu\n",
- eq->sequence, new_event->sequence);
- RETURN(PTL_EQ_DROPPED);
- }
-
- eq->sequence = new_event->sequence + 1;
- RETURN(PTL_OK);
-}
-
-int PtlEQGet(ptl_handle_eq_t eventq, ptl_event_t * ev)
-{
- int which;
-
- return (PtlEQPoll (&eventq, 1, 0, ev, &which));
-}
-
-int PtlEQWait(ptl_handle_eq_t eventq_in, ptl_event_t *event_out)
-{
- int which;
-
- return (PtlEQPoll (&eventq_in, 1, PTL_TIME_FOREVER,
- event_out, &which));
-}
-
-int PtlEQPoll(ptl_handle_eq_t *eventqs_in, int neq_in, int timeout,
- ptl_event_t *event_out, int *which_out)
-{
- nal_t *nal;
- int i;
- int rc;
- unsigned long flags;
-
- if (!ptl_init)
- RETURN(PTL_NO_INIT);
-
- if (neq_in < 1)
- RETURN(PTL_EQ_INVALID);
-
- nal = ptl_hndl2nal(&eventqs_in[0]);
- if (nal == NULL)
- RETURN(PTL_EQ_INVALID);
-
- nal->lock(nal, &flags);
-
- for (;;) {
- for (i = 0; i < neq_in; i++) {
- ptl_eq_t *eq = ptl_handle2usereq(&eventqs_in[i]);
-
- if (i > 0 &&
- ptl_hndl2nal(&eventqs_in[i]) != nal) {
- nal->unlock(nal, &flags);
- RETURN (PTL_EQ_INVALID);
- }
-
- /* size must be a power of 2 to handle a wrapped sequence # */
- LASSERT (eq->size != 0 &&
- eq->size == LOWEST_BIT_SET (eq->size));
-
- rc = ptl_get_event (eq, event_out);
- if (rc != PTL_EQ_EMPTY) {
- nal->unlock(nal, &flags);
- *which_out = i;
- RETURN(rc);
- }
- }
-
- if (timeout == 0) {
- nal->unlock(nal, &flags);
- RETURN (PTL_EQ_EMPTY);
- }
-
- timeout = nal->yield(nal, &flags, timeout);
- }
-}
"PTL_EQ_IN_USE",
+ "PTL_NI_INVALID",
+ "PTL_MD_ILLEGAL",
+
"PTL_MAX_ERRNO"
};
/* If you change these, you must update the number table in portals/errno.h */
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * api/api-init.c
- * Initialization and global data for the p30 user side library
- *
- * Copyright (c) 2001-2003 Cluster File Systems, Inc.
- * Copyright (c) 2001-2002 Sandia National Laboratories
- *
- * This file is part of Lustre, http://www.sf.net/projects/lustre/
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#define DEBUG_SUBSYSTEM S_PORTALS
-#include <portals/api-support.h>
-
-int PtlInit(int *max_interfaces)
-{
- if (max_interfaces != NULL)
- *max_interfaces = NAL_MAX_NR;
-
- LASSERT(!strcmp(ptl_err_str[PTL_MAX_ERRNO], "PTL_MAX_ERRNO"));
-
- return ptl_ni_init();
-}
-
-
-void PtlFini(void)
-{
- ptl_ni_fini();
-}
-
-
-void PtlSnprintHandle(char *str, int len, ptl_handle_any_t h)
-{
- snprintf(str, len, "0x%lx."LPX64, h.nal_idx, h.cookie);
-}
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * api/api-me.c
- * Match Entry local operations.
- *
- * Copyright (c) 2001-2003 Cluster File Systems, Inc.
- * Copyright (c) 2001-2002 Sandia National Laboratories
- *
- * This file is part of Lustre, http://www.sf.net/projects/lustre/
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#define DEBUG_SUBSYSTEM S_PORTALS
-#include <portals/api-support.h>
-
* invalidated out from under her (or worse, swapped for a
* completely different interface!) */
+ LASSERT (ptl_init);
+
if (((idx ^ NI_HANDLE_MAGIC) & ~NI_HANDLE_MASK) != 0)
return NULL;
ptl_mutex_exit();
}
-int ptl_ni_init(void)
+int PtlInit(int *max_interfaces)
{
+ LASSERT(!strcmp(ptl_err_str[PTL_MAX_ERRNO], "PTL_MAX_ERRNO"));
+
/* If this assertion fails, we need more bits in NI_HANDLE_MASK and
* to shift NI_HANDLE_MAGIC left appropriately */
LASSERT (NAL_MAX_NR <= (NI_HANDLE_MASK + 1));
+ if (max_interfaces != NULL)
+ *max_interfaces = NAL_MAX_NR;
+
ptl_mutex_enter();
if (!ptl_init) {
return PTL_OK;
}
-void ptl_ni_fini(void)
+void PtlFini(void)
{
nal_t *nal;
int i;
if (nal->nal_refct != 0) {
CWARN("NAL %d has outstanding refcount %d\n",
i, nal->nal_refct);
- nal->shutdown(nal);
+ nal->nal_ni_fini(nal);
}
ptl_nal_table[i] = NULL;
}
nal = ptl_nal_table[interface];
-
+ nal->nal_handle.nal_idx = (NI_HANDLE_MAGIC & ~NI_HANDLE_MASK) | interface;
+ nal->nal_handle.cookie = 0;
+
CDEBUG(D_OTHER, "Starting up NAL (%d) refs %d\n", interface, nal->nal_refct);
- rc = nal->startup(nal, requested_pid, desired_limits, actual_limits);
+ rc = nal->nal_ni_init(nal, requested_pid, desired_limits, actual_limits);
if (rc != PTL_OK) {
CERROR("Error %d starting up NAL %d, refs %d\n", rc,
}
nal->nal_refct++;
- handle->nal_idx = (NI_HANDLE_MAGIC & ~NI_HANDLE_MASK) | interface;
+ *handle = nal->nal_handle;
out:
ptl_mutex_exit ();
+
return rc;
}
nal->nal_refct--;
/* nal_refct == 0 tells nal->shutdown to really shut down */
- nal->shutdown(nal);
+ nal->nal_ni_fini(nal);
ptl_mutex_exit ();
return PTL_OK;
}
-
-int PtlNIHandle(ptl_handle_any_t handle_in, ptl_handle_ni_t * ni_out)
-{
- *ni_out = handle_in;
-
- return PTL_OK;
-}
# define DEBUG_SUBSYSTEM S_PORTALS
#include <portals/api-support.h>
-static int do_forward(ptl_handle_any_t any_h, int cmd, void *argbuf,
- int argsize, void *retbuf, int retsize)
+void PtlSnprintHandle(char *str, int len, ptl_handle_any_t h)
{
- nal_t *nal;
+ snprintf(str, len, "0x%lx."LPX64, h.nal_idx, h.cookie);
+}
- if (!ptl_init) {
- CERROR("Not initialized\n");
+int PtlNIHandle(ptl_handle_any_t handle_in, ptl_handle_ni_t *ni_out)
+{
+ if (!ptl_init)
return PTL_NO_INIT;
- }
-
- nal = ptl_hndl2nal(&any_h);
- if (!nal)
+
+ if (ptl_hndl2nal(&handle_in) == NULL)
return PTL_HANDLE_INVALID;
-
- nal->forward(nal, cmd, argbuf, argsize, retbuf, retsize);
-
+
+ *ni_out = handle_in;
return PTL_OK;
}
int PtlGetId(ptl_handle_ni_t ni_handle, ptl_process_id_t *id)
{
- PtlGetId_in args;
- PtlGetId_out ret;
- int rc;
-
- args.handle_in = ni_handle;
+ nal_t *nal;
- rc = do_forward(ni_handle, PTL_GETID, &args, sizeof(args), &ret,
- sizeof(ret));
- if (rc != PTL_OK)
- return rc;
+ if (!ptl_init)
+ return PTL_NO_INIT;
- if (id)
- *id = ret.id_out;
+ nal = ptl_hndl2nal(&ni_handle);
+ if (nal == NULL)
+ return PTL_NI_INVALID;
- return ret.rc;
+ return nal->nal_get_id(nal, id);
}
int PtlFailNid (ptl_handle_ni_t interface, ptl_nid_t nid, unsigned int threshold)
{
- PtlFailNid_in args;
- PtlFailNid_out ret;
- int rc;
-
- args.interface = interface;
- args.nid = nid;
- args.threshold = threshold;
+ nal_t *nal;
+
+ if (!ptl_init)
+ return PTL_NO_INIT;
- rc = do_forward (interface, PTL_FAILNID,
- &args, sizeof(args), &ret, sizeof (ret));
+ nal = ptl_hndl2nal(&interface);
+ if (nal == NULL)
+ return PTL_NI_INVALID;
- return ((rc != PTL_OK) ? rc : ret.rc);
+ return nal->nal_fail_nid(nal, nid, threshold);
}
int PtlNIStatus(ptl_handle_ni_t interface_in, ptl_sr_index_t register_in,
- ptl_sr_value_t * status_out)
+ ptl_sr_value_t *status_out)
{
- PtlNIStatus_in args;
- PtlNIStatus_out ret;
- int rc;
+ nal_t *nal;
- args.interface_in = interface_in;
- args.register_in = register_in;
-
- rc = do_forward(interface_in, PTL_NISTATUS, &args, sizeof(args), &ret,
- sizeof(ret));
-
- if (rc != PTL_OK)
- return rc;
-
- if (status_out)
- *status_out = ret.status_out;
+ if (!ptl_init)
+ return PTL_NO_INIT;
+
+ nal = ptl_hndl2nal(&interface_in);
+ if (nal == NULL)
+ return PTL_NI_INVALID;
- return ret.rc;
+ return nal->nal_ni_status(nal, register_in, status_out);
}
int PtlNIDist(ptl_handle_ni_t interface_in, ptl_process_id_t process_in,
unsigned long *distance_out)
{
- PtlNIDist_in args;
- PtlNIDist_out ret;
- int rc;
-
- args.interface_in = interface_in;
- args.process_in = process_in;
-
- rc = do_forward(interface_in, PTL_NIDIST, &args, sizeof(args), &ret,
- sizeof(ret));
+ nal_t *nal;
- if (rc != PTL_OK)
- return rc;
-
- if (distance_out)
- *distance_out = ret.distance_out;
+ if (!ptl_init)
+ return PTL_NO_INIT;
+
+ nal = ptl_hndl2nal(&interface_in);
+ if (nal == NULL)
+ return PTL_NI_INVALID;
- return ret.rc;
+ return nal->nal_ni_dist(nal, &process_in, distance_out);
}
int PtlMEAttach(ptl_handle_ni_t interface_in, ptl_pt_index_t index_in,
ptl_process_id_t match_id_in, ptl_match_bits_t match_bits_in,
ptl_match_bits_t ignore_bits_in, ptl_unlink_t unlink_in,
- ptl_ins_pos_t pos_in, ptl_handle_me_t * handle_out)
+ ptl_ins_pos_t pos_in, ptl_handle_me_t *handle_out)
{
- PtlMEAttach_in args;
- PtlMEAttach_out ret;
- int rc;
-
- args.interface_in = interface_in;
- args.index_in = index_in;
- args.match_id_in = match_id_in;
- args.match_bits_in = match_bits_in;
- args.ignore_bits_in = ignore_bits_in;
- args.unlink_in = unlink_in;
- args.position_in = pos_in;
-
- rc = do_forward(interface_in, PTL_MEATTACH, &args, sizeof(args), &ret,
- sizeof(ret));
-
- if (rc != PTL_OK)
- return rc;
-
- if (handle_out) {
- handle_out->nal_idx = interface_in.nal_idx;
- handle_out->cookie = ret.handle_out.cookie;
- }
-
- return ret.rc;
+ nal_t *nal;
+
+ if (!ptl_init)
+ return PTL_NO_INIT;
+
+ nal = ptl_hndl2nal(&interface_in);
+ if (nal == NULL)
+ return PTL_NI_INVALID;
+
+ return nal->nal_me_attach(nal, index_in, match_id_in,
+ match_bits_in, ignore_bits_in,
+ unlink_in, pos_in, handle_out);
}
int PtlMEInsert(ptl_handle_me_t current_in, ptl_process_id_t match_id_in,
ptl_unlink_t unlink_in, ptl_ins_pos_t position_in,
ptl_handle_me_t * handle_out)
{
- PtlMEInsert_in args;
- PtlMEInsert_out ret;
- int rc;
-
- args.current_in = current_in;
- args.match_id_in = match_id_in;
- args.match_bits_in = match_bits_in;
- args.ignore_bits_in = ignore_bits_in;
- args.unlink_in = unlink_in;
- args.position_in = position_in;
-
- rc = do_forward(current_in, PTL_MEINSERT, &args, sizeof(args), &ret,
- sizeof(ret));
-
- if (rc != PTL_OK)
- return (rc == PTL_HANDLE_INVALID) ? PTL_ME_INVALID : rc;
-
- if (handle_out) {
- handle_out->nal_idx = current_in.nal_idx;
- handle_out->cookie = ret.handle_out.cookie;
- }
- return ret.rc;
+ nal_t *nal;
+
+ if (!ptl_init)
+ return PTL_NO_INIT;
+
+ nal = ptl_hndl2nal(¤t_in);
+ if (nal == NULL)
+ return PTL_ME_INVALID;
+
+ return nal->nal_me_insert(nal, ¤t_in, match_id_in,
+ match_bits_in, ignore_bits_in,
+ unlink_in, position_in, handle_out);
}
int PtlMEUnlink(ptl_handle_me_t current_in)
{
- PtlMEUnlink_in args;
- PtlMEUnlink_out ret;
- int rc;
+ nal_t *nal;
- args.current_in = current_in;
- args.unlink_in = PTL_RETAIN;
-
- rc = do_forward(current_in, PTL_MEUNLINK, &args, sizeof(args), &ret,
- sizeof(ret));
-
- if (rc != PTL_OK)
- return (rc == PTL_HANDLE_INVALID) ? PTL_ME_INVALID : rc;
+ if (!ptl_init)
+ return PTL_NO_INIT;
+
+ nal = ptl_hndl2nal(¤t_in);
+ if (nal == NULL)
+ return PTL_ME_INVALID;
- return ret.rc;
+ return nal->nal_me_unlink(nal, ¤t_in);
}
-int PtlTblDump(ptl_handle_ni_t ni, int index_in)
+int PtlMDAttach(ptl_handle_me_t me_in, ptl_md_t md_in,
+ ptl_unlink_t unlink_in, ptl_handle_md_t * handle_out)
{
- PtlTblDump_in args;
- PtlTblDump_out ret;
- int rc;
+ nal_t *nal;
- args.index_in = index_in;
-
- rc = do_forward(ni, PTL_TBLDUMP, &args, sizeof(args), &ret,
- sizeof(ret));
+ if (!ptl_init)
+ return PTL_NO_INIT;
+
+ nal = ptl_hndl2nal(&me_in);
+ if (nal == NULL)
+ return PTL_ME_INVALID;
- if (rc != PTL_OK)
- return rc;
+ if (!PtlHandleIsEqual(md_in.eventq, PTL_EQ_NONE) &&
+ ptl_hndl2nal(&md_in.eventq) != nal)
+ return PTL_MD_ILLEGAL;
- return ret.rc;
+ return (nal->nal_md_attach)(nal, &me_in, &md_in,
+ unlink_in, handle_out);
}
-int PtlMEDump(ptl_handle_me_t current_in)
+int PtlMDBind(ptl_handle_ni_t ni_in, ptl_md_t md_in,
+ ptl_unlink_t unlink_in, ptl_handle_md_t *handle_out)
{
- PtlMEDump_in args;
- PtlMEDump_out ret;
- int rc;
+ nal_t *nal;
- args.current_in = current_in;
-
- rc = do_forward(current_in, PTL_MEDUMP, &args, sizeof(args), &ret,
- sizeof(ret));
+ if (!ptl_init)
+ return PTL_NO_INIT;
+
+ nal = ptl_hndl2nal(&ni_in);
+ if (nal == NULL)
+ return PTL_NI_INVALID;
- if (rc != PTL_OK)
- return (rc == PTL_HANDLE_INVALID) ? PTL_ME_INVALID : rc;
+ if (!PtlHandleIsEqual(md_in.eventq, PTL_EQ_NONE) &&
+ ptl_hndl2nal(&md_in.eventq) != nal)
+ return PTL_MD_ILLEGAL;
- return ret.rc;
+ return (nal->nal_md_bind)(nal, &md_in, unlink_in, handle_out);
}
-static ptl_handle_eq_t md2eq (ptl_md_t *md)
+int PtlMDUpdate(ptl_handle_md_t md_in, ptl_md_t *old_inout,
+ ptl_md_t *new_inout, ptl_handle_eq_t testq_in)
{
- if (PtlHandleIsEqual (md->eventq, PTL_EQ_NONE))
- return (PTL_EQ_NONE);
+ nal_t *nal;
- return (ptl_handle2usereq (&md->eventq)->cb_eq_handle);
-}
-
-
-int PtlMDAttach(ptl_handle_me_t me_in, ptl_md_t md_in,
- ptl_unlink_t unlink_in, ptl_handle_md_t * handle_out)
-{
- PtlMDAttach_in args;
- PtlMDAttach_out ret;
- int rc;
-
- args.eq_in = md2eq(&md_in);
- args.me_in = me_in;
- args.md_in = md_in;
- args.unlink_in = unlink_in;
-
- rc = do_forward(me_in, PTL_MDATTACH,
- &args, sizeof(args), &ret, sizeof(ret));
-
- if (rc != PTL_OK)
- return (rc == PTL_HANDLE_INVALID) ? PTL_ME_INVALID : rc;
-
- if (handle_out) {
- handle_out->nal_idx = me_in.nal_idx;
- handle_out->cookie = ret.handle_out.cookie;
- }
- return ret.rc;
-}
-
+ if (!ptl_init)
+ return PTL_NO_INIT;
+
+ nal = ptl_hndl2nal(&md_in);
+ if (nal == NULL)
+ return PTL_MD_INVALID;
+ if (!PtlHandleIsEqual(testq_in, PTL_EQ_NONE) &&
+ ptl_hndl2nal(&testq_in) != nal)
+ return PTL_EQ_INVALID;
-int PtlMDBind(ptl_handle_ni_t ni_in, ptl_md_t md_in,
- ptl_unlink_t unlink_in, ptl_handle_md_t * handle_out)
-{
- PtlMDBind_in args;
- PtlMDBind_out ret;
- int rc;
-
- args.eq_in = md2eq(&md_in);
- args.ni_in = ni_in;
- args.md_in = md_in;
- args.unlink_in = unlink_in;
-
- rc = do_forward(ni_in, PTL_MDBIND,
- &args, sizeof(args), &ret, sizeof(ret));
-
- if (rc != PTL_OK)
- return rc;
-
- if (handle_out) {
- handle_out->nal_idx = ni_in.nal_idx;
- handle_out->cookie = ret.handle_out.cookie;
- }
- return ret.rc;
+ return (nal->nal_md_update)(nal, &md_in,
+ old_inout, new_inout, &testq_in);
}
-int PtlMDUpdate(ptl_handle_md_t md_in, ptl_md_t *old_inout,
- ptl_md_t *new_inout, ptl_handle_eq_t testq_in)
+int PtlMDUnlink(ptl_handle_md_t md_in)
{
- PtlMDUpdate_internal_in args;
- PtlMDUpdate_internal_out ret;
- int rc;
-
- args.md_in = md_in;
-
- if (old_inout) {
- args.old_inout = *old_inout;
- args.old_inout_valid = 1;
- } else
- args.old_inout_valid = 0;
-
- if (new_inout) {
- args.new_inout = *new_inout;
- args.new_inout_valid = 1;
- } else
- args.new_inout_valid = 0;
-
- if (PtlHandleIsEqual (testq_in, PTL_EQ_NONE)) {
- args.testq_in = PTL_EQ_NONE;
- args.sequence_in = -1;
- } else {
- ptl_eq_t *eq = ptl_handle2usereq (&testq_in);
-
- args.testq_in = eq->cb_eq_handle;
- args.sequence_in = eq->sequence;
- }
-
- rc = do_forward(md_in, PTL_MDUPDATE, &args, sizeof(args), &ret,
- sizeof(ret));
- if (rc != PTL_OK)
- return (rc == PTL_HANDLE_INVALID) ? PTL_MD_INVALID : rc;
-
- if (old_inout)
- *old_inout = ret.old_inout;
-
- return ret.rc;
+ nal_t *nal;
+
+ if (!ptl_init)
+ return PTL_NO_INIT;
+
+ nal = ptl_hndl2nal(&md_in);
+ if (nal == NULL)
+ return PTL_MD_INVALID;
+
+ return (nal->nal_md_unlink)(nal, &md_in);
}
-int PtlMDUnlink(ptl_handle_md_t md_in)
+int PtlEQAlloc(ptl_handle_ni_t interface, ptl_size_t count,
+ ptl_eq_handler_t callback,
+ ptl_handle_eq_t *handle_out)
{
- PtlMDUnlink_in args;
- PtlMDUnlink_out ret;
- int rc;
-
- args.md_in = md_in;
- rc = do_forward(md_in, PTL_MDUNLINK, &args, sizeof(args), &ret,
- sizeof(ret));
- if (rc != PTL_OK)
- return (rc == PTL_HANDLE_INVALID) ? PTL_MD_INVALID : rc;
+ nal_t *nal;
+
+ if (!ptl_init)
+ return PTL_NO_INIT;
+
+ nal = ptl_hndl2nal(&interface);
+ if (nal == NULL)
+ return PTL_NI_INVALID;
- return ret.rc;
+ return (nal->nal_eq_alloc)(nal, count, callback, handle_out);
}
-int PtlEQAlloc(ptl_handle_ni_t interface, ptl_size_t count,
- ptl_eq_handler_t callback,
- ptl_handle_eq_t * handle_out)
+int PtlEQFree(ptl_handle_eq_t eventq)
{
- ptl_eq_t *eq = NULL;
- ptl_event_t *ev = NULL;
- PtlEQAlloc_in args;
- PtlEQAlloc_out ret;
- int rc, i;
- nal_t *nal;
+ nal_t *nal;
if (!ptl_init)
return PTL_NO_INIT;
- nal = ptl_hndl2nal (&interface);
+ nal = ptl_hndl2nal(&eventq);
if (nal == NULL)
- return PTL_HANDLE_INVALID;
+ return PTL_EQ_INVALID;
- if (count != LOWEST_BIT_SET(count)) { /* not a power of 2 already */
- do { /* knock off all but the top bit... */
- count &= ~LOWEST_BIT_SET (count);
- } while (count != LOWEST_BIT_SET(count));
-
- count <<= 1; /* ...and round up */
- }
-
- if (count == 0) /* catch bad parameter / overflow on roundup */
- return (PTL_VAL_FAILED);
-
- PORTAL_ALLOC(ev, count * sizeof(ptl_event_t));
- if (!ev)
- return PTL_NO_SPACE;
-
- for (i = 0; i < count; i++)
- ev[i].sequence = 0;
-
- args.ni_in = interface;
- args.count_in = count;
- args.base_in = ev;
- args.len_in = count * sizeof(*ev);
- args.callback_in = callback;
-
- rc = do_forward(interface, PTL_EQALLOC, &args, sizeof(args), &ret,
- sizeof(ret));
- if (rc != PTL_OK)
- goto fail;
- if (ret.rc)
- GOTO(fail, rc = ret.rc);
-
- PORTAL_ALLOC(eq, sizeof(*eq));
- if (!eq) {
- rc = PTL_NO_SPACE;
- goto fail;
- }
-
- eq->sequence = 1;
- eq->size = count;
- eq->base = ev;
-
- /* EQ handles are a little wierd. PtlEQGet() just looks at the
- * queued events in shared memory. It doesn't want to do_forward()
- * at all, so the cookie in the EQ handle we pass out of here is
- * simply a pointer to the event queue we just set up. We stash
- * the handle returned by do_forward(), so we can pass it back via
- * do_forward() when we need to. */
-
- eq->cb_eq_handle.nal_idx = interface.nal_idx;
- eq->cb_eq_handle.cookie = ret.handle_out.cookie;
-
- handle_out->nal_idx = interface.nal_idx;
- handle_out->cookie = (__u64)((unsigned long)eq);
- return PTL_OK;
+ return (nal->nal_eq_free)(nal, &eventq);
+}
-fail:
- PORTAL_FREE(ev, count * sizeof(ptl_event_t));
- return rc;
+int PtlEQGet(ptl_handle_eq_t eventq, ptl_event_t *ev)
+{
+ int which;
+
+ return (PtlEQPoll (&eventq, 1, 0, ev, &which));
}
-int PtlEQFree(ptl_handle_eq_t eventq)
+int PtlEQWait(ptl_handle_eq_t eventq_in, ptl_event_t *event_out)
{
- PtlEQFree_in args;
- PtlEQFree_out ret;
- ptl_eq_t *eq;
- int rc;
+ int which;
+
+ return (PtlEQPoll (&eventq_in, 1, PTL_TIME_FOREVER,
+ event_out, &which));
+}
- eq = ptl_handle2usereq (&eventq);
- args.eventq_in = eq->cb_eq_handle;
+int PtlEQPoll(ptl_handle_eq_t *eventqs_in, int neq_in, int timeout,
+ ptl_event_t *event_out, int *which_out)
+{
+ int i;
+ nal_t *nal;
- rc = do_forward(eq->cb_eq_handle, PTL_EQFREE, &args,
- sizeof(args), &ret, sizeof(ret));
+ if (!ptl_init)
+ return PTL_NO_INIT;
+
+ if (neq_in < 1)
+ return PTL_EQ_INVALID;
+
+ nal = ptl_hndl2nal(&eventqs_in[0]);
+ if (nal == NULL)
+ return PTL_EQ_INVALID;
- /* XXX we're betting rc == PTL_OK here */
- PORTAL_FREE(eq->base, eq->size * sizeof(ptl_event_t));
- PORTAL_FREE(eq, sizeof(*eq));
+ for (i = 1; i < neq_in; i++)
+ if (ptl_hndl2nal(&eventqs_in[i]) != nal)
+ return PTL_EQ_INVALID;
- return rc;
+ return (nal->nal_eq_poll)(nal, eventqs_in, neq_in, timeout,
+ event_out, which_out);
}
+
int PtlACEntry(ptl_handle_ni_t ni_in, ptl_ac_index_t index_in,
ptl_process_id_t match_id_in, ptl_pt_index_t portal_in)
{
- PtlACEntry_in args;
- PtlACEntry_out ret;
- int rc;
-
- /*
- * Copy arguments into the argument block to
- * hand to the forwarding object
- */
- args.ni_in = ni_in;
- args.index_in = index_in;
- args.match_id_in = match_id_in;
- args.portal_in = portal_in;
-
- rc = do_forward(ni_in, PTL_ACENTRY, &args, sizeof(args), &ret,
- sizeof(ret));
-
- return (rc != PTL_OK) ? rc : ret.rc;
+ nal_t *nal;
+
+ if (!ptl_init)
+ return PTL_NO_INIT;
+
+ nal = ptl_hndl2nal(&ni_in);
+ if (nal == NULL)
+ return PTL_NI_INVALID;
+
+ return (nal->nal_ace_entry)(nal, index_in, match_id_in, portal_in);
}
int PtlPut(ptl_handle_md_t md_in, ptl_ack_req_t ack_req_in,
ptl_process_id_t target_in, ptl_pt_index_t portal_in,
- ptl_ac_index_t cookie_in, ptl_match_bits_t match_bits_in,
+ ptl_ac_index_t ac_in, ptl_match_bits_t match_bits_in,
ptl_size_t offset_in, ptl_hdr_data_t hdr_data_in)
{
- PtlPut_in args;
- PtlPut_out ret;
- int rc;
-
- /*
- * Copy arguments into the argument block to
- * hand to the forwarding object
- */
- args.md_in = md_in;
- args.ack_req_in = ack_req_in;
- args.target_in = target_in;
- args.portal_in = portal_in;
- args.cookie_in = cookie_in;
- args.match_bits_in = match_bits_in;
- args.offset_in = offset_in;
- args.hdr_data_in = hdr_data_in;
-
- rc = do_forward(md_in, PTL_PUT, &args, sizeof(args), &ret, sizeof(ret));
-
- return (rc != PTL_OK) ? rc : ret.rc;
+ nal_t *nal;
+
+ if (!ptl_init)
+ return PTL_NO_INIT;
+
+ nal = ptl_hndl2nal(&md_in);
+ if (nal == NULL)
+ return PTL_MD_INVALID;
+
+ return (nal->nal_put)(nal, &md_in, ack_req_in,
+ &target_in, portal_in, ac_in,
+ match_bits_in, offset_in, hdr_data_in);
}
int PtlGet(ptl_handle_md_t md_in, ptl_process_id_t target_in,
- ptl_pt_index_t portal_in, ptl_ac_index_t cookie_in,
+ ptl_pt_index_t portal_in, ptl_ac_index_t ac_in,
ptl_match_bits_t match_bits_in, ptl_size_t offset_in)
{
- PtlGet_in args;
- PtlGet_out ret;
- int rc;
-
- /*
- * Copy arguments into the argument block to
- * hand to the forwarding object
- */
- args.md_in = md_in;
- args.target_in = target_in;
- args.portal_in = portal_in;
- args.cookie_in = cookie_in;
- args.match_bits_in = match_bits_in;
- args.offset_in = offset_in;
-
- rc = do_forward(md_in, PTL_GET, &args, sizeof(args), &ret, sizeof(ret));
-
- return (rc != PTL_OK) ? rc : ret.rc;
+ nal_t *nal;
+
+ if (!ptl_init)
+ return PTL_NO_INIT;
+
+ nal = ptl_hndl2nal(&md_in);
+ if (nal == NULL)
+ return PTL_MD_INVALID;
+
+ return (nal->nal_get)(nal, &md_in,
+ &target_in, portal_in, ac_in,
+ match_bits_in, offset_in);
}
+
# This code is issued under the GNU General Public License.
# See the file COPYING in this distribution
-my_sources = api-eq.c api-init.c api-me.c api-errno.c api-ni.c api-wrap.c \
- lib-dispatch.c lib-init.c lib-me.c lib-msg.c lib-eq.c \
+my_sources = api-errno.c api-ni.c api-wrap.c \
+ lib-init.c lib-me.c lib-msg.c lib-eq.c \
lib-md.c lib-move.c lib-ni.c lib-pid.c
if !CRAY_PORTALS
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * lib/lib-dispatch.c
- *
- * Copyright (c) 2001-2003 Cluster File Systems, Inc.
- * Copyright (c) 2001-2002 Sandia National Laboratories
- *
- * This file is part of Lustre, http://www.sf.net/projects/lustre/
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#define DEBUG_SUBSYSTEM S_PORTALS
-#include <portals/lib-p30.h>
-#include <portals/lib-dispatch.h>
-
-typedef struct {
- int (*fun) (nal_cb_t * nal, void *private, void *in, void *out);
- char *name;
-} dispatch_table_t;
-
-static dispatch_table_t dispatch_table[] = {
- [PTL_GETID] {do_PtlGetId, "PtlGetId"},
- [PTL_NISTATUS] {do_PtlNIStatus, "PtlNIStatus"},
- [PTL_NIDIST] {do_PtlNIDist, "PtlNIDist"},
- [PTL_MEATTACH] {do_PtlMEAttach, "PtlMEAttach"},
- [PTL_MEINSERT] {do_PtlMEInsert, "PtlMEInsert"},
- [PTL_MEUNLINK] {do_PtlMEUnlink, "PtlMEUnlink"},
- [PTL_TBLDUMP] {do_PtlTblDump, "PtlTblDump"},
- [PTL_MEDUMP] {do_PtlMEDump, "PtlMEDump"},
- [PTL_MDATTACH] {do_PtlMDAttach, "PtlMDAttach"},
- [PTL_MDBIND] {do_PtlMDBind, "PtlMDBind"},
- [PTL_MDUPDATE] {do_PtlMDUpdate_internal, "PtlMDUpdate_internal"},
- [PTL_MDUNLINK] {do_PtlMDUnlink, "PtlMDUnlink"},
- [PTL_EQALLOC] {do_PtlEQAlloc_internal, "PtlEQAlloc_internal"},
- [PTL_EQFREE] {do_PtlEQFree_internal, "PtlEQFree_internal"},
- [PTL_PUT] {do_PtlPut, "PtlPut"},
- [PTL_GET] {do_PtlGet, "PtlGet"},
- [PTL_FAILNID] {do_PtlFailNid, "PtlFailNid"},
- /* */ {0, ""}
-};
-
-/*
- * This really should be elsewhere, but lib-p30/dispatch.c is
- * an automatically generated file.
- */
-void lib_dispatch(nal_cb_t * nal, void *private, int index, void *arg_block,
- void *ret_block)
-{
- lib_ni_t *ni = &nal->ni;
-
- if (index < 0 || index > LIB_MAX_DISPATCH ||
- !dispatch_table[index].fun) {
- CDEBUG(D_NET, LPU64": Invalid API call %d\n", ni->nid, index);
- return;
- }
-
- CDEBUG(D_NET, LPU64": API call %s (%d)\n", ni->nid,
- dispatch_table[index].name, index);
-
- dispatch_table[index].fun(nal, private, arg_block, ret_block);
-}
-
-char *dispatch_name(int index)
-{
- return dispatch_table[index].name;
-}
#define DEBUG_SUBSYSTEM S_PORTALS
#include <portals/lib-p30.h>
-#include <portals/arg-blocks.h>
-int do_PtlEQAlloc_internal(nal_cb_t * nal, void *private, void *v_args,
- void *v_ret)
+int
+lib_api_eq_alloc (nal_t *apinal, ptl_size_t count,
+ ptl_eq_handler_t callback,
+ ptl_handle_eq_t *handle)
{
- /*
- * Incoming:
- * ptl_handle_ni_t ni_in
- * ptl_size_t count_in
- * void * base_in
- *
- * Outgoing:
- * ptl_handle_eq_t * handle_out
- */
-
- PtlEQAlloc_in *args = v_args;
- PtlEQAlloc_out *ret = v_ret;
-
- lib_eq_t *eq;
- unsigned long flags;
-
- /* api should have rounded up */
- if (args->count_in != LOWEST_BIT_SET (args->count_in))
- return ret->rc = PTL_VAL_FAILED;
+ lib_nal_t *nal = apinal->nal_data;
+ lib_eq_t *eq;
+ unsigned long flags;
+ int rc;
+ /* We need count to be a power of 2 so that when eq_{enq,deq}_seq
+ * overflow, they don't skip entries, so the queue has the same
+ * apparant capacity at all times */
+
+ if (count != LOWEST_BIT_SET(count)) { /* not a power of 2 already */
+ do { /* knock off all but the top bit... */
+ count &= ~LOWEST_BIT_SET (count);
+ } while (count != LOWEST_BIT_SET(count));
+
+ count <<= 1; /* ...and round up */
+ }
+
+ if (count == 0) /* catch bad parameter / overflow on roundup */
+ return (PTL_VAL_FAILED);
+
eq = lib_eq_alloc (nal);
if (eq == NULL)
- return (ret->rc = PTL_NO_SPACE);
+ return (PTL_NO_SPACE);
- state_lock(nal, &flags);
+ PORTAL_ALLOC(eq->eq_events, count * sizeof(ptl_event_t));
+ if (eq->eq_events == NULL) {
+ LIB_LOCK(nal, flags);
+ lib_eq_free (nal, eq);
+ LIB_UNLOCK(nal, flags);
+ }
- if (nal->cb_map != NULL) {
+ if (nal->libnal_map != NULL) {
struct iovec iov = {
- .iov_base = args->base_in,
- .iov_len = args->count_in * sizeof (ptl_event_t) };
+ .iov_base = eq->eq_events,
+ .iov_len = count * sizeof(ptl_event_t)};
- ret->rc = nal->cb_map (nal, 1, &iov, &eq->eq_addrkey);
- if (ret->rc != PTL_OK) {
+ rc = nal->libnal_map(nal, 1, &iov, &eq->eq_addrkey);
+ if (rc != PTL_OK) {
+ LIB_LOCK(nal, flags);
lib_eq_free (nal, eq);
-
- state_unlock (nal, &flags);
- return (ret->rc);
+ LIB_UNLOCK(nal, flags);
+ return (rc);
}
}
- eq->sequence = 1;
- eq->base = args->base_in;
- eq->size = args->count_in;
+ /* NB this resets all event sequence numbers to 0, to be earlier
+ * than eq_deq_seq */
+ memset(eq->eq_events, 0, count * sizeof(ptl_event_t));
+
+ eq->eq_deq_seq = 1;
+ eq->eq_enq_seq = 1;
+ eq->eq_size = count;
eq->eq_refcount = 0;
- eq->event_callback = args->callback_in;
+ eq->eq_callback = callback;
+
+ LIB_LOCK(nal, flags);
lib_initialise_handle (nal, &eq->eq_lh, PTL_COOKIE_TYPE_EQ);
- list_add (&eq->eq_list, &nal->ni.ni_active_eqs);
+ list_add (&eq->eq_list, &nal->libnal_ni.ni_active_eqs);
- state_unlock(nal, &flags);
+ LIB_UNLOCK(nal, flags);
- ptl_eq2handle(&ret->handle_out, eq);
- return (ret->rc = PTL_OK);
+ ptl_eq2handle(handle, nal, eq);
+ return (PTL_OK);
}
-int do_PtlEQFree_internal(nal_cb_t * nal, void *private, void *v_args,
- void *v_ret)
+int
+lib_api_eq_free(nal_t *apinal, ptl_handle_eq_t *eqh)
{
- /*
- * Incoming:
- * ptl_handle_eq_t eventq_in
- *
- * Outgoing:
- */
-
- PtlEQFree_in *args = v_args;
- PtlEQFree_out *ret = v_ret;
- lib_eq_t *eq;
- long flags;
+ lib_nal_t *nal = apinal->nal_data;
+ lib_eq_t *eq;
+ int size;
+ ptl_event_t *events;
+ void *addrkey;
+ unsigned long flags;
- state_lock (nal, &flags);
+ LIB_LOCK(nal, flags);
- eq = ptl_handle2eq(&args->eventq_in, nal);
+ eq = ptl_handle2eq(eqh, nal);
if (eq == NULL) {
- ret->rc = PTL_EQ_INVALID;
- } else if (eq->eq_refcount != 0) {
- ret->rc = PTL_EQ_IN_USE;
+ LIB_UNLOCK(nal, flags);
+ return (PTL_EQ_INVALID);
+ }
+
+ if (eq->eq_refcount != 0) {
+ LIB_UNLOCK(nal, flags);
+ return (PTL_EQ_IN_USE);
+ }
+
+ /* stash for free after lock dropped */
+ events = eq->eq_events;
+ size = eq->eq_size;
+ addrkey = eq->eq_addrkey;
+
+ lib_invalidate_handle (nal, &eq->eq_lh);
+ list_del (&eq->eq_list);
+ lib_eq_free (nal, eq);
+
+ LIB_UNLOCK(nal, flags);
+
+ if (nal->libnal_unmap != NULL) {
+ struct iovec iov = {
+ .iov_base = events,
+ .iov_len = size * sizeof(ptl_event_t)};
+
+ nal->libnal_unmap(nal, 1, &iov, &addrkey);
+ }
+
+ PORTAL_FREE(events, size * sizeof (ptl_event_t));
+
+ return (PTL_OK);
+}
+
+int
+lib_get_event (lib_eq_t *eq, ptl_event_t *ev)
+{
+ int new_index = eq->eq_deq_seq & (eq->eq_size - 1);
+ ptl_event_t *new_event = &eq->eq_events[new_index];
+ int rc;
+ ENTRY;
+
+ CDEBUG(D_INFO, "event: %p, sequence: %lu, eq->size: %u\n",
+ new_event, eq->eq_deq_seq, eq->eq_size);
+
+ if (PTL_SEQ_GT (eq->eq_deq_seq, new_event->sequence)) {
+ RETURN(PTL_EQ_EMPTY);
+ }
+
+ /* We've got a new event... */
+ *ev = *new_event;
+
+ /* ...but did it overwrite an event we've not seen yet? */
+ if (eq->eq_deq_seq == new_event->sequence) {
+ rc = PTL_OK;
} else {
- if (nal->cb_unmap != NULL) {
- struct iovec iov = {
- .iov_base = eq->base,
- .iov_len = eq->size * sizeof (ptl_event_t) };
-
- nal->cb_unmap(nal, 1, &iov, &eq->eq_addrkey);
+ CERROR("Event Queue Overflow: eq seq %lu ev seq %lu\n",
+ eq->eq_deq_seq, new_event->sequence);
+ rc = PTL_EQ_DROPPED;
+ }
+
+ eq->eq_deq_seq = new_event->sequence + 1;
+ RETURN(rc);
+}
+
+
+int
+lib_api_eq_poll (nal_t *apinal,
+ ptl_handle_eq_t *eventqs, int neq, int timeout_ms,
+ ptl_event_t *event, int *which)
+{
+ lib_nal_t *nal = apinal->nal_data;
+ lib_ni_t *ni = &nal->libnal_ni;
+ unsigned long flags;
+ int i;
+ int rc;
+#ifdef __KERNEL__
+ wait_queue_t wq;
+ unsigned long now;
+#else
+ struct timeval then;
+ struct timeval now;
+ struct timespec ts;
+#endif
+ ENTRY;
+
+ LIB_LOCK(nal, flags);
+
+ for (;;) {
+ for (i = 0; i < neq; i++) {
+ lib_eq_t *eq = ptl_handle2eq(&eventqs[i], nal);
+
+ rc = lib_get_event (eq, event);
+ if (rc != PTL_EQ_EMPTY) {
+ LIB_UNLOCK(nal, flags);
+ *which = i;
+ RETURN(rc);
+ }
+ }
+
+ if (timeout_ms == 0) {
+ LIB_UNLOCK (nal, flags);
+ RETURN (PTL_EQ_EMPTY);
}
- lib_invalidate_handle (nal, &eq->eq_lh);
- list_del (&eq->eq_list);
- lib_eq_free (nal, eq);
- ret->rc = PTL_OK;
- }
+ /* Some architectures force us to do spin locking/unlocking
+ * in the same stack frame, means we can abstract the
+ * locking here */
+#ifdef __KERNEL__
+ init_waitqueue_entry(&wq, current);
+ set_current_state(TASK_INTERRUPTIBLE);
+ add_wait_queue(&ni->ni_waitq, &wq);
- state_unlock (nal, &flags);
+ LIB_UNLOCK(nal, flags);
- return (ret->rc);
+ if (timeout_ms < 0) {
+ schedule ();
+ } else {
+ now = jiffies;
+ schedule_timeout((timeout_ms * HZ)/1000);
+ timeout_ms -= ((jiffies - now) * 1000)/HZ;
+ if (timeout_ms < 0)
+ timeout_ms = 0;
+ }
+
+ LIB_LOCK(nal, flags);
+#else
+ if (timeout_ms < 0) {
+ pthread_cond_wait(&ni->ni_cond, &ni->ni_mutex);
+ } else {
+ gettimeofday(&then, NULL);
+
+ ts.tv_sec = then.tv_sec + timeout_ms/1000;
+ ts.tv_nsec = then.tv_usec * 1000 +
+ (timeout_ms%1000) * 1000000;
+ if (ts.tv_nsec >= 1000000000) {
+ ts.tv_sec++;
+ ts.tv_nsec -= 1000000000;
+ }
+
+ pthread_cond_timedwait(&ni->ni_cond,
+ &ni->ni_mutex, &ts);
+
+ gettimeofday(&now, NULL);
+ timeout_ms -= (now.tv_sec - then.tv_sec) * 1000 +
+ (now.tv_usec - then.tv_usec) / 1000;
+
+ if (timeout_ms < 0)
+ timeout_ms = 0;
+ }
+#endif
+ }
}
#ifndef PTL_USE_LIB_FREELIST
int
-kportal_descriptor_setup (nal_cb_t *nal,
+kportal_descriptor_setup (lib_nal_t *nal,
ptl_ni_limits_t *requested_limits,
ptl_ni_limits_t *actual_limits)
{
}
void
-kportal_descriptor_cleanup (nal_cb_t *nal)
+kportal_descriptor_cleanup (lib_nal_t *nal)
{
}
#else
int
-lib_freelist_init (nal_cb_t *nal, lib_freelist_t *fl, int n, int size)
+lib_freelist_init (lib_nal_t *nal, lib_freelist_t *fl, int n, int size)
{
char *space;
size += offsetof (lib_freeobj_t, fo_contents);
- space = nal->cb_malloc (nal, n * size);
+ PORTAL_ALLOC(space, n * size);
if (space == NULL)
return (PTL_NO_SPACE);
}
void
-lib_freelist_fini (nal_cb_t *nal, lib_freelist_t *fl)
+lib_freelist_fini (lib_nal_t *nal, lib_freelist_t *fl)
{
struct list_head *el;
int count;
LASSERT (count == fl->fl_nobjs);
- nal->cb_free (nal, fl->fl_objs, fl->fl_nobjs * fl->fl_objsize);
+ PORTAL_FREE(fl->fl_objs, fl->fl_nobjs * fl->fl_objsize);
memset (fl, 0, sizeof (fl));
}
int
-kportal_descriptor_setup (nal_cb_t *nal,
+kportal_descriptor_setup (lib_nal_t *nal,
ptl_ni_limits_t *requested_limits,
ptl_ni_limits_t *actual_limits)
{
/* NB on failure caller must still call kportal_descriptor_cleanup */
/* ****** */
- int rc;
+ lib_ni_t *ni = &nal->libnal_ni;
+ int rc;
- memset (&nal->ni.ni_free_mes, 0, sizeof (nal->ni.ni_free_mes));
- memset (&nal->ni.ni_free_msgs, 0, sizeof (nal->ni.ni_free_msgs));
- memset (&nal->ni.ni_free_mds, 0, sizeof (nal->ni.ni_free_mds));
- memset (&nal->ni.ni_free_eqs, 0, sizeof (nal->ni.ni_free_eqs));
+ memset (&ni->ni_free_mes, 0, sizeof (ni->ni_free_mes));
+ memset (&ni->ni_free_msgs, 0, sizeof (ni->ni_free_msgs));
+ memset (&ni->ni_free_mds, 0, sizeof (ni->ni_free_mds));
+ memset (&ni->ni_free_eqs, 0, sizeof (ni->ni_free_eqs));
/* Ignore requested limits! */
actual_limits->max_mes = MAX_MES;
/* Hahahah what a load of bollocks. There's nowhere to
* specify the max # messages in-flight */
- rc = lib_freelist_init (nal, &nal->ni.ni_free_mes,
+ rc = lib_freelist_init (nal, &ni->ni_free_mes,
MAX_MES, sizeof (lib_me_t));
if (rc != PTL_OK)
return (rc);
- rc = lib_freelist_init (nal, &nal->ni.ni_free_msgs,
+ rc = lib_freelist_init (nal, &ni->ni_free_msgs,
MAX_MSGS, sizeof (lib_msg_t));
if (rc != PTL_OK)
return (rc);
- rc = lib_freelist_init (nal, &nal->ni.ni_free_mds,
+ rc = lib_freelist_init (nal, &ni->ni_free_mds,
MAX_MDS, sizeof (lib_md_t));
if (rc != PTL_OK)
return (rc);
- rc = lib_freelist_init (nal, &nal->ni.ni_free_eqs,
+ rc = lib_freelist_init (nal, &ni->ni_free_eqs,
MAX_EQS, sizeof (lib_eq_t));
return (rc);
}
void
-kportal_descriptor_cleanup (nal_cb_t *nal)
+kportal_descriptor_cleanup (lib_nal_t *nal)
{
- lib_freelist_fini (nal, &nal->ni.ni_free_mes);
- lib_freelist_fini (nal, &nal->ni.ni_free_msgs);
- lib_freelist_fini (nal, &nal->ni.ni_free_mds);
- lib_freelist_fini (nal, &nal->ni.ni_free_eqs);
+ lib_ni_t *ni = &nal->libnal_ni;
+
+ lib_freelist_fini (nal, &ni->ni_free_mes);
+ lib_freelist_fini (nal, &ni->ni_free_msgs);
+ lib_freelist_fini (nal, &ni->ni_free_mds);
+ lib_freelist_fini (nal, &ni->ni_free_eqs);
}
#endif
__u64
-lib_create_interface_cookie (nal_cb_t *nal)
+lib_create_interface_cookie (lib_nal_t *nal)
{
/* NB the interface cookie in wire handles guards against delayed
* replies and ACKs appearing valid in a new instance of the same
}
int
-lib_setup_handle_hash (nal_cb_t *nal)
+lib_setup_handle_hash (lib_nal_t *nal)
{
- lib_ni_t *ni = &nal->ni;
+ lib_ni_t *ni = &nal->libnal_ni;
int i;
/* Arbitrary choice of hash table size */
#else
ni->ni_lh_hash_size = (MAX_MES + MAX_MDS + MAX_EQS)/4;
#endif
- ni->ni_lh_hash_table =
- (struct list_head *)nal->cb_malloc (nal, ni->ni_lh_hash_size
- * sizeof (struct list_head));
+ PORTAL_ALLOC(ni->ni_lh_hash_table,
+ ni->ni_lh_hash_size * sizeof (struct list_head));
if (ni->ni_lh_hash_table == NULL)
return (PTL_NO_SPACE);
}
void
-lib_cleanup_handle_hash (nal_cb_t *nal)
+lib_cleanup_handle_hash (lib_nal_t *nal)
{
- lib_ni_t *ni = &nal->ni;
+ lib_ni_t *ni = &nal->libnal_ni;
if (ni->ni_lh_hash_table == NULL)
return;
- nal->cb_free (nal, ni->ni_lh_hash_table,
- ni->ni_lh_hash_size * sizeof (struct list_head));
+ PORTAL_FREE(ni->ni_lh_hash_table,
+ ni->ni_lh_hash_size * sizeof (struct list_head));
}
lib_handle_t *
-lib_lookup_cookie (nal_cb_t *nal, __u64 cookie, int type)
+lib_lookup_cookie (lib_nal_t *nal, __u64 cookie, int type)
{
/* ALWAYS called with statelock held */
- lib_ni_t *ni = &nal->ni;
+ lib_ni_t *ni = &nal->libnal_ni;
struct list_head *list;
struct list_head *el;
unsigned int hash;
}
void
-lib_initialise_handle (nal_cb_t *nal, lib_handle_t *lh, int type)
+lib_initialise_handle (lib_nal_t *nal, lib_handle_t *lh, int type)
{
/* ALWAYS called with statelock held */
- lib_ni_t *ni = &nal->ni;
+ lib_ni_t *ni = &nal->libnal_ni;
unsigned int hash;
LASSERT (type >= 0 && type < PTL_COOKIE_TYPES);
}
void
-lib_invalidate_handle (nal_cb_t *nal, lib_handle_t *lh)
+lib_invalidate_handle (lib_nal_t *nal, lib_handle_t *lh)
{
list_del (&lh->lh_hash_chain);
}
int
-lib_init(nal_cb_t *nal, ptl_process_id_t process_id,
+lib_init(lib_nal_t *libnal, nal_t *apinal,
+ ptl_process_id_t process_id,
ptl_ni_limits_t *requested_limits,
ptl_ni_limits_t *actual_limits)
{
int rc = PTL_OK;
- lib_ni_t *ni = &nal->ni;
- int ptl_size;
- int i;
+ lib_ni_t *ni = &libnal->libnal_ni;
+ int ptl_size;
+ int i;
ENTRY;
/* NB serialised in PtlNIInit() */
lib_assert_wire_constants ();
-
- /*
- * Allocate the portal table for this interface
- * and all per-interface objects.
- */
- memset(&ni->counters, 0, sizeof(lib_counters_t));
- rc = kportal_descriptor_setup (nal, requested_limits,
- &ni->actual_limits);
+ /* Setup the API nal with the lib API handling functions */
+ apinal->nal_get_id = lib_api_get_id;
+ apinal->nal_ni_status = lib_api_ni_status;
+ apinal->nal_ni_dist = lib_api_ni_dist;
+ apinal->nal_fail_nid = lib_api_fail_nid;
+ apinal->nal_me_attach = lib_api_me_attach;
+ apinal->nal_me_insert = lib_api_me_insert;
+ apinal->nal_me_unlink = lib_api_me_unlink;
+ apinal->nal_md_attach = lib_api_md_attach;
+ apinal->nal_md_bind = lib_api_md_bind;
+ apinal->nal_md_unlink = lib_api_md_unlink;
+ apinal->nal_md_update = lib_api_md_update;
+ apinal->nal_eq_alloc = lib_api_eq_alloc;
+ apinal->nal_eq_free = lib_api_eq_free;
+ apinal->nal_eq_poll = lib_api_eq_poll;
+ apinal->nal_put = lib_api_put;
+ apinal->nal_get = lib_api_get;
+
+ apinal->nal_data = libnal;
+ ni->ni_api = apinal;
+
+ rc = kportal_descriptor_setup (libnal, requested_limits,
+ &ni->ni_actual_limits);
if (rc != PTL_OK)
goto out;
+ memset(&ni->ni_counters, 0, sizeof(lib_counters_t));
+
INIT_LIST_HEAD (&ni->ni_active_msgs);
INIT_LIST_HEAD (&ni->ni_active_mds);
INIT_LIST_HEAD (&ni->ni_active_eqs);
-
INIT_LIST_HEAD (&ni->ni_test_peers);
- ni->ni_interface_cookie = lib_create_interface_cookie (nal);
+#ifdef __KERNEL__
+ spin_lock_init (&ni->ni_lock);
+ init_waitqueue_head (&ni->ni_waitq);
+#else
+ pthread_mutex_init(&ni->ni_mutex, NULL);
+ pthread_cond_init(&ni->ni_cond, NULL);
+#endif
+
+ ni->ni_interface_cookie = lib_create_interface_cookie (libnal);
ni->ni_next_object_cookie = 0;
- rc = lib_setup_handle_hash (nal);
+ rc = lib_setup_handle_hash (libnal);
if (rc != PTL_OK)
goto out;
- ni->nid = process_id.nid;
- ni->pid = process_id.pid;
+ ni->ni_pid = process_id;
if (requested_limits != NULL)
ptl_size = requested_limits->max_pt_index + 1;
else
ptl_size = 64;
- ni->tbl.size = ptl_size;
- ni->tbl.tbl = nal->cb_malloc(nal, sizeof(struct list_head) * ptl_size);
- if (ni->tbl.tbl == NULL) {
+ ni->ni_portals.size = ptl_size;
+ PORTAL_ALLOC(ni->ni_portals.tbl,
+ ptl_size * sizeof(struct list_head));
+ if (ni->ni_portals.tbl == NULL) {
rc = PTL_NO_SPACE;
goto out;
}
for (i = 0; i < ptl_size; i++)
- INIT_LIST_HEAD(&(ni->tbl.tbl[i]));
+ INIT_LIST_HEAD(&(ni->ni_portals.tbl[i]));
/* max_{mes,mds,eqs} set in kportal_descriptor_setup */
/* We don't have an access control table! */
- ni->actual_limits.max_ac_index = -1;
+ ni->ni_actual_limits.max_ac_index = -1;
- ni->actual_limits.max_pt_index = ptl_size - 1;
- ni->actual_limits.max_md_iovecs = PTL_MD_MAX_IOV;
- ni->actual_limits.max_me_list = INT_MAX;
+ ni->ni_actual_limits.max_pt_index = ptl_size - 1;
+ ni->ni_actual_limits.max_md_iovecs = PTL_MD_MAX_IOV;
+ ni->ni_actual_limits.max_me_list = INT_MAX;
/* We don't support PtlGetPut! */
- ni->actual_limits.max_getput_md = 0;
+ ni->ni_actual_limits.max_getput_md = 0;
if (actual_limits != NULL)
- *actual_limits = ni->actual_limits;
+ *actual_limits = ni->ni_actual_limits;
out:
if (rc != PTL_OK) {
- lib_cleanup_handle_hash (nal);
- kportal_descriptor_cleanup (nal);
+ lib_cleanup_handle_hash (libnal);
+ kportal_descriptor_cleanup (libnal);
}
RETURN (rc);
}
int
-lib_fini(nal_cb_t * nal)
+lib_fini(lib_nal_t *nal)
{
- lib_ni_t *ni = &nal->ni;
+ lib_ni_t *ni = &nal->libnal_ni;
int idx;
/* NB no state_lock() since this is the last reference. The NAL
* network op (eg MD with non-zero pending count)
*/
- for (idx = 0; idx < ni->tbl.size; idx++)
- while (!list_empty (&ni->tbl.tbl[idx])) {
- lib_me_t *me = list_entry (ni->tbl.tbl[idx].next,
+ for (idx = 0; idx < ni->ni_portals.size; idx++)
+ while (!list_empty (&ni->ni_portals.tbl[idx])) {
+ lib_me_t *me = list_entry (ni->ni_portals.tbl[idx].next,
lib_me_t, me_list);
CERROR ("Active me %p on exit\n", me);
lib_msg_free (nal, msg);
}
- nal->cb_free(nal, ni->tbl.tbl, sizeof(struct list_head) * ni->tbl.size);
+ PORTAL_FREE(ni->ni_portals.tbl,
+ ni->ni_portals.size * sizeof(struct list_head));
lib_cleanup_handle_hash (nal);
kportal_descriptor_cleanup (nal);
+#ifndef __KERNEL__
+ pthread_mutex_destroy(&ni->ni_mutex);
+ pthread_cond_destroy(&ni->ni_cond);
+#endif
+
return (PTL_OK);
}
#endif
#include <portals/lib-p30.h>
-#include <portals/arg-blocks.h>
/* must be called with state lock held */
-void lib_md_unlink(nal_cb_t * nal, lib_md_t * md)
+void
+lib_md_unlink(lib_nal_t *nal, lib_md_t *md)
{
if ((md->md_flags & PTL_MD_FLAG_ZOMBIE) == 0) {
/* first unlink attempt... */
CDEBUG(D_NET, "Unlinking md %p\n", md);
if ((md->options & PTL_MD_KIOV) != 0) {
- if (nal->cb_unmap_pages != NULL)
- nal->cb_unmap_pages (nal, md->md_niov, md->md_iov.kiov,
- &md->md_addrkey);
- } else if (nal->cb_unmap != NULL) {
- nal->cb_unmap (nal, md->md_niov, md->md_iov.iov,
- &md->md_addrkey);
+ if (nal->libnal_unmap_pages != NULL)
+ nal->libnal_unmap_pages (nal,
+ md->md_niov,
+ md->md_iov.kiov,
+ &md->md_addrkey);
+ } else if (nal->libnal_unmap != NULL) {
+ nal->libnal_unmap (nal,
+ md->md_niov, md->md_iov.iov,
+ &md->md_addrkey);
}
if (md->eq != NULL) {
}
/* must be called with state lock held */
-static int lib_md_build(nal_cb_t *nal, lib_md_t *new, void *private,
- ptl_md_t *md, ptl_handle_eq_t *eqh, int unlink)
+static int
+lib_md_build(lib_nal_t *nal, lib_md_t *lmd, ptl_md_t *umd, int unlink)
{
lib_eq_t *eq = NULL;
int rc;
int i;
int niov;
+ int total_length = 0;
/* NB we are passed an allocated, but uninitialised/active md.
* if we return success, caller may lib_md_unlink() it.
* otherwise caller may only lib_md_free() it.
*/
- if (!PtlHandleIsEqual (*eqh, PTL_EQ_NONE)) {
- eq = ptl_handle2eq(eqh, nal);
+ if (!PtlHandleIsEqual (umd->eventq, PTL_EQ_NONE)) {
+ eq = ptl_handle2eq(&umd->eventq, nal);
if (eq == NULL)
return PTL_EQ_INVALID;
}
- /* Must check this _before_ allocation. Also, note that non-iov
- * MDs must set md_niov to 0. */
- LASSERT((md->options & (PTL_MD_IOVEC | PTL_MD_KIOV)) == 0 ||
- md->length <= PTL_MD_MAX_IOV);
-
/* This implementation doesn't know how to create START events or
* disable END events. Best to LASSERT our caller is compliant so
* we find out quickly... */
- LASSERT (PtlHandleIsEqual (*eqh, PTL_EQ_NONE) ||
- ((md->options & PTL_MD_EVENT_START_DISABLE) != 0 &&
- (md->options & PTL_MD_EVENT_END_DISABLE) == 0));
-
- if ((md->options & PTL_MD_MAX_SIZE) != 0 && /* max size used */
- (md->max_size < 0 || md->max_size > md->length)) // illegal max_size
- return PTL_MD_INVALID;
-
- new->me = NULL;
- new->start = md->start;
- new->offset = 0;
- new->max_size = md->max_size;
- new->options = md->options;
- new->user_ptr = md->user_ptr;
- new->eq = eq;
- new->threshold = md->threshold;
- new->pending = 0;
- new->md_flags = (unlink == PTL_UNLINK) ? PTL_MD_FLAG_AUTO_UNLINK : 0;
-
- if ((md->options & PTL_MD_IOVEC) != 0) {
- int total_length = 0;
-
- if ((md->options & PTL_MD_KIOV) != 0) /* Can't specify both */
- return PTL_MD_INVALID;
-
- new->md_niov = niov = md->length;
-
- if (nal->cb_read (nal, private, new->md_iov.iov, md->start,
- niov * sizeof (new->md_iov.iov[0])))
- return PTL_SEGV;
+ LASSERT (eq == NULL ||
+ ((umd->options & PTL_MD_EVENT_START_DISABLE) != 0 &&
+ (umd->options & PTL_MD_EVENT_END_DISABLE) == 0));
+
+ lmd->me = NULL;
+ lmd->start = umd->start;
+ lmd->offset = 0;
+ lmd->max_size = umd->max_size;
+ lmd->options = umd->options;
+ lmd->user_ptr = umd->user_ptr;
+ lmd->eq = eq;
+ lmd->threshold = umd->threshold;
+ lmd->pending = 0;
+ lmd->md_flags = (unlink == PTL_UNLINK) ? PTL_MD_FLAG_AUTO_UNLINK : 0;
+
+ if ((umd->options & PTL_MD_IOVEC) != 0) {
+
+ if ((umd->options & PTL_MD_KIOV) != 0) /* Can't specify both */
+ return PTL_MD_ILLEGAL;
+
+ lmd->md_niov = niov = umd->length;
+ memcpy(lmd->md_iov.iov, umd->start,
+ niov * sizeof (lmd->md_iov.iov[0]));
for (i = 0; i < niov; i++) {
/* We take the base address on trust */
- if (new->md_iov.iov[i].iov_len <= 0) /* invalid length */
- return PTL_VAL_FAILED;
+ if (lmd->md_iov.iov[i].iov_len <= 0) /* invalid length */
+ return PTL_MD_ILLEGAL;
- total_length += new->md_iov.iov[i].iov_len;
+ total_length += lmd->md_iov.iov[i].iov_len;
}
- new->length = total_length;
+ lmd->length = total_length;
- if (nal->cb_map != NULL) {
- rc = nal->cb_map (nal, niov, new->md_iov.iov,
- &new->md_addrkey);
+ if ((umd->options & PTL_MD_MAX_SIZE) != 0 && /* max size used */
+ (umd->max_size < 0 ||
+ umd->max_size > total_length)) // illegal max_size
+ return PTL_MD_ILLEGAL;
+
+ if (nal->libnal_map != NULL) {
+ rc = nal->libnal_map (nal, niov, lmd->md_iov.iov,
+ &lmd->md_addrkey);
if (rc != PTL_OK)
return (rc);
}
- } else if ((md->options & PTL_MD_KIOV) != 0) {
+ } else if ((umd->options & PTL_MD_KIOV) != 0) {
#ifndef __KERNEL__
- return PTL_MD_INVALID;
-#else
- int total_length = 0;
-
+ return PTL_MD_ILLEGAL;
+#else
/* Trap attempt to use paged I/O if unsupported early. */
- if (nal->cb_send_pages == NULL ||
- nal->cb_recv_pages == NULL)
+ if (nal->libnal_send_pages == NULL ||
+ nal->libnal_recv_pages == NULL)
return PTL_MD_INVALID;
- new->md_niov = niov = md->length;
+ lmd->md_niov = niov = umd->length;
+ memcpy(lmd->md_iov.kiov, umd->start,
+ niov * sizeof (lmd->md_iov.kiov[0]));
- if (nal->cb_read (nal, private, new->md_iov.kiov, md->start,
- niov * sizeof (new->md_iov.kiov[0])))
- return PTL_SEGV;
-
for (i = 0; i < niov; i++) {
/* We take the page pointer on trust */
- if (new->md_iov.kiov[i].kiov_offset +
- new->md_iov.kiov[i].kiov_len > PAGE_SIZE )
+ if (lmd->md_iov.kiov[i].kiov_offset +
+ lmd->md_iov.kiov[i].kiov_len > PAGE_SIZE )
return PTL_VAL_FAILED; /* invalid length */
- total_length += new->md_iov.kiov[i].kiov_len;
+ total_length += lmd->md_iov.kiov[i].kiov_len;
}
- new->length = total_length;
+ lmd->length = total_length;
+
+ if ((umd->options & PTL_MD_MAX_SIZE) != 0 && /* max size used */
+ (umd->max_size < 0 ||
+ umd->max_size > total_length)) // illegal max_size
+ return PTL_MD_ILLEGAL;
- if (nal->cb_map_pages != NULL) {
- rc = nal->cb_map_pages (nal, niov, new->md_iov.kiov,
- &new->md_addrkey);
+ if (nal->libnal_map_pages != NULL) {
+ rc = nal->libnal_map_pages (nal, niov, lmd->md_iov.kiov,
+ &lmd->md_addrkey);
if (rc != PTL_OK)
return (rc);
}
#endif
} else { /* contiguous */
- new->length = md->length;
- new->md_niov = niov = 1;
- new->md_iov.iov[0].iov_base = md->start;
- new->md_iov.iov[0].iov_len = md->length;
-
- if (nal->cb_map != NULL) {
- rc = nal->cb_map (nal, niov, new->md_iov.iov,
- &new->md_addrkey);
+ lmd->length = umd->length;
+ lmd->md_niov = niov = 1;
+ lmd->md_iov.iov[0].iov_base = umd->start;
+ lmd->md_iov.iov[0].iov_len = umd->length;
+
+ if ((umd->options & PTL_MD_MAX_SIZE) != 0 && /* max size used */
+ (umd->max_size < 0 ||
+ umd->max_size > umd->length)) // illegal max_size
+ return PTL_MD_ILLEGAL;
+
+ if (nal->libnal_map != NULL) {
+ rc = nal->libnal_map (nal, niov, lmd->md_iov.iov,
+ &lmd->md_addrkey);
if (rc != PTL_OK)
return (rc);
}
eq->eq_refcount++;
/* It's good; let handle2md succeed and add to active mds */
- lib_initialise_handle (nal, &new->md_lh, PTL_COOKIE_TYPE_MD);
- list_add (&new->md_list, &nal->ni.ni_active_mds);
+ lib_initialise_handle (nal, &lmd->md_lh, PTL_COOKIE_TYPE_MD);
+ list_add (&lmd->md_list, &nal->libnal_ni.ni_active_mds);
return PTL_OK;
}
/* must be called with state lock held */
-void lib_md_deconstruct(nal_cb_t * nal, lib_md_t * md, ptl_md_t * new)
+void
+lib_md_deconstruct(lib_nal_t *nal, lib_md_t *lmd, ptl_md_t *umd)
{
/* NB this doesn't copy out all the iov entries so when a
* discontiguous MD is copied out, the target gets to know the
* original iov pointer (in start) and the number of entries it had
* and that's all.
*/
- new->start = md->start;
- new->length = ((md->options & (PTL_MD_IOVEC | PTL_MD_KIOV)) == 0) ?
- md->length : md->md_niov;
- new->threshold = md->threshold;
- new->max_size = md->max_size;
- new->options = md->options;
- new->user_ptr = md->user_ptr;
- ptl_eq2handle(&new->eventq, md->eq);
+ umd->start = lmd->start;
+ umd->length = ((lmd->options & (PTL_MD_IOVEC | PTL_MD_KIOV)) == 0) ?
+ lmd->length : lmd->md_niov;
+ umd->threshold = lmd->threshold;
+ umd->max_size = lmd->max_size;
+ umd->options = lmd->options;
+ umd->user_ptr = lmd->user_ptr;
+ ptl_eq2handle(&umd->eventq, nal, lmd->eq);
}
-int do_PtlMDAttach(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
+int
+lib_api_md_attach(nal_t *apinal, ptl_handle_me_t *meh,
+ ptl_md_t *umd, ptl_unlink_t unlink,
+ ptl_handle_md_t *handle)
{
- /*
- * Incoming:
- * ptl_handle_me_t current_in
- * ptl_md_t md_in
- * ptl_unlink_t unlink_in
- *
- * Outgoing:
- * ptl_handle_md_t * handle_out
- */
-
- PtlMDAttach_in *args = v_args;
- PtlMDAttach_out *ret = v_ret;
- lib_me_t *me;
- lib_md_t *md;
+ lib_nal_t *nal = apinal->nal_data;
+ lib_me_t *me;
+ lib_md_t *md;
unsigned long flags;
+ int rc;
- if ((args->md_in.options & (PTL_MD_KIOV | PTL_MD_IOVEC)) != 0 &&
- args->md_in.length > PTL_MD_MAX_IOV) /* too many fragments */
- return (ret->rc = PTL_IOV_INVALID);
+ if ((umd->options & (PTL_MD_KIOV | PTL_MD_IOVEC)) != 0 &&
+ umd->length > PTL_MD_MAX_IOV) /* too many fragments */
+ return PTL_IOV_INVALID;
- md = lib_md_alloc(nal, &args->md_in);
+ md = lib_md_alloc(nal, umd);
if (md == NULL)
- return (ret->rc = PTL_NO_SPACE);
+ return PTL_NO_SPACE;
- state_lock(nal, &flags);
+ LIB_LOCK(nal, flags);
- me = ptl_handle2me(&args->me_in, nal);
+ me = ptl_handle2me(meh, nal);
if (me == NULL) {
- ret->rc = PTL_ME_INVALID;
+ rc = PTL_ME_INVALID;
} else if (me->md != NULL) {
- ret->rc = PTL_ME_IN_USE;
+ rc = PTL_ME_IN_USE;
} else {
- ret->rc = lib_md_build(nal, md, private, &args->md_in,
- &args->eq_in, args->unlink_in);
-
- if (ret->rc == PTL_OK) {
+ rc = lib_md_build(nal, md, umd, unlink);
+ if (rc == PTL_OK) {
me->md = md;
md->me = me;
- ptl_md2handle(&ret->handle_out, md);
+ ptl_md2handle(handle, nal, md);
- state_unlock (nal, &flags);
+ LIB_UNLOCK(nal, flags);
return (PTL_OK);
}
}
lib_md_free (nal, md);
- state_unlock (nal, &flags);
- return (ret->rc);
+ LIB_UNLOCK(nal, flags);
+ return (rc);
}
-int do_PtlMDBind(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
+int
+lib_api_md_bind(nal_t *apinal,
+ ptl_md_t *umd, ptl_unlink_t unlink,
+ ptl_handle_md_t *handle)
{
- /*
- * Incoming:
- * ptl_handle_ni_t ni_in
- * ptl_md_t md_in
- *
- * Outgoing:
- * ptl_handle_md_t * handle_out
- */
-
- PtlMDBind_in *args = v_args;
- PtlMDBind_out *ret = v_ret;
- lib_md_t *md;
+ lib_nal_t *nal = apinal->nal_data;
+ lib_md_t *md;
unsigned long flags;
+ int rc;
- if ((args->md_in.options & (PTL_MD_KIOV | PTL_MD_IOVEC)) != 0 &&
- args->md_in.length > PTL_MD_MAX_IOV) /* too many fragments */
- return (ret->rc = PTL_IOV_INVALID);
+ if ((umd->options & (PTL_MD_KIOV | PTL_MD_IOVEC)) != 0 &&
+ umd->length > PTL_MD_MAX_IOV) /* too many fragments */
+ return PTL_IOV_INVALID;
- md = lib_md_alloc(nal, &args->md_in);
+ md = lib_md_alloc(nal, umd);
if (md == NULL)
- return (ret->rc = PTL_NO_SPACE);
+ return PTL_NO_SPACE;
- state_lock(nal, &flags);
+ LIB_LOCK(nal, flags);
- ret->rc = lib_md_build(nal, md, private, &args->md_in,
- &args->eq_in, args->unlink_in);
+ rc = lib_md_build(nal, md, umd, unlink);
- if (ret->rc == PTL_OK) {
- ptl_md2handle(&ret->handle_out, md);
+ if (rc == PTL_OK) {
+ ptl_md2handle(handle, nal, md);
- state_unlock(nal, &flags);
+ LIB_UNLOCK(nal, flags);
return (PTL_OK);
}
lib_md_free (nal, md);
- state_unlock(nal, &flags);
- return (ret->rc);
+ LIB_UNLOCK(nal, flags);
+ return (rc);
}
-int do_PtlMDUnlink(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
+int
+lib_api_md_unlink (nal_t *apinal, ptl_handle_md_t *mdh)
{
- PtlMDUnlink_in *args = v_args;
- PtlMDUnlink_out *ret = v_ret;
+ lib_nal_t *nal = apinal->nal_data;
ptl_event_t ev;
lib_md_t *md;
unsigned long flags;
- state_lock(nal, &flags);
+ LIB_LOCK(nal, flags);
- md = ptl_handle2md(&args->md_in, nal);
+ md = ptl_handle2md(mdh, nal);
if (md == NULL) {
- state_unlock(nal, &flags);
- return (ret->rc = PTL_MD_INVALID);
+ LIB_UNLOCK(nal, flags);
+ return PTL_MD_INVALID;
}
/* If the MD is busy, lib_md_unlink just marks it for deletion, and
ev.unlinked = 1;
lib_md_deconstruct(nal, md, &ev.mem_desc);
- lib_enq_event_locked(nal, private, md->eq, &ev);
+ lib_enq_event_locked(nal, NULL, md->eq, &ev);
}
- lib_md_deconstruct(nal, md, &ret->status_out);
lib_md_unlink(nal, md);
- ret->rc = PTL_OK;
- state_unlock(nal, &flags);
-
- return (PTL_OK);
+ LIB_UNLOCK(nal, flags);
+ return PTL_OK;
}
-int do_PtlMDUpdate_internal(nal_cb_t * nal, void *private, void *v_args,
- void *v_ret)
+int
+lib_api_md_update (nal_t *apinal,
+ ptl_handle_md_t *mdh,
+ ptl_md_t *oldumd, ptl_md_t *newumd,
+ ptl_handle_eq_t *testqh)
{
- /*
- * Incoming:
- * ptl_handle_md_t md_in
- * ptl_md_t * old_inout
- * ptl_md_t * new_inout
- * ptl_handle_eq_t testq_in
- * ptl_seq_t sequence_in
- *
- * Outgoing:
- * ptl_md_t * old_inout
- * ptl_md_t * new_inout
- */
- PtlMDUpdate_internal_in *args = v_args;
- PtlMDUpdate_internal_out *ret = v_ret;
- lib_md_t *md;
- lib_eq_t *test_eq = NULL;
- ptl_md_t *new = &args->new_inout;
+ lib_nal_t *nal = apinal->nal_data;
+ lib_md_t *md;
+ lib_eq_t *test_eq = NULL;
unsigned long flags;
+ int rc;
- state_lock(nal, &flags);
+ LIB_LOCK(nal, flags);
- md = ptl_handle2md(&args->md_in, nal);
+ md = ptl_handle2md(mdh, nal);
if (md == NULL) {
- ret->rc = PTL_MD_INVALID;
+ rc = PTL_MD_INVALID;
goto out;
}
- if (args->old_inout_valid)
- lib_md_deconstruct(nal, md, &ret->old_inout);
+ if (oldumd != NULL)
+ lib_md_deconstruct(nal, md, oldumd);
- if (!args->new_inout_valid) {
- ret->rc = PTL_OK;
+ if (newumd == NULL) {
+ rc = PTL_OK;
goto out;
}
/* XXX fttb, the new MD must be the same "shape" wrt fragmentation,
* since we simply overwrite the old lib-md */
- if ((((new->options ^ md->options) &
+ if ((((newumd->options ^ md->options) &
(PTL_MD_IOVEC | PTL_MD_KIOV)) != 0) ||
- ((new->options & (PTL_MD_IOVEC | PTL_MD_KIOV)) != 0 &&
- new->length != md->md_niov)) {
- ret->rc = PTL_IOV_INVALID;
+ ((newumd->options & (PTL_MD_IOVEC | PTL_MD_KIOV)) != 0 &&
+ newumd->length != md->md_niov)) {
+ rc = PTL_IOV_INVALID;
goto out;
}
- if (!PtlHandleIsEqual (args->testq_in, PTL_EQ_NONE)) {
- test_eq = ptl_handle2eq(&args->testq_in, nal);
+ if (!PtlHandleIsEqual (*testqh, PTL_EQ_NONE)) {
+ test_eq = ptl_handle2eq(testqh, nal);
if (test_eq == NULL) {
- ret->rc = PTL_EQ_INVALID;
+ rc = PTL_EQ_INVALID;
goto out;
}
}
if (md->pending != 0) {
- ret->rc = PTL_MD_NO_UPDATE;
- goto out;
+ rc = PTL_MD_NO_UPDATE;
+ goto out;
}
if (test_eq == NULL ||
- test_eq->sequence == args->sequence_in) {
+ test_eq->eq_deq_seq == test_eq->eq_enq_seq) {
lib_me_t *me = md->me;
int unlink = (md->md_flags & PTL_MD_FLAG_AUTO_UNLINK) ?
PTL_UNLINK : PTL_RETAIN;
// #warning this does not track eq refcounts properly
- ret->rc = lib_md_build(nal, md, private,
- new, &new->eventq, unlink);
+ rc = lib_md_build(nal, md, newumd, unlink);
md->me = me;
} else {
- ret->rc = PTL_MD_NO_UPDATE;
+ rc = PTL_MD_NO_UPDATE;
}
out:
- state_unlock(nal, &flags);
- return (ret->rc);
+ LIB_UNLOCK(nal, flags);
+
+ return rc;
}
#endif
#include <portals/lib-p30.h>
-#include <portals/arg-blocks.h>
-static void lib_me_dump(nal_cb_t * nal, lib_me_t * me);
-
-int do_PtlMEAttach(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
+int
+lib_api_me_attach(nal_t *apinal,
+ ptl_pt_index_t portal,
+ ptl_process_id_t match_id,
+ ptl_match_bits_t match_bits,
+ ptl_match_bits_t ignore_bits,
+ ptl_unlink_t unlink, ptl_ins_pos_t pos,
+ ptl_handle_me_t *handle)
{
- PtlMEAttach_in *args = v_args;
- PtlMEAttach_out *ret = v_ret;
- lib_ni_t *ni = &nal->ni;
- lib_ptl_t *tbl = &ni->tbl;
+ lib_nal_t *nal = apinal->nal_data;
+ lib_ni_t *ni = &nal->libnal_ni;
+ lib_ptl_t *tbl = &ni->ni_portals;
+ lib_me_t *me;
unsigned long flags;
- lib_me_t *me;
- if (args->index_in >= tbl->size)
- return ret->rc = PTL_PT_INDEX_INVALID;
+ if (portal >= tbl->size)
+ return PTL_PT_INDEX_INVALID;
/* Should check for valid matchid, but not yet */
- if (0)
- return ret->rc = PTL_PROCESS_INVALID;
me = lib_me_alloc (nal);
if (me == NULL)
- return (ret->rc = PTL_NO_SPACE);
+ return PTL_NO_SPACE;
- state_lock(nal, &flags);
+ LIB_LOCK(nal, flags);
- me->match_id = args->match_id_in;
- me->match_bits = args->match_bits_in;
- me->ignore_bits = args->ignore_bits_in;
- me->unlink = args->unlink_in;
+ me->match_id = match_id;
+ me->match_bits = match_bits;
+ me->ignore_bits = ignore_bits;
+ me->unlink = unlink;
me->md = NULL;
lib_initialise_handle (nal, &me->me_lh, PTL_COOKIE_TYPE_ME);
- if (args->position_in == PTL_INS_AFTER)
- list_add_tail(&me->me_list, &(tbl->tbl[args->index_in]));
+ if (pos == PTL_INS_AFTER)
+ list_add_tail(&me->me_list, &(tbl->tbl[portal]));
else
- list_add(&me->me_list, &(tbl->tbl[args->index_in]));
+ list_add(&me->me_list, &(tbl->tbl[portal]));
- ptl_me2handle(&ret->handle_out, me);
+ ptl_me2handle(handle, nal, me);
- state_unlock(nal, &flags);
+ LIB_UNLOCK(nal, flags);
- return ret->rc = PTL_OK;
+ return PTL_OK;
}
-int do_PtlMEInsert(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
+int
+lib_api_me_insert(nal_t *apinal,
+ ptl_handle_me_t *current_meh,
+ ptl_process_id_t match_id,
+ ptl_match_bits_t match_bits,
+ ptl_match_bits_t ignore_bits,
+ ptl_unlink_t unlink, ptl_ins_pos_t pos,
+ ptl_handle_me_t *handle)
{
- PtlMEInsert_in *args = v_args;
- PtlMEInsert_out *ret = v_ret;
+ lib_nal_t *nal = apinal->nal_data;
+ lib_me_t *current_me;
+ lib_me_t *new_me;
unsigned long flags;
- lib_me_t *me;
- lib_me_t *new;
- new = lib_me_alloc (nal);
- if (new == NULL)
- return (ret->rc = PTL_NO_SPACE);
+ new_me = lib_me_alloc (nal);
+ if (new_me == NULL)
+ return PTL_NO_SPACE;
/* Should check for valid matchid, but not yet */
- state_lock(nal, &flags);
+ LIB_LOCK(nal, flags);
- me = ptl_handle2me(&args->current_in, nal);
- if (me == NULL) {
- lib_me_free (nal, new);
+ current_me = ptl_handle2me(current_meh, nal);
+ if (current_me == NULL) {
+ lib_me_free (nal, new_me);
- state_unlock (nal, &flags);
- return (ret->rc = PTL_ME_INVALID);
+ LIB_UNLOCK(nal, flags);
+ return PTL_ME_INVALID;
}
- new->match_id = args->match_id_in;
- new->match_bits = args->match_bits_in;
- new->ignore_bits = args->ignore_bits_in;
- new->unlink = args->unlink_in;
- new->md = NULL;
+ new_me->match_id = match_id;
+ new_me->match_bits = match_bits;
+ new_me->ignore_bits = ignore_bits;
+ new_me->unlink = unlink;
+ new_me->md = NULL;
- lib_initialise_handle (nal, &new->me_lh, PTL_COOKIE_TYPE_ME);
+ lib_initialise_handle (nal, &new_me->me_lh, PTL_COOKIE_TYPE_ME);
- if (args->position_in == PTL_INS_AFTER)
- list_add_tail(&new->me_list, &me->me_list);
+ if (pos == PTL_INS_AFTER)
+ list_add_tail(&new_me->me_list, ¤t_me->me_list);
else
- list_add(&new->me_list, &me->me_list);
+ list_add(&new_me->me_list, ¤t_me->me_list);
- ptl_me2handle(&ret->handle_out, new);
+ ptl_me2handle(handle, nal, new_me);
- state_unlock(nal, &flags);
+ LIB_UNLOCK(nal, flags);
- return ret->rc = PTL_OK;
+ return PTL_OK;
}
-int do_PtlMEUnlink(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
+int
+lib_api_me_unlink (nal_t *apinal, ptl_handle_me_t *meh)
{
- PtlMEUnlink_in *args = v_args;
- PtlMEUnlink_out *ret = v_ret;
+ lib_nal_t *nal = apinal->nal_data;
unsigned long flags;
- lib_me_t *me;
+ lib_me_t *me;
+ int rc;
- state_lock(nal, &flags);
+ LIB_LOCK(nal, flags);
- me = ptl_handle2me(&args->current_in, nal);
+ me = ptl_handle2me(meh, nal);
if (me == NULL) {
- ret->rc = PTL_ME_INVALID;
+ rc = PTL_ME_INVALID;
} else {
lib_me_unlink(nal, me);
- ret->rc = PTL_OK;
+ rc = PTL_OK;
}
- state_unlock(nal, &flags);
+ LIB_UNLOCK(nal, flags);
- return (ret->rc);
+ return (rc);
}
/* call with state_lock please */
-void lib_me_unlink(nal_cb_t *nal, lib_me_t *me)
+void
+lib_me_unlink(lib_nal_t *nal, lib_me_t *me)
{
list_del (&me->me_list);
lib_me_free(nal, me);
}
-int do_PtlTblDump(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
+#if 0
+static void
+lib_me_dump(lib_nal_t *nal, lib_me_t * me)
{
- PtlTblDump_in *args = v_args;
- PtlTblDump_out *ret = v_ret;
- lib_ptl_t *tbl = &nal->ni.tbl;
- ptl_handle_any_t handle;
- struct list_head *tmp;
- unsigned long flags;
+ CWARN("Match Entry %p ("LPX64")\n", me,
+ me->me_lh.lh_cookie);
- if (args->index_in < 0 || args->index_in >= tbl->size)
- return ret->rc = PTL_PT_INDEX_INVALID;
-
- nal->cb_printf(nal, "Portal table index %d\n", args->index_in);
-
- state_lock(nal, &flags);
- list_for_each(tmp, &(tbl->tbl[args->index_in])) {
- lib_me_t *me = list_entry(tmp, lib_me_t, me_list);
- ptl_me2handle(&handle, me);
- lib_me_dump(nal, me);
- }
- state_unlock(nal, &flags);
+ CWARN("\tMatch/Ignore\t= %016lx / %016lx\n",
+ me->match_bits, me->ignore_bits);
- return ret->rc = PTL_OK;
-}
-
-int do_PtlMEDump(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
-{
- PtlMEDump_in *args = v_args;
- PtlMEDump_out *ret = v_ret;
- lib_me_t *me;
- unsigned long flags;
-
- state_lock(nal, &flags);
-
- me = ptl_handle2me(&args->current_in, nal);
- if (me == NULL) {
- ret->rc = PTL_ME_INVALID;
- } else {
- lib_me_dump(nal, me);
- ret->rc = PTL_OK;
- }
-
- state_unlock(nal, &flags);
-
- return ret->rc;
-}
-
-static void lib_me_dump(nal_cb_t * nal, lib_me_t * me)
-{
- nal->cb_printf(nal, "Match Entry %p ("LPX64")\n", me,
- me->me_lh.lh_cookie);
-
- nal->cb_printf(nal, "\tMatch/Ignore\t= %016lx / %016lx\n",
- me->match_bits, me->ignore_bits);
-
- nal->cb_printf(nal, "\tMD\t= %p\n", me->md);
- nal->cb_printf(nal, "\tprev\t= %p\n",
- list_entry(me->me_list.prev, lib_me_t, me_list));
- nal->cb_printf(nal, "\tnext\t= %p\n",
- list_entry(me->me_list.next, lib_me_t, me_list));
+ CWARN("\tMD\t= %p\n", me->md);
+ CWARN("\tprev\t= %p\n",
+ list_entry(me->me_list.prev, lib_me_t, me_list));
+ CWARN("\tnext\t= %p\n",
+ list_entry(me->me_list.next, lib_me_t, me_list));
}
+#endif
#endif
#include <portals/p30.h>
#include <portals/lib-p30.h>
-#include <portals/arg-blocks.h>
/* forward ref */
-static void lib_commit_md (nal_cb_t *nal, lib_md_t *md, lib_msg_t *msg);
+static void lib_commit_md (lib_nal_t *nal, lib_md_t *md, lib_msg_t *msg);
static lib_md_t *
-lib_match_md(nal_cb_t *nal, int index, int op_mask,
+lib_match_md(lib_nal_t *nal, int index, int op_mask,
ptl_nid_t src_nid, ptl_pid_t src_pid,
ptl_size_t rlength, ptl_size_t roffset,
ptl_match_bits_t match_bits, lib_msg_t *msg,
ptl_size_t *mlength_out, ptl_size_t *offset_out)
{
- lib_ni_t *ni = &nal->ni;
- struct list_head *match_list = &ni->tbl.tbl[index];
+ lib_ni_t *ni = &nal->libnal_ni;
+ struct list_head *match_list = &ni->ni_portals.tbl[index];
struct list_head *tmp;
lib_me_t *me;
lib_md_t *md;
CDEBUG (D_NET, "Request from "LPU64".%d of length %d into portal %d "
"MB="LPX64"\n", src_nid, src_pid, rlength, index, match_bits);
- if (index < 0 || index >= ni->tbl.size) {
+ if (index < 0 || index >= ni->ni_portals.size) {
CERROR("Invalid portal %d not in [0-%d]\n",
- index, ni->tbl.size);
+ index, ni->ni_portals.size);
goto failed;
}
failed:
CERROR (LPU64": Dropping %s from "LPU64".%d portal %d match "LPX64
" offset %d length %d: no match\n",
- ni->nid, (op_mask == PTL_MD_OP_GET) ? "GET" : "PUT",
+ ni->ni_pid.nid, (op_mask == PTL_MD_OP_GET) ? "GET" : "PUT",
src_nid, src_pid, index, match_bits, roffset, rlength);
RETURN(NULL);
}
-int do_PtlFailNid (nal_cb_t *nal, void *private, void *v_args, void *v_ret)
+int lib_api_fail_nid (nal_t *apinal, ptl_nid_t nid, unsigned int threshold)
{
- PtlFailNid_in *args = v_args;
- PtlFailNid_out *ret = v_ret;
+ lib_nal_t *nal = apinal->nal_data;
lib_test_peer_t *tp;
unsigned long flags;
struct list_head *el;
struct list_head *next;
struct list_head cull;
- if (args->threshold != 0) {
+ if (threshold != 0) {
/* Adding a new entry */
- tp = (lib_test_peer_t *)nal->cb_malloc (nal, sizeof (*tp));
+ PORTAL_ALLOC(tp, sizeof(*tp));
if (tp == NULL)
- return (ret->rc = PTL_FAIL);
+ return PTL_NO_SPACE;
- tp->tp_nid = args->nid;
- tp->tp_threshold = args->threshold;
+ tp->tp_nid = nid;
+ tp->tp_threshold = threshold;
- state_lock (nal, &flags);
- list_add (&tp->tp_list, &nal->ni.ni_test_peers);
- state_unlock (nal, &flags);
- return (ret->rc = PTL_OK);
+ LIB_LOCK(nal, flags);
+ list_add_tail (&tp->tp_list, &nal->libnal_ni.ni_test_peers);
+ LIB_UNLOCK(nal, flags);
+ return PTL_OK;
}
/* removing entries */
INIT_LIST_HEAD (&cull);
- state_lock (nal, &flags);
+ LIB_LOCK(nal, flags);
- list_for_each_safe (el, next, &nal->ni.ni_test_peers) {
+ list_for_each_safe (el, next, &nal->libnal_ni.ni_test_peers) {
tp = list_entry (el, lib_test_peer_t, tp_list);
if (tp->tp_threshold == 0 || /* needs culling anyway */
- args->nid == PTL_NID_ANY || /* removing all entries */
- tp->tp_nid == args->nid) /* matched this one */
+ nid == PTL_NID_ANY || /* removing all entries */
+ tp->tp_nid == nid) /* matched this one */
{
list_del (&tp->tp_list);
list_add (&tp->tp_list, &cull);
}
}
- state_unlock (nal, &flags);
+ LIB_UNLOCK(nal, flags);
while (!list_empty (&cull)) {
tp = list_entry (cull.next, lib_test_peer_t, tp_list);
list_del (&tp->tp_list);
- nal->cb_free (nal, tp, sizeof (*tp));
+ PORTAL_FREE(tp, sizeof (*tp));
}
- return (ret->rc = PTL_OK);
+ return PTL_OK;
}
static int
-fail_peer (nal_cb_t *nal, ptl_nid_t nid, int outgoing)
+fail_peer (lib_nal_t *nal, ptl_nid_t nid, int outgoing)
{
lib_test_peer_t *tp;
struct list_head *el;
INIT_LIST_HEAD (&cull);
- state_lock (nal, &flags);
+ LIB_LOCK (nal, flags);
- list_for_each_safe (el, next, &nal->ni.ni_test_peers) {
+ list_for_each_safe (el, next, &nal->libnal_ni.ni_test_peers) {
tp = list_entry (el, lib_test_peer_t, tp_list);
if (tp->tp_threshold == 0) {
}
}
- state_unlock (nal, &flags);
+ LIB_UNLOCK (nal, flags);
while (!list_empty (&cull)) {
tp = list_entry (cull.next, lib_test_peer_t, tp_list);
list_del (&tp->tp_list);
- nal->cb_free (nal, tp, sizeof (*tp));
+ PORTAL_FREE(tp, sizeof (*tp));
}
return (fail);
#endif
ptl_err_t
-lib_recv (nal_cb_t *nal, void *private, lib_msg_t *msg, lib_md_t *md,
+lib_recv (lib_nal_t *nal, void *private, lib_msg_t *msg, lib_md_t *md,
ptl_size_t offset, ptl_size_t mlen, ptl_size_t rlen)
{
if (mlen == 0)
- return (nal->cb_recv(nal, private, msg,
- 0, NULL,
- offset, mlen, rlen));
+ return (nal->libnal_recv(nal, private, msg,
+ 0, NULL,
+ offset, mlen, rlen));
if ((md->options & PTL_MD_KIOV) == 0)
- return (nal->cb_recv(nal, private, msg,
- md->md_niov, md->md_iov.iov,
- offset, mlen, rlen));
+ return (nal->libnal_recv(nal, private, msg,
+ md->md_niov, md->md_iov.iov,
+ offset, mlen, rlen));
- return (nal->cb_recv_pages(nal, private, msg,
- md->md_niov, md->md_iov.kiov,
- offset, mlen, rlen));
+ return (nal->libnal_recv_pages(nal, private, msg,
+ md->md_niov, md->md_iov.kiov,
+ offset, mlen, rlen));
}
ptl_err_t
-lib_send (nal_cb_t *nal, void *private, lib_msg_t *msg,
+lib_send (lib_nal_t *nal, void *private, lib_msg_t *msg,
ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
lib_md_t *md, ptl_size_t offset, ptl_size_t len)
{
if (len == 0)
- return (nal->cb_send(nal, private, msg,
- hdr, type, nid, pid,
- 0, NULL,
- offset, len));
+ return (nal->libnal_send(nal, private, msg,
+ hdr, type, nid, pid,
+ 0, NULL,
+ offset, len));
if ((md->options & PTL_MD_KIOV) == 0)
- return (nal->cb_send(nal, private, msg,
- hdr, type, nid, pid,
- md->md_niov, md->md_iov.iov,
- offset, len));
-
- return (nal->cb_send_pages(nal, private, msg,
- hdr, type, nid, pid,
- md->md_niov, md->md_iov.kiov,
- offset, len));
+ return (nal->libnal_send(nal, private, msg,
+ hdr, type, nid, pid,
+ md->md_niov, md->md_iov.iov,
+ offset, len));
+
+ return (nal->libnal_send_pages(nal, private, msg,
+ hdr, type, nid, pid,
+ md->md_niov, md->md_iov.kiov,
+ offset, len));
}
static void
-lib_commit_md (nal_cb_t *nal, lib_md_t *md, lib_msg_t *msg)
+lib_commit_md (lib_nal_t *nal, lib_md_t *md, lib_msg_t *msg)
{
- /* ALWAYS called holding the state_lock */
- lib_counters_t *counters = &nal->ni.counters;
+ /* ALWAYS called holding the LIB_LOCK */
+ lib_counters_t *counters = &nal->libnal_ni.ni_counters;
/* Here, we commit the MD to a network OP by marking it busy and
* decrementing its threshold. Come what may, the network "owns"
if (counters->msgs_alloc > counters->msgs_max)
counters->msgs_max = counters->msgs_alloc;
- list_add (&msg->msg_list, &nal->ni.ni_active_msgs);
+ list_add (&msg->msg_list, &nal->libnal_ni.ni_active_msgs);
}
static void
-lib_drop_message (nal_cb_t *nal, void *private, ptl_hdr_t *hdr)
+lib_drop_message (lib_nal_t *nal, void *private, ptl_hdr_t *hdr)
{
unsigned long flags;
* to receive (init_msg() not called) and therefore can't cause an
* event. */
- state_lock(nal, &flags);
- nal->ni.counters.drop_count++;
- nal->ni.counters.drop_length += hdr->payload_length;
- state_unlock(nal, &flags);
+ LIB_LOCK(nal, flags);
+ nal->libnal_ni.ni_counters.drop_count++;
+ nal->libnal_ni.ni_counters.drop_length += hdr->payload_length;
+ LIB_UNLOCK(nal, flags);
/* NULL msg => if NAL calls lib_finalize it will be a noop */
(void) lib_recv(nal, private, NULL, NULL, 0, 0, hdr->payload_length);
*
*/
static ptl_err_t
-parse_put(nal_cb_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
+parse_put(lib_nal_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
{
- lib_ni_t *ni = &nal->ni;
+ lib_ni_t *ni = &nal->libnal_ni;
ptl_size_t mlength = 0;
ptl_size_t offset = 0;
ptl_err_t rc;
hdr->msg.put.ptl_index = NTOH__u32 (hdr->msg.put.ptl_index);
hdr->msg.put.offset = NTOH__u32 (hdr->msg.put.offset);
- state_lock(nal, &flags);
+ LIB_LOCK(nal, flags);
md = lib_match_md(nal, hdr->msg.put.ptl_index, PTL_MD_OP_PUT,
hdr->src_nid, hdr->src_pid,
hdr->msg.put.match_bits, msg,
&mlength, &offset);
if (md == NULL) {
- state_unlock(nal, &flags);
+ LIB_UNLOCK(nal, flags);
return (PTL_FAIL);
}
msg->ack_wmd = hdr->msg.put.ack_wmd;
}
- ni->counters.recv_count++;
- ni->counters.recv_length += mlength;
+ ni->ni_counters.recv_count++;
+ ni->ni_counters.recv_length += mlength;
- state_unlock(nal, &flags);
+ LIB_UNLOCK(nal, flags);
rc = lib_recv(nal, private, msg, md, offset, mlength,
hdr->payload_length);
if (rc != PTL_OK)
CERROR(LPU64": error on receiving PUT from "LPU64": %d\n",
- ni->nid, hdr->src_nid, rc);
+ ni->ni_pid.nid, hdr->src_nid, rc);
return (rc);
}
static ptl_err_t
-parse_get(nal_cb_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
+parse_get(lib_nal_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
{
- lib_ni_t *ni = &nal->ni;
+ lib_ni_t *ni = &nal->libnal_ni;
ptl_size_t mlength = 0;
ptl_size_t offset = 0;
lib_md_t *md;
hdr->msg.get.sink_length = NTOH__u32 (hdr->msg.get.sink_length);
hdr->msg.get.src_offset = NTOH__u32 (hdr->msg.get.src_offset);
- state_lock(nal, &flags);
+ LIB_LOCK(nal, flags);
md = lib_match_md(nal, hdr->msg.get.ptl_index, PTL_MD_OP_GET,
hdr->src_nid, hdr->src_pid,
hdr->msg.get.match_bits, msg,
&mlength, &offset);
if (md == NULL) {
- state_unlock(nal, &flags);
+ LIB_UNLOCK(nal, flags);
return (PTL_FAIL);
}
msg->ev.type = PTL_EVENT_GET_END;
msg->ev.hdr_data = 0;
- ni->counters.send_count++;
- ni->counters.send_length += mlength;
+ ni->ni_counters.send_count++;
+ ni->ni_counters.send_length += mlength;
- state_unlock(nal, &flags);
+ LIB_UNLOCK(nal, flags);
memset (&reply, 0, sizeof (reply));
reply.type = HTON__u32 (PTL_MSG_REPLY);
reply.dest_nid = HTON__u64 (hdr->src_nid);
- reply.src_nid = HTON__u64 (ni->nid);
reply.dest_pid = HTON__u32 (hdr->src_pid);
- reply.src_pid = HTON__u32 (ni->pid);
+ reply.src_nid = HTON__u64 (ni->ni_pid.nid);
+ reply.src_pid = HTON__u32 (ni->ni_pid.pid);
reply.payload_length = HTON__u32 (mlength);
reply.msg.reply.dst_wmd = hdr->msg.get.return_wmd;
hdr->src_nid, hdr->src_pid, md, offset, mlength);
if (rc != PTL_OK)
CERROR(LPU64": Unable to send REPLY for GET from "LPU64": %d\n",
- ni->nid, hdr->src_nid, rc);
+ ni->ni_pid.nid, hdr->src_nid, rc);
/* Discard any junk after the hdr */
(void) lib_recv(nal, private, NULL, NULL, 0, 0, hdr->payload_length);
}
static ptl_err_t
-parse_reply(nal_cb_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
+parse_reply(lib_nal_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
{
- lib_ni_t *ni = &nal->ni;
+ lib_ni_t *ni = &nal->libnal_ni;
lib_md_t *md;
int rlength;
int length;
unsigned long flags;
ptl_err_t rc;
- state_lock(nal, &flags);
+ LIB_LOCK(nal, flags);
/* NB handles only looked up by creator (no flips) */
md = ptl_wire_handle2md(&hdr->msg.reply.dst_wmd, nal);
if (md == NULL || md->threshold == 0) {
CERROR (LPU64": Dropping REPLY from "LPU64" for %s MD "LPX64"."LPX64"\n",
- ni->nid, hdr->src_nid,
+ ni->ni_pid.nid, hdr->src_nid,
md == NULL ? "invalid" : "inactive",
hdr->msg.reply.dst_wmd.wh_interface_cookie,
hdr->msg.reply.dst_wmd.wh_object_cookie);
- state_unlock(nal, &flags);
+ LIB_UNLOCK(nal, flags);
return (PTL_FAIL);
}
if ((md->options & PTL_MD_TRUNCATE) == 0) {
CERROR (LPU64": Dropping REPLY from "LPU64
" length %d for MD "LPX64" would overflow (%d)\n",
- ni->nid, hdr->src_nid, length,
+ ni->ni_pid.nid, hdr->src_nid, length,
hdr->msg.reply.dst_wmd.wh_object_cookie,
md->length);
- state_unlock(nal, &flags);
+ LIB_UNLOCK(nal, flags);
return (PTL_FAIL);
}
length = md->length;
lib_md_deconstruct(nal, md, &msg->ev.mem_desc);
- ni->counters.recv_count++;
- ni->counters.recv_length += length;
+ ni->ni_counters.recv_count++;
+ ni->ni_counters.recv_length += length;
- state_unlock(nal, &flags);
+ LIB_UNLOCK(nal, flags);
rc = lib_recv(nal, private, msg, md, 0, length, rlength);
if (rc != PTL_OK)
CERROR(LPU64": error on receiving REPLY from "LPU64": %d\n",
- ni->nid, hdr->src_nid, rc);
+ ni->ni_pid.nid, hdr->src_nid, rc);
return (rc);
}
static ptl_err_t
-parse_ack(nal_cb_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
+parse_ack(lib_nal_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
{
- lib_ni_t *ni = &nal->ni;
+ lib_ni_t *ni = &nal->libnal_ni;
lib_md_t *md;
unsigned long flags;
hdr->msg.ack.match_bits = NTOH__u64 (hdr->msg.ack.match_bits);
hdr->msg.ack.mlength = NTOH__u32 (hdr->msg.ack.mlength);
- state_lock(nal, &flags);
+ LIB_LOCK(nal, flags);
/* NB handles only looked up by creator (no flips) */
md = ptl_wire_handle2md(&hdr->msg.ack.dst_wmd, nal);
if (md == NULL || md->threshold == 0) {
CDEBUG(D_INFO, LPU64": Dropping ACK from "LPU64" to %s MD "
- LPX64"."LPX64"\n", ni->nid, hdr->src_nid,
+ LPX64"."LPX64"\n", ni->ni_pid.nid, hdr->src_nid,
(md == NULL) ? "invalid" : "inactive",
hdr->msg.ack.dst_wmd.wh_interface_cookie,
hdr->msg.ack.dst_wmd.wh_object_cookie);
- state_unlock(nal, &flags);
+ LIB_UNLOCK(nal, flags);
return (PTL_FAIL);
}
CDEBUG(D_NET, LPU64": ACK from "LPU64" into md "LPX64"\n",
- ni->nid, hdr->src_nid,
+ ni->ni_pid.nid, hdr->src_nid,
hdr->msg.ack.dst_wmd.wh_object_cookie);
lib_commit_md(nal, md, msg);
lib_md_deconstruct(nal, md, &msg->ev.mem_desc);
- ni->counters.recv_count++;
+ ni->ni_counters.recv_count++;
- state_unlock(nal, &flags);
+ LIB_UNLOCK(nal, flags);
/* We have received and matched up the ack OK, create the
* completion event now... */
}
}
-void print_hdr(nal_cb_t * nal, ptl_hdr_t * hdr)
+void print_hdr(lib_nal_t *nal, ptl_hdr_t * hdr)
{
char *type_str = hdr_type_string (hdr);
- nal->cb_printf(nal, "P3 Header at %p of type %s\n", hdr, type_str);
- nal->cb_printf(nal, " From nid/pid %Lu/%Lu", hdr->src_nid,
- hdr->src_pid);
- nal->cb_printf(nal, " To nid/pid %Lu/%Lu\n", hdr->dest_nid,
- hdr->dest_pid);
+ CWARN("P3 Header at %p of type %s\n", hdr, type_str);
+ CWARN(" From nid/pid "LPX64"/%u", hdr->src_nid, hdr->src_pid);
+ CWARN(" To nid/pid "LPX64"/%u\n", hdr->dest_nid, hdr->dest_pid);
switch (hdr->type) {
default:
break;
case PTL_MSG_PUT:
- nal->cb_printf(nal,
- " Ptl index %d, ack md "LPX64"."LPX64", "
- "match bits "LPX64"\n",
- hdr->msg.put.ptl_index,
- hdr->msg.put.ack_wmd.wh_interface_cookie,
- hdr->msg.put.ack_wmd.wh_object_cookie,
- hdr->msg.put.match_bits);
- nal->cb_printf(nal,
- " Length %d, offset %d, hdr data "LPX64"\n",
- hdr->payload_length, hdr->msg.put.offset,
- hdr->msg.put.hdr_data);
+ CWARN(" Ptl index %d, ack md "LPX64"."LPX64", "
+ "match bits "LPX64"\n",
+ hdr->msg.put.ptl_index,
+ hdr->msg.put.ack_wmd.wh_interface_cookie,
+ hdr->msg.put.ack_wmd.wh_object_cookie,
+ hdr->msg.put.match_bits);
+ CWARN(" Length %d, offset %d, hdr data "LPX64"\n",
+ hdr->payload_length, hdr->msg.put.offset,
+ hdr->msg.put.hdr_data);
break;
case PTL_MSG_GET:
- nal->cb_printf(nal,
- " Ptl index %d, return md "LPX64"."LPX64", "
- "match bits "LPX64"\n", hdr->msg.get.ptl_index,
- hdr->msg.get.return_wmd.wh_interface_cookie,
- hdr->msg.get.return_wmd.wh_object_cookie,
- hdr->msg.get.match_bits);
- nal->cb_printf(nal,
- " Length %d, src offset %d\n",
- hdr->msg.get.sink_length,
- hdr->msg.get.src_offset);
+ CWARN(" Ptl index %d, return md "LPX64"."LPX64", "
+ "match bits "LPX64"\n", hdr->msg.get.ptl_index,
+ hdr->msg.get.return_wmd.wh_interface_cookie,
+ hdr->msg.get.return_wmd.wh_object_cookie,
+ hdr->msg.get.match_bits);
+ CWARN(" Length %d, src offset %d\n",
+ hdr->msg.get.sink_length,
+ hdr->msg.get.src_offset);
break;
case PTL_MSG_ACK:
- nal->cb_printf(nal, " dst md "LPX64"."LPX64", "
- "manipulated length %d\n",
- hdr->msg.ack.dst_wmd.wh_interface_cookie,
- hdr->msg.ack.dst_wmd.wh_object_cookie,
- hdr->msg.ack.mlength);
+ CWARN(" dst md "LPX64"."LPX64", "
+ "manipulated length %d\n",
+ hdr->msg.ack.dst_wmd.wh_interface_cookie,
+ hdr->msg.ack.dst_wmd.wh_object_cookie,
+ hdr->msg.ack.mlength);
break;
case PTL_MSG_REPLY:
- nal->cb_printf(nal, " dst md "LPX64"."LPX64", "
- "length %d\n",
- hdr->msg.reply.dst_wmd.wh_interface_cookie,
- hdr->msg.reply.dst_wmd.wh_object_cookie,
- hdr->payload_length);
+ CWARN(" dst md "LPX64"."LPX64", "
+ "length %d\n",
+ hdr->msg.reply.dst_wmd.wh_interface_cookie,
+ hdr->msg.reply.dst_wmd.wh_object_cookie,
+ hdr->payload_length);
}
} /* end of print_hdr() */
-void
-lib_parse(nal_cb_t *nal, ptl_hdr_t *hdr, void *private)
+ptl_err_t
+lib_parse(lib_nal_t *nal, ptl_hdr_t *hdr, void *private)
{
unsigned long flags;
ptl_err_t rc;
lib_msg_t *msg;
+
+ /* NB we return PTL_OK if we manage to parse the header and believe
+ * it looks OK. Anything that goes wrong with receiving the
+ * message after that point is the responsibility of the NAL */
/* convert common fields to host byte order */
- hdr->dest_nid = NTOH__u64 (hdr->dest_nid);
+ hdr->type = NTOH__u32 (hdr->type);
hdr->src_nid = NTOH__u64 (hdr->src_nid);
- hdr->dest_pid = NTOH__u32 (hdr->dest_pid);
hdr->src_pid = NTOH__u32 (hdr->src_pid);
- hdr->type = NTOH__u32 (hdr->type);
+ hdr->dest_pid = NTOH__u32 (hdr->dest_pid);
hdr->payload_length = NTOH__u32(hdr->payload_length);
-#if 0
- nal->cb_printf(nal, "%d: lib_parse: nal=%p hdr=%p type=%d\n",
- nal->ni.nid, nal, hdr, hdr->type);
- print_hdr(nal, hdr);
-#endif
- if (hdr->type == PTL_MSG_HELLO) {
+
+ switch (hdr->type) {
+ case PTL_MSG_HELLO: {
/* dest_nid is really ptl_magicversion_t */
ptl_magicversion_t *mv = (ptl_magicversion_t *)&hdr->dest_nid;
- CERROR (LPU64": Dropping unexpected HELLO message: "
+ mv->magic = NTOH__u32(mv->magic);
+ mv->version_major = NTOH__u16(mv->version_major);
+ mv->version_minor = NTOH__u16(mv->version_minor);
+
+ if (mv->magic == PORTALS_PROTO_MAGIC &&
+ mv->version_major == PORTALS_PROTO_VERSION_MAJOR &&
+ mv->version_minor == PORTALS_PROTO_VERSION_MINOR) {
+ CWARN (LPU64": Dropping unexpected HELLO message: "
+ "magic %d, version %d.%d from "LPD64"\n",
+ nal->libnal_ni.ni_pid.nid, mv->magic,
+ mv->version_major, mv->version_minor,
+ hdr->src_nid);
+
+ /* it's good but we don't want it */
+ lib_drop_message(nal, private, hdr);
+ return PTL_OK;
+ }
+
+ /* we got garbage */
+ CERROR (LPU64": Bad HELLO message: "
"magic %d, version %d.%d from "LPD64"\n",
- nal->ni.nid, mv->magic,
+ nal->libnal_ni.ni_pid.nid, mv->magic,
mv->version_major, mv->version_minor,
hdr->src_nid);
- lib_drop_message(nal, private, hdr);
- return;
+ return PTL_FAIL;
}
-
- if (hdr->dest_nid != nal->ni.nid) {
- CERROR(LPU64": Dropping %s message from "LPU64" to "LPU64
- " (not me)\n", nal->ni.nid, hdr_type_string (hdr),
- hdr->src_nid, hdr->dest_nid);
- lib_drop_message(nal, private, hdr);
- return;
+
+ case PTL_MSG_ACK:
+ case PTL_MSG_PUT:
+ case PTL_MSG_GET:
+ case PTL_MSG_REPLY:
+ hdr->dest_nid = NTOH__u64 (hdr->dest_nid);
+ if (hdr->dest_nid != nal->libnal_ni.ni_pid.nid) {
+ CERROR(LPU64": BAD dest NID in %s message from"
+ LPU64" to "LPU64" (not me)\n",
+ nal->libnal_ni.ni_pid.nid, hdr_type_string (hdr),
+ hdr->src_nid, hdr->dest_nid);
+ return PTL_FAIL;
+ }
+ break;
+
+ default:
+ CERROR(LPU64": Bad message type 0x%x from "LPU64"\n",
+ nal->libnal_ni.ni_pid.nid, hdr->type, hdr->src_nid);
+ return PTL_FAIL;
}
- if (!list_empty (&nal->ni.ni_test_peers) && /* normally we don't */
+ /* We've decided we're not receiving garbage since we can parse the
+ * header. We will return PTL_OK come what may... */
+
+ if (!list_empty (&nal->libnal_ni.ni_test_peers) && /* normally we don't */
fail_peer (nal, hdr->src_nid, 0)) /* shall we now? */
{
CERROR(LPU64": Dropping incoming %s from "LPU64
": simulated failure\n",
- nal->ni.nid, hdr_type_string (hdr),
+ nal->libnal_ni.ni_pid.nid, hdr_type_string (hdr),
hdr->src_nid);
lib_drop_message(nal, private, hdr);
- return;
+ return PTL_OK;
}
msg = lib_msg_alloc(nal);
if (msg == NULL) {
CERROR(LPU64": Dropping incoming %s from "LPU64
": can't allocate a lib_msg_t\n",
- nal->ni.nid, hdr_type_string (hdr),
+ nal->libnal_ni.ni_pid.nid, hdr_type_string (hdr),
hdr->src_nid);
lib_drop_message(nal, private, hdr);
- return;
+ return PTL_OK;
}
switch (hdr->type) {
rc = parse_reply(nal, hdr, private, msg);
break;
default:
- CERROR(LPU64": Dropping <unknown> message from "LPU64
- ": Bad type=0x%x\n", nal->ni.nid, hdr->src_nid,
- hdr->type);
- rc = PTL_FAIL;
+ LASSERT(0);
+ rc = PTL_FAIL; /* no compiler warning please */
break;
}
/* committed... */
lib_finalize(nal, private, msg, rc);
} else {
- state_lock(nal, &flags);
- lib_msg_free(nal, msg); /* expects state_lock held */
- state_unlock(nal, &flags);
+ LIB_LOCK(nal, flags);
+ lib_msg_free(nal, msg); /* expects LIB_LOCK held */
+ LIB_UNLOCK(nal, flags);
lib_drop_message(nal, private, hdr);
}
}
+
+ return PTL_OK;
+ /* That's "OK I can parse it", not "OK I like it" :) */
}
int
-do_PtlPut(nal_cb_t *nal, void *private, void *v_args, void *v_ret)
+lib_api_put(nal_t *apinal, ptl_handle_md_t *mdh,
+ ptl_ack_req_t ack, ptl_process_id_t *id,
+ ptl_pt_index_t portal, ptl_ac_index_t ac,
+ ptl_match_bits_t match_bits,
+ ptl_size_t offset, ptl_hdr_data_t hdr_data)
{
- /*
- * Incoming:
- * ptl_handle_md_t md_in
- * ptl_ack_req_t ack_req_in
- * ptl_process_id_t target_in
- * ptl_pt_index_t portal_in
- * ptl_ac_index_t cookie_in
- * ptl_match_bits_t match_bits_in
- * ptl_size_t offset_in
- *
- * Outgoing:
- */
-
- PtlPut_in *args = v_args;
- ptl_process_id_t *id = &args->target_in;
- PtlPut_out *ret = v_ret;
- lib_ni_t *ni = &nal->ni;
+ lib_nal_t *nal = apinal->nal_data;
+ lib_ni_t *ni = &nal->libnal_ni;
lib_msg_t *msg;
ptl_hdr_t hdr;
lib_md_t *md;
unsigned long flags;
int rc;
- if (!list_empty (&nal->ni.ni_test_peers) && /* normally we don't */
+ if (!list_empty (&ni->ni_test_peers) && /* normally we don't */
fail_peer (nal, id->nid, 1)) /* shall we now? */
{
- CERROR(LPU64": Dropping PUT to "LPU64": simulated failure\n",
- nal->ni.nid, id->nid);
- return (ret->rc = PTL_PROCESS_INVALID);
+ CERROR("Dropping PUT to "LPU64": simulated failure\n",
+ id->nid);
+ return PTL_PROCESS_INVALID;
}
msg = lib_msg_alloc(nal);
if (msg == NULL) {
CERROR(LPU64": Dropping PUT to "LPU64": ENOMEM on lib_msg_t\n",
- ni->nid, id->nid);
- return (ret->rc = PTL_NO_SPACE);
+ ni->ni_pid.nid, id->nid);
+ return PTL_NO_SPACE;
}
- state_lock(nal, &flags);
+ LIB_LOCK(nal, flags);
- md = ptl_handle2md(&args->md_in, nal);
+ md = ptl_handle2md(mdh, nal);
if (md == NULL || md->threshold == 0) {
lib_msg_free(nal, msg);
- state_unlock(nal, &flags);
+ LIB_UNLOCK(nal, flags);
- return (ret->rc = PTL_MD_INVALID);
+ return PTL_MD_INVALID;
}
- CDEBUG(D_NET, "PtlPut -> %Lu: %lu\n", (unsigned long long)id->nid,
- (unsigned long)id->pid);
+ CDEBUG(D_NET, "PtlPut -> "LPX64"\n", id->nid);
memset (&hdr, 0, sizeof (hdr));
hdr.type = HTON__u32 (PTL_MSG_PUT);
hdr.dest_nid = HTON__u64 (id->nid);
- hdr.src_nid = HTON__u64 (ni->nid);
hdr.dest_pid = HTON__u32 (id->pid);
- hdr.src_pid = HTON__u32 (ni->pid);
+ hdr.src_nid = HTON__u64 (ni->ni_pid.nid);
+ hdr.src_pid = HTON__u32 (ni->ni_pid.pid);
hdr.payload_length = HTON__u32 (md->length);
/* NB handles only looked up by creator (no flips) */
- if (args->ack_req_in == PTL_ACK_REQ) {
+ if (ack == PTL_ACK_REQ) {
hdr.msg.put.ack_wmd.wh_interface_cookie = ni->ni_interface_cookie;
hdr.msg.put.ack_wmd.wh_object_cookie = md->md_lh.lh_cookie;
} else {
hdr.msg.put.ack_wmd = PTL_WIRE_HANDLE_NONE;
}
- hdr.msg.put.match_bits = HTON__u64 (args->match_bits_in);
- hdr.msg.put.ptl_index = HTON__u32 (args->portal_in);
- hdr.msg.put.offset = HTON__u32 (args->offset_in);
- hdr.msg.put.hdr_data = args->hdr_data_in;
+ hdr.msg.put.match_bits = HTON__u64 (match_bits);
+ hdr.msg.put.ptl_index = HTON__u32 (portal);
+ hdr.msg.put.offset = HTON__u32 (offset);
+ hdr.msg.put.hdr_data = hdr_data;
lib_commit_md(nal, md, msg);
msg->ev.type = PTL_EVENT_SEND_END;
- msg->ev.initiator.nid = ni->nid;
- msg->ev.initiator.pid = ni->pid;
- msg->ev.portal = args->portal_in;
- msg->ev.match_bits = args->match_bits_in;
+ msg->ev.initiator.nid = ni->ni_pid.nid;
+ msg->ev.initiator.pid = ni->ni_pid.pid;
+ msg->ev.portal = portal;
+ msg->ev.match_bits = match_bits;
msg->ev.rlength = md->length;
msg->ev.mlength = md->length;
- msg->ev.offset = args->offset_in;
- msg->ev.hdr_data = args->hdr_data_in;
+ msg->ev.offset = offset;
+ msg->ev.hdr_data = hdr_data;
lib_md_deconstruct(nal, md, &msg->ev.mem_desc);
- ni->counters.send_count++;
- ni->counters.send_length += md->length;
+ ni->ni_counters.send_count++;
+ ni->ni_counters.send_length += md->length;
- state_unlock(nal, &flags);
+ LIB_UNLOCK(nal, flags);
- rc = lib_send (nal, private, msg, &hdr, PTL_MSG_PUT,
+ rc = lib_send (nal, NULL, msg, &hdr, PTL_MSG_PUT,
id->nid, id->pid, md, 0, md->length);
if (rc != PTL_OK) {
- CERROR(LPU64": error sending PUT to "LPU64": %d\n",
- ni->nid, id->nid, rc);
- lib_finalize (nal, private, msg, rc);
+ CERROR("Error sending PUT to "LPX64": %d\n",
+ id->nid, rc);
+ lib_finalize (nal, NULL, msg, rc);
}
/* completion will be signalled by an event */
- return ret->rc = PTL_OK;
+ return PTL_OK;
}
lib_msg_t *
-lib_create_reply_msg (nal_cb_t *nal, ptl_nid_t peer_nid, lib_msg_t *getmsg)
+lib_create_reply_msg (lib_nal_t *nal, ptl_nid_t peer_nid, lib_msg_t *getmsg)
{
/* The NAL can DMA direct to the GET md (i.e. no REPLY msg). This
* returns a msg for the NAL to pass to lib_finalize() when the sink
* CAVEAT EMPTOR: 'getmsg' is the original GET, which is freed when
* lib_finalize() is called on it, so the NAL must call this first */
- lib_ni_t *ni = &nal->ni;
+ lib_ni_t *ni = &nal->libnal_ni;
lib_msg_t *msg = lib_msg_alloc(nal);
lib_md_t *getmd = getmsg->md;
unsigned long flags;
- state_lock(nal, &flags);
+ LIB_LOCK(nal, flags);
LASSERT (getmd->pending > 0);
lib_md_deconstruct(nal, getmd, &msg->ev.mem_desc);
- ni->counters.recv_count++;
- ni->counters.recv_length += getmd->length;
+ ni->ni_counters.recv_count++;
+ ni->ni_counters.recv_length += getmd->length;
- state_unlock(nal, &flags);
+ LIB_UNLOCK(nal, flags);
return msg;
drop_msg:
lib_msg_free(nal, msg);
drop:
- nal->ni.counters.drop_count++;
- nal->ni.counters.drop_length += getmd->length;
+ nal->libnal_ni.ni_counters.drop_count++;
+ nal->libnal_ni.ni_counters.drop_length += getmd->length;
- state_unlock (nal, &flags);
+ LIB_UNLOCK (nal, flags);
return NULL;
}
int
-do_PtlGet(nal_cb_t *nal, void *private, void *v_args, void *v_ret)
+lib_api_get(nal_t *apinal, ptl_handle_md_t *mdh, ptl_process_id_t *id,
+ ptl_pt_index_t portal, ptl_ac_index_t ac,
+ ptl_match_bits_t match_bits, ptl_size_t offset)
{
- /*
- * Incoming:
- * ptl_handle_md_t md_in
- * ptl_process_id_t target_in
- * ptl_pt_index_t portal_in
- * ptl_ac_index_t cookie_in
- * ptl_match_bits_t match_bits_in
- * ptl_size_t offset_in
- *
- * Outgoing:
- */
-
- PtlGet_in *args = v_args;
- ptl_process_id_t *id = &args->target_in;
- PtlGet_out *ret = v_ret;
- lib_ni_t *ni = &nal->ni;
+ lib_nal_t *nal = apinal->nal_data;
+ lib_ni_t *ni = &nal->libnal_ni;
lib_msg_t *msg;
ptl_hdr_t hdr;
lib_md_t *md;
unsigned long flags;
int rc;
- if (!list_empty (&nal->ni.ni_test_peers) && /* normally we don't */
+ if (!list_empty (&ni->ni_test_peers) && /* normally we don't */
fail_peer (nal, id->nid, 1)) /* shall we now? */
{
- CERROR(LPU64": Dropping PUT to "LPU64": simulated failure\n",
- nal->ni.nid, id->nid);
- return (ret->rc = PTL_PROCESS_INVALID);
+ CERROR("Dropping PUT to "LPX64": simulated failure\n",
+ id->nid);
+ return PTL_PROCESS_INVALID;
}
msg = lib_msg_alloc(nal);
if (msg == NULL) {
- CERROR(LPU64": Dropping GET to "LPU64": ENOMEM on lib_msg_t\n",
- ni->nid, id->nid);
- return (ret->rc = PTL_NO_SPACE);
+ CERROR("Dropping GET to "LPU64": ENOMEM on lib_msg_t\n",
+ id->nid);
+ return PTL_NO_SPACE;
}
- state_lock(nal, &flags);
+ LIB_LOCK(nal, flags);
- md = ptl_handle2md(&args->md_in, nal);
+ md = ptl_handle2md(mdh, nal);
if (md == NULL || !md->threshold) {
lib_msg_free(nal, msg);
- state_unlock(nal, &flags);
+ LIB_UNLOCK(nal, flags);
- return ret->rc = PTL_MD_INVALID;
+ return PTL_MD_INVALID;
}
CDEBUG(D_NET, "PtlGet -> %Lu: %lu\n", (unsigned long long)id->nid,
memset (&hdr, 0, sizeof (hdr));
hdr.type = HTON__u32 (PTL_MSG_GET);
hdr.dest_nid = HTON__u64 (id->nid);
- hdr.src_nid = HTON__u64 (ni->nid);
hdr.dest_pid = HTON__u32 (id->pid);
- hdr.src_pid = HTON__u32 (ni->pid);
+ hdr.src_nid = HTON__u64 (ni->ni_pid.nid);
+ hdr.src_pid = HTON__u32 (ni->ni_pid.pid);
hdr.payload_length = 0;
/* NB handles only looked up by creator (no flips) */
hdr.msg.get.return_wmd.wh_interface_cookie = ni->ni_interface_cookie;
hdr.msg.get.return_wmd.wh_object_cookie = md->md_lh.lh_cookie;
- hdr.msg.get.match_bits = HTON__u64 (args->match_bits_in);
- hdr.msg.get.ptl_index = HTON__u32 (args->portal_in);
- hdr.msg.get.src_offset = HTON__u32 (args->offset_in);
+ hdr.msg.get.match_bits = HTON__u64 (match_bits);
+ hdr.msg.get.ptl_index = HTON__u32 (portal);
+ hdr.msg.get.src_offset = HTON__u32 (offset);
hdr.msg.get.sink_length = HTON__u32 (md->length);
lib_commit_md(nal, md, msg);
msg->ev.type = PTL_EVENT_SEND_END;
- msg->ev.initiator.nid = ni->nid;
- msg->ev.initiator.pid = ni->pid;
- msg->ev.portal = args->portal_in;
- msg->ev.match_bits = args->match_bits_in;
+ msg->ev.initiator = ni->ni_pid;
+ msg->ev.portal = portal;
+ msg->ev.match_bits = match_bits;
msg->ev.rlength = md->length;
msg->ev.mlength = md->length;
- msg->ev.offset = args->offset_in;
+ msg->ev.offset = offset;
msg->ev.hdr_data = 0;
lib_md_deconstruct(nal, md, &msg->ev.mem_desc);
- ni->counters.send_count++;
+ ni->ni_counters.send_count++;
- state_unlock(nal, &flags);
+ LIB_UNLOCK(nal, flags);
- rc = lib_send (nal, private, msg, &hdr, PTL_MSG_GET,
+ rc = lib_send (nal, NULL, msg, &hdr, PTL_MSG_GET,
id->nid, id->pid, NULL, 0, 0);
if (rc != PTL_OK) {
CERROR(LPU64": error sending GET to "LPU64": %d\n",
- ni->nid, id->nid, rc);
- lib_finalize (nal, private, msg, rc);
+ ni->ni_pid.nid, id->nid, rc);
+ lib_finalize (nal, NULL, msg, rc);
}
/* completion will be signalled by an event */
- return ret->rc = PTL_OK;
+ return PTL_OK;
}
void lib_assert_wire_constants (void)
#include <portals/lib-p30.h>
void
-lib_enq_event_locked (nal_cb_t *nal, void *private,
+lib_enq_event_locked (lib_nal_t *nal, void *private,
lib_eq_t *eq, ptl_event_t *ev)
{
ptl_event_t *eq_slot;
- int rc;
- ev->sequence = eq->sequence++; /* Allocate the next queue slot */
-
- /* size must be a power of 2 to handle a wrapped sequence # */
- LASSERT (eq->size != 0 &&
- eq->size == LOWEST_BIT_SET (eq->size));
- eq_slot = eq->base + (ev->sequence & (eq->size - 1));
+ ev->sequence = eq->eq_enq_seq++; /* Allocate the next queue slot */
- /* Copy the event into the allocated slot, ensuring all the rest of
- * the event's contents have been copied _before_ the sequence
- * number gets updated. A processes 'getting' an event waits on
- * the next queue slot's sequence to be 'new'. When it is, _all_
- * other event fields had better be consistent. I assert
- * 'sequence' is the last member, so I only need a 2 stage copy. */
+ /* size must be a power of 2 to handle sequence # overflow */
+ LASSERT (eq->eq_size != 0 &&
+ eq->eq_size == LOWEST_BIT_SET (eq->eq_size));
+ eq_slot = eq->eq_events + (ev->sequence & (eq->eq_size - 1));
- LASSERT(sizeof (ptl_event_t) ==
- offsetof(ptl_event_t, sequence) + sizeof(ev->sequence));
+ /* There is no race since both event consumers and event producers
+ * take the LIB_LOCK(), so we don't screw around with memory
+ * barriers, setting the sequence number last or wierd structure
+ * layout assertions. */
+ *eq_slot = *ev;
- rc = nal->cb_write (nal, private, (user_ptr)eq_slot, ev,
- offsetof (ptl_event_t, sequence));
- LASSERT (rc == PTL_OK);
+ /* Call the callback handler (if any) */
+ if (eq->eq_callback != NULL)
+ eq->eq_callback (eq_slot);
+ /* Wake anyone sleeping for an event (see lib-eq.c) */
#ifdef __KERNEL__
- barrier();
-#endif
- /* Updating the sequence number is what makes the event 'new' NB if
- * the cb_write below isn't atomic, this could cause a race with
- * PtlEQGet */
- rc = nal->cb_write(nal, private, (user_ptr)&eq_slot->sequence,
- (void *)&ev->sequence,sizeof (ev->sequence));
- LASSERT (rc == PTL_OK);
-
-#ifdef __KERNEL__
- barrier();
+ if (waitqueue_active(&nal->libnal_ni.ni_waitq))
+ wake_up_all(&nal->libnal_ni.ni_waitq);
+#else
+ pthread_cond_broadcast(&nal->libnal_ni.ni_cond);
#endif
-
- if (nal->cb_callback != NULL)
- nal->cb_callback(nal, private, eq, ev);
- else if (eq->event_callback != NULL)
- eq->event_callback(ev);
}
void
-lib_finalize(nal_cb_t *nal, void *private, lib_msg_t *msg, ptl_err_t status)
+lib_finalize (lib_nal_t *nal, void *private, lib_msg_t *msg, ptl_err_t status)
{
lib_md_t *md;
int unlink;
memset (&ack, 0, sizeof (ack));
ack.type = HTON__u32 (PTL_MSG_ACK);
ack.dest_nid = HTON__u64 (msg->ev.initiator.nid);
- ack.src_nid = HTON__u64 (nal->ni.nid);
ack.dest_pid = HTON__u32 (msg->ev.initiator.pid);
- ack.src_pid = HTON__u32 (nal->ni.pid);
+ ack.src_nid = HTON__u64 (nal->libnal_ni.ni_pid.nid);
+ ack.src_pid = HTON__u32 (nal->libnal_ni.ni_pid.pid);
ack.payload_length = 0;
ack.msg.ack.dst_wmd = msg->ack_wmd;
md = msg->md;
- state_lock(nal, &flags);
+ LIB_LOCK(nal, flags);
/* Now it's safe to drop my caller's ref */
md->pending--;
lib_md_unlink(nal, md);
list_del (&msg->msg_list);
- nal->ni.counters.msgs_alloc--;
+ nal->libnal_ni.ni_counters.msgs_alloc--;
lib_msg_free(nal, msg);
- state_unlock(nal, &flags);
+ LIB_UNLOCK(nal, flags);
}
#define DEBUG_SUBSYSTEM S_PORTALS
#include <portals/lib-p30.h>
-#include <portals/arg-blocks.h>
#define MAX_DIST 18446744073709551615ULL
-int do_PtlNIStatus(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
+int lib_api_ni_status (nal_t *apinal, ptl_sr_index_t sr_idx,
+ ptl_sr_value_t *status)
{
- /*
- * Incoming:
- * ptl_handle_ni_t interface_in
- * ptl_sr_index_t register_in
- *
- * Outgoing:
- * ptl_sr_value_t * status_out
- */
-
- PtlNIStatus_in *args = v_args;
- PtlNIStatus_out *ret = v_ret;
- lib_ni_t *ni = &nal->ni;
- lib_counters_t *count = &ni->counters;
-
- if (!args)
- return ret->rc = PTL_SEGV;
-
- ret->rc = PTL_OK;
- ret->status_out = 0;
-
- /*
- * I hate this sort of code.... Hash tables, offset lists?
- * Treat the counters as an array of ints?
- */
- if (args->register_in == PTL_SR_DROP_COUNT)
- ret->status_out = count->drop_count;
-
- else if (args->register_in == PTL_SR_DROP_LENGTH)
- ret->status_out = count->drop_length;
-
- else if (args->register_in == PTL_SR_RECV_COUNT)
- ret->status_out = count->recv_count;
-
- else if (args->register_in == PTL_SR_RECV_LENGTH)
- ret->status_out = count->recv_length;
-
- else if (args->register_in == PTL_SR_SEND_COUNT)
- ret->status_out = count->send_count;
-
- else if (args->register_in == PTL_SR_SEND_LENGTH)
- ret->status_out = count->send_length;
-
- else if (args->register_in == PTL_SR_MSGS_MAX)
- ret->status_out = count->msgs_max;
- else
- ret->rc = PTL_SR_INDEX_INVALID;
-
- return ret->rc;
+ lib_nal_t *nal = apinal->nal_data;
+ lib_ni_t *ni = &nal->libnal_ni;
+ lib_counters_t *count = &ni->ni_counters;
+
+ switch (sr_idx) {
+ case PTL_SR_DROP_COUNT:
+ *status = count->drop_count;
+ return PTL_OK;
+ case PTL_SR_DROP_LENGTH:
+ *status = count->drop_length;
+ return PTL_OK;
+ case PTL_SR_RECV_COUNT:
+ *status = count->recv_count;
+ return PTL_OK;
+ case PTL_SR_RECV_LENGTH:
+ *status = count->recv_length;
+ return PTL_OK;
+ case PTL_SR_SEND_COUNT:
+ *status = count->send_count;
+ return PTL_OK;
+ case PTL_SR_SEND_LENGTH:
+ *status = count->send_length;
+ return PTL_OK;
+ case PTL_SR_MSGS_MAX:
+ *status = count->msgs_max;
+ return PTL_OK;
+ default:
+ *status = 0;
+ return PTL_SR_INDEX_INVALID;
+ }
}
-int do_PtlNIDist(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
+int lib_api_ni_dist (nal_t *apinal, ptl_process_id_t *pid, unsigned long *dist)
{
- /*
- * Incoming:
- * ptl_handle_ni_t interface_in
- * ptl_process_id_t process_in
-
- *
- * Outgoing:
- * unsigned long * distance_out
-
- */
-
- PtlNIDist_in *args = v_args;
- PtlNIDist_out *ret = v_ret;
-
- unsigned long dist;
- ptl_process_id_t id_in = args->process_in;
- ptl_nid_t nid;
- int rc;
-
- nid = id_in.nid;
-
- if ((rc = nal->cb_dist(nal, nid, &dist)) != 0) {
- ret->distance_out = (unsigned long) MAX_DIST;
- return PTL_PROCESS_INVALID;
- }
-
- ret->distance_out = dist;
+ lib_nal_t *nal = apinal->nal_data;
- return ret->rc = PTL_OK;
+ return (nal->libnal_dist(nal, pid->nid, dist));
}
# include <unistd.h>
#endif
#include <portals/lib-p30.h>
-#include <portals/arg-blocks.h>
-int do_PtlGetId(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
+int
+lib_api_get_id(nal_t *apinal, ptl_process_id_t *pid)
{
- /*
- * Incoming:
- * ptl_handle_ni_t handle_in
- *
- * Outgoing:
- * ptl_process_id_t * id_out
- * ptl_id_t * gsize_out
- */
-
- PtlGetId_out *ret = v_ret;
- lib_ni_t *ni = &nal->ni;
-
- ret->id_out.nid = ni->nid;
- ret->id_out.pid = ni->pid;
-
- return ret->rc = PTL_OK;
+ lib_nal_t *nal = apinal->nal_data;
+
+ *pid = nal->libnal_ni.ni_pid;
+ return PTL_OK;
}
EXPORT_SYMBOL(ptl_unregister_nal);
EXPORT_SYMBOL(ptl_err_str);
-EXPORT_SYMBOL(lib_dispatch);
EXPORT_SYMBOL(PtlMEAttach);
EXPORT_SYMBOL(PtlMEInsert);
EXPORT_SYMBOL(PtlMEUnlink);
EXPORT_SYMBOL(lib_create_reply_msg);
EXPORT_SYMBOL(lib_init);
EXPORT_SYMBOL(lib_fini);
-EXPORT_SYMBOL(dispatch_name);
MODULE_AUTHOR("Peter J. Braam <braam@clusterfs.com>");
MODULE_DESCRIPTION("Portals v3.1");
int port;
if (pidrequest==(unsigned short)PTL_PID_ANY) port = 0;
else port=pidrequest;
- t->nal_cb->ni.nid=get_node_id();
- t->nal_cb->ni.pid=port;
+ t->lib_nal->libnal_ni.ni_pid.nid=get_node_id();
+ t->lib_nal->libnal_ni.ni_pid.pid=port;
}
#else
in_addr = get_node_id();
t->iptop8 = in_addr >> PNAL_HOSTID_SHIFT;/* for making new connections */
- t->nal_cb->ni.nid = ((in_addr & PNAL_HOSTID_MASK)
- << PNAL_VNODE_SHIFT)
- + virtnode;
-
+ t->lib_nal->libnal_ni.ni_pid.nid = ((in_addr & PNAL_HOSTID_MASK)
+ << PNAL_VNODE_SHIFT)
+ + virtnode;
pid=pidrequest;
/* TODO: Support of pid PTL_ID_ANY with virtual nodes needs more work. */
#ifdef notyet
return;
}
else port = ((virtnode << PNAL_VNODE_SHIFT) + pid) + PNAL_BASE_PORT;
- t->nal_cb->ni.pid=pid;
+ t->lib_nal->libnal_ni.ni_pid.pid=pid;
}
#endif
typedef struct bridge {
int alive;
- nal_cb_t *nal_cb;
+ lib_nal_t *lib_nal;
void *lower;
void *local;
void (*shutdown)(struct bridge *);
syscall(SYS_write, p->notifier[0], buf, sizeof(buf));
}
-/* Function: forward
- * Arguments: nal_t *nal: pointer to my top-side nal structure
- * id: the command to pass to the lower layer
- * args, args_len:pointer to and length of the request
- * ret, ret_len: pointer to and size of the result
- * Returns: a portals status code
- *
- * forwards a packaged api call from the 'api' side to the 'library'
- * side, and collects the result
- */
-static int procbridge_forward(nal_t *n, int id, void *args, size_t args_len,
- void *ret, size_t ret_len)
-{
- bridge b = (bridge) n->nal_data;
-
- if (id == PTL_FINI) {
- lib_fini(b->nal_cb);
-
- if (b->shutdown)
- (*b->shutdown)(b);
- }
-
- lib_dispatch(b->nal_cb, NULL, id, args, ret);
-
- return (PTL_OK);
-}
-
-
/* Function: shutdown
* Arguments: nal: a pointer to my top side nal structure
* ni: my network interface index
*/
static void procbridge_shutdown(nal_t *n)
{
- bridge b=(bridge)n->nal_data;
+ lib_nal_t *nal = n->nal_data;
+ bridge b=(bridge)nal->libnal_data;
procbridge p=(procbridge)b->local;
p->nal_flags |= NAL_FLAG_STOPPING;
}
-static void procbridge_lock(nal_t * n, unsigned long *flags)
-{
- bridge b=(bridge)n->nal_data;
- procbridge p=(procbridge)b->local;
-
- pthread_mutex_lock(&p->mutex);
-}
-
-static void procbridge_unlock(nal_t * n, unsigned long *flags)
-{
- bridge b=(bridge)n->nal_data;
- procbridge p=(procbridge)b->local;
-
- pthread_mutex_unlock(&p->mutex);
-}
-
-/* Function: yield
- * Arguments: pid:
- *
- * this function was originally intended to allow the
- * lower half thread to be scheduled to allow progress. we
- * overload it to explicitly block until signalled by the
- * lower half.
- */
-static int procbridge_yield(nal_t *n, unsigned long *flags, int milliseconds)
-{
- bridge b=(bridge)n->nal_data;
- procbridge p=(procbridge)b->local;
-
- if (milliseconds == 0)
- return 0;
-
- if (milliseconds < 0) {
- pthread_cond_wait(&p->cond,&p->mutex);
- } else {
- struct timeval then;
- struct timeval now;
- struct timespec timeout;
-
- gettimeofday(&then, NULL);
- timeout.tv_sec = then.tv_sec + milliseconds/1000;
- timeout.tv_nsec = then.tv_usec * 1000 + milliseconds % 1000 * 1000000;
- if (timeout.tv_nsec >= 1000000000) {
- timeout.tv_sec++;
- timeout.tv_nsec -= 1000000000;
- }
-
- pthread_cond_timedwait(&p->cond, &p->mutex, &timeout);
-
- gettimeofday(&now, NULL);
- milliseconds -= (now.tv_sec - then.tv_sec) * 1000 +
- (now.tv_usec - then.tv_usec) / 1000;
-
- if (milliseconds < 0)
- milliseconds = 0;
- }
-
- return (milliseconds);
-}
-
/* forward decl */
extern int procbridge_startup (nal_t *, ptl_pid_t,
ptl_ni_limits_t *, ptl_ni_limits_t *);
/* api_nal
* the interface vector to allow the generic code to access
- * this nal. this is seperate from the library side nal_cb.
+ * this nal. this is seperate from the library side lib_nal.
* TODO: should be dyanmically allocated
*/
nal_t procapi_nal = {
nal_data: NULL,
- startup: procbridge_startup,
- shutdown: procbridge_shutdown,
- forward: procbridge_forward,
- yield: procbridge_yield,
- lock: procbridge_lock,
- unlock: procbridge_unlock
+ nal_ni_init: procbridge_startup,
+ nal_ni_fini: procbridge_shutdown,
};
ptl_nid_t tcpnal_mynid;
b=(bridge)malloc(sizeof(struct bridge));
p=(procbridge)malloc(sizeof(struct procbridge));
- nal->nal_data=b;
b->local=p;
args.nia_requested_pid = requested_pid;
args.nia_actual_limits = actual_limits;
args.nia_nal_type = nal_type;
args.nia_bridge = b;
+ args.nia_apinal = nal;
/* init procbridge */
pthread_mutex_init(&p->mutex,0);
if (p->nal_flags & NAL_FLAG_STOPPED)
return PTL_FAIL;
- b->nal_cb->ni.nid = tcpnal_mynid;
+ b->lib_nal->libnal_ni.ni_pid.nid = tcpnal_mynid;
return PTL_OK;
}
int nal_flags;
- pthread_mutex_t nal_cb_lock;
} *procbridge;
typedef struct nal_init_args {
ptl_ni_limits_t *nia_actual_limits;
int nia_nal_type;
bridge nia_bridge;
+ nal_t *nia_apinal;
} nal_init_args_t;
extern void *nal_thread(void *);
/* the following functions are stubs to satisfy the nal definition
without doing anything particularily useful*/
-static ptl_err_t nal_write(nal_cb_t *nal,
- void *private,
- user_ptr dst_addr,
- void *src_addr,
- size_t len)
-{
- memcpy(dst_addr, src_addr, len);
- return PTL_OK;
-}
-
-static ptl_err_t nal_read(nal_cb_t * nal,
- void *private,
- void *dst_addr,
- user_ptr src_addr,
- size_t len)
-{
- memcpy(dst_addr, src_addr, len);
- return PTL_OK;
-}
-
-static void *nal_malloc(nal_cb_t *nal,
- size_t len)
-{
- void *buf = malloc(len);
- return buf;
-}
-
-static void nal_free(nal_cb_t *nal,
- void *buf,
- size_t len)
-{
- free(buf);
-}
-
-static void nal_printf(nal_cb_t *nal,
- const char *fmt,
- ...)
-{
- va_list ap;
-
- va_start(ap, fmt);
- vprintf(fmt, ap);
- va_end(ap);
-}
-
-
-static void nal_cli(nal_cb_t *nal,
- unsigned long *flags)
-{
- bridge b = (bridge) nal->nal_data;
- procbridge p = (procbridge) b->local;
-
- pthread_mutex_lock(&p->mutex);
-}
-
-
-static void nal_sti(nal_cb_t *nal,
- unsigned long *flags)
-{
- bridge b = (bridge)nal->nal_data;
- procbridge p = (procbridge) b->local;
-
- pthread_mutex_unlock(&p->mutex);
-}
-
-static void nal_callback(nal_cb_t *nal, void *private,
- lib_eq_t *eq, ptl_event_t *ev)
-{
- bridge b = (bridge)nal->nal_data;
- procbridge p = (procbridge) b->local;
-
- /* holding p->mutex */
- if (eq->event_callback != NULL)
- eq->event_callback(ev);
-
- pthread_cond_broadcast(&p->cond);
-}
-
-static int nal_dist(nal_cb_t *nal,
+static int nal_dist(lib_nal_t *nal,
ptl_nid_t nid,
unsigned long *dist)
{
ptl_process_id_t process_id;
int nal_type;
- b->nal_cb=(nal_cb_t *)malloc(sizeof(nal_cb_t));
- b->nal_cb->nal_data=b;
- b->nal_cb->cb_read=nal_read;
- b->nal_cb->cb_write=nal_write;
- b->nal_cb->cb_malloc=nal_malloc;
- b->nal_cb->cb_free=nal_free;
- b->nal_cb->cb_map=NULL;
- b->nal_cb->cb_unmap=NULL;
- b->nal_cb->cb_printf=nal_printf;
- b->nal_cb->cb_cli=nal_cli;
- b->nal_cb->cb_sti=nal_sti;
- b->nal_cb->cb_callback=nal_callback;
- b->nal_cb->cb_dist=nal_dist;
+ b->lib_nal=(lib_nal_t *)malloc(sizeof(lib_nal_t));
+ b->lib_nal->libnal_data=b;
+ b->lib_nal->libnal_map=NULL;
+ b->lib_nal->libnal_unmap=NULL;
+ b->lib_nal->libnal_dist=nal_dist;
nal_type = args->nia_nal_type;
- /* Wierd, but this sets b->nal_cb->ni.{nid,pid}, which lib_init() is
- * about to do from the process_id passed to it...*/
+ /* Wierd, but this sets b->lib_nal->libnal_ni.ni_pid.{nid,pid}, which
+ * lib_init() is about to do from the process_id passed to it...*/
set_address(b,args->nia_requested_pid);
- process_id.pid = b->nal_cb->ni.pid;
- process_id.nid = b->nal_cb->ni.nid;
+ process_id = b->lib_nal->libnal_ni.ni_pid;
if (nal_table[nal_type]) rc=(*nal_table[nal_type])(b);
/* initialize the generic 'library' level code */
- rc = lib_init(b->nal_cb, process_id,
+ rc = lib_init(b->lib_nal, args->nia_apinal,
+ process_id,
args->nia_requested_limits,
args->nia_actual_limits);
*
* sends a packet to the peer, after insuring that a connection exists
*/
-ptl_err_t tcpnal_send(nal_cb_t *n,
+ptl_err_t tcpnal_send(lib_nal_t *n,
void *private,
lib_msg_t *cookie,
ptl_hdr_t *hdr,
size_t len)
{
connection c;
- bridge b=(bridge)n->nal_data;
+ bridge b=(bridge)n->libnal_data;
struct iovec tiov[257];
static pthread_mutex_t send_lock = PTHREAD_MUTEX_INITIALIZER;
ptl_err_t rc = PTL_OK;
/* Function: tcpnal_recv
- * Arguments: nal_cb_t *nal: pointer to my nal control block
+ * Arguments: lib_nal_t *nal: pointer to my nal control block
* void *private: connection pointer passed through
* lib_parse()
* lib_msg_t *cookie: passed back to portals library
* blocking read of the requested data. must drain out the
* difference of mainpulated and requested lengths from the network
*/
-ptl_err_t tcpnal_recv(nal_cb_t *n,
+ptl_err_t tcpnal_recv(lib_nal_t *n,
void *private,
lib_msg_t *cookie,
unsigned int niov,
ptl_hdr_t hdr;
if (read_connection(c, (unsigned char *)&hdr, sizeof(hdr))){
- lib_parse(b->nal_cb, &hdr, c);
+ lib_parse(b->lib_nal, &hdr, c);
+ /*TODO: check error status*/
return(1);
}
return(0);
{
manager m;
- b->nal_cb->cb_send=tcpnal_send;
- b->nal_cb->cb_recv=tcpnal_recv;
+ b->lib_nal->libnal_send=tcpnal_send;
+ b->lib_nal->libnal_recv=tcpnal_recv;
b->shutdown=tcpnal_shutdown;
- if (!(m=init_connections(PNAL_PORT(b->nal_cb->ni.nid,
- b->nal_cb->ni.pid),
+ if (!(m=init_connections(PNAL_PORT(b->lib_nal->libnal_ni.ni_pid.nid,
+ b->lib_nal->libnal_ni.ni_pid.pid),
from_connection,b))){
/* TODO: this needs to shut down the
newly created junk */
return(PTL_NAL_FAILED);
}
/* XXX cfs hack */
- b->nal_cb->ni.pid=0;
+ b->lib_nal->libnal_ni.ni_pid.pid=0;
b->lower=m;
return(PTL_OK);
}
# This code is issued under the GNU General Public License.
# See the file COPYING in this distribution
+if MODULES
if !LINUX25
modulefs_DATA = ptlbd$(KMODEXT)
endif
+endif
MOSTLYCLEANFILES = *.o *.ko *.mod.c
DIST_SOURCES = $(ptlbd-objs:%.o=%.c)
#endif
-struct ptlrpc_ni ptlrpc_interfaces[NAL_MAX_NR];
+struct ptlrpc_ni ptlrpc_interfaces[8];
int ptlrpc_ninterfaces;
/*
int number;
char *name;
} ptl_nis[] = {
+#ifndef CRAY_PORTALS
{QSWNAL, "qswnal"},
{SOCKNAL, "socknal"},
{GMNAL, "gmnal"},
{IBNAL, "ibnal"},
{TCPNAL, "tcpnal"},
- {CRAY_KB_ERNAL, "cray_kb_ernal"}};
+#else
+ {CRAY_KB_ERNAL, "cray_kb_ernal"},
+#endif
+ };
int rc;
int i;
struct ldlm_res_id;
struct ptlrpc_request_set;
-void ptlrpc_daemonize(void);
-
void ptlrpc_request_handle_notconn(struct ptlrpc_request *);
void lustre_assert_wire_constants(void);
int ptlrpc_import_in_recovery(struct obd_import *imp);
EXPORT_SYMBOL(ptlrpc_start_n_threads);
EXPORT_SYMBOL(ptlrpc_start_thread);
EXPORT_SYMBOL(ptlrpc_unregister_service);
+EXPORT_SYMBOL(ptlrpc_daemonize);
/* pack_generic.c */
EXPORT_SYMBOL(lustre_msg_swabbed);
#!/bin/bash
+# Put this script and cvs-modified-files.pl into your PATH (~bin is good) and
+#
+# export CVSEDITOR=cvsdiffclient
+#
+# in your .bashrc and you will get a nice bunch of CVS commit reminders:
+# <merge/land tag information>
+# b=<bug_number>
+# r=<reviewed by>
+#
+# Remember to remove the leading "CVS: " part of the comment before saving
+# your commit comment if you want those entries to be saved.
[ -f .mergeinfo ] && . ./.mergeinfo
FILES=`cvs-modified-files.pl $1`
TMP=`mktemp /tmp/cvslog-XXXXXXXX`
if [ -f $TMP ]; then
- [ -f .mergeinfo ] && \
- echo "CVS: Update $child from $parent ($date)" >> $TMP
+ if [ -f .mergeinfo ]; then
+ . .mergeinfo
+ [ "$OPERATION" ] || OPERATION=Update
+ [ "$OPERWHERE" ] || OPERWHERE=from
+ echo "CVS: $OPERATION $child $OPERWHERE $parent ($date)" >> $TMP
+ fi
echo "CVS: did you update the ChangeLog for a bug fix?" >> $TMP
echo "CVS: b=" >> $TMP
echo "CVS: r=" >> $TMP
case $parent in
HEAD) : ;;
- b_*|b1*) : ;;
+ b_*|b[1-4]*) : ;;
*) parent="b_$parent" ;;
esac
case $child in
HEAD) : ;;
- b_*|b1*) : ;;
+ b_*|b[1-4]*) : ;;
*) child="b_$child"
esac
if [ "$parent" != "HEAD" -a "`cat CVS/Tag 2> /dev/null`" != "T$parent" ]; then
- echo "This script must be run within the $parent branch"
+ echo "$0: this script must be run within the $parent branch"
exit 1
fi
+TEST_FILE=${TEST_FILE:-ChangeLog} # does this need to be smarter?
+check_tag() {
+ [ -z "$1" ] && echo "check_tag() missing arg" && exit3
+ [ "$1" = "HEAD" ] && return
+ $CVS log $TEST_FILE | grep -q " $1: " && return
+ echo "$0: tag $1 not found in $TEST_FILE"
+ exit 2
+}
+
+check_tag $child
+check_tag ${CHILD}_BASE
+
dir=$3
cat << EOF > .mergeinfo
module=$module
dir=$dir
CONFLICTS=$CONFLICTS
+OPERATION=Land
+OPERWHERE=onto
EOF
echo PARENT $PARENT parent $parent CHILD $CHILD child $child date $date
install -m 644 "$CONFIG_FILE" "$DESTDIR/boot/config-${FULL_VERSION}"
mkdir -p "$DESTDIR/dev/shm"
+ mkdir -p "$DESTDIR/lib/modules/${FULL_VERSION}"
+
+ make CC="$CC" INSTALL_MOD_PATH="$DESTDIR" KERNELRELEASE="$FULL_VERSION" \
+ -s modules_install || \
+ fatal 1 "Error installing modules."
case "$TARGET_ARCH" in
i386 | i586 | i686 | athlon)
cp arch/i386/boot/bzImage "$DESTDIR/boot/vmlinuz-${FULL_VERSION}"
- cp vmlinux "$DESTDIR/boot/vmlinux-${FULL_VERSION}"
+ cp vmlinux "$DESTDIR/lib/modules/${FULL_VERSION}/"
+ ln -sf "../lib/modules/${FULL_VERSION}/vmlinux" "$DESTDIR/boot/vmlinux-${FULL_VERSION}"
;;
x86_64)
cp arch/x86_64/boot/bzImage "$DESTDIR/boot/vmlinuz-${FULL_VERSION}"
- cp vmlinux "$DESTDIR/boot/vmlinux-${FULL_VERSION}"
+ cp vmlinux "$DESTDIR/lib/modules/${FULL_VERSION}/"
+ ln -sf "../lib/modules/${FULL_VERSION}/vmlinux" "$DESTDIR/boot/vmlinux-${FULL_VERSION}"
;;
ia64)
gzip -cfv vmlinux > vmlinuz
mkdir -p "$DESTDIR/boot/efi/redhat"
- install -m 755 vmlinux "$DESTDIR/boot/efi/redhat/vmlinux-${FULL_VERSION}"
+ install -m 755 vmlinux "$DESTDIR/lib/modules/${FULL_VERSION}/"
install -m 755 vmlinuz "$DESTDIR/boot/efi/redhat/vmlinuz-${FULL_VERSION}"
+ ln -sf "../lib/modules/${FULL_VERSION}/vmlinux" "$DESTDIR/boot/efi/redhat/vmlinux-${FULL_VERSION}"
ln -sf "efi/redhat/vmlinux-${FULL_VERSION}" "$DESTDIR/boot/vmlinux-${FULL_VERSION}"
ln -sf "efi/redhat/vmlinuz-${FULL_VERSION}" "$DESTDIR/boot/vmlinuz-${FULL_VERSION}"
;;
*)
cp vmlinuz "$DESTDIR/boot/vmlinuz-${FULL_VERSION}"
- cp vmlinux "$DESTDIR/boot/vmlinux-${FULL_VERSION}"
+ cp vmlinux "$DESTDIR/lib/modules/${FULL_VERSION}/vmlinux-${FULL_VERSION}"
+ ln -sf "../lib/modules/${FULL_VERSION}/vmlinux-${FULL_VERSION}" "$DESTDIR/boot/vmlinux-${FULL_VERSION}"
+
;;
esac
- mkdir -p "$DESTDIR/lib/modules/${FULL_VERSION}"
- make CC="$CC" INSTALL_MOD_PATH="$DESTDIR" KERNELRELEASE="$FULL_VERSION" \
- -s modules_install || \
- fatal 1 "Error installing modules."
-
popd >/dev/null
}
%define nptlarchs %{all_x86}
#define nptlarchs noarch
%define rhbuild @RHBUILD@
+%define linux26 @LINUX26@
# disable build root strip policy
%define __spec_install_post /usr/lib/rpm/brp-compress || :
#clean up the destination
make -s mrproper -C $RPM_BUILD_ROOT/usr/src/linux-%{KVERREL}
-rm -rf $RPM_BUILD_ROOT/usr/src/linux-%{KVERREL}/configs/*
+rm -rf $RPM_BUILD_ROOT/usr/src/linux-%{KVERREL}/configs
+mkdir -p $RPM_BUILD_ROOT/usr/src/linux-%{KVERREL}/configs
cp ../kernel_patches/kernel_configs/kernel-%{kversion}-@LUSTRE_TARGET@*.config $RPM_BUILD_ROOT/usr/src/linux-%{KVERREL}/configs
cp ../kernel_patches/kernel_configs/kernel-%{kversion}-@LUSTRE_TARGET@-%{_target_cpu}%{dashtargetboard}.config $RPM_BUILD_ROOT/usr/src/linux-%{KVERREL}/.config
if grep -q oldconfig_nonint $RPM_BUILD_ROOT/usr/src/linux-%{KVERREL}/Makefile ; then
OLDCONFIG='oldconfig'
fi
make -s $OLDCONFIG -C $RPM_BUILD_ROOT/usr/src/linux-%{KVERREL}
+%if %{linux26}
+make -s include/asm -C $RPM_BUILD_ROOT/usr/src/linux-%{KVERREL}
+%else
make -s symlinks -C $RPM_BUILD_ROOT/usr/src/linux-%{KVERREL}
+%endif
make -s include/linux/version.h -C $RPM_BUILD_ROOT/usr/src/linux-%{KVERREL}
#this generates modversions info which we want to include and we may as
/usr/src/linux-%{KVERREL}/Makefile
/usr/src/linux-%{KVERREL}/README
/usr/src/linux-%{KVERREL}/REPORTING-BUGS
-/usr/src/linux-%{KVERREL}/Rules.make
/usr/src/linux-%{KVERREL}/arch
%ifarch sparc
/usr/src/linux-%{KVERREL}/arch/sparc64
%ifarch alpha sparc
/usr/src/linux-%{KVERREL}/include/math-emu
%endif
+%if %{linux26}
+%dir /usr/src/linux-%{KVERREL}/crypto
+%dir /usr/src/linux-%{KVERREL}/kdb
+%dir /usr/src/linux-%{KVERREL}/rpmify
+%dir /usr/src/linux-%{KVERREL}/security
+%else
+/usr/src/linux-%{KVERREL}/Rules.make
+%endif
%endif
%files doc
case $parent in
HEAD) : ;;
- b_*|b1*) : ;;
+ b_*|b[1-4]*) : ;;
*) parent="b_$parent" ;;
esac
case $child in
HEAD) : ;;
- b_*|b1*) : ;;
+ b_*|b[1-4]*) : ;;
*) child="b_$child"
esac
exit 1
fi
+TEST_FILE=${TEST_FILE:-ChangeLog} # does this need to be smarter?
+check_tag() {
+ [ -z "$1" ] && echo "check_tag() missing arg" && exit3
+ [ "$1" = "HEAD" ] && return
+ $CVS log $TEST_FILE | grep -q " $1: " && return
+ echo "$0: tag $1 not found in $TEST_FILE"
+ exit 2
+}
+
+check_tag $parent
+check_tag ${CHILD}_BASE
+
cat << EOF > .mergeinfo
parent=$parent
PARENT=$PARENT
date=$date
module=$module
CONFLICTS=$CONFLICTS
+OPERATION=Merge
+OPERWHERE=from
EOF
echo PARENT: $PARENT parent: $parent CHILD: $CHILD child: $child date: $date
ll_dirstripe_verify
openfilleddirunlink
copy_attr
+rename_many
noinst_PROGRAMS += wantedi statone runas openfile getdents mkdirdeep o_directory
noinst_PROGRAMS += small_write multiop sleeptest ll_sparseness_verify cmknod
noinst_PROGRAMS += ll_sparseness_write mrename ll_dirstripe_verify copy_attr
-noinst_PROGRAMS += openfilleddirunlink
+noinst_PROGRAMS += openfilleddirunlink rename_many
# noinst_PROGRAMS += ldaptest
bin_PROGRAMS = mcreate munlink mkdirmany iopentest1 iopentest2
endif # TESTS
mkdirdeep_LDADD=-L$(top_builddir)/portals/utils -lptlctl $(LIBREADLINE)
small_write_SOURCES = small_write.c
sleeptest_SOURCES = sleeptest.c
+rename_many_SOURCES = rename_many.c
#write_append_truncate_SOURCES=write_append_truncate.c
#write_append_truncate_CC=mpicc
#createmany_mpi_SOURCES=createmany_mpi.c
OSTDEV=${OSTDEV:-$ROOT/tmp/ost1-`hostname`}
OSTSIZE=${OSTSIZE:-50000}
FSTYPE=${FSTYPE:-ext3}
-TIMEOUT=${TIMEOUT:-10}
+TIMEOUT=${TIMEOUT:-20}
UPCALL=${UPCALL:-$PWD/replay-single-upcall.sh}
STRIPE_BYTES=${STRIPE_BYTES:-524288}
}
run_test 15 "failed open (-ENOMEM)"
+stop_read_ahead() {
+ for f in /proc/fs/lustre/llite/*/read_ahead; do
+ echo 0 > $f
+ done
+}
+
+start_read_ahead() {
+ for f in /proc/fs/lustre/llite/*/read_ahead; do
+ echo 1 > $f
+ done
+}
+
test_16() {
do_facet client cp /etc/termcap $MOUNT
sync
+ stop_read_ahead
#define OBD_FAIL_PTLRPC_BULK_PUT_NET 0x504 | OBD_FAIL_ONCE
sysctl -w lustre.fail_loc=0x80000504
# give recovery a chance to finish (shouldn't take long)
sleep $TIMEOUT
do_facet client "cmp /etc/termcap $MOUNT/termcap" || return 2
+ start_read_ahead
}
run_test 16 "timeout bulk put, evict client (2732)"
--- /dev/null
+#define PATH_LENGTH 35
+#include <math.h>
+#include <signal.h>
+#include <unistd.h>
+#include <time.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <stdio.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <getopt.h>
+
+struct names {
+ char from[PATH_LENGTH];
+ char to[PATH_LENGTH];
+} *names;
+
+unsigned int loop_count = 500;
+int file_count = 1000;
+int seed;
+int loops;
+int stop;
+long start;
+
+int opt_exit_on_err;
+int opt_verbose;
+int opt_create_only;
+int opt_rename_only;
+int creat_errors;
+int rename_errors;
+int unlink_errors;
+
+void usage(const char *progname)
+{
+ fprintf(stderr, "usage: %s [-n numfiles] [-s seed] [-v] [-x] [dir]\n"
+ "\t-c: only do the create step of first loop\n"
+ "\t-f: number of files to create/rename/unlink per loop\n"
+ "\t-n: number of test loops (0 to run forever)\n"
+ "\t-r: only do the rename step of first loop\n"
+ "\t-s: starting seed (equals loop number by default)\n"
+ "\t-v: verbose\n"
+ "\t-x: don't exit on error\n", progname);
+}
+
+void handler(int sig) {
+ static long last_time;
+ long now = time(0);
+
+ signal(SIGINT, handler);
+ signal(SIGALRM, handler);
+ printf("%6lds %8d iterations %d/%d/%d errors",
+ now - start, loops, creat_errors, rename_errors, unlink_errors);
+ if (sig != 0)
+ printf(" - use SIGQUIT (^\\) or ^C^C to kill\n");
+ else
+ printf("\n");
+
+ if (sig == SIGQUIT)
+ stop = 1;
+ else if (sig == SIGINT) {
+ if (now - last_time < 2)
+ stop = 1;
+ last_time = now;
+ }
+ alarm(60);
+}
+
+extern char *optarg;
+extern int optind;
+
+int main(int argc, char *argv[])
+{
+ unsigned long n;
+ char msg[100], c, *end = NULL;
+ int h1, h2;
+ int i;
+
+ while ((c = getopt(argc, argv, "cf:n:rs:vx")) != EOF) {
+ switch(c) {
+ case 'c':
+ ++opt_create_only;
+ break;
+ case 'f':
+ i = strtoul(optarg, &end, 0);
+ if (i && end != NULL && *end == '\0') {
+ file_count = i;
+ } else {
+ fprintf(stderr, "bad file count '%s'\n",optarg);
+ usage(argv[0]);
+ return 1;
+ }
+ break;
+ case 'n':
+ i = strtoul(optarg, &end, 0);
+ if (i && end != NULL && *end == '\0') {
+ loop_count = i;
+ } else {
+ fprintf(stderr, "bad loop count '%s'\n",optarg);
+ usage(argv[0]);
+ return 1;
+ }
+ break;
+ case 'r':
+ ++opt_rename_only;
+ break;
+ case 's':
+ i = strtoul(optarg, &end, 0);
+ if (end && *end == '\0') {
+ seed = i;
+ } else {
+ fprintf(stderr, "bad seed '%s'\n", optarg);
+ usage(argv[0]);
+ return 1;
+ }
+ break;
+ case 'v':
+ ++opt_verbose;
+ break;
+ case 'x':
+ ++opt_exit_on_err;
+ break;
+ default:
+ usage(argv[0]);
+ return 1;
+ }
+ }
+
+ names = malloc(sizeof(struct names) * file_count);
+ if (names == NULL) {
+ perror("calloc");
+ return(1);
+ }
+
+ h2 = sprintf(msg, "%x", file_count); /* just to figure length */
+ h1 = (PATH_LENGTH - h2 - 2) / 4;
+
+ n = (1ULL << h1 * 4) - 1;
+
+ //printf("h1 = %d, h2 = %d n = %lu\n", h1, h2, n);
+
+ start = time(0);
+
+ signal(SIGQUIT, handler);
+ signal(SIGINT, handler);
+ signal(SIGALRM, handler);
+ signal(SIGUSR1, handler);
+ alarm(60);
+
+ if (argc > optind + 1) {
+ fprintf(stderr, "too many extra args %d\n", argc - optind);
+ usage(argv[0]);
+ return 1;
+ } else if (argv[optind] != NULL) {
+ if (chdir(argv[optind]) < 0) {
+ sprintf(msg, "chdir '%s'\n", argv[optind]);
+ perror(msg);
+ return 2;
+ }
+ }
+
+ while (!stop && loop_count != 0 && loops < loop_count) {
+ int j,k,l,m;
+
+ srand(seed + loops);
+ if (mkdir("tmp", S_IRWXU) == -1) {
+ perror("mkdir tmp");
+ return(1);
+ }
+ if (chdir("tmp") == -1) {
+ perror("chdir tmp");
+ return(1);
+ }
+
+ for (i = 0; i < file_count ; i++) {
+ j = random() & n;
+ k = random() & n;
+ l = random() & n;
+ m = random() & n;
+ sprintf(names[i].from, "%0*x%0*x%0*x%0*x0%0*x",
+ h1, j, h1, k, h1, l, h1, m, h2, i);
+ sprintf(names[i].to, "%0*x%0*x%0*x%0*x1%0*x",
+ h1, j, h1, k, h1, l, h1, m, h2, i);
+
+ }
+
+ for (i = 0; i < file_count; i++) {
+ if (mknod(names[i].from, S_IFREG | S_IRWXU, 0) == -1) {
+ sprintf(msg, "loop %d.%d: creat %s",
+ loops, i, names[i].from);
+ perror(msg);
+ creat_errors++;
+ if (!opt_exit_on_err)
+ return 4;
+ }
+ }
+
+ if (opt_create_only)
+ return 0;
+
+ for (i = 0; i < file_count; i++) {
+ if (rename(names[i].from, names[i].to) == -1) {
+ sprintf(msg, "loop %d.%d: rename %s to %s",
+ loops, i, names[i].from, names[i].to);
+ perror(msg);
+ rename_errors++;
+ if (!opt_exit_on_err)
+ return 4;
+ }
+ }
+
+ if (opt_rename_only)
+ return 0;
+
+ for (i = 0; i < file_count; i++) {
+ if (unlink(names[i].to) == -1) {
+ sprintf(msg, "loop %d.%d: unlink %s",
+ loops, i, names[i].to);
+ perror(msg);
+ unlink_errors++;
+ if (!opt_exit_on_err)
+ return 4;
+ }
+ }
+
+ if (chdir("..") == -1) {
+ perror("chdir ..");
+ return(1);
+ }
+
+ if (rmdir("tmp") == -1) {
+ if (chdir("tmp") == -1) {
+ perror("chdir tmp 2");
+ return(1);
+ }
+ for (i = 0; i < file_count; i++) {
+ if (unlink(names[i].from) != -1) {
+ fprintf(stderr, "loop %d.%d: "
+ "unexpected file %s\n",
+ loops, i, names[i].to);
+ unlink_errors++;
+ if (!opt_exit_on_err)
+ return 4;
+ }
+ }
+ if (chdir("..") == -1) {
+ perror("chdir .. 2");
+ return(1);
+ }
+ if (rmdir("tmp") == -1) {
+ perror("rmdir tmp");
+ return(1);
+ }
+ }
+
+ loops++;
+ if (opt_verbose)
+ handler(0);
+ }
+
+ if (!opt_verbose)
+ handler(0);
+ return(0);
+}
}
run_test 6 "open1, open2, unlink |X| close1 [fail mds] close2"
+test_7() {
+ mcreate $MOUNT1/a
+ multiop $MOUNT2/a o_c &
+ pid1=$!
+ multiop $MOUNT1/a o_c &
+ pid2=$!
+ # give multiop a chance to open
+ sleep 1
+ rm -f $MOUNT1/a
+ replay_barrier mds
+ kill -USR1 $pid2
+ wait $pid2 || return 1
+
+ fail mds
+ kill -USR1 $pid1
+ wait $pid1 || return 1
+ [ -e $MOUNT2/a ] && return 2
+ return 0
+}
+run_test 7 "open1, open2, unlink |X| close2 [fail mds] close1"
+
if [ "$ONLY" != "setup" ]; then
equals_msg test complete, cleaning up
cleanup
run_test 42 "recovery after ost failure"
# b=2530
-# directory orphans can't be unlinked from PENDING directory
+# timeout in MDS/OST recovery RPC will LBUG MDS
test_43() {
replay_barrier mds
}
run_test 46 "Don't leak file handle after open resend (3325)"
+# b=2824
+test_47() {
+
+ # create some files to make sure precreate has been done on all
+ # OSTs. (just in case this test is run independently)
+ createmany -o $DIR/$tfile 20 || return 1
+
+ # OBD_FAIL_OST_CREATE_NET 0x204
+ fail ost
+ do_facet ost "sysctl -w lustre.fail_loc=0x80000204"
+ df $MOUNT || return 2
+
+ # let the MDS discover the OST failure, attempt to recover, fail
+ # and recover again.
+ sleep $((3 * TIMEOUT))
+
+ # Without 2824, this createmany would hang
+ createmany -o $DIR/$tfile 20 || return 3
+ unlinkmany $DIR/$tfile 20 || return 4
+
+ do_facet ost "sysctl -w lustre.fail_loc=0"
+ return 0
+}
+run_test 47 "MDS->OSC failure during precreate cleanup (2824)"
+
equals_msg test complete, cleaning up
$CLEANUP
# bug number for skipped test: 2108
ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"24j 48c 48d 58"}
# UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
+case `uname -r` in
+2.6.*) ALWAYS_EXCEPT="$ALWAYS_EXCEPT 54c 55" # bug 3117
+esac
[ "$ALWAYS_EXCEPT$EXCEPT" ] && echo "Skipping tests: $ALWAYS_EXCEPT $EXCEPT"
export PATH=$PWD/$SRCDIR:$SRCDIR:$SRCDIR/../utils:$PATH
TMP=${TMP:-/tmp}
+FSTYPE=${FSTYPE:-ext3}
CHECKSTAT=${CHECKSTAT:-"checkstat -v"}
CREATETEST=${CREATETEST:-createtest}
echo preparing for tests involving mounts
EXT2_DEV=${EXT2_DEV:-/tmp/SANITY.LOOP}
touch $EXT2_DEV
-mke2fs -F $EXT2_DEV 1000 > /dev/null
-
-EXT3_DEV=${EXT3_DEV:-/tmp/SANITY_EXT3_DEV.LOOP}
-touch $EXT3_DEV
-mkfs.ext3 -F $EXT3_DEV 10000 > /dev/null
+mke2fs -j -F $EXT2_DEV 8000 > /dev/null
test_0() {
touch $DIR/f
}
run_test 24n "Statting the old file after renameing (Posix rename 2)"
+test_24o() {
+ check_kernel_version 37 || return 0
+ rename_many -s 3287 -v -n 10 $DIR
+}
+run_test 24o "rename of files during htree split ==============="
+
test_25a() {
echo '== symlink sanity ============================================='
mkdir $DIR/d25
#set -vx
mkdir -p $DIR/d48c/dir
cd $DIR/d48c/dir
- rmdir $DIR/d48c/dir || error "remove cwd $DIR/d48c/dir failed"
+ $TRACE rmdir $DIR/d48c/dir || error "remove cwd $DIR/d48c/dir failed"
$TRACE touch foo && error "'touch foo' worked after removing cwd"
$TRACE mkdir foo && error "'mkdir foo' worked after removing cwd"
$TRACE ls . && error "'ls .' worked after removing cwd"
$TRACE mkdir . && error "'mkdir .' worked after removing cwd"
$TRACE rmdir . && error "'rmdir .' worked after removing cwd"
$TRACE ln -s . foo && error "'ln -s .' worked after removing cwd" ||true
- $TRACE cd .. || error "'cd ..' failed after removing cwd"
+ $TRACE cd .. || echo "'cd ..' failed after removing cwd (`pwd)`"
}
run_test 48c "Access removed working subdir (should return errors)"
#set -vx
mkdir -p $DIR/d48d/dir
cd $DIR/d48d/dir
- rm -r $DIR/d48d || error "remove cwd and parent $DIR/d48d failed"
+ pwd
+ ls .
+ $TRACE rm -vr $DIR/d48d || error "remove cwd+parent $DIR/d48d failed"
$TRACE touch foo && error "'touch foo' worked after removing cwd"
$TRACE mkdir foo && error "'mkdir foo' worked after removing cwd"
$TRACE ls . && error "'ls .' worked after removing cwd"
- $TRACE ls .. && error "'ls ..' worked after removing cwd"
+ $TRACE ls .. && echo "'ls ..' worked after removing cwd" # bug 3415
$TRACE cd . && error "'cd .' worked after recreate cwd"
$TRACE mkdir . && error "'mkdir .' worked after removing cwd"
$TRACE rmdir . && error "'rmdir .' worked after removing cwd"
test_55() {
rm -rf $DIR/d55
mkdir $DIR/d55
- mount -t ext3 -o loop,iopen $EXT3_DEV $DIR/d55 || error
+ mount -t $FSTYPE -o loop,iopen $EXT2_DEV $DIR/d55 || error
touch $DIR/d55/foo
$IOPENTEST1 $DIR/d55/foo $DIR/d55 || error
$IOPENTEST2 $DIR/d55 || error
- echo "check for $EXT3_DEV. Please wait..."
+ echo "check for $EXT2_DEV. Please wait..."
rm -rf $DIR/d55/*
umount $DIR/d55 || error
}
"/proc/qsnet/elan4/config/elan4_mainint_punt_loops"]
for p in procfiles:
if os.access(p, os.R_OK):
- run ("echo 0 > " + p)
+ run ("echo 1 > " + p)
def sys_set_ptldebug(ptldebug):
if config.ptldebug:
#include <string.h>
#include <sys/mount.h>
#include <mntent.h>
+#define _GNU_SOURCE
+#include <getopt.h>
#include "obdctl.h"
#include <portals/ptlctl.h>
-int debug = 0;
-int verbose = 0;
-int nomtab = 0;
+int debug;
+int verbose;
+int nomtab;
+int force;
static char *progname = NULL;
+typedef struct {
+ ptl_nid_t gw;
+ ptl_nid_t lo;
+ ptl_nid_t hi;
+} llmount_route_t;
+
+#define MAX_ROUTES 1024
+int route_index;
+ptl_nid_t lmd_cluster_id = 0;
+llmount_route_t routes[MAX_ROUTES];
+
+static int check_mtab_entry(char *spec, char *mtpt, char *type)
+{
+ FILE *fp;
+ struct mntent *mnt;
+
+ if (!force) {
+ fp = setmntent(MOUNTED, "r");
+ if (fp == NULL)
+ return(0);
+
+ while ((mnt = getmntent(fp)) != NULL) {
+ if (strcmp(mnt->mnt_fsname, spec) == 0 &&
+ strcmp(mnt->mnt_dir, mtpt) == 0 &&
+ strcmp(mnt->mnt_type, type) == 0) {
+ fprintf(stderr, "%s: according to %s %s is "
+ "already mounted on %s\n",
+ progname, MOUNTED, spec, mtpt);
+ return(1); /* or should we return an error? */
+ }
+ }
+ endmntent(fp);
+ }
+ return(0);
+}
+
static void
-update_mtab_entry(char *spec, char *node, char *type, char *opts,
- int flags, int freq, int pass)
+update_mtab_entry(char *spec, char *mtpt, char *type, char *opts,
+ int flags, int freq, int pass)
{
FILE *fp;
struct mntent mnt;
mnt.mnt_fsname = spec;
- mnt.mnt_dir = node;
+ mnt.mnt_dir = mtpt;
mnt.mnt_type = type;
mnt.mnt_opts = opts ? opts : "";
mnt.mnt_freq = freq;
if (!nomtab) {
fp = setmntent(MOUNTED, "a+");
if (fp == NULL) {
- fprintf(stderr, "%s: setmntent(%s): %s:",
+ fprintf(stderr, "%s: setmntent(%s): %s:",
progname, MOUNTED, strerror (errno));
} else {
if ((addmntent (fp, &mnt)) == 1) {
int
print_options(struct lustre_mount_data *lmd)
{
+ int i;
+
printf("mds: %s\n", lmd->lmd_mds);
printf("profile: %s\n", lmd->lmd_profile);
printf("server_nid: "LPX64"\n", lmd->lmd_server_nid);
printf("server_ipaddr: 0x%x\n", lmd->lmd_server_ipaddr);
printf("port: %d\n", lmd->lmd_port);
+ for (i = 0; i < route_index; i++)
+ printf("route: 0x%llx : 0x%llx - 0x%llx\n",
+ routes[i].gw, routes[i].lo, routes[i].hi);
+
return 0;
}
-int
-parse_options(char * options, struct lustre_mount_data *lmd)
+static int parse_route(char *opteq, char *opttgts)
{
- ptl_nid_t nid = 0;
+ char *gw_lo_ptr, *gw_hi_ptr, *tgt_lo_ptr, *tgt_hi_ptr;
+ ptl_nid_t gw_lo, gw_hi, tgt_lo, tgt_hi;
+
+ opttgts[0] = '\0';
+ gw_lo_ptr = opteq + 1;
+ if (!(gw_hi_ptr = strchr(gw_lo_ptr, '-'))) {
+ gw_hi_ptr = gw_lo_ptr;
+ } else {
+ gw_hi_ptr[0] = '\0';
+ gw_hi_ptr++;
+ }
+
+ if (ptl_parse_nid(&gw_lo, gw_lo_ptr) != 0) {
+ fprintf(stderr, "%s: can't parse NID %s\n", progname,gw_lo_ptr);
+ return(-1);
+ }
+
+ if (ptl_parse_nid(&gw_hi, gw_hi_ptr) != 0) {
+ fprintf(stderr, "%s: can't parse NID %s\n", progname,gw_hi_ptr);
+ return(-1);
+ }
+
+ tgt_lo_ptr = opttgts + 1;
+ if (!(tgt_hi_ptr = strchr(tgt_lo_ptr, '-'))) {
+ tgt_hi_ptr = tgt_lo_ptr;
+ } else {
+ tgt_hi_ptr[0] = '\0';
+ tgt_hi_ptr++;
+ }
+
+ if (ptl_parse_nid(&tgt_lo, tgt_lo_ptr) != 0) {
+ fprintf(stderr, "%s: can't parse NID %s\n",progname,tgt_lo_ptr);
+ return(-1);
+ }
+
+ if (ptl_parse_nid(&tgt_hi, tgt_hi_ptr) != 0) {
+ fprintf(stderr, "%s: can't parse NID %s\n",progname,tgt_hi_ptr);
+ return(-1);
+ }
+
+ while (gw_lo <= gw_hi) {
+ if (route_index >= MAX_ROUTES) {
+ fprintf(stderr, "%s: to many routes %d\n",
+ progname, MAX_ROUTES);
+ return(-1);
+ }
+
+ routes[route_index].gw = gw_lo;
+ routes[route_index].lo = tgt_lo;
+ routes[route_index].hi = tgt_hi;
+ route_index++;
+ gw_lo++;
+ }
+
+ return(0);
+}
+
+int parse_options(char * options, struct lustre_mount_data *lmd)
+{
+ ptl_nid_t nid = 0, cluster_id = 0;
int val;
- char *opt;
- char * opteq;
+ char *opt, *opteq, *opttgts;
/* parsing ideas here taken from util-linux/mount/nfsmount.c */
for (opt = strtok(options, ","); opt; opt = strtok(NULL, ",")) {
val = atoi(opteq + 1);
*opteq = '\0';
if (!strcmp(opt, "nettype")) {
- lmd->lmd_nal = ptl_name2nal(opteq+1);
- } else if(!strcmp(opt, "local_nid")) {
- if (ptl_parse_nid(&nid, opteq+1) != 0) {
+ lmd->lmd_nal = ptl_name2nal(opteq + 1);
+ } else if(!strcmp(opt, "cluster_id")) {
+ if (ptl_parse_nid(&cluster_id, opteq+1) != 0) {
+ fprintf (stderr, "%s: can't parse NID "
+ "%s\n", progname, opteq+1);
+ return (-1);
+ }
+ lmd_cluster_id = cluster_id;
+ } else if(!strcmp(opt, "route")) {
+ if (!(opttgts = strchr(opteq + 1, ':'))) {
+ fprintf(stderr, "%s: Route must be "
+ "of the form: route="
+ "<gw>[-<gw>]:<low>[-<high>]\n",
+ progname);
+ return(-1);
+ }
+ parse_route(opteq, opttgts);
+ } else if (!strcmp(opt, "local_nid")) {
+ if (ptl_parse_nid(&nid, opteq + 1) != 0) {
fprintf (stderr, "%s: "
"can't parse NID %s\n",
progname,
return (-1);
}
lmd->lmd_local_nid = nid;
- } else if(!strcmp(opt, "server_nid")) {
- if (ptl_parse_nid(&nid, opteq+1) != 0) {
+ } else if (!strcmp(opt, "server_nid")) {
+ if (ptl_parse_nid(&nid, opteq + 1) != 0) {
fprintf (stderr, "%s: "
"can't parse NID %s\n",
- progname, opteq+1);
+ progname, opteq + 1);
return (-1);
}
lmd->lmd_server_nid = nid;
return (-1);
}
- lmd->lmd_local_nid = nid;
+ lmd->lmd_local_nid = nid + lmd_cluster_id;
return 0;
}
int
build_data(char *source, char *options, struct lustre_mount_data *lmd)
{
- char target[1024];
- char *hostname = NULL;
- char *mds = NULL;
- char *profile = NULL;
- char *s;
+ char buf[1024];
+ char *hostname = NULL, *mds = NULL, *profile = NULL, *s;
int rc;
if (lmd_bad_magic(lmd))
return -EINVAL;
- if (strlen(source) > sizeof(target) + 1) {
- fprintf(stderr, "%s: "
- "exessively long host:/mds/profile argument\n",
+ if (strlen(source) > sizeof(buf) + 1) {
+ fprintf(stderr, "%s: host:/mds/profile argument too long\n",
progname);
return -EINVAL;
}
- strcpy(target, source);
- if ((s = strchr(target, ':'))) {
- hostname = target;
+ strcpy(buf, source);
+ if ((s = strchr(buf, ':'))) {
+ hostname = buf;
*s = '\0';
while (*++s == '/')
*s = '\0';
profile = s + 1;
} else {
- fprintf(stderr, "%s: "
- "directory to mount not in "
+ fprintf(stderr, "%s: directory to mount not in "
"host:/mds/profile format\n",
progname);
return(-1);
progname);
return(-1);
}
- if (verbose)
- printf("host: %s\nmds: %s\nprofile: %s\n", hostname, mds,
- profile);
rc = parse_options(options, lmd);
if (rc)
return 0;
}
-int
-main(int argc, char * const argv[])
+static int set_routes(struct lustre_mount_data *lmd) {
+ struct portals_cfg pcfg;
+ struct portal_ioctl_data data;
+ int i, j, route_exists, rc, err = 0;
+
+ register_ioc_dev(PORTALS_DEV_ID, PORTALS_DEV_PATH);
+
+ for (i = 0; i < route_index; i++) {
+
+ /* Check for existing routes so as not to add duplicates */
+ for (j = 0; ; j++) {
+ PCFG_INIT(pcfg, NAL_CMD_GET_ROUTE);
+ pcfg.pcfg_nal = ROUTER;
+ pcfg.pcfg_count = j;
+
+ PORTAL_IOC_INIT(data);
+ data.ioc_pbuf1 = (char*)&pcfg;
+ data.ioc_plen1 = sizeof(pcfg);
+ data.ioc_nid = pcfg.pcfg_nid;
+
+ rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_NAL_CMD, &data);
+ if (rc != 0) {
+ route_exists = 0;
+ break;
+ }
+
+ if ((pcfg.pcfg_gw_nal == lmd->lmd_nal) &&
+ (pcfg.pcfg_nid == routes[i].gw) &&
+ (pcfg.pcfg_nid2 == routes[i].lo) &&
+ (pcfg.pcfg_nid3 == routes[i].hi)) {
+ route_exists = 1;
+ break;
+ }
+ }
+
+ if (route_exists)
+ continue;
+
+ PCFG_INIT(pcfg, NAL_CMD_ADD_ROUTE);
+ pcfg.pcfg_nid = routes[i].gw;
+ pcfg.pcfg_nal = ROUTER;
+ pcfg.pcfg_gw_nal = lmd->lmd_nal;
+ pcfg.pcfg_nid2 = MIN(routes[i].lo, routes[i].hi);
+ pcfg.pcfg_nid3 = MAX(routes[i].lo, routes[i].hi);
+
+ PORTAL_IOC_INIT(data);
+ data.ioc_pbuf1 = (char*)&pcfg;
+ data.ioc_plen1 = sizeof(pcfg);
+ data.ioc_nid = pcfg.pcfg_nid;
+
+ rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_NAL_CMD, &data);
+ if (rc != 0) {
+ fprintf(stderr, "%s: Unable to add route "
+ "0x%llx : 0x%llx - 0x%llx\n[%d] %s\n",
+ progname, routes[i].gw, routes[i].lo,
+ routes[i].hi, errno, strerror(errno));
+ err = -1;
+ break;
+ }
+ }
+
+ unregister_ioc_dev(PORTALS_DEV_ID);
+ return err;
+}
+
+void usage(FILE *out)
{
- char * source = argv[1];
- char * target = argv[2];
- char * options = "";
- int opt;
- int i = 3;
- struct lustre_mount_data lmd;
+ fprintf(out, "usage: %s <source> <target> [-f] [-v] [-n] [-o mntopt]\n",
+ progname);
+ exit(out != stdout);
+}
- int rc;
+int main(int argc, char *const argv[])
+{
+ char *source, *target, *options = "";
+ int i, nargs = 3, opt, rc;
+ struct lustre_mount_data lmd;
+ static struct option long_opt[] = {
+ {"force", 0, 0, 'f'},
+ {"help", 0, 0, 'h'},
+ {"nomtab", 0, 0, 'n'},
+ {"options", 1, 0, 'o'},
+ {"verbose", 0, 0, 'v'},
+ {0, 0, 0, 0}
+ };
progname = strrchr(argv[0], '/');
progname = progname ? progname + 1 : argv[0];
- while ((opt = getopt(argc, argv, "vno:")) != EOF) {
+ while ((opt = getopt_long(argc, argv, "fno:v", long_opt, NULL)) != EOF){
switch (opt) {
- case 'v':
- verbose = 1;
- printf("verbose: %d\n", verbose);
- i++;
+ case 'f':
+ ++force;
+ printf("force: %d\n", force);
+ nargs++;
+ break;
+ case 'h':
+ usage(stdout);
break;
case 'n':
- nomtab = 1;
+ ++nomtab;
printf("nomtab: %d\n", nomtab);
- i++;
+ nargs++;
break;
case 'o':
options = optarg;
- i++;
+ nargs++;
+ break;
+ case 'v':
+ ++verbose;
+ printf("verbose: %d\n", verbose);
+ nargs++;
break;
default:
- i++;
+ fprintf(stderr, "%s: unknown option '%c'\n",
+ progname, opt);
+ usage(stderr);
break;
}
}
- if (argc < i) {
- fprintf(stderr,
- "%s: too few arguments\n"
- "Usage: %s <source> <target> [-v] [-n] [-o ...]\n",
- progname, progname);
- exit(1);
+ if (optind + 2 > argc) {
+ fprintf(stderr, "%s: too few arguments\n", progname);
+ usage(stderr);
}
- if (verbose)
- for (i = 0; i < argc; i++) {
+ source = argv[optind];
+ target = argv[optind + 1];
+
+ if (verbose) {
+ for (i = 0; i < argc; i++)
printf("arg[%d] = %s\n", i, argv[i]);
- }
+ printf("source = %s, target = %s\n", source, target);
+ }
+
+ if (check_mtab_entry(source, target, "lustre"))
+ exit(32);
init_options(&lmd);
rc = build_data(source, options, &lmd);
exit(1);
}
+ rc = set_routes(&lmd);
+ if (rc) {
+ exit(1);
+ }
+
if (debug) {
printf("%s: debug mode, not mounting\n", progname);
exit(0);