EXTRA_DIST = Rules.linux archdep.m4 include
DIST_SUBDIRS = libcfs portals knals unals utils tests doc router
+
if LIBLUSTRE
SUBDIRS = portals unals utils
else
+
+if CRAY_PORTALS
+SUBDIRS = libcfs tests doc
+else
SUBDIRS = libcfs portals knals unals utils tests doc router
endif
+
+endif
AM_CONDITIONAL(INKERNEL, test x$enable_inkernel = xyes)
echo "Makefile for in kernel build: $INKERNEL"
+# -------- are we building against an external portals? -------
+# haha, I wonder how one is really supposed to do this
+# automake seems to have a DEFS variable which looks good
+AC_ARG_WITH(cray-portals, [ --with-cray-portals=[path] path to cray portals],
+ CRAY_PORTALS_INCLUDE="-I$with_cray_portals"
+ CC="$CC -DCRAY_PORTALS=1"
+ )
+AC_SUBST(CRAY_PORTALS_INCLUDE)
+AM_CONDITIONAL(CRAY_PORTALS, test ! "x$with_cray_portals" = x)
+
# -------- liblustre compilation --------------
AC_ARG_WITH(lib, [ --with-lib compile lustre library], host_cpu="lib")
# ------------ include paths ------------------
+KINCFLAGS="$CRAY_PORTALS_INCLUDE $CRAY_PORTALS_COMMANDLINE \
+ -I\$(top_srcdir)/include \
+ -I\$(top_srcdir)/portals/include -I$LINUX/include"
if test $host_cpu != "lib" ; then
- KINCFLAGS="-I\$(top_srcdir)/include -I\$(top_srcdir)/portals/include -I$LINUX/include"
-else
- KINCFLAGS='-I$(top_srcdir)/include -I$(top_srcdir)/portals/include'
+ KINCFLAGS="$KINCFLAGS -I$LINUX/include"
fi
CPPFLAGS="$KINCFLAGS $ARCHCPPFLAGS"
+++ /dev/null
-# This version is here to make autoconf happy; the name is a file which is
-# "unique" to this directory so that configure knows where it should run.
-AC_INIT(knals/Makefile.am, 3.0)
-AC_CANONICAL_SYSTEM
-# Copyright (C) 2001 Cluster File Systems, Inc.
-#
-# This code is issued under the GNU General Public License.
-# See the file COPYING in this distribution
-
-# Automake variables. Steal the version number from packaging/intersync.spec
-AM_INIT_AUTOMAKE(portals, builtin([esyscmd], [sed -ne '/.*define IVERSION /{ s/.*IVERSION //; p; }' libcfs/module.c]))
-# AM_MAINTAINER_MODE
-
-sinclude(build.m4)
-sinclude(archdep.m4)
-
-if test x$enable_inkernel = xyes ; then
-cp Kernelenv.mk Kernelenv.in
-cp Makefile.mk Makefile.in
-cp libcfs/Makefile.mk libcfs/Makefile.in
-cp portals/Makefile.mk portals/Makefile.in
-cp knals/Makefile.mk knals/Makefile.in
-cp knals/socknal/Makefile.mk knals/socknal/Makefile.in
-cp router/Makefile.mk router/Makefile.in
-fi
-
-AM_CONFIG_HEADER(include/config.h)
-
-AC_OUTPUT([Makefile Kernelenv libcfs/Makefile portals/Makefile \
- unals/Makefile knals/Makefile router/Makefile \
- knals/socknal/Makefile knals/gmnal/Makefile knals/qswnal/Makefile \
- knals/scimacnal/Makefile knals/ibnal/Makefile\
- utils/Makefile tests/Makefile doc/Makefile ])
-
+++ /dev/null
-/* portals/include/config.h.in. Generated from configure.in by autoheader. */
-
-/* Use the Pinger */
-#undef ENABLE_PINGER
-
-/* Define to 1 if you have the <inttypes.h> header file. */
-#undef HAVE_INTTYPES_H
-
-/* Define to 1 if you have the <memory.h> header file. */
-#undef HAVE_MEMORY_H
-
-/* Define to 1 if you have the <stdint.h> header file. */
-#undef HAVE_STDINT_H
-
-/* Define to 1 if you have the <stdlib.h> header file. */
-#undef HAVE_STDLIB_H
-
-/* Define to 1 if you have the <strings.h> header file. */
-#undef HAVE_STRINGS_H
-
-/* Define to 1 if you have the <string.h> header file. */
-#undef HAVE_STRING_H
-
-/* Define to 1 if you have the <sys/stat.h> header file. */
-#undef HAVE_SYS_STAT_H
-
-/* Define to 1 if you have the <sys/types.h> header file. */
-#undef HAVE_SYS_TYPES_H
-
-/* Define to 1 if you have the <unistd.h> header file. */
-#undef HAVE_UNISTD_H
-
-/* IOCTL Buffer Size */
-#undef OBD_MAX_IOCTL_BUFFER
-
-/* Name of package */
-#undef PACKAGE
-
-/* Define to the address where bug reports for this package should be sent. */
-#undef PACKAGE_BUGREPORT
-
-/* Define to the full name of this package. */
-#undef PACKAGE_NAME
-
-/* Define to the full name and version of this package. */
-#undef PACKAGE_STRING
-
-/* Define to the one symbol short name of this package. */
-#undef PACKAGE_TARNAME
-
-/* Define to the version of this package. */
-#undef PACKAGE_VERSION
-
-/* The size of a `unsigned long long', as computed by sizeof. */
-#undef SIZEOF_UNSIGNED_LONG_LONG
-
-/* Define to 1 if you have the ANSI C header files. */
-#undef STDC_HEADERS
-
-/* Version number of package */
-#undef VERSION
#ifndef _KP30_INCLUDED
#define _KP30_INCLUDED
+#include <linux/libcfs.h>
#define PORTAL_DEBUG
#ifndef offsetof
#define LOWEST_BIT_SET(x) ((x) & ~((x) - 1))
-/*
- * Debugging
- */
-extern unsigned int portal_subsystem_debug;
-extern unsigned int portal_stack;
-extern unsigned int portal_debug;
-extern unsigned int portal_printk;
-extern unsigned int portal_cerror;
-/* Debugging subsystems (32 bits, non-overlapping) */
-#define S_UNDEFINED 0x00000001
-#define S_MDC 0x00000002
-#define S_MDS 0x00000004
-#define S_OSC 0x00000008
-#define S_OST 0x00000010
-#define S_CLASS 0x00000020
-#define S_LOG 0x00000040
-#define S_LLITE 0x00000080
-#define S_RPC 0x00000100
-#define S_MGMT 0x00000200
-#define S_PORTALS 0x00000400
-#define S_SOCKNAL 0x00000800
-#define S_QSWNAL 0x00001000
-#define S_PINGER 0x00002000
-#define S_FILTER 0x00004000
-#define S_PTLBD 0x00008000
-#define S_ECHO 0x00010000
-#define S_LDLM 0x00020000
-#define S_LOV 0x00040000
-#define S_GMNAL 0x00080000
-#define S_PTLROUTER 0x00100000
-#define S_COBD 0x00200000
-#define S_IBNAL 0x00400000
-
-/* If you change these values, please keep portals/utils/debug.c
- * up to date! */
-
-/* Debugging masks (32 bits, non-overlapping) */
-#define D_TRACE 0x00000001 /* ENTRY/EXIT markers */
-#define D_INODE 0x00000002
-#define D_SUPER 0x00000004
-#define D_EXT2 0x00000008 /* anything from ext2_debug */
-#define D_MALLOC 0x00000010 /* print malloc, free information */
-#define D_CACHE 0x00000020 /* cache-related items */
-#define D_INFO 0x00000040 /* general information */
-#define D_IOCTL 0x00000080 /* ioctl related information */
-#define D_BLOCKS 0x00000100 /* ext2 block allocation */
-#define D_NET 0x00000200 /* network communications */
-#define D_WARNING 0x00000400 /* CWARN(...) == CDEBUG (D_WARNING, ...) */
-#define D_BUFFS 0x00000800
-#define D_OTHER 0x00001000
-#define D_DENTRY 0x00002000
-#define D_PORTALS 0x00004000 /* ENTRY/EXIT markers */
-#define D_PAGE 0x00008000 /* bulk page handling */
-#define D_DLMTRACE 0x00010000
-#define D_ERROR 0x00020000 /* CERROR(...) == CDEBUG (D_ERROR, ...) */
-#define D_EMERG 0x00040000 /* CEMERG(...) == CDEBUG (D_EMERG, ...) */
-#define D_HA 0x00080000 /* recovery and failover */
-#define D_RPCTRACE 0x00100000 /* for distributed debugging */
-#define D_VFSTRACE 0x00200000
-#define D_READA 0x00400000 /* read-ahead */
-
-#ifdef __KERNEL__
-# include <linux/sched.h> /* THREAD_SIZE */
-#else
-# ifndef THREAD_SIZE /* x86_64 has THREAD_SIZE in userspace */
-# define THREAD_SIZE 8192
-# endif
-#endif
-
-#define LUSTRE_TRACE_SIZE (THREAD_SIZE >> 5)
-
-#ifdef __KERNEL__
-# ifdef __ia64__
-# define CDEBUG_STACK (THREAD_SIZE - \
- ((unsigned long)__builtin_dwarf_cfa() & \
- (THREAD_SIZE - 1)))
-# else
-# define CDEBUG_STACK (THREAD_SIZE - \
- ((unsigned long)__builtin_frame_address(0) & \
- (THREAD_SIZE - 1)))
-# endif
-
-#define CHECK_STACK(stack) \
- do { \
- if ((stack) > 3*THREAD_SIZE/4 && (stack) > portal_stack) { \
- portals_debug_msg(DEBUG_SUBSYSTEM, D_WARNING, \
- __FILE__, __FUNCTION__, __LINE__, \
- (stack),"maximum lustre stack %u\n",\
- portal_stack = (stack)); \
- /*panic("LBUG");*/ \
- } \
- } while (0)
-#else /* __KERNEL__ */
-#define CHECK_STACK(stack) do { } while(0)
-#define CDEBUG_STACK (0L)
-#endif /* __KERNEL__ */
-
-#if 1
-#define CDEBUG(mask, format, a...) \
-do { \
- if (portal_cerror == 0) \
- break; \
- CHECK_STACK(CDEBUG_STACK); \
- if (((mask) & (D_ERROR | D_EMERG | D_WARNING)) || \
- (portal_debug & (mask) && \
- portal_subsystem_debug & DEBUG_SUBSYSTEM)) \
- portals_debug_msg(DEBUG_SUBSYSTEM, mask, \
- __FILE__, __FUNCTION__, __LINE__, \
- CDEBUG_STACK, format, ## a); \
-} while (0)
-
-#define CWARN(format, a...) CDEBUG(D_WARNING, format, ## a)
-#define CERROR(format, a...) CDEBUG(D_ERROR, format, ## a)
-#define CEMERG(format, a...) CDEBUG(D_EMERG, format, ## a)
-
-#define GOTO(label, rc) \
-do { \
- long GOTO__ret = (long)(rc); \
- CDEBUG(D_TRACE,"Process leaving via %s (rc=%lu : %ld : %lx)\n", \
- #label, (unsigned long)GOTO__ret, (signed long)GOTO__ret,\
- (signed long)GOTO__ret); \
- goto label; \
-} while (0)
-
-#define RETURN(rc) \
-do { \
- typeof(rc) RETURN__ret = (rc); \
- CDEBUG(D_TRACE, "Process leaving (rc=%lu : %ld : %lx)\n", \
- (long)RETURN__ret, (long)RETURN__ret, (long)RETURN__ret);\
- return RETURN__ret; \
-} while (0)
-
-#define ENTRY \
-do { \
- CDEBUG(D_TRACE, "Process entered\n"); \
-} while (0)
-
-#define EXIT \
-do { \
- CDEBUG(D_TRACE, "Process leaving\n"); \
-} while(0)
-#else
-#define CDEBUG(mask, format, a...) do { } while (0)
-#define CWARN(format, a...) do { } while (0)
-#define CERROR(format, a...) printk("<3>" format, ## a)
-#define CEMERG(format, a...) printk("<0>" format, ## a)
-#define GOTO(label, rc) do { (void)(rc); goto label; } while (0)
-#define RETURN(rc) return (rc)
-#define ENTRY do { } while (0)
-#define EXIT do { } while (0)
-#endif
-
#ifdef __KERNEL__
# include <linux/vmalloc.h>
# include <linux/time.h>
# include <linux/highmem.h>
# include <linux/module.h>
# include <linux/version.h>
-# include <portals/lib-nal.h>
+# include <portals/p30.h>
# include <linux/smp_lock.h>
# include <asm/atomic.h>
#endif
/******************************************************************************/
-/* Kernel Portals Router interface */
-
-typedef void (*kpr_fwd_callback_t)(void *arg, int error); // completion callback
-
-/* space for routing targets to stash "stuff" in a forwarded packet */
-typedef union {
- long long _alignment;
- void *_space[16]; /* scale with CPU arch */
-} kprfd_scratch_t;
-
-/* Kernel Portals Routing Forwarded message Descriptor */
-typedef struct {
- struct list_head kprfd_list; /* stash in queues (routing target can use) */
- ptl_nid_t kprfd_target_nid; /* final destination NID */
- ptl_nid_t kprfd_gateway_nid; /* gateway NID */
- ptl_hdr_t *kprfd_hdr; /* header in wire byte order */
- int kprfd_nob; /* # payload bytes */
- int kprfd_niov; /* # payload frags */
- ptl_kiov_t *kprfd_kiov; /* payload fragments */
- void *kprfd_router_arg; /* originating NAL's router arg */
- kpr_fwd_callback_t kprfd_callback; /* completion callback */
- void *kprfd_callback_arg; /* completion callback arg */
- kprfd_scratch_t kprfd_scratch; /* scratchpad for routing targets */
-} kpr_fwd_desc_t;
-
-typedef void (*kpr_fwd_t)(void *arg, kpr_fwd_desc_t *fwd);
-typedef void (*kpr_notify_t)(void *arg, ptl_nid_t peer, int alive);
-
-/* NAL's routing interface (Kernel Portals Routing Nal Interface) */
-typedef const struct {
- int kprni_nalid; /* NAL's id */
- void *kprni_arg; /* Arg to pass when calling into NAL */
- kpr_fwd_t kprni_fwd; /* NAL's forwarding entrypoint */
- kpr_notify_t kprni_notify; /* NAL's notification entrypoint */
-} kpr_nal_interface_t;
-
-/* Router's routing interface (Kernel Portals Routing Router Interface) */
-typedef const struct {
- /* register the calling NAL with the router and get back the handle for
- * subsequent calls */
- int (*kprri_register) (kpr_nal_interface_t *nal_interface,
- void **router_arg);
-
- /* ask the router to find a gateway that forwards to 'nid' and is a
- * peer of the calling NAL; assume caller will send 'nob' bytes of
- * payload there */
- int (*kprri_lookup) (void *router_arg, ptl_nid_t nid, int nob,
- ptl_nid_t *gateway_nid);
-
- /* hand a packet over to the router for forwarding */
- kpr_fwd_t kprri_fwd_start;
-
- /* hand a packet back to the router for completion */
- void (*kprri_fwd_done) (void *router_arg, kpr_fwd_desc_t *fwd,
- int error);
-
- /* notify the router about peer state */
- void (*kprri_notify) (void *router_arg, ptl_nid_t peer,
- int alive, time_t when);
-
- /* the calling NAL is shutting down */
- void (*kprri_shutdown) (void *router_arg);
-
- /* deregister the calling NAL with the router */
- void (*kprri_deregister) (void *router_arg);
-
-} kpr_router_interface_t;
-
-/* Convenient struct for NAL to stash router interface/args */
-typedef struct {
- kpr_router_interface_t *kpr_interface;
- void *kpr_arg;
-} kpr_router_t;
-
-/* Router's control interface (Kernel Portals Routing Control Interface) */
-typedef const struct {
- int (*kprci_add_route)(int gateway_nal, ptl_nid_t gateway_nid,
- ptl_nid_t lo_nid, ptl_nid_t hi_nid);
- int (*kprci_del_route)(int gateway_nal, ptl_nid_t gateway_nid,
- ptl_nid_t lo_nid, ptl_nid_t hi_nid);
- int (*kprci_get_route)(int index, int *gateway_nal,
- ptl_nid_t *gateway,
- ptl_nid_t *lo_nid, ptl_nid_t *hi_nid,
- int *alive);
- int (*kprci_notify)(int gateway_nal, ptl_nid_t gateway_nid,
- int alive, time_t when);
-} kpr_control_interface_t;
-
-extern kpr_control_interface_t kpr_control_interface;
-extern kpr_router_interface_t kpr_router_interface;
-
-static inline int
-kpr_register (kpr_router_t *router, kpr_nal_interface_t *nalif)
-{
- int rc;
-
- router->kpr_interface = PORTAL_SYMBOL_GET (kpr_router_interface);
- if (router->kpr_interface == NULL)
- return (-ENOENT);
-
- rc = (router->kpr_interface)->kprri_register (nalif, &router->kpr_arg);
- if (rc != 0)
- router->kpr_interface = NULL;
-
- PORTAL_SYMBOL_PUT (kpr_router_interface);
- return (rc);
-}
-
-static inline int
-kpr_routing (kpr_router_t *router)
-{
- return (router->kpr_interface != NULL);
-}
-
-static inline int
-kpr_lookup (kpr_router_t *router, ptl_nid_t nid, int nob, ptl_nid_t *gateway_nid)
-{
- if (!kpr_routing (router))
- return (-ENETUNREACH);
-
- return (router->kpr_interface->kprri_lookup(router->kpr_arg, nid, nob,
- gateway_nid));
-}
-
-static inline void
-kpr_fwd_init (kpr_fwd_desc_t *fwd, ptl_nid_t nid, ptl_hdr_t *hdr,
- int nob, int niov, ptl_kiov_t *kiov,
- kpr_fwd_callback_t callback, void *callback_arg)
-{
- fwd->kprfd_target_nid = nid;
- fwd->kprfd_gateway_nid = nid;
- fwd->kprfd_hdr = hdr;
- fwd->kprfd_nob = nob;
- fwd->kprfd_niov = niov;
- fwd->kprfd_kiov = kiov;
- fwd->kprfd_callback = callback;
- fwd->kprfd_callback_arg = callback_arg;
-}
-
-static inline void
-kpr_fwd_start (kpr_router_t *router, kpr_fwd_desc_t *fwd)
-{
- if (!kpr_routing (router))
- fwd->kprfd_callback (fwd->kprfd_callback_arg, -ENETUNREACH);
- else
- router->kpr_interface->kprri_fwd_start (router->kpr_arg, fwd);
-}
-
-static inline void
-kpr_fwd_done (kpr_router_t *router, kpr_fwd_desc_t *fwd, int error)
-{
- LASSERT (kpr_routing (router));
- router->kpr_interface->kprri_fwd_done (router->kpr_arg, fwd, error);
-}
-
-static inline void
-kpr_notify (kpr_router_t *router,
- ptl_nid_t peer, int alive, time_t when)
-{
- if (!kpr_routing (router))
- return;
-
- router->kpr_interface->kprri_notify(router->kpr_arg, peer, alive, when);
-}
-
-static inline void
-kpr_shutdown (kpr_router_t *router)
-{
- if (kpr_routing (router))
- router->kpr_interface->kprri_shutdown (router->kpr_arg);
-}
-
-static inline void
-kpr_deregister (kpr_router_t *router)
-{
- if (!kpr_routing (router))
- return;
- router->kpr_interface->kprri_deregister (router->kpr_arg);
- router->kpr_interface = NULL;
-}
-
-/******************************************************************************/
#ifdef PORTALS_PROFILING
#define prof_enum(FOO) PROF__##FOO
#define PING_SYNC 0
#define PING_ASYNC 1
-struct portal_ioctl_data {
- __u32 ioc_len;
- __u32 ioc_version;
- __u64 ioc_nid;
- __u64 ioc_nid2;
- __u64 ioc_nid3;
- __u32 ioc_count;
- __u32 ioc_nal;
- __u32 ioc_nal_cmd;
- __u32 ioc_fd;
- __u32 ioc_id;
-
- __u32 ioc_flags;
- __u32 ioc_size;
-
- __u32 ioc_wait;
- __u32 ioc_timeout;
- __u32 ioc_misc;
-
- __u32 ioc_inllen1;
- char *ioc_inlbuf1;
- __u32 ioc_inllen2;
- char *ioc_inlbuf2;
-
- __u32 ioc_plen1; /* buffers in userspace */
- char *ioc_pbuf1;
- __u32 ioc_plen2; /* buffers in userspace */
- char *ioc_pbuf2;
-
- char ioc_bulk[0];
-};
-
struct portal_ioctl_hdr {
__u32 ioc_len;
__u32 ioc_version;
DEBUG_DAEMON_CONTINUE = 4,
};
-/* XXX remove to lustre ASAP */
-struct lustre_peer {
- ptl_nid_t peer_nid;
- ptl_handle_ni_t peer_ni;
-};
-
-
/* module.c */
typedef int (*nal_cmd_handler_t)(struct portals_cfg *, void * private);
int kportal_nal_register(int nal, nal_cmd_handler_t handler, void * private);
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
+#ifndef _KPR_H
+#define _KPR_H
+
+# include <portals/lib-nal.h> /* for ptl_hdr_t */
+
+/******************************************************************************/
+/* Kernel Portals Router interface */
+
+typedef void (*kpr_fwd_callback_t)(void *arg, int error); // completion callback
+
+/* space for routing targets to stash "stuff" in a forwarded packet */
+typedef union {
+ long long _alignment;
+ void *_space[16]; /* scale with CPU arch */
+} kprfd_scratch_t;
+
+/* Kernel Portals Routing Forwarded message Descriptor */
+typedef struct {
+ struct list_head kprfd_list; /* stash in queues (routing target can use) */
+ ptl_nid_t kprfd_target_nid; /* final destination NID */
+ ptl_nid_t kprfd_gateway_nid; /* gateway NID */
+ ptl_hdr_t *kprfd_hdr; /* header in wire byte order */
+ int kprfd_nob; /* # payload bytes */
+ int kprfd_niov; /* # payload frags */
+ ptl_kiov_t *kprfd_kiov; /* payload fragments */
+ void *kprfd_router_arg; /* originating NAL's router arg */
+ kpr_fwd_callback_t kprfd_callback; /* completion callback */
+ void *kprfd_callback_arg; /* completion callback arg */
+ kprfd_scratch_t kprfd_scratch; /* scratchpad for routing targets */
+} kpr_fwd_desc_t;
+
+typedef void (*kpr_fwd_t)(void *arg, kpr_fwd_desc_t *fwd);
+typedef void (*kpr_notify_t)(void *arg, ptl_nid_t peer, int alive);
+
+/* NAL's routing interface (Kernel Portals Routing Nal Interface) */
+typedef const struct {
+ int kprni_nalid; /* NAL's id */
+ void *kprni_arg; /* Arg to pass when calling into NAL */
+ kpr_fwd_t kprni_fwd; /* NAL's forwarding entrypoint */
+ kpr_notify_t kprni_notify; /* NAL's notification entrypoint */
+} kpr_nal_interface_t;
+
+/* Router's routing interface (Kernel Portals Routing Router Interface) */
+typedef const struct {
+ /* register the calling NAL with the router and get back the handle for
+ * subsequent calls */
+ int (*kprri_register) (kpr_nal_interface_t *nal_interface,
+ void **router_arg);
+
+ /* ask the router to find a gateway that forwards to 'nid' and is a
+ * peer of the calling NAL; assume caller will send 'nob' bytes of
+ * payload there */
+ int (*kprri_lookup) (void *router_arg, ptl_nid_t nid, int nob,
+ ptl_nid_t *gateway_nid);
+
+ /* hand a packet over to the router for forwarding */
+ kpr_fwd_t kprri_fwd_start;
+
+ /* hand a packet back to the router for completion */
+ void (*kprri_fwd_done) (void *router_arg, kpr_fwd_desc_t *fwd,
+ int error);
+
+ /* notify the router about peer state */
+ void (*kprri_notify) (void *router_arg, ptl_nid_t peer,
+ int alive, time_t when);
+
+ /* the calling NAL is shutting down */
+ void (*kprri_shutdown) (void *router_arg);
+
+ /* deregister the calling NAL with the router */
+ void (*kprri_deregister) (void *router_arg);
+
+} kpr_router_interface_t;
+
+/* Convenient struct for NAL to stash router interface/args */
+typedef struct {
+ kpr_router_interface_t *kpr_interface;
+ void *kpr_arg;
+} kpr_router_t;
+
+/* Router's control interface (Kernel Portals Routing Control Interface) */
+typedef const struct {
+ int (*kprci_add_route)(int gateway_nal, ptl_nid_t gateway_nid,
+ ptl_nid_t lo_nid, ptl_nid_t hi_nid);
+ int (*kprci_del_route)(int gateway_nal, ptl_nid_t gateway_nid,
+ ptl_nid_t lo_nid, ptl_nid_t hi_nid);
+ int (*kprci_get_route)(int index, int *gateway_nal,
+ ptl_nid_t *gateway,
+ ptl_nid_t *lo_nid, ptl_nid_t *hi_nid,
+ int *alive);
+ int (*kprci_notify)(int gateway_nal, ptl_nid_t gateway_nid,
+ int alive, time_t when);
+} kpr_control_interface_t;
+
+extern kpr_control_interface_t kpr_control_interface;
+extern kpr_router_interface_t kpr_router_interface;
+
+static inline int
+kpr_register (kpr_router_t *router, kpr_nal_interface_t *nalif)
+{
+ int rc;
+
+ router->kpr_interface = PORTAL_SYMBOL_GET (kpr_router_interface);
+ if (router->kpr_interface == NULL)
+ return (-ENOENT);
+
+ rc = (router->kpr_interface)->kprri_register (nalif, &router->kpr_arg);
+ if (rc != 0)
+ router->kpr_interface = NULL;
+
+ PORTAL_SYMBOL_PUT (kpr_router_interface);
+ return (rc);
+}
+
+static inline int
+kpr_routing (kpr_router_t *router)
+{
+ return (router->kpr_interface != NULL);
+}
+
+static inline int
+kpr_lookup (kpr_router_t *router, ptl_nid_t nid, int nob, ptl_nid_t *gateway_nid)
+{
+ if (!kpr_routing (router))
+ return (-ENETUNREACH);
+
+ return (router->kpr_interface->kprri_lookup(router->kpr_arg, nid, nob,
+ gateway_nid));
+}
+
+static inline void
+kpr_fwd_init (kpr_fwd_desc_t *fwd, ptl_nid_t nid, ptl_hdr_t *hdr,
+ int nob, int niov, ptl_kiov_t *kiov,
+ kpr_fwd_callback_t callback, void *callback_arg)
+{
+ fwd->kprfd_target_nid = nid;
+ fwd->kprfd_gateway_nid = nid;
+ fwd->kprfd_hdr = hdr;
+ fwd->kprfd_nob = nob;
+ fwd->kprfd_niov = niov;
+ fwd->kprfd_kiov = kiov;
+ fwd->kprfd_callback = callback;
+ fwd->kprfd_callback_arg = callback_arg;
+}
+
+static inline void
+kpr_fwd_start (kpr_router_t *router, kpr_fwd_desc_t *fwd)
+{
+ if (!kpr_routing (router))
+ fwd->kprfd_callback (fwd->kprfd_callback_arg, -ENETUNREACH);
+ else
+ router->kpr_interface->kprri_fwd_start (router->kpr_arg, fwd);
+}
+
+static inline void
+kpr_fwd_done (kpr_router_t *router, kpr_fwd_desc_t *fwd, int error)
+{
+ LASSERT (kpr_routing (router));
+ router->kpr_interface->kprri_fwd_done (router->kpr_arg, fwd, error);
+}
+
+static inline void
+kpr_notify (kpr_router_t *router,
+ ptl_nid_t peer, int alive, time_t when)
+{
+ if (!kpr_routing (router))
+ return;
+
+ router->kpr_interface->kprri_notify(router->kpr_arg, peer, alive, when);
+}
+
+static inline void
+kpr_shutdown (kpr_router_t *router)
+{
+ if (kpr_routing (router))
+ router->kpr_interface->kprri_shutdown (router->kpr_arg);
+}
+
+static inline void
+kpr_deregister (kpr_router_t *router)
+{
+ if (!kpr_routing (router))
+ return;
+ router->kpr_interface->kprri_deregister (router->kpr_arg);
+ router->kpr_interface = NULL;
+}
+
+#endif /* _KPR_H */
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
+#ifndef _LIBCFS_H
+
+
+#define PORTAL_DEBUG
+
+#ifndef offsetof
+# define offsetof(typ,memb) ((int)((char *)&(((typ *)0)->memb)))
+#endif
+
+#define LOWEST_BIT_SET(x) ((x) & ~((x) - 1))
+
+/*
+ * Debugging
+ */
+extern unsigned int portal_subsystem_debug;
+extern unsigned int portal_stack;
+extern unsigned int portal_debug;
+extern unsigned int portal_printk;
+extern unsigned int portal_cerror;
+/* Debugging subsystems (32 bits, non-overlapping) */
+#define S_UNDEFINED 0x00000001
+#define S_MDC 0x00000002
+#define S_MDS 0x00000004
+#define S_OSC 0x00000008
+#define S_OST 0x00000010
+#define S_CLASS 0x00000020
+#define S_LOG 0x00000040
+#define S_LLITE 0x00000080
+#define S_RPC 0x00000100
+#define S_MGMT 0x00000200
+#define S_PORTALS 0x00000400
+#define S_SOCKNAL 0x00000800
+#define S_QSWNAL 0x00001000
+#define S_PINGER 0x00002000
+#define S_FILTER 0x00004000
+#define S_PTLBD 0x00008000
+#define S_ECHO 0x00010000
+#define S_LDLM 0x00020000
+#define S_LOV 0x00040000
+#define S_GMNAL 0x00080000
+#define S_PTLROUTER 0x00100000
+#define S_COBD 0x00200000
+#define S_IBNAL 0x00400000
+
+/* If you change these values, please keep portals/utils/debug.c
+ * up to date! */
+
+/* Debugging masks (32 bits, non-overlapping) */
+#define D_TRACE 0x00000001 /* ENTRY/EXIT markers */
+#define D_INODE 0x00000002
+#define D_SUPER 0x00000004
+#define D_EXT2 0x00000008 /* anything from ext2_debug */
+#define D_MALLOC 0x00000010 /* print malloc, free information */
+#define D_CACHE 0x00000020 /* cache-related items */
+#define D_INFO 0x00000040 /* general information */
+#define D_IOCTL 0x00000080 /* ioctl related information */
+#define D_BLOCKS 0x00000100 /* ext2 block allocation */
+#define D_NET 0x00000200 /* network communications */
+#define D_WARNING 0x00000400 /* CWARN(...) == CDEBUG (D_WARNING, ...) */
+#define D_BUFFS 0x00000800
+#define D_OTHER 0x00001000
+#define D_DENTRY 0x00002000
+#define D_PORTALS 0x00004000 /* ENTRY/EXIT markers */
+#define D_PAGE 0x00008000 /* bulk page handling */
+#define D_DLMTRACE 0x00010000
+#define D_ERROR 0x00020000 /* CERROR(...) == CDEBUG (D_ERROR, ...) */
+#define D_EMERG 0x00040000 /* CEMERG(...) == CDEBUG (D_EMERG, ...) */
+#define D_HA 0x00080000 /* recovery and failover */
+#define D_RPCTRACE 0x00100000 /* for distributed debugging */
+#define D_VFSTRACE 0x00200000
+#define D_READA 0x00400000 /* read-ahead */
+
+#ifdef __KERNEL__
+# include <linux/sched.h> /* THREAD_SIZE */
+#else
+# ifndef THREAD_SIZE /* x86_64 has THREAD_SIZE in userspace */
+# define THREAD_SIZE 8192
+# endif
+#endif
+
+#define LUSTRE_TRACE_SIZE (THREAD_SIZE >> 5)
+
+#ifdef __KERNEL__
+# ifdef __ia64__
+# define CDEBUG_STACK (THREAD_SIZE - \
+ ((unsigned long)__builtin_dwarf_cfa() & \
+ (THREAD_SIZE - 1)))
+# else
+# define CDEBUG_STACK (THREAD_SIZE - \
+ ((unsigned long)__builtin_frame_address(0) & \
+ (THREAD_SIZE - 1)))
+# endif
+
+#define CHECK_STACK(stack) \
+ do { \
+ if ((stack) > 3*THREAD_SIZE/4 && (stack) > portal_stack) { \
+ portals_debug_msg(DEBUG_SUBSYSTEM, D_WARNING, \
+ __FILE__, __FUNCTION__, __LINE__, \
+ (stack),"maximum lustre stack %u\n",\
+ portal_stack = (stack)); \
+ /*panic("LBUG");*/ \
+ } \
+ } while (0)
+#else /* __KERNEL__ */
+#define CHECK_STACK(stack) do { } while(0)
+#define CDEBUG_STACK (0L)
+#endif /* __KERNEL__ */
+
+#if 1
+#define CDEBUG(mask, format, a...) \
+do { \
+ if (portal_cerror == 0) \
+ break; \
+ CHECK_STACK(CDEBUG_STACK); \
+ if (((mask) & (D_ERROR | D_EMERG | D_WARNING)) || \
+ (portal_debug & (mask) && \
+ portal_subsystem_debug & DEBUG_SUBSYSTEM)) \
+ portals_debug_msg(DEBUG_SUBSYSTEM, mask, \
+ __FILE__, __FUNCTION__, __LINE__, \
+ CDEBUG_STACK, format, ## a); \
+} while (0)
+
+#define CWARN(format, a...) CDEBUG(D_WARNING, format, ## a)
+#define CERROR(format, a...) CDEBUG(D_ERROR, format, ## a)
+#define CEMERG(format, a...) CDEBUG(D_EMERG, format, ## a)
+
+#define GOTO(label, rc) \
+do { \
+ long GOTO__ret = (long)(rc); \
+ CDEBUG(D_TRACE,"Process leaving via %s (rc=%lu : %ld : %lx)\n", \
+ #label, (unsigned long)GOTO__ret, (signed long)GOTO__ret,\
+ (signed long)GOTO__ret); \
+ goto label; \
+} while (0)
+
+#define RETURN(rc) \
+do { \
+ typeof(rc) RETURN__ret = (rc); \
+ CDEBUG(D_TRACE, "Process leaving (rc=%lu : %ld : %lx)\n", \
+ (long)RETURN__ret, (long)RETURN__ret, (long)RETURN__ret);\
+ return RETURN__ret; \
+} while (0)
+
+#define ENTRY \
+do { \
+ CDEBUG(D_TRACE, "Process entered\n"); \
+} while (0)
+
+#define EXIT \
+do { \
+ CDEBUG(D_TRACE, "Process leaving\n"); \
+} while(0)
+#else
+#define CDEBUG(mask, format, a...) do { } while (0)
+#define CWARN(format, a...) do { } while (0)
+#define CERROR(format, a...) printk("<3>" format, ## a)
+#define CEMERG(format, a...) printk("<0>" format, ## a)
+#define GOTO(label, rc) do { (void)(rc); goto label; } while (0)
+#define RETURN(rc) return (rc)
+#define ENTRY do { } while (0)
+#define EXIT do { } while (0)
+#endif
+
+struct portal_ioctl_data {
+ __u32 ioc_len;
+ __u32 ioc_version;
+ __u64 ioc_nid;
+ __u64 ioc_nid2;
+ __u64 ioc_nid3;
+ __u32 ioc_count;
+ __u32 ioc_nal;
+ __u32 ioc_nal_cmd;
+ __u32 ioc_fd;
+ __u32 ioc_id;
+
+ __u32 ioc_flags;
+ __u32 ioc_size;
+
+ __u32 ioc_wait;
+ __u32 ioc_timeout;
+ __u32 ioc_misc;
+
+ __u32 ioc_inllen1;
+ char *ioc_inlbuf1;
+ __u32 ioc_inllen2;
+ char *ioc_inlbuf2;
+
+ __u32 ioc_plen1; /* buffers in userspace */
+ char *ioc_pbuf1;
+ __u32 ioc_plen2; /* buffers in userspace */
+ char *ioc_pbuf2;
+
+ char ioc_bulk[0];
+};
+
+#ifdef __KERNEL__
+
+#include <linux/list.h>
+
+struct libcfs_ioctl_handler {
+ struct list_head item;
+ int (*handle_ioctl)(struct portal_ioctl_data *data,
+ unsigned int cmd, unsigned long args);
+};
+
+#define DECLARE_IOCTL_HANDLER(ident, func) \
+ struct libcfs_ioctl_handler ident = { \
+ .item = LIST_HEAD_INIT(ident.item), \
+ .handle_ioctl = func \
+ }
+
+int libcfs_register_ioctl(struct libcfs_ioctl_handler *hand);
+int libcfs_deregister_ioctl(struct libcfs_ioctl_handler *hand);
+
+#endif
+
+#define _LIBCFS_H
+
+#endif /* _LIBCFS_H */
--- /dev/null
+#ifndef _LUSTRE_LIST_H
+#define _LUSTRE_LIST_H
+
+#ifdef __KERNEL__
+#include <linux/list.h>
+#else
+/*
+ * Simple doubly linked list implementation.
+ *
+ * Some of the internal functions ("__xxx") are useful when
+ * manipulating whole lists rather than single entries, as
+ * sometimes we already know the next/prev entries and we can
+ * generate better code by using them directly rather than
+ * using the generic single-entry routines.
+ */
+
+#define prefetch(a) ((void)a)
+
+struct list_head {
+ struct list_head *next, *prev;
+};
+
+typedef struct list_head list_t;
+
+#define LIST_HEAD_INIT(name) { &(name), &(name) }
+
+#define LIST_HEAD(name) \
+ struct list_head name = LIST_HEAD_INIT(name)
+
+#define INIT_LIST_HEAD(ptr) do { \
+ (ptr)->next = (ptr); (ptr)->prev = (ptr); \
+} while (0)
+
+/*
+ * Insert a new entry between two known consecutive entries.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static inline void __list_add(struct list_head * new,
+ struct list_head * prev,
+ struct list_head * next)
+{
+ next->prev = new;
+ new->next = next;
+ new->prev = prev;
+ prev->next = new;
+}
+
+/**
+ * list_add - add a new entry
+ * @new: new entry to be added
+ * @head: list head to add it after
+ *
+ * Insert a new entry after the specified head.
+ * This is good for implementing stacks.
+ */
+static inline void list_add(struct list_head *new, struct list_head *head)
+{
+ __list_add(new, head, head->next);
+}
+
+/**
+ * list_add_tail - add a new entry
+ * @new: new entry to be added
+ * @head: list head to add it before
+ *
+ * Insert a new entry before the specified head.
+ * This is useful for implementing queues.
+ */
+static inline void list_add_tail(struct list_head *new, struct list_head *head)
+{
+ __list_add(new, head->prev, head);
+}
+
+/*
+ * Delete a list entry by making the prev/next entries
+ * point to each other.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static inline void __list_del(struct list_head * prev, struct list_head * next)
+{
+ next->prev = prev;
+ prev->next = next;
+}
+
+/**
+ * list_del - deletes entry from list.
+ * @entry: the element to delete from the list.
+ * Note: list_empty on entry does not return true after this, the entry is in an undefined state.
+ */
+static inline void list_del(struct list_head *entry)
+{
+ __list_del(entry->prev, entry->next);
+}
+
+/**
+ * list_del_init - deletes entry from list and reinitialize it.
+ * @entry: the element to delete from the list.
+ */
+static inline void list_del_init(struct list_head *entry)
+{
+ __list_del(entry->prev, entry->next);
+ INIT_LIST_HEAD(entry);
+}
+
+/**
+ * list_move - delete from one list and add as another's head
+ * @list: the entry to move
+ * @head: the head that will precede our entry
+ */
+static inline void list_move(struct list_head *list, struct list_head *head)
+{
+ __list_del(list->prev, list->next);
+ list_add(list, head);
+}
+
+/**
+ * list_move_tail - delete from one list and add as another's tail
+ * @list: the entry to move
+ * @head: the head that will follow our entry
+ */
+static inline void list_move_tail(struct list_head *list,
+ struct list_head *head)
+{
+ __list_del(list->prev, list->next);
+ list_add_tail(list, head);
+}
+
+/**
+ * list_empty - tests whether a list is empty
+ * @head: the list to test.
+ */
+static inline int list_empty(struct list_head *head)
+{
+ return head->next == head;
+}
+
+static inline void __list_splice(struct list_head *list,
+ struct list_head *head)
+{
+ struct list_head *first = list->next;
+ struct list_head *last = list->prev;
+ struct list_head *at = head->next;
+
+ first->prev = head;
+ head->next = first;
+
+ last->next = at;
+ at->prev = last;
+}
+
+/**
+ * list_splice - join two lists
+ * @list: the new list to add.
+ * @head: the place to add it in the first list.
+ */
+static inline void list_splice(struct list_head *list, struct list_head *head)
+{
+ if (!list_empty(list))
+ __list_splice(list, head);
+}
+
+/**
+ * list_splice_init - join two lists and reinitialise the emptied list.
+ * @list: the new list to add.
+ * @head: the place to add it in the first list.
+ *
+ * The list at @list is reinitialised
+ */
+static inline void list_splice_init(struct list_head *list,
+ struct list_head *head)
+{
+ if (!list_empty(list)) {
+ __list_splice(list, head);
+ INIT_LIST_HEAD(list);
+ }
+}
+
+/**
+ * list_entry - get the struct for this entry
+ * @ptr: the &struct list_head pointer.
+ * @type: the type of the struct this is embedded in.
+ * @member: the name of the list_struct within the struct.
+ */
+#define list_entry(ptr, type, member) \
+ ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member)))
+
+/**
+ * list_for_each - iterate over a list
+ * @pos: the &struct list_head to use as a loop counter.
+ * @head: the head for your list.
+ */
+#define list_for_each(pos, head) \
+ for (pos = (head)->next, prefetch(pos->next); pos != (head); \
+ pos = pos->next, prefetch(pos->next))
+
+/**
+ * list_for_each_prev - iterate over a list in reverse order
+ * @pos: the &struct list_head to use as a loop counter.
+ * @head: the head for your list.
+ */
+#define list_for_each_prev(pos, head) \
+ for (pos = (head)->prev, prefetch(pos->prev); pos != (head); \
+ pos = pos->prev, prefetch(pos->prev))
+
+/**
+ * list_for_each_safe - iterate over a list safe against removal of list entry
+ * @pos: the &struct list_head to use as a loop counter.
+ * @n: another &struct list_head to use as temporary storage
+ * @head: the head for your list.
+ */
+#define list_for_each_safe(pos, n, head) \
+ for (pos = (head)->next, n = pos->next; pos != (head); \
+ pos = n, n = pos->next)
+
+/**
+ * list_for_each_entry - iterate over list of given type
+ * @pos: the type * to use as a loop counter.
+ * @head: the head for your list.
+ * @member: the name of the list_struct within the struct.
+ */
+#define list_for_each_entry(pos, head, member) \
+ for (pos = list_entry((head)->next, typeof(*pos), member), \
+ prefetch(pos->member.next); \
+ &pos->member != (head); \
+ pos = list_entry(pos->member.next, typeof(*pos), member), \
+ prefetch(pos->member.next))
+
+/**
+ * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry
+ * @pos: the type * to use as a loop counter.
+ * @n: another type * to use as temporary storage
+ * @head: the head for your list.
+ * @member: the name of the list_struct within the struct.
+ */
+#define list_for_each_entry_safe(pos, n, head, member) \
+ for (pos = list_entry((head)->next, typeof(*pos), member), \
+ n = list_entry(pos->member.next, typeof(*pos), member); \
+ &pos->member != (head); \
+ pos = n, n = list_entry(n->member.next, typeof(*n), member))
+
+#endif /* if !__KERNEL__*/
+#endif /* if !_LUSTRE_LIST_H */
# define DEBUG_SUBSYSTEM S_PORTALS
# define PORTAL_DEBUG
+#include "build_check.h"
+
#ifndef __KERNEL__
# include <stdio.h>
# include <stdlib.h>
#ifndef P30_API_H
#define P30_API_H
+#include "build_check.h"
+
#include <portals/types.h>
#ifndef PTL_NO_WRAP
-int PtlInit(void);
-int PtlInitialized(void);
+int PtlInit(int *);
void PtlFini(void);
int PtlNIInit(ptl_interface_t interface, ptl_pt_index_t ptl_size_in,
* Network interfaces
*/
-#ifndef PTL_NO_WRAP
-int PtlNIBarrier(ptl_handle_ni_t interface_in);
-#endif
-
int PtlNIStatus(ptl_handle_ni_t interface_in, ptl_sr_index_t register_in,
ptl_sr_value_t * status_out);
*/
int PtlFailNid (ptl_handle_ni_t ni, ptl_nid_t nid, unsigned int threshold);
+/*
+ * PtlSnprintHandle:
+ *
+ * This is not an official Portals 3 API call. It is provided
+ * so that an application can print an opaque handle.
+ */
+void PtlSnprintHandle (char *str, int str_len, ptl_handle_any_t handle);
/*
* Match entries
ptl_unlink_t unlink_in, ptl_handle_md_t * handle_out);
int PtlMDBind(ptl_handle_ni_t ni_in, ptl_md_t md_in,
- ptl_handle_md_t * handle_out);
+ ptl_unlink_t unlink_in, ptl_handle_md_t * handle_out);
int PtlMDUnlink(ptl_handle_md_t md_in);
int PtlEQWait(ptl_handle_eq_t eventq_in, ptl_event_t * event_out);
-int PtlEQWait_timeout(ptl_handle_eq_t eventq_in, ptl_event_t * event_out,
- int timeout);
+int PtlEQPoll(ptl_handle_eq_t *eventqs_in, int neq_in, int timeout,
+ ptl_event_t *event_out, int *which_out);
#endif
/*
#ifndef PTL_BLOCKS_H
#define PTL_BLOCKS_H
+#include "build_check.h"
+
/*
* blocks.h
*
ptl_handle_ni_t ni_in;
ptl_handle_eq_t eq_in;
ptl_md_t md_in;
+ ptl_unlink_t unlink_in;
} PtlMDBind_in;
typedef struct PtlMDBind_out {
--- /dev/null
+#ifndef _BUILD_CHECK_H
+#define _BUILD_CHECK_H
+
+#ifdef CRAY_PORTALS
+#error "an application got to me instead of cray's includes"
+#endif
+
+#endif
+#include "build_check.h"
/*
**
** This files contains definitions that are used throughout the cplant code.
#ifndef _P30_ERRNO_H_
#define _P30_ERRNO_H_
+#include "build_check.h"
/*
* include/portals/errno.h
*
/* If you change these, you must update the string table in api-errno.c */
typedef enum {
- PTL_OK = 0,
- PTL_SEGV = 1,
-
- PTL_NOSPACE = 2,
- PTL_INUSE = 3,
- PTL_VAL_FAILED = 4,
-
- PTL_NAL_FAILED = 5,
- PTL_NOINIT = 6,
- PTL_INIT_DUP = 7,
- PTL_INIT_INV = 8,
- PTL_AC_INV_INDEX = 9,
-
- PTL_INV_ASIZE = 10,
- PTL_INV_HANDLE = 11,
- PTL_INV_MD = 12,
- PTL_INV_ME = 13,
- PTL_INV_NI = 14,
+ PTL_OK = 0,
+ PTL_SEGV = 1,
+
+ PTL_NO_SPACE = 2,
+ PTL_ME_IN_USE = 3,
+ PTL_VAL_FAILED = 4,
+
+ PTL_NAL_FAILED = 5,
+ PTL_NO_INIT = 6,
+ PTL_IFACE_DUP = 7,
+ PTL_IFACE_INVALID = 8,
+
+ PTL_HANDLE_INVALID = 9,
+ PTL_MD_INVALID = 10,
+ PTL_ME_INVALID = 11,
/* If you change these, you must update the string table in api-errno.c */
- PTL_ILL_MD = 15,
- PTL_INV_PROC = 16,
- PTL_INV_PSIZE = 17,
- PTL_INV_PTINDEX = 18,
- PTL_INV_REG = 19,
-
- PTL_INV_SR_INDX = 20,
- PTL_ML_TOOLONG = 21,
- PTL_ADDR_UNKNOWN = 22,
- PTL_INV_EQ = 23,
- PTL_EQ_DROPPED = 24,
-
- PTL_EQ_EMPTY = 25,
- PTL_NOUPDATE = 26,
- PTL_FAIL = 27,
- PTL_NOT_IMPLEMENTED = 28,
- PTL_NO_ACK = 29,
-
- PTL_IOV_TOO_MANY = 30,
- PTL_IOV_TOO_SMALL = 31,
-
- PTL_EQ_INUSE = 32,
-
- PTL_MAX_ERRNO = 32
+ PTL_PROCESS_INVALID = 12,
+ PTL_PT_INDEX_INVALID = 13,
+
+ PTL_SR_INDEX_INVALID = 14,
+ PTL_EQ_INVALID = 15,
+ PTL_EQ_DROPPED = 16,
+
+ PTL_EQ_EMPTY = 17,
+ PTL_MD_NO_UPDATE = 18,
+ PTL_FAIL = 19,
+
+ PTL_IOV_TOO_MANY = 20,
+ PTL_IOV_TOO_SMALL = 21,
+
+ PTL_EQ_IN_USE = 22,
+
+ PTL_MAX_ERRNO = 23
} ptl_err_t;
/* If you change these, you must update the string table in api-errno.c */
#ifndef _P30_INTERNAL_H_
#define _P30_INTERNAL_H_
+#include "build_check.h"
/*
* p30/internal.h
*
#ifndef PTL_DISPATCH_H
#define PTL_DISPATCH_H
+#include "build_check.h"
/*
* include/dispatch.h
*
#ifndef _LIB_P30_H_
#define _LIB_P30_H_
+#include "build_check.h"
+
#ifdef __KERNEL__
# include <asm/page.h>
# include <linux/string.h>
niov = umd->niov;
size = offsetof(lib_md_t, md_iov.kiov[niov]);
} else {
- niov = ((umd->options & PTL_MD_IOV) != 0) ?
+ niov = ((umd->options & PTL_MD_IOVEC) != 0) ?
umd->niov : 1;
size = offsetof(lib_md_t, md_iov.iov[niov]);
}
static inline lib_msg_t *
lib_msg_alloc(nal_cb_t *nal)
{
- /* NEVER called with statelock held */
+ /* NEVER called with statelock held; may be in interrupt... */
lib_msg_t *msg;
- PORTAL_ALLOC(msg, sizeof(*msg));
+ if (in_interrupt())
+ PORTAL_ALLOC_ATOMIC(msg, sizeof(*msg));
+ else
+ PORTAL_ALLOC(msg, sizeof(*msg));
+
if (msg != NULL) {
/* NULL pointers, clear flags etc */
memset (msg, 0, sizeof (*msg));
extern void lib_enq_event_locked (nal_cb_t *nal, void *private,
lib_eq_t *eq, ptl_event_t *ev);
extern void lib_finalize (nal_cb_t *nal, void *private, lib_msg_t *msg,
- ptl_err_t status);
+ ptl_ni_fail_t ni_fail_type);
extern void lib_parse (nal_cb_t *nal, ptl_hdr_t *hdr, void *private);
-extern lib_msg_t *lib_fake_reply_msg (nal_cb_t *nal, ptl_nid_t peer_nid,
- lib_md_t *getmd);
+extern lib_msg_t *lib_create_reply_msg (nal_cb_t *nal, ptl_nid_t peer_nid,
+ lib_msg_t *get_msg);
extern void print_hdr (nal_cb_t * nal, ptl_hdr_t * hdr);
#ifndef _LIB_NAL_H_
#define _LIB_NAL_H_
+#include "build_check.h"
/*
* nal.h
*
#ifndef _LIB_P30_H_
#define _LIB_P30_H_
+#include "build_check.h"
+
#ifdef __KERNEL__
# include <asm/page.h>
# include <linux/string.h>
niov = umd->niov;
size = offsetof(lib_md_t, md_iov.kiov[niov]);
} else {
- niov = ((umd->options & PTL_MD_IOV) != 0) ?
+ niov = ((umd->options & PTL_MD_IOVEC) != 0) ?
umd->niov : 1;
size = offsetof(lib_md_t, md_iov.iov[niov]);
}
static inline lib_msg_t *
lib_msg_alloc(nal_cb_t *nal)
{
- /* NEVER called with statelock held */
+ /* NEVER called with statelock held; may be in interrupt... */
lib_msg_t *msg;
- PORTAL_ALLOC(msg, sizeof(*msg));
+ if (in_interrupt())
+ PORTAL_ALLOC_ATOMIC(msg, sizeof(*msg));
+ else
+ PORTAL_ALLOC(msg, sizeof(*msg));
+
if (msg != NULL) {
/* NULL pointers, clear flags etc */
memset (msg, 0, sizeof (*msg));
extern void lib_enq_event_locked (nal_cb_t *nal, void *private,
lib_eq_t *eq, ptl_event_t *ev);
extern void lib_finalize (nal_cb_t *nal, void *private, lib_msg_t *msg,
- ptl_err_t status);
+ ptl_ni_fail_t ni_fail_type);
extern void lib_parse (nal_cb_t *nal, ptl_hdr_t *hdr, void *private);
-extern lib_msg_t *lib_fake_reply_msg (nal_cb_t *nal, ptl_nid_t peer_nid,
- lib_md_t *getmd);
+extern lib_msg_t *lib_create_reply_msg (nal_cb_t *nal, ptl_nid_t peer_nid,
+ lib_msg_t *get_msg);
extern void print_hdr (nal_cb_t * nal, ptl_hdr_t * hdr);
#ifndef _LIB_TYPES_H_
#define _LIB_TYPES_H_
+#include "build_check.h"
+
#include <portals/types.h>
#ifdef __KERNEL__
# include <linux/uio.h>
} lib_counters_t;
/* temporary expedient: limit number of entries in discontiguous MDs */
-# define PTL_MTU (512<<10)
-# define PTL_MD_MAX_IOV 128
-# define PTL_MD_MAX_PAGES min_t(int, PTL_MD_MAX_IOV, PTL_MTU / PAGE_SIZE)
+#define PTL_MTU (512<<10)
+#define PTL_MD_MAX_IOV 128
struct lib_msg_t {
struct list_head msg_list;
ptl_size_t max_size;
int threshold;
int pending;
- ptl_unlink_t unlink;
unsigned int options;
unsigned int md_flags;
void *user_ptr;
} md_iov;
};
-#define PTL_MD_FLAG_UNLINK (1 << 0)
+#define PTL_MD_FLAG_ZOMBIE (1 << 0)
+#define PTL_MD_FLAG_AUTO_UNLINK (1 << 1)
+
+static inline int lib_md_exhausted (lib_md_t *md)
+{
+ return (md->threshold == 0 ||
+ ((md->options & PTL_MD_MAX_SIZE) != 0 &&
+ md->offset + md->max_size > md->length));
+}
#ifdef PTL_USE_LIB_FREELIST
typedef struct
* using the generic single-entry routines.
*/
-#define prefetch(a) ((void)a)
-
struct list_head {
struct list_head *next, *prev;
};
* @head: the head for your list.
*/
#define list_for_each(pos, head) \
- for (pos = (head)->next, prefetch(pos->next); pos != (head); \
- pos = pos->next, prefetch(pos->next))
+ for (pos = (head)->next ; pos != (head); pos = pos->next )
/**
* list_for_each_prev - iterate over a list in reverse order
* @head: the head for your list.
*/
#define list_for_each_prev(pos, head) \
- for (pos = (head)->prev, prefetch(pos->prev); pos != (head); \
- pos = pos->prev, prefetch(pos->prev))
+ for (pos = (head)->prev ; pos != (head); pos = pos->prev)
/**
* list_for_each_safe - iterate over a list safe against removal of list entry
* @member: the name of the list_struct within the struct.
*/
#define list_for_each_entry(pos, head, member) \
- for (pos = list_entry((head)->next, typeof(*pos), member), \
- prefetch(pos->member.next); \
+ for (pos = list_entry((head)->next, typeof(*pos), member); \
&pos->member != (head); \
- pos = list_entry(pos->member.next, typeof(*pos), member), \
- prefetch(pos->member.next))
+ pos = list_entry(pos->member.next, typeof(*pos), member))
#endif
#ifndef list_for_each_entry_safe
#ifndef _P30_H_
#define _P30_H_
+#include "build_check.h"
+
/*
* p30.h
*
#include <portals/api.h>
#include <portals/nalids.h>
-extern int __p30_initialized; /* for libraries & test codes */
-extern int __p30_myr_initialized; /* that don't know if p30 */
-extern int __p30_ip_initialized; /* had been initialized yet */
-extern ptl_handle_ni_t __myr_ni_handle, __ip_ni_handle;
-
-extern int __p30_myr_timeout; /* in seconds, for PtlNIBarrier, */
-extern int __p30_ip_timeout; /* PtlReduce_all, & PtlBroadcast_all */
-
/*
* Debugging flags reserved for the Portals reference library.
* These are not part of the API as described in the SAND report
#ifndef _NAL_H_
#define _NAL_H_
+#include "build_check.h"
+
/*
* p30/nal.h
*
int (*validate) (nal_t * nal, void *base, size_t extent);
- void (*yield) (nal_t * nal);
+ int (*yield) (nal_t * nal, unsigned long *flags, int milliseconds);
void (*lock) (nal_t * nal, unsigned long *flags);
+#include "build_check.h"
+
#define PTL_IFACE_TCP 1
#define PTL_IFACE_ER 2
#define PTL_IFACE_SS 3
#ifndef _P30_H_
#define _P30_H_
+#include "build_check.h"
+
/*
* p30.h
*
#include <portals/api.h>
#include <portals/nalids.h>
-extern int __p30_initialized; /* for libraries & test codes */
-extern int __p30_myr_initialized; /* that don't know if p30 */
-extern int __p30_ip_initialized; /* had been initialized yet */
-extern ptl_handle_ni_t __myr_ni_handle, __ip_ni_handle;
-
-extern int __p30_myr_timeout; /* in seconds, for PtlNIBarrier, */
-extern int __p30_ip_timeout; /* PtlReduce_all, & PtlBroadcast_all */
-
/*
* Debugging flags reserved for the Portals reference library.
* These are not part of the API as described in the SAND report
#ifndef _P30_TYPES_H_
#define _P30_TYPES_H_
+#include "build_check.h"
+
#ifdef __linux__
# include <asm/types.h>
# if defined(__powerpc__) && !defined(__KERNEL__)
#include <portals/errno.h>
+/* This implementation uses the same type for API function return codes and
+ * the completion status in an event */
+#define PTL_NI_OK PTL_OK
+typedef ptl_err_t ptl_ni_fail_t;
+
typedef __u64 ptl_nid_t;
typedef __u32 ptl_pid_t;
typedef __u32 ptl_pt_index_t;
typedef __u64 ptl_hdr_data_t;
typedef __u32 ptl_size_t;
+#define PTL_TIME_FOREVER (-1)
+#define PTL_EQ_HANDLER_NONE NULL
+
typedef struct {
unsigned long nal_idx; /* which network interface */
__u64 cookie; /* which thing on that interface */
typedef ptl_handle_any_t ptl_handle_md_t;
typedef ptl_handle_any_t ptl_handle_me_t;
-#define PTL_HANDLE_NONE \
+#define PTL_INVALID_HANDLE \
((const ptl_handle_any_t){.nal_idx = -1, .cookie = -1})
-#define PTL_EQ_NONE PTL_HANDLE_NONE
+#define PTL_EQ_NONE PTL_INVALID_HANDLE
-static inline int PtlHandleEqual (ptl_handle_any_t h1, ptl_handle_any_t h2)
+static inline int PtlHandleIsEqual (ptl_handle_any_t h1, ptl_handle_any_t h2)
{
return (h1.nal_idx == h2.nal_idx && h1.cookie == h2.cookie);
}
} ptl_md_t;
/* Options for the MD structure */
-#define PTL_MD_OP_PUT (1 << 0)
-#define PTL_MD_OP_GET (1 << 1)
-#define PTL_MD_MANAGE_REMOTE (1 << 2)
-#define PTL_MD_AUTO_UNLINK (1 << 3)
-#define PTL_MD_TRUNCATE (1 << 4)
-#define PTL_MD_ACK_DISABLE (1 << 5)
-#define PTL_MD_IOV (1 << 6)
-#define PTL_MD_MAX_SIZE (1 << 7)
-#define PTL_MD_KIOV (1 << 8)
+#define PTL_MD_OP_PUT (1 << 0)
+#define PTL_MD_OP_GET (1 << 1)
+#define PTL_MD_MANAGE_REMOTE (1 << 2)
+/* unused (1 << 3) */
+#define PTL_MD_TRUNCATE (1 << 4)
+#define PTL_MD_ACK_DISABLE (1 << 5)
+#define PTL_MD_IOVEC (1 << 6)
+#define PTL_MD_MAX_SIZE (1 << 7)
+#define PTL_MD_KIOV (1 << 8)
+#define PTL_MD_EVENT_START_DISABLE (1 << 9)
+#define PTL_MD_EVENT_END_DISABLE (1 << 10)
+
+/* For compatibility with Cray Portals */
+#define PTL_MD_LUSTRE_COMPLETION_SEMANTICS 0
#define PTL_MD_THRESH_INF (-1)
typedef enum {
- PTL_EVENT_GET,
- PTL_EVENT_PUT,
- PTL_EVENT_REPLY,
+ PTL_EVENT_GET_START,
+ PTL_EVENT_GET_END,
+
+ PTL_EVENT_PUT_START,
+ PTL_EVENT_PUT_END,
+
+ PTL_EVENT_REPLY_START,
+ PTL_EVENT_REPLY_END,
+
PTL_EVENT_ACK,
- PTL_EVENT_SENT,
+
+ PTL_EVENT_SEND_START,
+ PTL_EVENT_SEND_END,
+
PTL_EVENT_UNLINK,
} ptl_event_kind_t;
#endif
typedef struct {
ptl_event_kind_t type;
- ptl_err_t status;
- int unlinked;
ptl_process_id_t initiator;
ptl_pt_index_t portal;
ptl_match_bits_t match_bits;
ptl_size_t offset;
ptl_md_t mem_desc;
ptl_hdr_data_t hdr_data;
- struct timeval arrival_time;
+ int unlinked;
+ ptl_ni_fail_t ni_fail_type;
volatile ptl_seq_t sequence;
} ptl_event_t;
int gmnal_api_validate(nal_t *, void *, size_t);
-void gmnal_api_yield(nal_t *);
+void gmnal_api_yield(nal_t *, unsigned long *, int);
void gmnal_api_lock(nal_t *, unsigned long *);
* Give up the processor
*/
void
-gmnal_api_yield(nal_t *nal)
+gmnal_api_yield(nal_t *nal, unsigned long *flags, int milliseconds)
{
CDEBUG(D_TRACE, "gmnal_api_yield : nal [%p]\n", nal);
- set_current_state(TASK_INTERRUPTIBLE);
- schedule();
+ if (milliseconds != 0) {
+ CERROR("Blocking yield not implemented yet\n");
+ LBUG();
+ }
+ our_cond_resched();
return;
}
return;
}
+void gmnal_cb_callback(nal_cb_t *nal_cb, void *private, lib_eq_t *eq, ptl_event_t *ev)
+{
+ /* holding cb_lock */
+
+ if (eq->event_callback != NULL)
+ eq->event_callback(ev);
+
+ /* We will wake theads sleeping in yield() here, AFTER the
+ * callback, when we implement blocking yield */
+}
+
int gmnal_cb_dist(nal_cb_t *nal_cb, ptl_nid_t nid, unsigned long *dist)
{
CDEBUG(D_TRACE, "gmnal_cb_dist\n");
// when do we call this yield function
//
void
-kibnal_yield( nal_t *nal )
+kibnal_yield( nal_t *nal, unsigned long *flags, int milliseconds )
{
kibnal_data_t *k = nal->nal_data;
nal_cb_t *nal_cb = k->kib_cb;
LASSERT (k == &kibnal_data);
LASSERT (nal_cb == &kibnal_lib);
+ if (milliseconds != 0) {
+ CERROR("Blocking yeild not implemented yet\n");
+ LBUG();
+ }
+
// check under what condition that we need to
// call schedule()
// who set this need_resched
#include <portals/p30.h>
#include <portals/lib-p30.h>
#include <linux/kp30.h>
+#include <linux/kpr.h>
// Infiniband VAPI/EVAPI header files
// Mellanox MT23108 VAPI
spin_unlock_irqrestore(&data->kib_dispatch_lock,*flags);
}
+//
+// A new event has just been created
+//
+void kibnal_callback(nal_cb_t *nal, void *private, lib_eq_t *eq, ptl_event_t *ev)
+{
+ /* holding kib_dispatch_lock */
+ if (eq->event_callback != NULL)
+ eq->event_callback(ev);
+
+ /* We will wake theads sleeping in yield() here, AFTER the
+ * callback, when we implement blocking yield */
+}
//
// nic distance
cb_printf: kibnal_printf,
cb_cli: kibnal_cli,
cb_sti: kibnal_sti,
+ cb_callback: kibnal_callback,
cb_dist: kibnal_dist // no used at this moment
};
return (0);
}
-static void
-kqswnal_yield( nal_t *nal )
+static int
+kqswnal_yield(nal_t *nal, unsigned long *flags, int milliseconds)
{
+ /* NB called holding statelock */
+ wait_queue_t wait;
+ unsigned long now = jiffies;
+
CDEBUG (D_NET, "yield\n");
- if (current->need_resched)
- schedule();
- return;
+ if (milliseconds == 0) {
+ if (current->need_resched)
+ schedule();
+ return 0;
+ }
+
+ init_waitqueue_entry(&wait, current);
+ set_current_state(TASK_INTERRUPTIBLE);
+ add_wait_queue(&kqswnal_data.kqn_yield_waitq, &wait);
+
+ kqswnal_unlock(nal, flags);
+
+ if (milliseconds < 0)
+ schedule ();
+ else
+ schedule_timeout((milliseconds * HZ) / 1000);
+
+ kqswnal_lock(nal, flags);
+
+ remove_wait_queue(&kqswnal_data.kqn_yield_waitq, &wait);
+
+ if (milliseconds > 0) {
+ milliseconds -= ((jiffies - now) * 1000) / HZ;
+ if (milliseconds < 0)
+ milliseconds = 0;
+ }
+
+ return (milliseconds);
}
static nal_t *
init_waitqueue_head (&kqswnal_data.kqn_sched_waitq);
spin_lock_init (&kqswnal_data.kqn_statelock);
+ init_waitqueue_head (&kqswnal_data.kqn_yield_waitq);
/* pointers/lists/locks initialised */
kqswnal_data.kqn_init = KQN_INIT_DATA;
#define DEBUG_SUBSYSTEM S_QSWNAL
#include <linux/kp30.h>
+#include <linux/kpr.h>
#include <portals/p30.h>
#include <portals/lib-p30.h>
struct list_head kqn_delayedtxds; /* delayed transmits */
spinlock_t kqn_statelock; /* cb_cli/cb_sti */
+ wait_queue_head_t kqn_yield_waitq; /* where yield waits */
nal_cb_t *kqn_cb; /* -> kqswnal_lib */
#if MULTIRAIL_EKC
EP_SYS *kqn_ep; /* elan system */
CDEBUG (D_NET, "%s", msg);
}
+#if (defined(CONFIG_SPARC32) || defined(CONFIG_SPARC64))
+# error "Can't save/restore irq contexts in different procedures"
+#endif
static void
kqswnal_cli(nal_cb_t *nal, unsigned long *flags)
spin_unlock_irqrestore(&data->kqn_statelock, *flags);
}
+static void
+kqswnal_callback(nal_cb_t *nal, void *private, lib_eq_t *eq, ptl_event_t *ev)
+{
+ /* holding kqn_statelock */
+
+ if (eq->event_callback != NULL)
+ eq->event_callback(ev);
+
+ if (waitqueue_active(&kqswnal_data.kqn_yield_waitq))
+ wake_up_all(&kqswnal_data.kqn_yield_waitq);
+}
static int
kqswnal_dist(nal_cb_t *nal, ptl_nid_t nid, unsigned long *dist)
lib_finalize (&kqswnal_lib, ktx->ktx_args[0],
(lib_msg_t *)ktx->ktx_args[1],
(error == 0) ? PTL_OK :
- (error == -ENOMEM) ? PTL_NOSPACE : PTL_FAIL);
+ (error == -ENOMEM) ? PTL_NO_SPACE : PTL_FAIL);
break;
case KTX_GETTING: /* Peer has DMA-ed direct? */
msg = (lib_msg_t *)ktx->ktx_args[1];
if (error == 0) {
- repmsg = lib_fake_reply_msg (&kqswnal_lib,
- ktx->ktx_nid, msg->md);
+ repmsg = lib_create_reply_msg (&kqswnal_lib,
+ ktx->ktx_nid, msg);
if (repmsg == NULL)
error = -ENOMEM;
}
lib_finalize (&kqswnal_lib, NULL, repmsg, PTL_OK);
} else {
lib_finalize (&kqswnal_lib, ktx->ktx_args[0], msg,
- (error == -ENOMEM) ? PTL_NOSPACE : PTL_FAIL);
+ (error == -ENOMEM) ? PTL_NO_SPACE : PTL_FAIL);
}
break;
in_interrupt()));
if (ktx == NULL) {
kqswnal_cerror_hdr (hdr);
- return (PTL_NOSPACE);
+ return (PTL_NO_SPACE);
}
ktx->ktx_nid = targetnid;
cb_printf: kqswnal_printf,
cb_cli: kqswnal_cli,
cb_sti: kqswnal_sti,
+ cb_callback: kqswnal_callback,
cb_dist: kqswnal_dist
};
}
-static void kscimacnal_yield( nal_t *nal )
+static void kscimacnal_yield( nal_t *nal, unsigned long *flags, int milliseconds )
{
LASSERT (nal == &kscimacnal_api);
+ if (milliseconds != 0) {
+ CERROR ("Blocking yield not implemented yet\n");
+ LBUG();
+ }
+
if (current->need_resched)
schedule();
return;
#define DEBUG_SUBSYSTEM S_UNDEFINED
#include <linux/kp30.h>
+#include <linux/kpr.h>
#include <portals/p30.h>
#include <portals/lib-p30.h>
}
+static void
+kscimacnal_callback(nal_cb_t *nal, void *private, lib_eq_t *eq, ptl_event_t *ev)
+{
+ /* holding ksci_dispatch_lock */
+
+ if (eq->event_callback != NULL)
+ eq->event_callback(ev);
+
+ /* We will wake theads sleeping in yield() here, AFTER the
+ * callback, when we implement blocking yield */
+}
+
static int
kscimacnal_dist(nal_cb_t *nal, ptl_nid_t nid, unsigned long *dist)
{
/* save transaction info for later finalize and cleanup */
PORTAL_ALLOC(ktx, (sizeof(kscimacnal_tx_t)));
if (!ktx) {
- return PTL_NOSPACE;
+ return PTL_NO_SPACE;
}
ktx->ktx_nmapped = 0; /* Start with no mapped pages :) */
kscimacnal_txrelease, ktx);
if (!msg) {
PORTAL_FREE(ktx, (sizeof(kscimacnal_tx_t)));
- return PTL_NOSPACE;
+ return PTL_NO_SPACE;
}
mac_put_mblk(msg, sizeof(ptl_hdr_t));
lastblk=msg;
if(!newblk) {
mac_free_msg(msg);
PORTAL_FREE(ktx, (sizeof(kscimacnal_tx_t)));
- return PTL_NOSPACE;
+ return PTL_NO_SPACE;
}
mac_put_mblk(newblk, nob);
mac_link_mblk(lastblk, newblk);
cb_printf: kscimacnal_printf,
cb_cli: kscimacnal_cli,
cb_sti: kscimacnal_sti,
+ cb_callback: kscimacnal_callback,
cb_dist: kscimacnal_dist
};
}
void
-ksocknal_api_yield(nal_t *nal)
-{
- our_cond_resched();
- return;
-}
-
-void
ksocknal_api_lock(nal_t *nal, unsigned long *flags)
{
ksock_nal_data_t *k;
nal_cb->cb_sti(nal_cb,flags);
}
+int
+ksocknal_api_yield(nal_t *nal, unsigned long *flags, int milliseconds)
+{
+ /* NB called holding statelock */
+ wait_queue_t wait;
+ unsigned long now = jiffies;
+
+ CDEBUG (D_NET, "yield\n");
+
+ if (milliseconds == 0) {
+ our_cond_resched();
+ return 0;
+ }
+
+ init_waitqueue_entry(&wait, current);
+ set_current_state (TASK_INTERRUPTIBLE);
+ add_wait_queue (&ksocknal_data.ksnd_yield_waitq, &wait);
+
+ ksocknal_api_unlock(nal, flags);
+
+ if (milliseconds < 0)
+ schedule ();
+ else
+ schedule_timeout((milliseconds * HZ) / 1000);
+
+ ksocknal_api_lock(nal, flags);
+
+ remove_wait_queue (&ksocknal_data.ksnd_yield_waitq, &wait);
+
+ if (milliseconds > 0) {
+ milliseconds -= ((jiffies - now) * 1000) / HZ;
+ if (milliseconds < 0)
+ milliseconds = 0;
+ }
+
+ return (milliseconds);
+}
+
nal_t *
ksocknal_init(int interface, ptl_pt_index_t ptl_size,
ptl_ac_index_t ac_size, ptl_pid_t requested_pid)
ksocknal_get_peer_addr (conn);
+ CWARN("New conn nid:"LPX64" ip:%08x/%d incarnation:"LPX64"\n",
+ nid, conn->ksnc_ipaddr, conn->ksnc_port, incarnation);
+
irq = ksocknal_conn_irq (conn);
write_lock_irqsave (&ksocknal_data.ksnd_global_lock, flags);
if (conn->ksnc_incarnation == incarnation)
continue;
+
+ CWARN("Closing stale conn nid:"LPX64" ip:%08x/%d "
+ "incarnation:"LPX64"("LPX64")\n",
+ peer->ksnp_nid, conn->ksnc_ipaddr, conn->ksnc_port,
+ conn->ksnc_incarnation, incarnation);
count++;
ksocknal_close_conn_locked (conn, -ESTALE);
ksocknal_api.forward = ksocknal_api_forward;
ksocknal_api.shutdown = ksocknal_api_shutdown;
- ksocknal_api.yield = ksocknal_api_yield;
ksocknal_api.validate = NULL; /* our api validate is a NOOP */
ksocknal_api.lock = ksocknal_api_lock;
ksocknal_api.unlock = ksocknal_api_unlock;
ksocknal_data.ksnd_nal_cb = &ksocknal_lib;
spin_lock_init (&ksocknal_data.ksnd_nal_cb_lock);
-
+ init_waitqueue_head(&ksocknal_data.ksnd_yield_waitq);
+
spin_lock_init(&ksocknal_data.ksnd_small_fmp.fmp_lock);
INIT_LIST_HEAD(&ksocknal_data.ksnd_small_fmp.fmp_idle_fmbs);
INIT_LIST_HEAD(&ksocknal_data.ksnd_small_fmp.fmp_blocked_conns);
ksocknal_data.ksnd_init = SOCKNAL_INIT_ALL;
printk(KERN_INFO "Lustre: Routing socket NAL loaded "
- "(Routing %s, initial mem %d)\n",
+ "(Routing %s, initial mem %d, incarnation "LPX64")\n",
kpr_routing (&ksocknal_data.ksnd_router) ?
- "enabled" : "disabled", pkmem);
+ "enabled" : "disabled", pkmem, ksocknal_data.ksnd_incarnation);
return (0);
}
#include <linux/kp30.h>
#include <linux/portals_compat25.h>
+#include <linux/kpr.h>
#include <portals/p30.h>
#include <portals/lib-p30.h>
#include <portals/socknal.h>
nal_cb_t *ksnd_nal_cb;
spinlock_t ksnd_nal_cb_lock; /* lib cli/sti lock */
+ wait_queue_head_t ksnd_yield_waitq; /* where yield waits */
atomic_t ksnd_nthreads; /* # live threads */
int ksnd_shuttingdown; /* tell threads to exit */
{
ksock_nal_data_t *data = nal->nal_data;
+ /* OK to ignore 'flags'; we're only ever serialise threads and
+ * never need to lock out interrupts */
spin_lock(&data->ksnd_nal_cb_lock);
}
ksock_nal_data_t *data;
data = nal->nal_data;
+ /* OK to ignore 'flags'; we're only ever serialise threads and
+ * never need to lock out interrupts */
spin_unlock(&data->ksnd_nal_cb_lock);
}
+void
+ksocknal_callback(nal_cb_t *nal, void *private, lib_eq_t *eq, ptl_event_t *ev)
+{
+ /* holding ksnd_nal_cb_lock */
+
+ if (eq->event_callback != NULL)
+ eq->event_callback(ev);
+
+ if (waitqueue_active(&ksocknal_data.ksnd_yield_waitq))
+ wake_up_all(&ksocknal_data.ksnd_yield_waitq);
+}
+
int
ksocknal_dist(nal_cb_t *nal, ptl_nid_t nid, unsigned long *dist)
{
if (ltx == NULL) {
CERROR("Can't allocate tx desc type %d size %d %s\n",
type, desc_size, in_interrupt() ? "(intr)" : "");
- return (PTL_NOSPACE);
+ return (PTL_NO_SPACE);
}
atomic_inc(&ksocknal_data.ksnd_nactive_ltxs);
}
ksocknal_data.ksnd_reaper_waketime = jiffies + timeout;
- add_wait_queue (&ksocknal_data.ksnd_reaper_waitq, &wait);
set_current_state (TASK_INTERRUPTIBLE);
+ add_wait_queue (&ksocknal_data.ksnd_reaper_waitq, &wait);
if (!ksocknal_data.ksnd_shuttingdown &&
list_empty (&ksocknal_data.ksnd_deathrow_conns) &&
cb_printf: ksocknal_printf,
cb_cli: ksocknal_cli,
cb_sti: ksocknal_sti,
+ cb_callback: ksocknal_callback,
cb_dist: ksocknal_dist
};
# See the file COPYING in this distribution
-MODULE = portals
-modulenet_DATA = portals.o
-EXTRA_PROGRAMS = portals
+MODULE = libcfs
+modulenet_DATA = libcfs.o
+EXTRA_PROGRAMS = libcfs
-LIBLINKS := lib-dispatch.c lib-eq.c lib-init.c lib-md.c lib-me.c lib-move.c lib-msg.c lib-ni.c lib-pid.c
-APILINKS := api-eq.c api-errno.c api-init.c api-me.c api-ni.c api-wrap.c
-LINKS = $(APILINKS) $(LIBLINKS)
-DISTCLEANFILES = $(LINKS) link-stamp *.orig *.rej
-
-$(LINKS): link-stamp
-link-stamp:
- -list='$(LIBLINKS)'; for f in $$list; do echo $$f ; ln -sf $(srcdir)/../portals/$$f .; done
- -list='$(APILINKS)'; for f in $$list; do echo $$f ; ln -sf $(srcdir)/../portals/$$f .; done
- echo timestamp > link-stamp
+DISTCLEANFILES = *.orig *.rej
DEFS =
-portals_SOURCES = $(LINKS) module.c proc.c debug.c lwt.c
+libcfs_SOURCES = module.c proc.c debug.c lwt.c
# Don't distribute any patched files.
dist-hook:
#include <linux/kp30.h>
#include <linux/portals_compat25.h>
+#include <linux/libcfs.h>
+
+unsigned int portal_subsystem_debug = ~0 - (S_PORTALS | S_QSWNAL | S_SOCKNAL |
+ S_GMNAL | S_IBNAL);
+EXPORT_SYMBOL(portal_subsystem_debug);
+
+unsigned int portal_debug = (D_WARNING | D_DLMTRACE | D_ERROR | D_EMERG | D_HA |
+ D_RPCTRACE | D_VFSTRACE | D_MALLOC);
+EXPORT_SYMBOL(portal_debug);
+
+unsigned int portal_cerror = 1;
+EXPORT_SYMBOL(portal_cerror);
+
+unsigned int portal_printk;
+EXPORT_SYMBOL(portal_printk);
+
+unsigned int portal_stack;
+EXPORT_SYMBOL(portal_stack);
+
+#ifdef __KERNEL__
+atomic_t portal_kmemory = ATOMIC_INIT(0);
+EXPORT_SYMBOL(portal_kmemory);
+#endif
#define DEBUG_OVERFLOW 1024
static char *debug_buf = NULL;
char *portals_nid2str(int nal, ptl_nid_t nid, char *str)
{
switch(nal){
+/* XXX this should be a nal method of some sort */
+#ifndef CRAY_PORTALS
case TCPNAL:
/* userspace NAL */
case SOCKNAL:
case SCIMACNAL:
sprintf(str, "%u:%u", (__u32)(nid >> 32), (__u32)nid);
break;
+#endif
default:
- return NULL;
+ snprintf(str, PTL_NALFMT_SIZE-1, "(?%llx)", (long long)nid);
}
return str;
}
extern void (kping_client)(struct portal_ioctl_data *);
-struct nal_cmd_handler {
- nal_cmd_handler_t nch_handler;
- void * nch_private;
-};
-
-static struct nal_cmd_handler nal_cmd[NAL_MAX_NR + 1];
-struct semaphore nal_cmd_sem;
-
#ifdef PORTAL_DEBUG
void kportal_assertion_failed(char *expr, char *file, const char *func,
const int line)
}
/* called when opening /dev/device */
-static int kportal_psdev_open(struct inode * inode, struct file * file)
+static int libcfs_psdev_open(struct inode * inode, struct file * file)
{
struct portals_device_userstate *pdu;
ENTRY;
}
/* called when closing /dev/device */
-static int kportal_psdev_release(struct inode * inode, struct file * file)
+static int libcfs_psdev_release(struct inode * inode, struct file * file)
{
struct portals_device_userstate *pdu;
ENTRY;
PORTAL_FREE(data, len);
}
-static int
-kportal_add_route(int gateway_nalid, ptl_nid_t gateway_nid,
- ptl_nid_t lo_nid, ptl_nid_t hi_nid)
-{
- int rc;
- kpr_control_interface_t *ci;
-
- ci = (kpr_control_interface_t *) PORTAL_SYMBOL_GET (kpr_control_interface);
- if (ci == NULL)
- return (-ENODEV);
-
- rc = ci->kprci_add_route (gateway_nalid, gateway_nid, lo_nid, hi_nid);
-
- PORTAL_SYMBOL_PUT(kpr_control_interface);
- return (rc);
-}
-
-static int
-kportal_del_route(int gw_nalid, ptl_nid_t gw_nid,
- ptl_nid_t lo, ptl_nid_t hi)
-{
- int rc;
- kpr_control_interface_t *ci;
-
- ci = (kpr_control_interface_t *)PORTAL_SYMBOL_GET(kpr_control_interface);
- if (ci == NULL)
- return (-ENODEV);
-
- rc = ci->kprci_del_route (gw_nalid, gw_nid, lo, hi);
-
- PORTAL_SYMBOL_PUT(kpr_control_interface);
- return (rc);
-}
-
-static int
-kportal_notify_router (int gw_nalid, ptl_nid_t gw_nid,
- int alive, time_t when)
-{
- int rc;
- kpr_control_interface_t *ci;
-
- /* No error if router not preset. Sysadmin is allowed to notify
- * _everywhere_ when a NID boots or crashes, even if they know
- * nothing of the peer. */
- ci = (kpr_control_interface_t *)PORTAL_SYMBOL_GET(kpr_control_interface);
- if (ci == NULL)
- return (0);
-
- rc = ci->kprci_notify (gw_nalid, gw_nid, alive, when);
-
- PORTAL_SYMBOL_PUT(kpr_control_interface);
- return (rc);
-}
-
-static int
-kportal_get_route(int index, __u32 *gateway_nalidp, ptl_nid_t *gateway_nidp,
- ptl_nid_t *lo_nidp, ptl_nid_t *hi_nidp, int *alivep)
-{
- int gateway_nalid;
- ptl_nid_t gateway_nid;
- ptl_nid_t lo_nid;
- ptl_nid_t hi_nid;
- int alive;
- int rc;
- kpr_control_interface_t *ci;
-
- ci = (kpr_control_interface_t *) PORTAL_SYMBOL_GET(kpr_control_interface);
- if (ci == NULL)
- return (-ENODEV);
-
- rc = ci->kprci_get_route(index, &gateway_nalid, &gateway_nid,
- &lo_nid, &hi_nid, &alive);
-
- if (rc == 0) {
- CDEBUG(D_IOCTL, "got route [%d] %d "LPX64":"LPX64" - "LPX64", %s\n",
- index, gateway_nalid, gateway_nid, lo_nid, hi_nid,
- alive ? "up" : "down");
-
- *gateway_nalidp = (__u32)gateway_nalid;
- *gateway_nidp = gateway_nid;
- *lo_nidp = lo_nid;
- *hi_nidp = hi_nid;
- *alivep = alive;
- }
-
- PORTAL_SYMBOL_PUT (kpr_control_interface);
- return (rc);
-}
-
-static int
-kportal_router_cmd(struct portals_cfg *pcfg, void * private)
-{
- int err = -EINVAL;
- ENTRY;
-
- switch(pcfg->pcfg_command) {
- default:
- CDEBUG(D_IOCTL, "Inappropriate cmd: %d\n", pcfg->pcfg_command);
- break;
-
- case NAL_CMD_ADD_ROUTE:
- CDEBUG(D_IOCTL, "Adding route: [%d] "LPU64" : "LPU64" - "LPU64"\n",
- pcfg->pcfg_nal, pcfg->pcfg_nid,
- pcfg->pcfg_nid2, pcfg->pcfg_nid3);
- err = kportal_add_route(pcfg->pcfg_gw_nal, pcfg->pcfg_nid,
- pcfg->pcfg_nid2, pcfg->pcfg_nid3);
- break;
-
- case NAL_CMD_DEL_ROUTE:
- CDEBUG (D_IOCTL, "Removing routes via [%d] "LPU64" : "LPU64" - "LPU64"\n",
- pcfg->pcfg_gw_nal, pcfg->pcfg_nid,
- pcfg->pcfg_nid2, pcfg->pcfg_nid3);
- err = kportal_del_route (pcfg->pcfg_gw_nal, pcfg->pcfg_nid,
- pcfg->pcfg_nid2, pcfg->pcfg_nid3);
- break;
-
- case NAL_CMD_NOTIFY_ROUTER: {
- CDEBUG (D_IOCTL, "Notifying peer [%d] "LPU64" %s @ %ld\n",
- pcfg->pcfg_gw_nal, pcfg->pcfg_nid,
- pcfg->pcfg_flags ? "Enabling" : "Disabling",
- (time_t)pcfg->pcfg_nid3);
-
- err = kportal_notify_router (pcfg->pcfg_gw_nal, pcfg->pcfg_nid,
- pcfg->pcfg_flags,
- (time_t)pcfg->pcfg_nid3);
- break;
- }
-
- case NAL_CMD_GET_ROUTE:
- CDEBUG (D_IOCTL, "Getting route [%d]\n", pcfg->pcfg_count);
- err = kportal_get_route(pcfg->pcfg_count, &pcfg->pcfg_gw_nal,
- &pcfg->pcfg_nid,
- &pcfg->pcfg_nid2, &pcfg->pcfg_nid3,
- &pcfg->pcfg_flags);
- break;
- }
- RETURN(err);
-}
-
-int
-kportal_nal_cmd(struct portals_cfg *pcfg)
-{
- __u32 nal = pcfg->pcfg_nal;
- int rc = -EINVAL;
-
- ENTRY;
-
- down(&nal_cmd_sem);
- if (nal > 0 && nal <= NAL_MAX_NR && nal_cmd[nal].nch_handler) {
- CDEBUG(D_IOCTL, "calling handler nal: %d, cmd: %d\n", nal,
- pcfg->pcfg_command);
- rc = nal_cmd[nal].nch_handler(pcfg, nal_cmd[nal].nch_private);
- }
- up(&nal_cmd_sem);
- RETURN(rc);
-}
-
-ptl_handle_ni_t *
-kportal_get_ni (int nal)
-{
-
- switch (nal)
- {
- case QSWNAL:
- return (PORTAL_SYMBOL_GET(kqswnal_ni));
- case SOCKNAL:
- return (PORTAL_SYMBOL_GET(ksocknal_ni));
- case GMNAL:
- return (PORTAL_SYMBOL_GET(kgmnal_ni));
- case IBNAL:
- return (PORTAL_SYMBOL_GET(kibnal_ni));
- case TCPNAL:
- /* userspace NAL */
- return (NULL);
- case SCIMACNAL:
- return (PORTAL_SYMBOL_GET(kscimacnal_ni));
- default:
- /* A warning to a naive caller */
- CERROR ("unknown nal: %d\n", nal);
- return (NULL);
- }
-}
-
-void
-kportal_put_ni (int nal)
-{
-
- switch (nal)
- {
- case QSWNAL:
- PORTAL_SYMBOL_PUT(kqswnal_ni);
- break;
- case SOCKNAL:
- PORTAL_SYMBOL_PUT(ksocknal_ni);
- break;
- case GMNAL:
- PORTAL_SYMBOL_PUT(kgmnal_ni);
- break;
- case IBNAL:
- PORTAL_SYMBOL_PUT(kibnal_ni);
- break;
- case TCPNAL:
- /* A lesson to a malicious caller */
- LBUG ();
- case SCIMACNAL:
- PORTAL_SYMBOL_PUT(kscimacnal_ni);
- break;
- default:
- CERROR ("unknown nal: %d\n", nal);
- }
-}
+static DECLARE_RWSEM(ioctl_list_sem);
+static LIST_HEAD(ioctl_list);
-int
-kportal_nal_register(int nal, nal_cmd_handler_t handler, void * private)
+int libcfs_register_ioctl(struct libcfs_ioctl_handler *hand)
{
int rc = 0;
+ down_read(&ioctl_list_sem);
+ if (!list_empty(&hand->item))
+ rc = -EBUSY;
+ up_read(&ioctl_list_sem);
- CDEBUG(D_IOCTL, "Register NAL %d, handler: %p\n", nal, handler);
-
- if (nal > 0 && nal <= NAL_MAX_NR) {
- down(&nal_cmd_sem);
- if (nal_cmd[nal].nch_handler != NULL)
- rc = -EBUSY;
- else {
- nal_cmd[nal].nch_handler = handler;
- nal_cmd[nal].nch_private = private;
- }
- up(&nal_cmd_sem);
+ if (rc == 0) {
+ down_write(&ioctl_list_sem);
+ list_add_tail(&hand->item, &ioctl_list);
+ up_write(&ioctl_list_sem);
}
- return rc;
+ RETURN(0);
}
+EXPORT_SYMBOL(libcfs_register_ioctl);
-int
-kportal_nal_unregister(int nal)
+int libcfs_deregister_ioctl(struct libcfs_ioctl_handler *hand)
{
int rc = 0;
+ down_read(&ioctl_list_sem);
+ if (list_empty(&hand->item))
+ rc = -ENOENT;
+ up_read(&ioctl_list_sem);
- CDEBUG(D_IOCTL, "Unregister NAL %d\n", nal);
-
- if (nal > 0 && nal <= NAL_MAX_NR) {
- down(&nal_cmd_sem);
- nal_cmd[nal].nch_handler = NULL;
- nal_cmd[nal].nch_private = NULL;
- up(&nal_cmd_sem);
+ if (rc == 0) {
+ down_write(&ioctl_list_sem);
+ list_del_init(&hand->item);
+ up_write(&ioctl_list_sem);
}
- return rc;
+ RETURN(0);
}
+EXPORT_SYMBOL(libcfs_deregister_ioctl);
-
-static int kportal_ioctl(struct inode *inode, struct file *file,
- unsigned int cmd, unsigned long arg)
+static int libcfs_ioctl(struct inode *inode, struct file *file,
+ unsigned int cmd, unsigned long arg)
{
- int err = 0;
+ int err = -EINVAL;
char buf[1024];
struct portal_ioctl_data *data;
- char str[PTL_NALFMT_SIZE];
-
ENTRY;
if (current->fsuid != 0)
RETURN(-EINVAL);
portals_debug_mark_buffer(data->ioc_inlbuf1);
RETURN(0);
- case IOC_PORTAL_PING: {
- void (*ping)(struct portal_ioctl_data *);
-
- CDEBUG(D_IOCTL, "doing %d pings to nid "LPX64" (%s)\n",
- data->ioc_count, data->ioc_nid,
- portals_nid2str(data->ioc_nal, data->ioc_nid, str));
- ping = PORTAL_SYMBOL_GET(kping_client);
- if (!ping)
- CERROR("PORTAL_SYMBOL_GET failed\n");
- else {
- ping(data);
- PORTAL_SYMBOL_PUT(kping_client);
- }
- RETURN(0);
- }
-
- case IOC_PORTAL_GET_NID: {
- const ptl_handle_ni_t *nip;
- ptl_process_id_t pid;
-
- CDEBUG (D_IOCTL, "Getting nid for nal [%d]\n", data->ioc_nal);
-
- nip = kportal_get_ni (data->ioc_nal);
- if (nip == NULL)
- RETURN (-EINVAL);
-
- err = PtlGetId (*nip, &pid);
- LASSERT (err == PTL_OK);
- kportal_put_ni (data->ioc_nal);
-
- data->ioc_nid = pid.nid;
- if (copy_to_user ((char *)arg, data, sizeof (*data)))
- err = -EFAULT;
- break;
- }
-
- case IOC_PORTAL_NAL_CMD: {
- struct portals_cfg pcfg;
-
- LASSERT (data->ioc_plen1 == sizeof(pcfg));
- err = copy_from_user(&pcfg, (void *)data->ioc_pbuf1,
- sizeof(pcfg));
- if ( err ) {
- EXIT;
- return err;
- }
-
- CDEBUG (D_IOCTL, "nal command nal %d cmd %d\n", pcfg.pcfg_nal,
- pcfg.pcfg_command);
- err = kportal_nal_cmd(&pcfg);
- if (err == 0) {
- if (copy_to_user((char *)data->ioc_pbuf1, &pcfg,
- sizeof (pcfg)))
- err = -EFAULT;
- if (copy_to_user((char *)arg, data, sizeof (*data)))
- err = -EFAULT;
- }
- break;
- }
- case IOC_PORTAL_FAIL_NID: {
- const ptl_handle_ni_t *nip;
-
- CDEBUG (D_IOCTL, "fail nid: [%d] "LPU64" count %d\n",
- data->ioc_nal, data->ioc_nid, data->ioc_count);
-
- nip = kportal_get_ni (data->ioc_nal);
- if (nip == NULL)
- return (-EINVAL);
-
- err = PtlFailNid (*nip, data->ioc_nid, data->ioc_count);
- kportal_put_ni (data->ioc_nal);
- break;
- }
#if LWT_SUPPORT
case IOC_PORTAL_LWT_CONTROL:
err = lwt_control (data->ioc_flags, data->ioc_misc);
}
break;
- default:
+ default: {
+ struct libcfs_ioctl_handler *hand;
err = -EINVAL;
- break;
+ down_read(&ioctl_list_sem);
+ list_for_each_entry(hand, &ioctl_list, item) {
+ err = hand->handle_ioctl(data, cmd, arg);
+ if (err != -EINVAL)
+ break;
+ }
+ up_read(&ioctl_list_sem);
+ } break;
}
RETURN(err);
}
-static struct file_operations portalsdev_fops = {
- ioctl: kportal_ioctl,
- open: kportal_psdev_open,
- release: kportal_psdev_release
+static struct file_operations libcfs_fops = {
+ ioctl: libcfs_ioctl,
+ open: libcfs_psdev_open,
+ release: libcfs_psdev_release
};
-static struct miscdevice portal_dev = {
+static struct miscdevice libcfs_dev = {
PORTAL_MINOR,
"portals",
- &portalsdev_fops
+ &libcfs_fops
};
extern int insert_proc(void);
MODULE_DESCRIPTION("Portals v3.1");
MODULE_LICENSE("GPL");
-static int init_kportals_module(void)
+static int init_libcfs_module(void)
{
int rc;
goto cleanup_debug;
}
#endif
- sema_init(&nal_cmd_sem, 1);
-
- rc = misc_register(&portal_dev);
+ rc = misc_register(&libcfs_dev);
if (rc) {
CERROR("misc_register: error %d\n", rc);
goto cleanup_lwt;
}
- rc = PtlInit();
- if (rc) {
- CERROR("PtlInit: error %d\n", rc);
- goto cleanup_deregister;
- }
-
rc = insert_proc();
if (rc) {
CERROR("insert_proc: error %d\n", rc);
- goto cleanup_fini;
- }
-
- rc = kportal_nal_register(ROUTER, kportal_router_cmd, NULL);
- if (rc) {
- CERROR("kportal_nal_registre: ROUTER error %d\n", rc);
- goto cleanup_proc;
+ goto cleanup_deregister;
}
CDEBUG (D_OTHER, "portals setup OK\n");
return (0);
- cleanup_proc:
- remove_proc();
- cleanup_fini:
- PtlFini();
cleanup_deregister:
- misc_deregister(&portal_dev);
+ misc_deregister(&libcfs_dev);
cleanup_lwt:
#if LWT_SUPPORT
lwt_fini();
return rc;
}
-static void exit_kportals_module(void)
+static void exit_libcfs_module(void)
{
int rc;
- kportal_nal_unregister(ROUTER);
remove_proc();
- PtlFini();
CDEBUG(D_MALLOC, "before Portals cleanup: kmem %d\n",
atomic_read(&portal_kmemory));
- rc = misc_deregister(&portal_dev);
+ rc = misc_deregister(&libcfs_dev);
if (rc)
CERROR("misc_deregister error %d\n", rc);
printk(KERN_ERR "LustreError: portals_debug_cleanup: %d\n", rc);
}
-EXPORT_SYMBOL(lib_dispatch);
-EXPORT_SYMBOL(PtlMEAttach);
-EXPORT_SYMBOL(PtlMEInsert);
-EXPORT_SYMBOL(PtlMEUnlink);
-EXPORT_SYMBOL(PtlEQAlloc);
-EXPORT_SYMBOL(PtlMDAttach);
-EXPORT_SYMBOL(PtlMDUnlink);
-EXPORT_SYMBOL(PtlNIInit);
-EXPORT_SYMBOL(PtlNIFini);
-EXPORT_SYMBOL(PtlNIDebug);
-EXPORT_SYMBOL(PtlInit);
-EXPORT_SYMBOL(PtlFini);
-EXPORT_SYMBOL(PtlPut);
-EXPORT_SYMBOL(PtlGet);
-EXPORT_SYMBOL(ptl_err_str);
-EXPORT_SYMBOL(portal_subsystem_debug);
-EXPORT_SYMBOL(portal_debug);
-EXPORT_SYMBOL(portal_stack);
-EXPORT_SYMBOL(portal_printk);
-EXPORT_SYMBOL(portal_cerror);
-EXPORT_SYMBOL(PtlEQWait);
-EXPORT_SYMBOL(PtlEQFree);
-EXPORT_SYMBOL(PtlEQGet);
-EXPORT_SYMBOL(PtlGetId);
-EXPORT_SYMBOL(PtlMDBind);
-EXPORT_SYMBOL(lib_iov_nob);
-EXPORT_SYMBOL(lib_copy_iov2buf);
-EXPORT_SYMBOL(lib_copy_buf2iov);
-EXPORT_SYMBOL(lib_extract_iov);
-EXPORT_SYMBOL(lib_kiov_nob);
-EXPORT_SYMBOL(lib_copy_kiov2buf);
-EXPORT_SYMBOL(lib_copy_buf2kiov);
-EXPORT_SYMBOL(lib_extract_kiov);
-EXPORT_SYMBOL(lib_finalize);
-EXPORT_SYMBOL(lib_parse);
-EXPORT_SYMBOL(lib_fake_reply_msg);
-EXPORT_SYMBOL(lib_init);
-EXPORT_SYMBOL(lib_fini);
-EXPORT_SYMBOL(portal_kmemory);
EXPORT_SYMBOL(kportal_daemonize);
EXPORT_SYMBOL(kportal_blockallsigs);
-EXPORT_SYMBOL(kportal_nal_register);
-EXPORT_SYMBOL(kportal_nal_unregister);
EXPORT_SYMBOL(kportal_assertion_failed);
-EXPORT_SYMBOL(dispatch_name);
-EXPORT_SYMBOL(kportal_get_ni);
-EXPORT_SYMBOL(kportal_put_ni);
-EXPORT_SYMBOL(kportal_nal_cmd);
-module_init(init_kportals_module);
-module_exit (exit_kportals_module);
+module_init(init_libcfs_module);
+module_exit(exit_libcfs_module);
# This code is issued under the GNU General Public License.
# See the file COPYING in this distribution
+DEFS =
+
+my_sources = api-eq.c api-init.c api-me.c api-errno.c api-ni.c api-wrap.c \
+ lib-dispatch.c lib-init.c lib-me.c lib-msg.c lib-eq.c \
+ lib-md.c lib-move.c lib-ni.c lib-pid.c
-CPPFLAGS=
-INCLUDES=-I$(top_srcdir)/portals/include -I$(top_srcdir)/include
noinst_LIBRARIES= libportals.a
-libportals_a_SOURCES= api-eq.c api-init.c api-me.c api-errno.c api-ni.c api-wrap.c lib-dispatch.c lib-init.c lib-me.c lib-msg.c lib-eq.c lib-md.c lib-move.c lib-ni.c lib-pid.c
+libportals_a_SOURCES= $(my_sources)
if LIBLUSTRE
libportals_a_CFLAGS= -fPIC
+else
+
+MODULE = portals
+modulenet_DATA = portals.o
+EXTRA_PROGRAMS = portals
+
+DISTCLEANFILES = *.orig *.rej
+
+portals_SOURCES= module.c $(my_sources)
+
+# Don't distribute any patched files.
+dist-hook:
+ list='$(EXT2C)'; for f in $$list; do rm -f $(distdir)/$$f; done
+
+include ../Rules.linux
endif
portals-objs := lib-dispatch.o lib-eq.o lib-init.o lib-md.o lib-me.o \
lib-move.o lib-msg.o lib-ni.o lib-pid.o \
api-eq.o api-errno.o api-init.o api-me.o api-ni.o \
- api-wrap.o
+ api-wrap.o module.o
/* Nothing to do anymore... */
}
-int PtlEQGet(ptl_handle_eq_t eventq, ptl_event_t * ev)
+int ptl_get_event (ptl_eq_t *eq, ptl_event_t *ev)
{
- ptl_eq_t *eq;
- int rc, new_index;
- unsigned long flags;
- ptl_event_t *new_event;
- nal_t *nal;
+ int new_index = eq->sequence & (eq->size - 1);
+ ptl_event_t *new_event = &eq->base[new_index];
ENTRY;
- if (!ptl_init)
- RETURN(PTL_NOINIT);
-
- nal = ptl_hndl2nal(&eventq);
- if (!nal)
- RETURN(PTL_INV_EQ);
-
- eq = ptl_handle2usereq(&eventq);
- nal->lock(nal, &flags);
-
- /* size must be a power of 2 to handle a wrapped sequence # */
- LASSERT (eq->size != 0 &&
- eq->size == LOWEST_BIT_SET (eq->size));
-
- new_index = eq->sequence & (eq->size - 1);
- new_event = &eq->base[new_index];
CDEBUG(D_INFO, "new_event: %p, sequence: %lu, eq->size: %u\n",
new_event, eq->sequence, eq->size);
+
if (PTL_SEQ_GT (eq->sequence, new_event->sequence)) {
- nal->unlock(nal, &flags);
RETURN(PTL_EQ_EMPTY);
}
if (eq->sequence != new_event->sequence) {
CERROR("DROPPING EVENT: eq seq %lu ev seq %lu\n",
eq->sequence, new_event->sequence);
- rc = PTL_EQ_DROPPED;
- } else {
- rc = PTL_OK;
+ RETURN(PTL_EQ_DROPPED);
}
eq->sequence = new_event->sequence + 1;
- nal->unlock(nal, &flags);
- RETURN(rc);
+ RETURN(PTL_OK);
}
-
-int PtlEQWait(ptl_handle_eq_t eventq_in, ptl_event_t *event_out)
+int PtlEQGet(ptl_handle_eq_t eventq, ptl_event_t * ev)
{
- int rc;
+ int which;
- /* PtlEQGet does the handle checking */
- while ((rc = PtlEQGet(eventq_in, event_out)) == PTL_EQ_EMPTY) {
- nal_t *nal = ptl_hndl2nal(&eventq_in);
-
- if (nal->yield)
- nal->yield(nal);
- }
-
- return rc;
+ return (PtlEQPoll (&eventq, 1, 0, ev, &which));
}
-#ifndef __KERNEL__
-#if 0
-static jmp_buf eq_jumpbuf;
-
-static void eq_timeout(int signal)
-{
- sigset_t set;
-
- /* signal will be automatically disabled in sig handler,
- * must enable it before long jump
- */
- sigemptyset(&set);
- sigaddset(&set, SIGALRM);
- sigprocmask(SIG_UNBLOCK, &set, NULL);
-
- longjmp(eq_jumpbuf, -1);
-}
-
-int PtlEQWait_timeout(ptl_handle_eq_t eventq_in, ptl_event_t * event_out,
- int timeout)
+int PtlEQWait(ptl_handle_eq_t eventq_in, ptl_event_t *event_out)
{
- static void (*prev) (int) = NULL;
- static int left_over;
- time_t time_at_start;
- int rc;
-
- if (setjmp(eq_jumpbuf)) {
- signal(SIGALRM, prev);
- alarm(left_over - timeout);
- return PTL_EQ_EMPTY;
- }
-
- left_over = alarm(timeout);
- prev = signal(SIGALRM, eq_timeout);
- time_at_start = time(NULL);
- if (left_over && left_over < timeout)
- alarm(left_over);
-
- rc = PtlEQWait(eventq_in, event_out);
-
- signal(SIGALRM, prev);
- alarm(left_over); /* Should compute how long we waited */
-
- return rc;
+ int which;
+
+ return (PtlEQPoll (&eventq_in, 1, PTL_TIME_FOREVER,
+ event_out, &which));
}
-#else
-#include <errno.h>
-
-/* FIXME
- * Here timeout need a trick with tcpnal, definitely unclean but OK for
- * this moment.
- */
-/* global variables defined by tcpnal */
-extern int __tcpnal_eqwait_timeout_value;
-extern int __tcpnal_eqwait_timedout;
-
-int PtlEQWait_timeout(ptl_handle_eq_t eventq_in, ptl_event_t * event_out,
- int timeout)
+int PtlEQPoll(ptl_handle_eq_t *eventqs_in, int neq_in, int timeout,
+ ptl_event_t *event_out, int *which_out)
{
- int rc;
+ nal_t *nal;
+ int i;
+ int rc;
+ unsigned long flags;
+
+ if (!ptl_init)
+ RETURN(PTL_NO_INIT);
- if (!timeout)
- return PtlEQWait(eventq_in, event_out);
+ if (neq_in < 1)
+ RETURN(PTL_EQ_INVALID);
+
+ nal = ptl_hndl2nal(&eventqs_in[0]);
+ if (nal == NULL)
+ RETURN(PTL_EQ_INVALID);
- __tcpnal_eqwait_timeout_value = timeout;
+ nal->lock(nal, &flags);
- while ((rc = PtlEQGet(eventq_in, event_out)) == PTL_EQ_EMPTY) {
- nal_t *nal = ptl_hndl2nal(&eventq_in);
+ for (;;) {
+ for (i = 0; i < neq_in; i++) {
+ ptl_eq_t *eq = ptl_handle2usereq(&eventqs_in[i]);
+
+ if (i > 0 &&
+ ptl_hndl2nal(&eventqs_in[i]) != nal) {
+ nal->unlock(nal, &flags);
+ RETURN (PTL_EQ_INVALID);
+ }
+
+ /* size must be a power of 2 to handle a wrapped sequence # */
+ LASSERT (eq->size != 0 &&
+ eq->size == LOWEST_BIT_SET (eq->size));
+
+ rc = ptl_get_event (eq, event_out);
+ if (rc != PTL_EQ_EMPTY) {
+ nal->unlock(nal, &flags);
+ *which_out = i;
+ RETURN(rc);
+ }
+ }
- if (nal->yield)
- nal->yield(nal);
-
- if (__tcpnal_eqwait_timedout) {
- if (__tcpnal_eqwait_timedout != ETIMEDOUT)
- printf("Warning: yield return error %d\n",
- __tcpnal_eqwait_timedout);
- rc = PTL_EQ_EMPTY;
- break;
+ if (timeout == 0) {
+ nal->unlock(nal, &flags);
+ RETURN (PTL_EQ_EMPTY);
}
+
+ timeout = nal->yield(nal, &flags, timeout);
}
-
- __tcpnal_eqwait_timeout_value = 0;
-
- return rc;
}
-#endif
-#endif /* __KERNEL__ */
"PTL_OK",
"PTL_SEGV",
- "PTL_NOSPACE",
- "PTL_INUSE",
+ "PTL_NO_SPACE",
+ "PTL_ME_IN_USE",
"PTL_VAL_FAILED",
"PTL_NAL_FAILED",
- "PTL_NOINIT",
- "PTL_INIT_DUP",
- "PTL_INIT_INV",
- "PTL_AC_INV_INDEX",
-
- "PTL_INV_ASIZE",
- "PTL_INV_HANDLE",
- "PTL_INV_MD",
- "PTL_INV_ME",
- "PTL_INV_NI",
+ "PTL_NO_INIT",
+ "PTL_IFACE_DUP",
+ "PTL_IFACE_INVALID",
+
+ "PTL_HANDLE_INVALID",
+ "PTL_MD_INVALID",
+ "PTL_ME_INVALID",
/* If you change these, you must update the number table in portals/errno.h */
- "PTL_ILL_MD",
- "PTL_INV_PROC",
- "PTL_INV_PSIZE",
- "PTL_INV_PTINDEX",
- "PTL_INV_REG",
-
- "PTL_INV_SR_INDX",
- "PTL_ML_TOOLONG",
- "PTL_ADDR_UNKNOWN",
- "PTL_INV_EQ",
+ "PTL_PROCESS_INVALID",
+ "PTL_PT_INDEX_INVALID",
+
+ "PTL_SR_INDEX_INVALID",
+ "PTL_EQ_INVALID",
"PTL_EQ_DROPPED",
"PTL_EQ_EMPTY",
- "PTL_NOUPDATE",
+ "PTL_MD_NO_UPDATE",
"PTL_FAIL",
- "PTL_NOT_IMPLEMENTED",
- "PTL_NO_ACK",
"PTL_IOV_TOO_MANY",
"PTL_IOV_TOO_SMALL",
- "PTL_EQ_INUSE",
+ "PTL_EQ_IN_USE",
+
+ "PTL_MAX_ERRNO"
};
/* If you change these, you must update the number table in portals/errno.h */
#include <portals/api-support.h>
int ptl_init;
-unsigned int portal_subsystem_debug = ~0 - (S_PORTALS | S_QSWNAL | S_SOCKNAL |
- S_GMNAL | S_IBNAL);
-unsigned int portal_debug = (D_WARNING | D_DLMTRACE | D_ERROR | D_EMERG | D_HA |
- D_RPCTRACE | D_VFSTRACE | D_MALLOC);
-unsigned int portal_cerror = 1;
-unsigned int portal_printk;
-unsigned int portal_stack;
-
-#ifdef __KERNEL__
-atomic_t portal_kmemory = ATOMIC_INIT(0);
-#endif
int __p30_initialized;
int __p30_myr_initialized;
ptl_handle_ni_t __myr_ni_handle;
ptl_handle_ni_t __ip_ni_handle;
-int __p30_myr_timeout = 10;
-int __p30_ip_timeout;
-
-int PtlInit(void)
+int PtlInit(int *max_interfaces)
{
+ if (max_interfaces != NULL)
+ *max_interfaces = NAL_ENUM_END_MARKER;
if (ptl_init)
return PTL_OK;
+ LASSERT(!strcmp(ptl_err_str[PTL_MAX_ERRNO], "PTL_MAX_ERRNO"));
+
ptl_ni_init();
ptl_me_init();
ptl_eq_init();
ptl_init = 1;
- __p30_initialized = 1;
return PTL_OK;
}
ptl_ni_fini();
ptl_init = 0;
}
+
+
+void PtlSnprintHandle(char *str, int len, ptl_handle_any_t h)
+{
+ snprintf(str, len, "0x%lx."LPX64, h.nal_idx, h.cookie);
+}
int i;
if (!ptl_init)
- return PTL_NOINIT;
+ return PTL_NO_INIT;
ptl_ni_init_mutex_enter ();
if (nal->shutdown)
nal->shutdown (nal, ptl_num_interfaces);
ptl_ni_init_mutex_exit ();
- return PTL_NOSPACE;
+ return PTL_NO_SPACE;
}
handle->nal_idx = (NI_HANDLE_MAGIC & ~NI_HANDLE_MASK) | ptl_num_interfaces;
int rc;
if (!ptl_init)
- return PTL_NOINIT;
+ return PTL_NO_INIT;
ptl_ni_init_mutex_enter ();
nal = ptl_hndl2nal (&ni);
if (nal == NULL) {
ptl_ni_init_mutex_exit ();
- return PTL_INV_HANDLE;
+ return PTL_HANDLE_INVALID;
}
idx = ni.nal_idx & NI_HANDLE_MASK;
if (!ptl_init) {
CERROR("Not initialized\n");
- return PTL_NOINIT;
+ return PTL_NO_INIT;
}
nal = ptl_hndl2nal(&any_h);
if (!nal)
- return PTL_INV_HANDLE;
+ return PTL_HANDLE_INVALID;
nal->forward(nal, cmd, argbuf, argsize, retbuf, retsize);
sizeof(ret));
if (rc != PTL_OK)
- return (rc == PTL_INV_HANDLE) ? PTL_INV_ME : rc;
+ return (rc == PTL_HANDLE_INVALID) ? PTL_ME_INVALID : rc;
if (handle_out) {
handle_out->nal_idx = current_in.nal_idx;
sizeof(ret));
if (rc != PTL_OK)
- return (rc == PTL_INV_HANDLE) ? PTL_INV_ME : rc;
+ return (rc == PTL_HANDLE_INVALID) ? PTL_ME_INVALID : rc;
return ret.rc;
}
sizeof(ret));
if (rc != PTL_OK)
- return (rc == PTL_INV_HANDLE) ? PTL_INV_ME : rc;
+ return (rc == PTL_HANDLE_INVALID) ? PTL_ME_INVALID : rc;
return ret.rc;
}
if (!ptl_init) {
CERROR("PtlMDAttach/Bind/Update: Not initialized\n");
- return PTL_NOINIT;
+ return PTL_NO_INIT;
}
nal = ptl_hndl2nal(¤t_in);
if (!nal)
- return PTL_INV_HANDLE;
+ return PTL_HANDLE_INVALID;
if (nal->validate != NULL) /* nal->validate not a NOOP */
{
- if ((md_in.options & PTL_MD_IOV) == 0) /* contiguous */
+ if ((md_in.options & PTL_MD_IOVEC) == 0) /* contiguous */
{
rc = nal->validate (nal, md_in.start, md_in.length);
if (rc)
static ptl_handle_eq_t md2eq (ptl_md_t *md)
{
- if (PtlHandleEqual (md->eventq, PTL_EQ_NONE))
+ if (PtlHandleIsEqual (md->eventq, PTL_EQ_NONE))
return (PTL_EQ_NONE);
return (ptl_handle2usereq (&md->eventq)->cb_eq_handle);
}
if (rc != PTL_OK)
- return (rc == PTL_INV_HANDLE) ? PTL_INV_ME : rc;
+ return (rc == PTL_HANDLE_INVALID) ? PTL_ME_INVALID : rc;
if (handle_out) {
handle_out->nal_idx = me_in.nal_idx;
int PtlMDBind(ptl_handle_ni_t ni_in, ptl_md_t md_in,
- ptl_handle_md_t * handle_out)
+ ptl_unlink_t unlink_in, ptl_handle_md_t * handle_out)
{
PtlMDBind_in args;
PtlMDBind_out ret;
args.eq_in = md2eq(&md_in);
args.ni_in = ni_in;
args.md_in = md_in;
+ args.unlink_in = unlink_in;
rc = do_forward(ni_in, PTL_MDBIND,
&args, sizeof(args), &ret, sizeof(ret));
if (new_inout) {
rc = validate_md (md_in, *new_inout);
if (rc != PTL_OK)
- return (rc == PTL_INV_HANDLE) ? PTL_INV_MD : rc;
+ return (rc == PTL_HANDLE_INVALID) ? PTL_MD_INVALID : rc;
args.new_inout = *new_inout;
args.new_inout_valid = 1;
} else
args.new_inout_valid = 0;
- if (PtlHandleEqual (testq_in, PTL_EQ_NONE)) {
+ if (PtlHandleIsEqual (testq_in, PTL_EQ_NONE)) {
args.testq_in = PTL_EQ_NONE;
args.sequence_in = -1;
} else {
rc = do_forward(md_in, PTL_MDUPDATE, &args, sizeof(args), &ret,
sizeof(ret));
if (rc != PTL_OK)
- return (rc == PTL_INV_HANDLE) ? PTL_INV_MD : rc;
+ return (rc == PTL_HANDLE_INVALID) ? PTL_MD_INVALID : rc;
if (old_inout)
*old_inout = ret.old_inout;
rc = do_forward(md_in, PTL_MDUNLINK, &args, sizeof(args), &ret,
sizeof(ret));
if (rc != PTL_OK)
- return (rc == PTL_INV_HANDLE) ? PTL_INV_MD : rc;
+ return (rc == PTL_HANDLE_INVALID) ? PTL_MD_INVALID : rc;
return ret.rc;
}
nal_t *nal;
if (!ptl_init)
- return PTL_NOINIT;
+ return PTL_NO_INIT;
nal = ptl_hndl2nal (&interface);
if (nal == NULL)
- return PTL_INV_HANDLE;
+ return PTL_HANDLE_INVALID;
if (count != LOWEST_BIT_SET(count)) { /* not a power of 2 already */
do { /* knock off all but the top bit... */
PORTAL_ALLOC(ev, count * sizeof(ptl_event_t));
if (!ev)
- return PTL_NOSPACE;
+ return PTL_NO_SPACE;
for (i = 0; i < count; i++)
ev[i].sequence = 0;
PORTAL_ALLOC(eq, sizeof(*eq));
if (!eq) {
- rc = PTL_NOSPACE;
+ rc = PTL_NO_SPACE;
goto fail;
}
eq = lib_eq_alloc (nal);
if (eq == NULL)
- return (ret->rc = PTL_NOSPACE);
+ return (ret->rc = PTL_NO_SPACE);
state_lock(nal, &flags);
eq = ptl_handle2eq(&args->eventq_in, nal);
if (eq == NULL) {
- ret->rc = PTL_INV_EQ;
+ ret->rc = PTL_EQ_INVALID;
} else if (eq->eq_refcount != 0) {
- ret->rc = PTL_EQ_INUSE;
+ ret->rc = PTL_EQ_IN_USE;
} else {
if (nal->cb_unmap != NULL) {
struct iovec iov = {
space = nal->cb_malloc (nal, n * size);
if (space == NULL)
- return (PTL_NOSPACE);
+ return (PTL_NO_SPACE);
INIT_LIST_HEAD (&fl->fl_list);
fl->fl_objs = space;
(struct list_head *)nal->cb_malloc (nal, ni->ni_lh_hash_size
* sizeof (struct list_head));
if (ni->ni_lh_hash_table == NULL)
- return (PTL_NOSPACE);
+ return (PTL_NO_SPACE);
for (i = 0; i < ni->ni_lh_hash_size; i++)
INIT_LIST_HEAD (&ni->ni_lh_hash_table[i]);
ni->tbl.tbl = nal->cb_malloc(nal, sizeof(struct list_head) * ptl_size);
if (ni->tbl.tbl == NULL) {
- rc = PTL_NOSPACE;
+ rc = PTL_NO_SPACE;
goto out;
}
#include <portals/lib-p30.h>
#include <portals/arg-blocks.h>
-/*
- * must be called with state lock held
- */
+/* must be called with state lock held */
void lib_md_unlink(nal_cb_t * nal, lib_md_t * md)
{
- lib_me_t *me = md->me;
+ if ((md->md_flags & PTL_MD_FLAG_ZOMBIE) == 0) {
+ /* first unlink attempt... */
+ lib_me_t *me = md->me;
+
+ md->md_flags |= PTL_MD_FLAG_ZOMBIE;
+
+ /* Disassociate from ME (if any), and unlink it if it was created
+ * with PTL_UNLINK */
+ if (me != NULL) {
+ me->md = NULL;
+ if (me->unlink == PTL_UNLINK)
+ lib_me_unlink(nal, me);
+ }
+
+ /* emsure all future handle lookups fail */
+ lib_invalidate_handle(nal, &md->md_lh);
+ }
if (md->pending != 0) {
CDEBUG(D_NET, "Queueing unlink of md %p\n", md);
- md->md_flags |= PTL_MD_FLAG_UNLINK;
return;
}
if (nal->cb_unmap_pages != NULL)
nal->cb_unmap_pages (nal, md->md_niov, md->md_iov.kiov,
&md->md_addrkey);
- } else if (nal->cb_unmap != NULL)
+ } else if (nal->cb_unmap != NULL) {
nal->cb_unmap (nal, md->md_niov, md->md_iov.iov,
&md->md_addrkey);
-
- if (me) {
- me->md = NULL;
- if (me->unlink == PTL_UNLINK)
- lib_me_unlink(nal, me);
}
- if (md->eq != NULL)
- {
+ if (md->eq != NULL) {
md->eq->eq_refcount--;
LASSERT (md->eq->eq_refcount >= 0);
}
- lib_invalidate_handle (nal, &md->md_lh);
list_del (&md->md_list);
lib_md_free(nal, md);
}
static int lib_md_build(nal_cb_t *nal, lib_md_t *new, void *private,
ptl_md_t *md, ptl_handle_eq_t *eqh, int unlink)
{
- const int max_size_opts = PTL_MD_AUTO_UNLINK |
- PTL_MD_MAX_SIZE;
lib_eq_t *eq = NULL;
int rc;
int i;
* otherwise caller may only lib_md_free() it.
*/
- if (!PtlHandleEqual (*eqh, PTL_EQ_NONE)) {
+ if (!PtlHandleIsEqual (*eqh, PTL_EQ_NONE)) {
eq = ptl_handle2eq(eqh, nal);
if (eq == NULL)
- return PTL_INV_EQ;
+ return PTL_EQ_INVALID;
}
/* Must check this _before_ allocation. Also, note that non-iov
* MDs must set md_niov to 0. */
- LASSERT((md->options & (PTL_MD_IOV | PTL_MD_KIOV)) == 0 ||
+ LASSERT((md->options & (PTL_MD_IOVEC | PTL_MD_KIOV)) == 0 ||
md->niov <= PTL_MD_MAX_IOV);
- if ((md->options & max_size_opts) != 0 && /* max size used */
+ /* This implementation doesn't know how to create START events or
+ * disable END events. Best to LASSERT our caller is compliant so
+ * we find out quickly... */
+ LASSERT (PtlHandleIsEqual (*eqh, PTL_EQ_NONE) ||
+ ((md->options & PTL_MD_EVENT_START_DISABLE) != 0 &&
+ (md->options & PTL_MD_EVENT_END_DISABLE) == 0));
+
+ if ((md->options & PTL_MD_MAX_SIZE) != 0 && /* max size used */
(md->max_size < 0 || md->max_size > md->length)) // illegal max_size
- return PTL_INV_MD;
+ return PTL_MD_INVALID;
new->me = NULL;
new->start = md->start;
new->length = md->length;
new->offset = 0;
new->max_size = md->max_size;
- new->unlink = unlink;
new->options = md->options;
new->user_ptr = md->user_ptr;
new->eq = eq;
new->threshold = md->threshold;
new->pending = 0;
- new->md_flags = 0;
+ new->md_flags = (unlink == PTL_UNLINK) ? PTL_MD_FLAG_AUTO_UNLINK : 0;
- if ((md->options & PTL_MD_IOV) != 0) {
+ if ((md->options & PTL_MD_IOVEC) != 0) {
int total_length = 0;
if ((md->options & PTL_MD_KIOV) != 0) /* Can't specify both */
- return PTL_INV_MD;
+ return PTL_MD_INVALID;
new->md_niov = md->niov;
}
} else if ((md->options & PTL_MD_KIOV) != 0) {
#ifndef __KERNEL__
- return PTL_INV_MD;
+ return PTL_MD_INVALID;
#else
int total_length = 0;
/* Trap attempt to use paged I/O if unsupported early. */
if (nal->cb_send_pages == NULL ||
nal->cb_recv_pages == NULL)
- return PTL_INV_MD;
+ return PTL_MD_INVALID;
new->md_niov = md->niov;
new->options = md->options;
new->user_ptr = md->user_ptr;
ptl_eq2handle(&new->eventq, md->eq);
- new->niov = ((md->options & (PTL_MD_IOV | PTL_MD_KIOV)) == 0) ? 0 : md->md_niov;
+ new->niov = ((md->options & (PTL_MD_IOVEC | PTL_MD_KIOV)) == 0) ? 0 : md->md_niov;
}
int do_PtlMDAttach(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
lib_md_t *md;
unsigned long flags;
- if ((args->md_in.options & (PTL_MD_KIOV | PTL_MD_IOV)) != 0 &&
+ if ((args->md_in.options & (PTL_MD_KIOV | PTL_MD_IOVEC)) != 0 &&
args->md_in.niov > PTL_MD_MAX_IOV) /* too many fragments */
return (ret->rc = PTL_IOV_TOO_MANY);
md = lib_md_alloc(nal, &args->md_in);
if (md == NULL)
- return (ret->rc = PTL_NOSPACE);
+ return (ret->rc = PTL_NO_SPACE);
state_lock(nal, &flags);
me = ptl_handle2me(&args->me_in, nal);
if (me == NULL) {
- ret->rc = PTL_INV_ME;
+ ret->rc = PTL_ME_INVALID;
} else if (me->md != NULL) {
- ret->rc = PTL_INUSE;
+ ret->rc = PTL_ME_IN_USE;
} else {
ret->rc = lib_md_build(nal, md, private, &args->md_in,
&args->eq_in, args->unlink_in);
lib_md_t *md;
unsigned long flags;
- if ((args->md_in.options & (PTL_MD_KIOV | PTL_MD_IOV)) != 0 &&
+ if ((args->md_in.options & (PTL_MD_KIOV | PTL_MD_IOVEC)) != 0 &&
args->md_in.niov > PTL_MD_MAX_IOV) /* too many fragments */
return (ret->rc = PTL_IOV_TOO_MANY);
md = lib_md_alloc(nal, &args->md_in);
if (md == NULL)
- return (ret->rc = PTL_NOSPACE);
+ return (ret->rc = PTL_NO_SPACE);
state_lock(nal, &flags);
- ret->rc = lib_md_build(nal, md, private,
- &args->md_in, &args->eq_in, PTL_UNLINK);
+ ret->rc = lib_md_build(nal, md, private, &args->md_in,
+ &args->eq_in, args->unlink_in);
if (ret->rc == PTL_OK) {
ptl_md2handle(&ret->handle_out, md);
md = ptl_handle2md(&args->md_in, nal);
if (md == NULL) {
state_unlock(nal, &flags);
- return (ret->rc = PTL_INV_MD);
+ return (ret->rc = PTL_MD_INVALID);
}
/* If the MD is busy, lib_md_unlink just marks it for deletion, and
memset(&ev, 0, sizeof(ev));
ev.type = PTL_EVENT_UNLINK;
- ev.status = PTL_OK;
+ ev.ni_fail_type = PTL_OK;
ev.unlinked = 1;
lib_md_deconstruct(nal, md, &ev.mem_desc);
md = ptl_handle2md(&args->md_in, nal);
if (md == NULL) {
- ret->rc = PTL_INV_MD;
+ ret->rc = PTL_MD_INVALID;
goto out;
}
/* XXX fttb, the new MD must be the same type wrt fragmentation */
if (((new->options ^ md->options) &
- (PTL_MD_IOV | PTL_MD_KIOV)) != 0) {
- ret->rc = PTL_INV_MD;
+ (PTL_MD_IOVEC | PTL_MD_KIOV)) != 0) {
+ ret->rc = PTL_MD_INVALID;
goto out;
}
goto out;
}
- if (!PtlHandleEqual (args->testq_in, PTL_EQ_NONE)) {
+ if (!PtlHandleIsEqual (args->testq_in, PTL_EQ_NONE)) {
test_eq = ptl_handle2eq(&args->testq_in, nal);
if (test_eq == NULL) {
- ret->rc = PTL_INV_EQ;
+ ret->rc = PTL_EQ_INVALID;
goto out;
}
}
if (md->pending != 0) {
- ret->rc = PTL_NOUPDATE;
+ ret->rc = PTL_MD_NO_UPDATE;
goto out;
}
if (test_eq == NULL ||
test_eq->sequence == args->sequence_in) {
lib_me_t *me = md->me;
+ int unlink = (md->md_flags & PTL_MD_FLAG_AUTO_UNLINK) ?
+ PTL_UNLINK : PTL_RETAIN;
// #warning this does not track eq refcounts properly
ret->rc = lib_md_build(nal, md, private,
- new, &new->eventq, md->unlink);
+ new, &new->eventq, unlink);
md->me = me;
} else {
- ret->rc = PTL_NOUPDATE;
+ ret->rc = PTL_MD_NO_UPDATE;
}
out:
lib_me_t *me;
if (args->index_in >= tbl->size)
- return ret->rc = PTL_INV_PTINDEX;
+ return ret->rc = PTL_PT_INDEX_INVALID;
/* Should check for valid matchid, but not yet */
if (0)
- return ret->rc = PTL_INV_PROC;
+ return ret->rc = PTL_PROCESS_INVALID;
me = lib_me_alloc (nal);
if (me == NULL)
- return (ret->rc = PTL_NOSPACE);
+ return (ret->rc = PTL_NO_SPACE);
state_lock(nal, &flags);
new = lib_me_alloc (nal);
if (new == NULL)
- return (ret->rc = PTL_NOSPACE);
+ return (ret->rc = PTL_NO_SPACE);
/* Should check for valid matchid, but not yet */
lib_me_free (nal, new);
state_unlock (nal, &flags);
- return (ret->rc = PTL_INV_ME);
+ return (ret->rc = PTL_ME_INVALID);
}
new->match_id = args->match_id_in;
me = ptl_handle2me(&args->current_in, nal);
if (me == NULL) {
- ret->rc = PTL_INV_ME;
+ ret->rc = PTL_ME_INVALID;
} else {
lib_me_unlink(nal, me);
ret->rc = PTL_OK;
unsigned long flags;
if (args->index_in < 0 || args->index_in >= tbl->size)
- return ret->rc = PTL_INV_PTINDEX;
+ return ret->rc = PTL_PT_INDEX_INVALID;
nal->cb_printf(nal, "Portal table index %d\n", args->index_in);
me = ptl_handle2me(&args->current_in, nal);
if (me == NULL) {
- ret->rc = PTL_INV_ME;
+ ret->rc = PTL_ME_INVALID;
} else {
lib_me_dump(nal, me);
ret->rc = PTL_OK;
#include <portals/lib-p30.h>
#include <portals/arg-blocks.h>
-/*
- * Right now it does not check access control lists.
- *
- * We only support one MD per ME, which is how the Portals 3.1 spec is written.
- * All previous complication is removed.
- */
-
-static lib_me_t *
-lib_find_me(nal_cb_t *nal, int index, int op_mask, ptl_nid_t src_nid,
- ptl_pid_t src_pid, ptl_size_t rlength, ptl_size_t roffset,
- ptl_match_bits_t match_bits, ptl_size_t *mlength_out,
- ptl_size_t *offset_out, int *unlink_out)
+/* forward ref */
+static void lib_commit_md (nal_cb_t *nal, lib_md_t *md, lib_msg_t *msg);
+
+static lib_md_t *
+lib_match_md(nal_cb_t *nal, int index, int op_mask,
+ ptl_nid_t src_nid, ptl_pid_t src_pid,
+ ptl_size_t rlength, ptl_size_t roffset,
+ ptl_match_bits_t match_bits, lib_msg_t *msg,
+ ptl_size_t *mlength_out, ptl_size_t *offset_out)
{
lib_ni_t *ni = &nal->ni;
struct list_head *match_list = &ni->tbl.tbl[index];
lib_md_t *md;
ptl_size_t mlength;
ptl_size_t offset;
-
ENTRY;
CDEBUG (D_NET, "Request from "LPU64".%d of length %d into portal %d "
LASSERT (me == md->me);
- /* MD deactivated */
- if (md->threshold == 0)
- continue;
-
/* mismatched MD op */
if ((md->options & op_mask) == 0)
continue;
+ /* MD exhausted */
+ if (lib_md_exhausted(md))
+ continue;
+
/* mismatched ME nid/pid? */
if (me->match_id.nid != PTL_NID_ANY &&
me->match_id.nid != src_nid)
else
offset = roffset;
- mlength = md->length - offset;
- if ((md->options & PTL_MD_MAX_SIZE) != 0 &&
- mlength > md->max_size)
+ if ((md->options & PTL_MD_MAX_SIZE) != 0) {
mlength = md->max_size;
+ LASSERT (md->offset + mlength <= md->length);
+ } else {
+ mlength = md->length - offset;
+ }
if (rlength <= mlength) { /* fits in allowed space */
mlength = rlength;
goto failed;
}
+ /* Commit to this ME/MD */
+ CDEBUG(D_NET, "Incoming %s index %x from "LPU64"/%u of "
+ "length %d/%d into md "LPX64" [%d] + %d\n",
+ (op_mask == PTL_MD_OP_PUT) ? "put" : "get",
+ index, src_nid, src_pid, mlength, rlength,
+ md->md_lh.lh_cookie, md->md_niov, offset);
+
+ lib_commit_md(nal, md, msg);
md->offset = offset + mlength;
+ /* NB Caller sets ev.type and ev.hdr_data */
+ msg->ev.initiator.nid = src_nid;
+ msg->ev.initiator.pid = src_pid;
+ msg->ev.portal = index;
+ msg->ev.match_bits = match_bits;
+ msg->ev.rlength = rlength;
+ msg->ev.mlength = mlength;
+ msg->ev.offset = offset;
+
+ lib_md_deconstruct(nal, md, &msg->ev.mem_desc);
+
*offset_out = offset;
*mlength_out = mlength;
- *unlink_out = ((md->options & PTL_MD_AUTO_UNLINK) != 0 &&
- md->offset >= (md->length - md->max_size));
- RETURN (me);
+
+ /* Auto-unlink NOW, so the ME gets unlinked if required.
+ * We bumped md->pending above so the MD just gets flagged
+ * for unlink when it is finalized. */
+ if ((md->md_flags & PTL_MD_FLAG_AUTO_UNLINK) != 0 &&
+ lib_md_exhausted(md))
+ lib_md_unlink(nal, md);
+
+ RETURN (md);
}
failed:
lib_ni_t *ni = &nal->ni;
ptl_size_t mlength = 0;
ptl_size_t offset = 0;
- int unlink = 0;
ptl_err_t rc;
- lib_me_t *me;
lib_md_t *md;
unsigned long flags;
state_lock(nal, &flags);
- me = lib_find_me(nal, hdr->msg.put.ptl_index, PTL_MD_OP_PUT,
- hdr->src_nid, hdr->src_pid,
- hdr->payload_length, hdr->msg.put.offset,
- hdr->msg.put.match_bits,
- &mlength, &offset, &unlink);
- if (me == NULL) {
+ md = lib_match_md(nal, hdr->msg.put.ptl_index, PTL_MD_OP_PUT,
+ hdr->src_nid, hdr->src_pid,
+ hdr->payload_length, hdr->msg.put.offset,
+ hdr->msg.put.match_bits, msg,
+ &mlength, &offset);
+ if (md == NULL) {
state_unlock(nal, &flags);
return (PTL_FAIL);
}
- md = me->md;
- CDEBUG(D_NET, "Incoming put index %x from "LPU64"/%u of length %d/%d "
- "into md "LPX64" [%d] + %d\n", hdr->msg.put.ptl_index,
- hdr->src_nid, hdr->src_pid, mlength, hdr->payload_length,
- md->md_lh.lh_cookie, md->md_niov, offset);
-
- lib_commit_md(nal, md, msg);
-
- msg->ev.type = PTL_EVENT_PUT;
- msg->ev.initiator.nid = hdr->src_nid;
- msg->ev.initiator.pid = hdr->src_pid;
- msg->ev.portal = hdr->msg.put.ptl_index;
- msg->ev.match_bits = hdr->msg.put.match_bits;
- msg->ev.rlength = hdr->payload_length;
- msg->ev.mlength = mlength;
- msg->ev.offset = offset;
+ msg->ev.type = PTL_EVENT_PUT_END;
msg->ev.hdr_data = hdr->msg.put.hdr_data;
- lib_md_deconstruct(nal, md, &msg->ev.mem_desc);
-
if (!ptl_is_wire_handle_none(&hdr->msg.put.ack_wmd) &&
!(md->options & PTL_MD_ACK_DISABLE)) {
msg->ack_wmd = hdr->msg.put.ack_wmd;
ni->counters.recv_count++;
ni->counters.recv_length += mlength;
- /* only unlink after MD's pending count has been bumped in
- * lib_commit_md() otherwise lib_me_unlink() will nuke it */
- if (unlink)
- lib_me_unlink (nal, me);
-
state_unlock(nal, &flags);
rc = lib_recv(nal, private, msg, md, offset, mlength,
lib_ni_t *ni = &nal->ni;
ptl_size_t mlength = 0;
ptl_size_t offset = 0;
- int unlink = 0;
- lib_me_t *me;
lib_md_t *md;
ptl_hdr_t reply;
unsigned long flags;
state_lock(nal, &flags);
- me = lib_find_me(nal, hdr->msg.get.ptl_index, PTL_MD_OP_GET,
- hdr->src_nid, hdr->src_pid,
- hdr->msg.get.sink_length, hdr->msg.get.src_offset,
- hdr->msg.get.match_bits,
- &mlength, &offset, &unlink);
- if (me == NULL) {
+ md = lib_match_md(nal, hdr->msg.get.ptl_index, PTL_MD_OP_GET,
+ hdr->src_nid, hdr->src_pid,
+ hdr->msg.get.sink_length, hdr->msg.get.src_offset,
+ hdr->msg.get.match_bits, msg,
+ &mlength, &offset);
+ if (md == NULL) {
state_unlock(nal, &flags);
return (PTL_FAIL);
}
- md = me->md;
- CDEBUG(D_NET, "Incoming get index %d from "LPU64".%u of length %d/%d "
- "from md "LPX64" [%d] + %d\n", hdr->msg.get.ptl_index,
- hdr->src_nid, hdr->src_pid, mlength, hdr->payload_length,
- md->md_lh.lh_cookie, md->md_niov, offset);
-
- lib_commit_md(nal, md, msg);
-
- msg->ev.type = PTL_EVENT_GET;
- msg->ev.initiator.nid = hdr->src_nid;
- msg->ev.initiator.pid = hdr->src_pid;
- msg->ev.portal = hdr->msg.get.ptl_index;
- msg->ev.match_bits = hdr->msg.get.match_bits;
- msg->ev.rlength = hdr->payload_length;
- msg->ev.mlength = mlength;
- msg->ev.offset = offset;
+ msg->ev.type = PTL_EVENT_GET_END;
msg->ev.hdr_data = 0;
- lib_md_deconstruct(nal, md, &msg->ev.mem_desc);
-
ni->counters.send_count++;
ni->counters.send_length += mlength;
- /* only unlink after MD's refcount has been bumped in
- * lib_commit_md() otherwise lib_me_unlink() will nuke it */
- if (unlink)
- lib_me_unlink (nal, me);
-
state_unlock(nal, &flags);
memset (&reply, 0, sizeof (reply));
lib_commit_md(nal, md, msg);
- msg->ev.type = PTL_EVENT_REPLY;
+ msg->ev.type = PTL_EVENT_REPLY_END;
msg->ev.initiator.nid = hdr->src_nid;
msg->ev.initiator.pid = hdr->src_pid;
msg->ev.rlength = rlength;
return;
}
- do_gettimeofday(&msg->ev.arrival_time);
-
switch (hdr->type) {
case PTL_MSG_ACK:
rc = parse_ack(nal, hdr, private, msg);
{
CERROR(LPU64": Dropping PUT to "LPU64": simulated failure\n",
nal->ni.nid, id->nid);
- return (ret->rc = PTL_INV_PROC);
+ return (ret->rc = PTL_PROCESS_INVALID);
}
msg = lib_msg_alloc(nal);
if (msg == NULL) {
CERROR(LPU64": Dropping PUT to "LPU64": ENOMEM on lib_msg_t\n",
ni->nid, id->nid);
- return (ret->rc = PTL_NOSPACE);
+ return (ret->rc = PTL_NO_SPACE);
}
state_lock(nal, &flags);
lib_msg_free(nal, msg);
state_unlock(nal, &flags);
- return (ret->rc = PTL_INV_MD);
+ return (ret->rc = PTL_MD_INVALID);
}
CDEBUG(D_NET, "PtlPut -> %Lu: %lu\n", (unsigned long long)id->nid,
lib_commit_md(nal, md, msg);
- msg->ev.type = PTL_EVENT_SENT;
+ msg->ev.type = PTL_EVENT_SEND_END;
msg->ev.initiator.nid = ni->nid;
msg->ev.initiator.pid = ni->pid;
msg->ev.portal = args->portal_in;
}
lib_msg_t *
-lib_fake_reply_msg (nal_cb_t *nal, ptl_nid_t peer_nid, lib_md_t *getmd)
+lib_create_reply_msg (nal_cb_t *nal, ptl_nid_t peer_nid, lib_msg_t *getmsg)
{
/* The NAL can DMA direct to the GET md (i.e. no REPLY msg). This
- * returns a msg the NAL can pass to lib_finalize() so that a REPLY
- * event still occurs.
+ * returns a msg for the NAL to pass to lib_finalize() when the sink
+ * data has been received.
*
- * CAVEAT EMPTOR: 'getmd' is passed by pointer so it MUST be valid.
- * This can only be guaranteed while a lib_msg_t holds a reference
- * on it (ie. pending > 0), so best call this before the
- * lib_finalize() of the original GET. */
+ * CAVEAT EMPTOR: 'getmsg' is the original GET, which is freed when
+ * lib_finalize() is called on it, so the NAL must call this first */
lib_ni_t *ni = &nal->ni;
lib_msg_t *msg = lib_msg_alloc(nal);
+ lib_md_t *getmd = getmsg->md;
unsigned long flags;
state_lock(nal, &flags);
lib_commit_md (nal, getmd, msg);
- msg->ev.type = PTL_EVENT_REPLY;
+ msg->ev.type = PTL_EVENT_REPLY_END;
msg->ev.initiator.nid = peer_nid;
msg->ev.initiator.pid = 0; /* XXX FIXME!!! */
msg->ev.rlength = msg->ev.mlength = getmd->length;
{
CERROR(LPU64": Dropping PUT to "LPU64": simulated failure\n",
nal->ni.nid, id->nid);
- return (ret->rc = PTL_INV_PROC);
+ return (ret->rc = PTL_PROCESS_INVALID);
}
msg = lib_msg_alloc(nal);
if (msg == NULL) {
CERROR(LPU64": Dropping GET to "LPU64": ENOMEM on lib_msg_t\n",
ni->nid, id->nid);
- return (ret->rc = PTL_NOSPACE);
+ return (ret->rc = PTL_NO_SPACE);
}
state_lock(nal, &flags);
lib_msg_free(nal, msg);
state_unlock(nal, &flags);
- return ret->rc = PTL_INV_MD;
+ return ret->rc = PTL_MD_INVALID;
}
CDEBUG(D_NET, "PtlGet -> %Lu: %lu\n", (unsigned long long)id->nid,
lib_commit_md(nal, md, msg);
- msg->ev.type = PTL_EVENT_SENT;
+ msg->ev.type = PTL_EVENT_SEND_END;
msg->ev.initiator.nid = ni->nid;
msg->ev.initiator.pid = ni->pid;
msg->ev.portal = args->portal_in;
if (status == PTL_OK &&
!ptl_is_wire_handle_none(&msg->ack_wmd)) {
- LASSERT(msg->ev.type == PTL_EVENT_PUT);
+ LASSERT(msg->ev.type == PTL_EVENT_PUT_END);
memset (&ack, 0, sizeof (ack));
ack.type = HTON__u32 (PTL_MSG_ACK);
LASSERT (md->pending >= 0);
/* Should I unlink this MD? */
- unlink = (md->pending == 0 && /* No other refs */
- (md->threshold == 0 || /* All ops done */
- md->md_flags & PTL_MD_FLAG_UNLINK) != 0); /* black spot */
-
- msg->ev.status = status;
+ if (md->pending != 0) /* other refs */
+ unlink = 0;
+ else if ((md->md_flags & PTL_MD_FLAG_ZOMBIE) != 0)
+ unlink = 1;
+ else if ((md->md_flags & PTL_MD_FLAG_AUTO_UNLINK) == 0)
+ unlink = 0;
+ else
+ unlink = lib_md_exhausted(md);
+
+ msg->ev.ni_fail_type = status;
msg->ev.unlinked = unlink;
if (md->eq != NULL)
else if (args->register_in == PTL_SR_MSGS_MAX)
ret->status_out = count->msgs_max;
else
- ret->rc = PTL_INV_SR_INDX;
+ ret->rc = PTL_SR_INDEX_INVALID;
return ret->rc;
}
if ((rc = nal->cb_dist(nal, nid, &dist)) != 0) {
ret->distance_out = (unsigned long) MAX_DIST;
- return PTL_INV_PROC;
+ return PTL_PROCESS_INVALID;
}
ret->distance_out = dist;
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef EXPORT_SYMTAB
+# define EXPORT_SYMTAB
+#endif
+#define DEBUG_SUBSYSTEM S_PORTALS
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/smp_lock.h>
+#include <linux/unistd.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+
+#include <linux/fs.h>
+#include <linux/stat.h>
+#include <asm/uaccess.h>
+#include <asm/segment.h>
+#include <linux/miscdevice.h>
+
+#include <portals/lib-p30.h>
+#include <portals/p30.h>
+#include <linux/kp30.h>
+#include <linux/kpr.h>
+#include <linux/portals_compat25.h>
+
+extern void (kping_client)(struct portal_ioctl_data *);
+
+struct nal_cmd_handler {
+ nal_cmd_handler_t nch_handler;
+ void * nch_private;
+};
+
+static struct nal_cmd_handler nal_cmd[NAL_MAX_NR + 1];
+static DECLARE_MUTEX(nal_cmd_sem);
+
+
+static int
+kportal_add_route(int gateway_nalid, ptl_nid_t gateway_nid,
+ ptl_nid_t lo_nid, ptl_nid_t hi_nid)
+{
+ int rc;
+ kpr_control_interface_t *ci;
+
+ ci = (kpr_control_interface_t *) PORTAL_SYMBOL_GET (kpr_control_interface);
+ if (ci == NULL)
+ return (-ENODEV);
+
+ rc = ci->kprci_add_route (gateway_nalid, gateway_nid, lo_nid, hi_nid);
+
+ PORTAL_SYMBOL_PUT(kpr_control_interface);
+ return (rc);
+}
+
+static int
+kportal_del_route(int gw_nalid, ptl_nid_t gw_nid,
+ ptl_nid_t lo, ptl_nid_t hi)
+{
+ int rc;
+ kpr_control_interface_t *ci;
+
+ ci = (kpr_control_interface_t *)PORTAL_SYMBOL_GET(kpr_control_interface);
+ if (ci == NULL)
+ return (-ENODEV);
+
+ rc = ci->kprci_del_route (gw_nalid, gw_nid, lo, hi);
+
+ PORTAL_SYMBOL_PUT(kpr_control_interface);
+ return (rc);
+}
+
+static int
+kportal_notify_router (int gw_nalid, ptl_nid_t gw_nid,
+ int alive, time_t when)
+{
+ int rc;
+ kpr_control_interface_t *ci;
+
+ /* No error if router not preset. Sysadmin is allowed to notify
+ * _everywhere_ when a NID boots or crashes, even if they know
+ * nothing of the peer. */
+ ci = (kpr_control_interface_t *)PORTAL_SYMBOL_GET(kpr_control_interface);
+ if (ci == NULL)
+ return (0);
+
+ rc = ci->kprci_notify (gw_nalid, gw_nid, alive, when);
+
+ PORTAL_SYMBOL_PUT(kpr_control_interface);
+ return (rc);
+}
+
+static int
+kportal_get_route(int index, __u32 *gateway_nalidp, ptl_nid_t *gateway_nidp,
+ ptl_nid_t *lo_nidp, ptl_nid_t *hi_nidp, int *alivep)
+{
+ int gateway_nalid;
+ ptl_nid_t gateway_nid;
+ ptl_nid_t lo_nid;
+ ptl_nid_t hi_nid;
+ int alive;
+ int rc;
+ kpr_control_interface_t *ci;
+
+ ci = (kpr_control_interface_t *) PORTAL_SYMBOL_GET(kpr_control_interface);
+ if (ci == NULL)
+ return (-ENODEV);
+
+ rc = ci->kprci_get_route(index, &gateway_nalid, &gateway_nid,
+ &lo_nid, &hi_nid, &alive);
+
+ if (rc == 0) {
+ CDEBUG(D_IOCTL, "got route [%d] %d "LPX64":"LPX64" - "LPX64", %s\n",
+ index, gateway_nalid, gateway_nid, lo_nid, hi_nid,
+ alive ? "up" : "down");
+
+ *gateway_nalidp = (__u32)gateway_nalid;
+ *gateway_nidp = gateway_nid;
+ *lo_nidp = lo_nid;
+ *hi_nidp = hi_nid;
+ *alivep = alive;
+ }
+
+ PORTAL_SYMBOL_PUT (kpr_control_interface);
+ return (rc);
+}
+
+static int
+kportal_router_cmd(struct portals_cfg *pcfg, void * private)
+{
+ int err = -EINVAL;
+ ENTRY;
+
+ switch(pcfg->pcfg_command) {
+ default:
+ CDEBUG(D_IOCTL, "Inappropriate cmd: %d\n", pcfg->pcfg_command);
+ break;
+
+ case NAL_CMD_ADD_ROUTE:
+ CDEBUG(D_IOCTL, "Adding route: [%d] "LPU64" : "LPU64" - "LPU64"\n",
+ pcfg->pcfg_nal, pcfg->pcfg_nid,
+ pcfg->pcfg_nid2, pcfg->pcfg_nid3);
+ err = kportal_add_route(pcfg->pcfg_gw_nal, pcfg->pcfg_nid,
+ pcfg->pcfg_nid2, pcfg->pcfg_nid3);
+ break;
+
+ case NAL_CMD_DEL_ROUTE:
+ CDEBUG (D_IOCTL, "Removing routes via [%d] "LPU64" : "LPU64" - "LPU64"\n",
+ pcfg->pcfg_gw_nal, pcfg->pcfg_nid,
+ pcfg->pcfg_nid2, pcfg->pcfg_nid3);
+ err = kportal_del_route (pcfg->pcfg_gw_nal, pcfg->pcfg_nid,
+ pcfg->pcfg_nid2, pcfg->pcfg_nid3);
+ break;
+
+ case NAL_CMD_NOTIFY_ROUTER: {
+ CDEBUG (D_IOCTL, "Notifying peer [%d] "LPU64" %s @ %ld\n",
+ pcfg->pcfg_gw_nal, pcfg->pcfg_nid,
+ pcfg->pcfg_flags ? "Enabling" : "Disabling",
+ (time_t)pcfg->pcfg_nid3);
+
+ err = kportal_notify_router (pcfg->pcfg_gw_nal, pcfg->pcfg_nid,
+ pcfg->pcfg_flags,
+ (time_t)pcfg->pcfg_nid3);
+ break;
+ }
+
+ case NAL_CMD_GET_ROUTE:
+ CDEBUG (D_IOCTL, "Getting route [%d]\n", pcfg->pcfg_count);
+ err = kportal_get_route(pcfg->pcfg_count, &pcfg->pcfg_gw_nal,
+ &pcfg->pcfg_nid,
+ &pcfg->pcfg_nid2, &pcfg->pcfg_nid3,
+ &pcfg->pcfg_flags);
+ break;
+ }
+ RETURN(err);
+}
+
+int
+kportal_nal_cmd(struct portals_cfg *pcfg)
+{
+ __u32 nal = pcfg->pcfg_nal;
+ int rc = -EINVAL;
+
+ ENTRY;
+
+ down(&nal_cmd_sem);
+ if (nal > 0 && nal <= NAL_MAX_NR && nal_cmd[nal].nch_handler) {
+ CDEBUG(D_IOCTL, "calling handler nal: %d, cmd: %d\n", nal,
+ pcfg->pcfg_command);
+ rc = nal_cmd[nal].nch_handler(pcfg, nal_cmd[nal].nch_private);
+ }
+ up(&nal_cmd_sem);
+ RETURN(rc);
+}
+
+ptl_handle_ni_t *
+kportal_get_ni (int nal)
+{
+
+ switch (nal)
+ {
+ case QSWNAL:
+ return (PORTAL_SYMBOL_GET(kqswnal_ni));
+ case SOCKNAL:
+ return (PORTAL_SYMBOL_GET(ksocknal_ni));
+ case GMNAL:
+ return (PORTAL_SYMBOL_GET(kgmnal_ni));
+ case IBNAL:
+ return (PORTAL_SYMBOL_GET(kibnal_ni));
+ case TCPNAL:
+ /* userspace NAL */
+ return (NULL);
+ case SCIMACNAL:
+ return (PORTAL_SYMBOL_GET(kscimacnal_ni));
+ default:
+ /* A warning to a naive caller */
+ CERROR ("unknown nal: %d\n", nal);
+ return (NULL);
+ }
+}
+
+void
+kportal_put_ni (int nal)
+{
+
+ switch (nal)
+ {
+ case QSWNAL:
+ PORTAL_SYMBOL_PUT(kqswnal_ni);
+ break;
+ case SOCKNAL:
+ PORTAL_SYMBOL_PUT(ksocknal_ni);
+ break;
+ case GMNAL:
+ PORTAL_SYMBOL_PUT(kgmnal_ni);
+ break;
+ case IBNAL:
+ PORTAL_SYMBOL_PUT(kibnal_ni);
+ break;
+ case TCPNAL:
+ /* A lesson to a malicious caller */
+ LBUG ();
+ case SCIMACNAL:
+ PORTAL_SYMBOL_PUT(kscimacnal_ni);
+ break;
+ default:
+ CERROR ("unknown nal: %d\n", nal);
+ }
+}
+
+int
+kportal_nal_register(int nal, nal_cmd_handler_t handler, void * private)
+{
+ int rc = 0;
+
+ CDEBUG(D_IOCTL, "Register NAL %d, handler: %p\n", nal, handler);
+
+ if (nal > 0 && nal <= NAL_MAX_NR) {
+ down(&nal_cmd_sem);
+ if (nal_cmd[nal].nch_handler != NULL)
+ rc = -EBUSY;
+ else {
+ nal_cmd[nal].nch_handler = handler;
+ nal_cmd[nal].nch_private = private;
+ }
+ up(&nal_cmd_sem);
+ }
+ return rc;
+}
+
+int
+kportal_nal_unregister(int nal)
+{
+ int rc = 0;
+
+ CDEBUG(D_IOCTL, "Unregister NAL %d\n", nal);
+
+ if (nal > 0 && nal <= NAL_MAX_NR) {
+ down(&nal_cmd_sem);
+ nal_cmd[nal].nch_handler = NULL;
+ nal_cmd[nal].nch_private = NULL;
+ up(&nal_cmd_sem);
+ }
+ return rc;
+}
+
+static int kportal_ioctl(struct portal_ioctl_data *data,
+ unsigned int cmd, unsigned long arg)
+{
+ int err = 0;
+ char str[PTL_NALFMT_SIZE];
+ ENTRY;
+
+ switch (cmd) {
+ case IOC_PORTAL_PING: {
+ void (*ping)(struct portal_ioctl_data *);
+
+ CDEBUG(D_IOCTL, "doing %d pings to nid "LPX64" (%s)\n",
+ data->ioc_count, data->ioc_nid,
+ portals_nid2str(data->ioc_nal, data->ioc_nid, str));
+ ping = PORTAL_SYMBOL_GET(kping_client);
+ if (!ping)
+ CERROR("PORTAL_SYMBOL_GET failed\n");
+ else {
+ ping(data);
+ PORTAL_SYMBOL_PUT(kping_client);
+ }
+ RETURN(0);
+ }
+
+ case IOC_PORTAL_GET_NID: {
+ const ptl_handle_ni_t *nip;
+ ptl_process_id_t pid;
+
+ CDEBUG (D_IOCTL, "Getting nid for nal [%d]\n", data->ioc_nal);
+
+ nip = kportal_get_ni (data->ioc_nal);
+ if (nip == NULL)
+ RETURN (-EINVAL);
+
+ err = PtlGetId (*nip, &pid);
+ LASSERT (err == PTL_OK);
+ kportal_put_ni (data->ioc_nal);
+
+ data->ioc_nid = pid.nid;
+ if (copy_to_user ((char *)arg, data, sizeof (*data)))
+ err = -EFAULT;
+ break;
+ }
+
+ case IOC_PORTAL_NAL_CMD: {
+ struct portals_cfg pcfg;
+
+ LASSERT (data->ioc_plen1 == sizeof(pcfg));
+ err = copy_from_user(&pcfg, (void *)data->ioc_pbuf1,
+ sizeof(pcfg));
+ if ( err ) {
+ EXIT;
+ return err;
+ }
+
+ CDEBUG (D_IOCTL, "nal command nal %d cmd %d\n", pcfg.pcfg_nal,
+ pcfg.pcfg_command);
+ err = kportal_nal_cmd(&pcfg);
+ if (err == 0) {
+ if (copy_to_user((char *)data->ioc_pbuf1, &pcfg,
+ sizeof (pcfg)))
+ err = -EFAULT;
+ if (copy_to_user((char *)arg, data, sizeof (*data)))
+ err = -EFAULT;
+ }
+ break;
+ }
+ case IOC_PORTAL_FAIL_NID: {
+ const ptl_handle_ni_t *nip;
+
+ CDEBUG (D_IOCTL, "fail nid: [%d] "LPU64" count %d\n",
+ data->ioc_nal, data->ioc_nid, data->ioc_count);
+
+ nip = kportal_get_ni (data->ioc_nal);
+ if (nip == NULL)
+ return (-EINVAL);
+
+ err = PtlFailNid (*nip, data->ioc_nid, data->ioc_count);
+ kportal_put_ni (data->ioc_nal);
+ break;
+ }
+ default:
+ err = -EINVAL;
+ break;
+ }
+
+ RETURN(err);
+}
+
+DECLARE_IOCTL_HANDLER(kportal_ioctl_handler, kportal_ioctl);
+
+static int init_kportals_module(void)
+{
+ int rc;
+ ENTRY;
+
+ rc = PtlInit(NULL);
+ if (rc) {
+ CERROR("PtlInit: error %d\n", rc);
+ RETURN(rc);
+ }
+
+ rc = kportal_nal_register(ROUTER, kportal_router_cmd, NULL);
+ if (rc) {
+ PtlFini();
+ CERROR("kportal_nal_registre: ROUTER error %d\n", rc);
+ }
+
+ if (rc == 0)
+ libcfs_register_ioctl(&kportal_ioctl_handler);
+
+ RETURN(rc);
+}
+
+static void exit_kportals_module(void)
+{
+ libcfs_deregister_ioctl(&kportal_ioctl_handler);
+ kportal_nal_unregister(ROUTER);
+ PtlFini();
+}
+
+EXPORT_SYMBOL(kportal_nal_register);
+EXPORT_SYMBOL(kportal_nal_unregister);
+EXPORT_SYMBOL(kportal_get_ni);
+EXPORT_SYMBOL(kportal_put_ni);
+EXPORT_SYMBOL(kportal_nal_cmd);
+
+EXPORT_SYMBOL(ptl_err_str);
+EXPORT_SYMBOL(lib_dispatch);
+EXPORT_SYMBOL(PtlMEAttach);
+EXPORT_SYMBOL(PtlMEInsert);
+EXPORT_SYMBOL(PtlMEUnlink);
+EXPORT_SYMBOL(PtlEQAlloc);
+EXPORT_SYMBOL(PtlMDAttach);
+EXPORT_SYMBOL(PtlMDUnlink);
+EXPORT_SYMBOL(PtlNIInit);
+EXPORT_SYMBOL(PtlNIFini);
+EXPORT_SYMBOL(PtlNIDebug);
+EXPORT_SYMBOL(PtlInit);
+EXPORT_SYMBOL(PtlFini);
+EXPORT_SYMBOL(PtlSnprintHandle);
+EXPORT_SYMBOL(PtlPut);
+EXPORT_SYMBOL(PtlGet);
+EXPORT_SYMBOL(PtlEQWait);
+EXPORT_SYMBOL(PtlEQFree);
+EXPORT_SYMBOL(PtlEQGet);
+EXPORT_SYMBOL(PtlGetId);
+EXPORT_SYMBOL(PtlMDBind);
+EXPORT_SYMBOL(lib_iov_nob);
+EXPORT_SYMBOL(lib_copy_iov2buf);
+EXPORT_SYMBOL(lib_copy_buf2iov);
+EXPORT_SYMBOL(lib_extract_iov);
+EXPORT_SYMBOL(lib_kiov_nob);
+EXPORT_SYMBOL(lib_copy_kiov2buf);
+EXPORT_SYMBOL(lib_copy_buf2kiov);
+EXPORT_SYMBOL(lib_extract_kiov);
+EXPORT_SYMBOL(lib_finalize);
+EXPORT_SYMBOL(lib_parse);
+EXPORT_SYMBOL(lib_create_reply_msg);
+EXPORT_SYMBOL(lib_init);
+EXPORT_SYMBOL(lib_fini);
+EXPORT_SYMBOL(dispatch_name);
+
+MODULE_AUTHOR("Peter J. Braam <braam@clusterfs.com>");
+MODULE_DESCRIPTION("Portals v3.1");
+MODULE_LICENSE("GPL");
+module_init(init_kportals_module);
+module_exit(exit_kportals_module);
#define DEBUG_SUBSYSTEM S_PTLROUTER
#include <linux/kp30.h>
+#include <linux/kpr.h>
#include <portals/p30.h>
#include <portals/lib-p30.h>
client->md_in_head.length = (args->ioc_size + STDSIZE)
* count;
client->md_in_head.threshold = PTL_MD_THRESH_INF;
- client->md_in_head.options = PTL_MD_OP_PUT;
+ client->md_in_head.options = PTL_MD_EVENT_START_DISABLE | PTL_MD_OP_PUT;
client->md_in_head.user_ptr = NULL;
client->md_in_head.eventq = client->eq;
memset (client->inbuf, 0, (args->ioc_size + STDSIZE) * count);
client->md_out_head.start = client->outbuf;
client->md_out_head.length = STDSIZE + args->ioc_size;
client->md_out_head.threshold = args->ioc_count;
- client->md_out_head.options = PTL_MD_OP_PUT;
+ client->md_out_head.options = PTL_MD_EVENT_START_DISABLE | PTL_MD_OP_PUT;
client->md_out_head.user_ptr = NULL;
client->md_out_head.eventq = PTL_EQ_NONE;
/* Bind the outgoing ping header */
if ((rc=PtlMDBind (*nip, client->md_out_head,
- &client->md_out_head_h))) {
+ PTL_UNLINK, &client->md_out_head_h))) {
CERROR ("PtlMDBind error %d\n", rc);
pingcli_shutdown (1);
return NULL;
server->mdout.length = server->evnt.rlength;
server->mdout.start = server->in_buf;
server->mdout.threshold = 1;
- server->mdout.options = PTL_MD_OP_PUT;
+ server->mdout.options = PTL_MD_EVENT_START_DISABLE | PTL_MD_OP_PUT;
server->mdout.user_ptr = NULL;
server->mdout.eventq = PTL_EQ_NONE;
/* Bind the outgoing buffer */
if ((rc = PtlMDBind (server->ni, server->mdout,
- &server->mdout_h))) {
+ PTL_UNLINK, &server->mdout_h))) {
PDEBUG ("PtlMDBind", rc);
pingsrv_shutdown (1);
return 1;
server->mdin.start = server->in_buf;
server->mdin.length = MAXSIZE;
server->mdin.threshold = 1;
- server->mdin.options = PTL_MD_OP_PUT;
+ server->mdin.options = PTL_MD_EVENT_START_DISABLE | PTL_MD_OP_PUT;
server->mdin.user_ptr = NULL;
server->mdin.eventq = server->eq;
server->mdin.start = server->in_buf;
server->mdin.length = MAXSIZE;
server->mdin.threshold = 1;
- server->mdin.options = PTL_MD_OP_PUT;
+ server->mdin.options = PTL_MD_EVENT_START_DISABLE | PTL_MD_OP_PUT;
server->mdin.user_ptr = NULL;
server->mdin.eventq = server->eq;
memset (server->in_buf, 0, STDSIZE);
client->md_in_head.start = client->inbuf;
client->md_in_head.length = STDSIZE;
client->md_in_head.threshold = 1;
- client->md_in_head.options = PTL_MD_OP_PUT;
+ client->md_in_head.options = PTL_MD_EVENT_START_DISABLE | PTL_MD_OP_PUT;
client->md_in_head.user_ptr = NULL;
client->md_in_head.eventq = client->eq;
memset (client->inbuf, 0, STDSIZE);
client->md_out_head.start = client->outbuf;
client->md_out_head.length = STDSIZE;
client->md_out_head.threshold = 1;
- client->md_out_head.options = PTL_MD_OP_PUT;
+ client->md_out_head.options = PTL_MD_EVENT_START_DISABLE | PTL_MD_OP_PUT;
client->md_out_head.user_ptr = NULL;
client->md_out_head.eventq = PTL_EQ_NONE;
/* Bind the outgoing ping header */
if ((rc=PtlMDBind (*nip, client->md_out_head,
- &client->md_out_head_h))) {
+ PTL_UNLINK, &client->md_out_head_h))) {
CERROR ("PtlMDBind error %d\n", rc);
pingcli_shutdown (1);
return (NULL);
server->mdout.start = server->in_buf;
server->mdout.length = STDSIZE;
server->mdout.threshold = 1;
- server->mdout.options = PTL_MD_OP_PUT;
+ server->mdout.options = PTL_MD_EVENT_START_DISABLE | PTL_MD_OP_PUT;
server->mdout.user_ptr = NULL;
server->mdout.eventq = PTL_EQ_NONE;
/* Bind the outgoing buffer */
if ((rc = PtlMDBind (server->ni, server->mdout,
- &server->mdout_h))) {
+ PTL_UNLINK, &server->mdout_h))) {
PDEBUG ("PtlMDBind", rc);
pingsrv_shutdown (1);
return 1;
server->mdin.start = server->in_buf;
server->mdin.length = STDSIZE;
server->mdin.threshold = 1;
- server->mdin.options = PTL_MD_OP_PUT;
+ server->mdin.options = PTL_MD_EVENT_START_DISABLE | PTL_MD_OP_PUT;
server->mdin.user_ptr = NULL;
server->mdin.eventq = server->eq;
server->mdin.start = server->in_buf;
server->mdin.length = STDSIZE;
server->mdin.threshold = 1;
- server->mdin.options = PTL_MD_OP_PUT;
+ server->mdin.options = PTL_MD_EVENT_START_DISABLE | PTL_MD_OP_PUT;
server->mdin.user_ptr = NULL;
server->mdin.eventq = server->eq;
memset (server->in_buf, 0, STDSIZE);
}
-/* FIXME cfs temporary workaround! FIXME
- * global time out value
- */
-int __tcpnal_eqwait_timeout_value = 0;
-int __tcpnal_eqwait_timedout = 0;
+static void procbridge_lock(nal_t * n, unsigned long *flags)
+{
+ bridge b=(bridge)n->nal_data;
+ procbridge p=(procbridge)b->local;
+
+ pthread_mutex_lock(&p->mutex);
+}
+
+static void procbridge_unlock(nal_t * n, unsigned long *flags)
+{
+ bridge b=(bridge)n->nal_data;
+ procbridge p=(procbridge)b->local;
+
+ pthread_mutex_unlock(&p->mutex);
+}
/* Function: yield
* Arguments: pid:
* overload it to explicitly block until signalled by the
* lower half.
*/
-static void procbridge_yield(nal_t *n)
+static int procbridge_yield(nal_t *n, unsigned long *flags, int milliseconds)
{
bridge b=(bridge)n->nal_data;
procbridge p=(procbridge)b->local;
- pthread_mutex_lock(&p->mutex);
- if (!__tcpnal_eqwait_timeout_value) {
+ if (milliseconds == 0)
+ return 0;
+
+ if (milliseconds < 0) {
pthread_cond_wait(&p->cond,&p->mutex);
} else {
+ struct timeval then;
struct timeval now;
struct timespec timeout;
- gettimeofday(&now, NULL);
- timeout.tv_sec = now.tv_sec + __tcpnal_eqwait_timeout_value;
- timeout.tv_nsec = now.tv_usec * 1000;
+ gettimeofday(&then, NULL);
+ timeout.tv_sec = then.tv_sec + milliseconds/1000;
+ timeout.tv_nsec = then.tv_usec * 1000 + milliseconds % 1000 * 1000000;
+ if (timeout.tv_nsec >= 1000000000) {
+ timeout.tv_sec++;
+ timeout.tv_nsec -= 1000000000;
+ }
+
+ pthread_cond_timedwait(&p->cond, &p->mutex, &timeout);
- __tcpnal_eqwait_timedout =
- pthread_cond_timedwait(&p->cond, &p->mutex, &timeout);
+ gettimeofday(&now, NULL);
+ milliseconds -= (now.tv_sec - then.tv_sec) * 1000 +
+ (now.tv_usec - then.tv_usec) / 1000;
+
+ if (milliseconds < 0)
+ milliseconds = 0;
}
- pthread_mutex_unlock(&p->mutex);
+
+ return (milliseconds);
}
-static void procbridge_lock(nal_t * nal, unsigned long *flags){}
-static void procbridge_unlock(nal_t * nal, unsigned long *flags){}
/* api_nal
* the interface vector to allow the generic code to access
* this nal. this is seperate from the library side nal_cb.
pthread_mutex_init(&p->mutex,0);
pthread_cond_init(&p->cond, 0);
p->nal_flags = 0;
- pthread_mutex_init(&p->nal_cb_lock, 0);
/* initialize notifier */
if (socketpair(AF_UNIX, SOCK_STREAM, 0, p->notifier)) {
bridge b = (bridge) nal->nal_data;
procbridge p = (procbridge) b->local;
- pthread_mutex_lock(&p->nal_cb_lock);
+ pthread_mutex_lock(&p->mutex);
}
bridge b = (bridge)nal->nal_data;
procbridge p = (procbridge) b->local;
- pthread_mutex_unlock(&p->nal_cb_lock);
+ pthread_mutex_unlock(&p->mutex);
}
+static void nal_callback(nal_cb_t *nal, void *private,
+ lib_eq_t *eq, ptl_event_t *ev)
+{
+ bridge b = (bridge)nal->nal_data;
+ procbridge p = (procbridge) b->local;
+
+ /* holding p->mutex */
+ if (eq->event_callback != NULL)
+ eq->event_callback(ev);
+
+ pthread_cond_broadcast(&p->cond);
+}
static int nal_dist(nal_cb_t *nal,
ptl_nid_t nid,
return 0;
}
-static void wakeup_topside(void *z)
+static void check_stopping(void *z)
{
bridge b = z;
procbridge p = b->local;
- int stop;
+ if ((p->nal_flags & NAL_FLAG_STOPPING) == 0)
+ return;
+
pthread_mutex_lock(&p->mutex);
- stop = p->nal_flags & NAL_FLAG_STOPPING;
- if (stop)
- p->nal_flags |= NAL_FLAG_STOPPED;
+ p->nal_flags |= NAL_FLAG_STOPPED;
pthread_cond_broadcast(&p->cond);
pthread_mutex_unlock(&p->mutex);
- if (stop)
- pthread_exit(0);
+ pthread_exit(0);
}
b->nal_cb->cb_printf=nal_printf;
b->nal_cb->cb_cli=nal_cli;
b->nal_cb->cb_sti=nal_sti;
+ b->nal_cb->cb_callback=nal_callback;
b->nal_cb->cb_dist=nal_dist;
pid_request = args->nia_requested_pid;
performs an operation and returns to blocking mode. we
overload this function to inform the api side that
it may be interested in looking at the event queue */
- register_thunk(wakeup_topside,b);
+ register_thunk(check_stopping,b);
timer_loop();
}
return(0);
}
-/* FIXME cfs temporary workaround! FIXME
- * global time out value
- */
-int __tcpnal_eqwait_timeout_value = 0;
-int __tcpnal_eqwait_timedout = 0;
+static void procbridge_lock(nal_t * n, unsigned long *flags)
+{
+ bridge b=(bridge)n->nal_data;
+ procbridge p=(procbridge)b->local;
+
+ pthread_mutex_lock(&p->mutex);
+}
+
+static void procbridge_unlock(nal_t * n, unsigned long *flags)
+{
+ bridge b=(bridge)n->nal_data;
+ procbridge p=(procbridge)b->local;
+
+ pthread_mutex_unlock(&p->mutex);
+}
/* Function: yield
* Arguments: pid:
* overload it to explicitly block until signalled by the
* lower half.
*/
-static void procbridge_yield(nal_t *n)
+static int procbridge_yield(nal_t *n, unsigned long *flags, int milliseconds)
{
bridge b=(bridge)n->nal_data;
procbridge p=(procbridge)b->local;
- pthread_mutex_lock(&p->mutex);
- if (!__tcpnal_eqwait_timeout_value) {
+ if (milliseconds == 0)
+ return 0;
+
+ if (milliseconds < 0) {
pthread_cond_wait(&p->cond,&p->mutex);
} else {
+ struct timeval then;
struct timeval now;
struct timespec timeout;
- gettimeofday(&now, NULL);
- timeout.tv_sec = now.tv_sec + __tcpnal_eqwait_timeout_value;
- timeout.tv_nsec = now.tv_usec * 1000;
+ gettimeofday(&then, NULL);
+ timeout.tv_sec = then.tv_sec + milliseconds/1000;
+ timeout.tv_nsec = then.tv_usec * 1000 + milliseconds % 1000 * 1000000;
+ if (timeout.tv_nsec >= 1000000000) {
+ timeout.tv_sec++;
+ timeout.tv_nsec -= 1000000000;
+ }
+
+ pthread_cond_timedwait(&p->cond, &p->mutex, &timeout);
- __tcpnal_eqwait_timedout =
- pthread_cond_timedwait(&p->cond, &p->mutex, &timeout);
+ gettimeofday(&now, NULL);
+ milliseconds -= (now.tv_sec - then.tv_sec) * 1000 +
+ (now.tv_usec - then.tv_usec) / 1000;
+
+ if (milliseconds < 0)
+ milliseconds = 0;
}
- pthread_mutex_unlock(&p->mutex);
+
+ return (milliseconds);
}
-static void procbridge_lock(nal_t * nal, unsigned long *flags){}
-static void procbridge_unlock(nal_t * nal, unsigned long *flags){}
/* api_nal
* the interface vector to allow the generic code to access
* this nal. this is seperate from the library side nal_cb.
pthread_mutex_init(&p->mutex,0);
pthread_cond_init(&p->cond, 0);
p->nal_flags = 0;
- pthread_mutex_init(&p->nal_cb_lock, 0);
/* initialize notifier */
if (socketpair(AF_UNIX, SOCK_STREAM, 0, p->notifier)) {
bridge b = (bridge) nal->nal_data;
procbridge p = (procbridge) b->local;
- pthread_mutex_lock(&p->nal_cb_lock);
+ pthread_mutex_lock(&p->mutex);
}
bridge b = (bridge)nal->nal_data;
procbridge p = (procbridge) b->local;
- pthread_mutex_unlock(&p->nal_cb_lock);
+ pthread_mutex_unlock(&p->mutex);
}
+static void nal_callback(nal_cb_t *nal, void *private,
+ lib_eq_t *eq, ptl_event_t *ev)
+{
+ bridge b = (bridge)nal->nal_data;
+ procbridge p = (procbridge) b->local;
+
+ /* holding p->mutex */
+ if (eq->event_callback != NULL)
+ eq->event_callback(ev);
+
+ pthread_cond_broadcast(&p->cond);
+}
static int nal_dist(nal_cb_t *nal,
ptl_nid_t nid,
return 0;
}
-static void wakeup_topside(void *z)
+static void check_stopping(void *z)
{
bridge b = z;
procbridge p = b->local;
- int stop;
+ if ((p->nal_flags & NAL_FLAG_STOPPING) == 0)
+ return;
+
pthread_mutex_lock(&p->mutex);
- stop = p->nal_flags & NAL_FLAG_STOPPING;
- if (stop)
- p->nal_flags |= NAL_FLAG_STOPPED;
+ p->nal_flags |= NAL_FLAG_STOPPED;
pthread_cond_broadcast(&p->cond);
pthread_mutex_unlock(&p->mutex);
- if (stop)
- pthread_exit(0);
+ pthread_exit(0);
}
b->nal_cb->cb_printf=nal_printf;
b->nal_cb->cb_cli=nal_cli;
b->nal_cb->cb_sti=nal_sti;
+ b->nal_cb->cb_callback=nal_callback;
b->nal_cb->cb_dist=nal_dist;
pid_request = args->nia_requested_pid;
performs an operation and returns to blocking mode. we
overload this function to inform the api side that
it may be interested in looking at the event queue */
- register_thunk(wakeup_topside,b);
+ register_thunk(check_stopping,b);
timer_loop();
}
return(0);
# This code is issued under the GNU General Public License.
# See the file COPYING in this distribution
-COMPILE = $(CC) -Wall -g -I$(srcdir)/../include
+# ../ for <portals/*.h>, ../../ for <config.h>
+COMPILE = $(CC) -Wall -g -I$(srcdir)/../include -I$(srcdir)/../../include
LINK = $(CC) -o $@
if LIBLUSTRE
DIRS24 = ptlbd
endif
+# just until things are farther along
+if CRAY_PORTALS
+UTILS_TESTS =
+else
+UTILS_TESTS = utils tests
+endif
+
if LIBLUSTRE
SUBDIRS = portals obdclass lov ptlrpc obdecho osc utils mdc lvfs liblustre
else
-SUBDIRS = lvfs portals obdclass include $(DIRS24) mds utils obdfilter mdc osc ost
-SUBDIRS+= llite obdecho lov cobd tests doc scripts conf ptlrpc
+SUBDIRS = lvfs portals obdclass include $(DIRS24) mds obdfilter mdc osc ost
+SUBDIRS+= llite obdecho lov cobd doc scripts conf ptlrpc $(UTILS_TESTS)
endif
+
if SNAPFS
SUBDIRS+= snapfs
endif
sh -e -x -c '(cp -f $0.mk $0.in)'
fi
-AM_CONFIG_HEADER(portals/include/config.h)
+AM_CONFIG_HEADER(include/config.h)
AC_OUTPUT([Makefile lvfs/Makefile portals/Makefile portals/Kernelenv \
portals/libcfs/Makefile portals/portals/Makefile \
-/* include/config.h.in. Generated automatically from configure.in by autoheader. */
+/* include/config.h.in. Generated from configure.in by autoheader. */
-/* Define if you have the `readline' library (-lreadline). */
-#undef HAVE_LIBREADLINE
+/* Use the Pinger */
+#undef ENABLE_PINGER
+
+/* Define to 1 if you have the <inttypes.h> header file. */
+#undef HAVE_INTTYPES_H
+
+/* Define to 1 if you have the <memory.h> header file. */
+#undef HAVE_MEMORY_H
+
+/* Define to 1 if you have the <stdint.h> header file. */
+#undef HAVE_STDINT_H
+
+/* Define to 1 if you have the <stdlib.h> header file. */
+#undef HAVE_STDLIB_H
+
+/* Define to 1 if you have the <strings.h> header file. */
+#undef HAVE_STRINGS_H
+
+/* Define to 1 if you have the <string.h> header file. */
+#undef HAVE_STRING_H
+
+/* Define to 1 if you have the <sys/stat.h> header file. */
+#undef HAVE_SYS_STAT_H
+
+/* Define to 1 if you have the <sys/types.h> header file. */
+#undef HAVE_SYS_TYPES_H
+
+/* Define to 1 if you have the <unistd.h> header file. */
+#undef HAVE_UNISTD_H
+
+/* IOCTL Buffer Size */
+#undef OBD_MAX_IOCTL_BUFFER
/* Name of package */
#undef PACKAGE
+/* Define to the address where bug reports for this package should be sent. */
+#undef PACKAGE_BUGREPORT
+
+/* Define to the full name of this package. */
+#undef PACKAGE_NAME
+
+/* Define to the full name and version of this package. */
+#undef PACKAGE_STRING
+
+/* Define to the one symbol short name of this package. */
+#undef PACKAGE_TARNAME
+
+/* Define to the version of this package. */
+#undef PACKAGE_VERSION
+
+/* The size of a `unsigned long long', as computed by sizeof. */
+#undef SIZEOF_UNSIGNED_LONG_LONG
+
+/* Define to 1 if you have the ANSI C header files. */
+#undef STDC_HEADERS
+
/* Version number of package */
#undef VERSION
typedef __u64 kdev_t;
#define SPIN_LOCK_UNLOCKED 0
+#define LASSERT_SPIN_LOCKED(lock) do {} while(0)
+
static inline void spin_lock(spinlock_t *l) {return;}
static inline void spin_unlock(spinlock_t *l) {return;}
static inline void spin_lock_init(spinlock_t *l) {return;}
int ldlm_del_waiting_lock(struct ldlm_lock *lock);
int ldlm_get_ref(void);
void ldlm_put_ref(int force);
+#ifndef __KERNEL__
+void liblustre_ldlm_handle_bl_callback(struct ldlm_namespace *ns,
+ struct ldlm_lock_desc *ld,
+ struct ldlm_lock *lock);
+#endif
/* ldlm_lock.c */
ldlm_processing_policy ldlm_get_processing_policy(struct ldlm_resource *res);
#include <linux/lustre_import.h>
#include <linux/lprocfs_status.h>
+/* MD flags we _always_ use */
+#define PTLRPC_MD_OPTIONS (PTL_MD_EVENT_START_DISABLE | \
+ PTL_MD_LUSTRE_COMPLETION_SEMANTICS)
+
+/* Define some large-ish defaults for MTU and MAX_IOV if portals ones
+ * aren't defined (i.e. no limits) or too large */
+#if (defined(PTL_MTU) && (PTL_MTU <= (1 << 20)))
+# define PTLRPC_MTU PTL_MTU
+#else
+# define PTLRPC_MTU (1 << 20)
+#endif
+#if (defined(PTL_MAX_IOV) && (PTL_MAX_IOV <= 512))
+# define PTLRPC_MAX_IOV PTL_MAX_IOV
+#else
+# define PTLRPC_MAX_IOV 512
+#endif
+
+/* Define consistent max bulk size/pages */
+#if (PTLRPC_MTU > PTLRPC_MAX_IOV * PAGE_SIZE)
+# define PTLRPC_MAX_BRW_PAGES PTLRPC_MAX_IOV
+# define PTLRPC_MAX_BRW_SIZE (PTLRPC_MAX_IOV * PAGE_SIZE)
+#else
+# define PTLRPC_MAX_BRW_PAGES (PTLRPC_MTU / PAGE_SIZE)
+# define PTLRPC_MAX_BRW_SIZE PTLRPC_MTU
+#endif
+
/* Size over which to OBD_VMALLOC() rather than OBD_ALLOC() service request
* buffers */
#define SVC_BUF_VMALLOC_THRESHOLD (2*PAGE_SIZE)
__u32 bd_portal;
struct ptlrpc_request *bd_req; /* associated request */
wait_queue_head_t bd_waitq; /* server side only WQ */
- int bd_page_count; /* # pages (== entries in bd_iov) */
- int bd_max_pages; /* allocated size of bd_iov */
+ int bd_iov_count; /* # entries in bd_iov */
+ int bd_max_iov; /* allocated size of bd_iov */
int bd_nob; /* # bytes covered */
int bd_nob_transferred; /* # bytes GOT/PUT */
struct ptlrpc_cb_id bd_cbid; /* network callback info */
ptl_handle_md_t bd_md_h; /* associated MD */
-#ifdef __KERNEL__
- ptl_kiov_t bd_iov[PTL_MD_MAX_IOV];
+#if (!CRAY_PORTALS && defined(__KERNEL__))
+ ptl_kiov_t bd_iov[0];
#else
- struct iovec bd_iov[PTL_MD_MAX_IOV];
+ struct iovec bd_iov[0];
#endif
};
void class_uuid_unparse(class_uuid_t in, struct obd_uuid *out);
/* lustre_peer.c */
-int lustre_uuid_to_peer(char *uuid, struct lustre_peer *peer);
+int lustre_uuid_to_peer(char *uuid, ptl_handle_ni_t *peer_ni, ptl_nid_t *peer_nid);
int class_add_uuid(char *uuid, __u64 nid, __u32 nal);
int class_del_uuid (char *uuid);
void class_init_uuidlist(void);
+$Id: bproc-patch-2.4.20,v 1.6 2004/03/19 01:09:33 zab Exp $
+
Index: linux/fs/exec.c
===================================================================
--- linux.orig/fs/exec.c 2003-09-03 17:52:00.000000000 -0400
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
-+ * Id: bproc-patch-2.4.20,v 1.3.2.1 2004/02/14 07:21:44 nic Exp $
++ * $Id: bproc-patch-2.4.20,v 1.6 2004/03/19 01:09:33 zab Exp $
+ *-----------------------------------------------------------------------*/
+#include <linux/kernel.h>
+#include <linux/sched.h>
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
-+ * Id: bproc-patch-2.4.20,v 1.3.2.1 2004/02/14 07:21:44 nic Exp $
++ * $Id: bproc-patch-2.4.20,v 1.6 2004/03/19 01:09:33 zab Exp $
+ *-----------------------------------------------------------------------*/
+#ifndef _LINUX_BPROC_H
+#define _LINUX_BPROC_H
#else
void l_check_no_ns_lock(struct ldlm_namespace *ns)
{
-#warning "FIXME: check lock in user space??"
+ if (l_has_lock(&ns->ns_lock)) {
+ CERROR("namespace %s lock held illegally; tell phil\n",
+ ns->ns_name);
+ }
}
#endif /* __KERNEL__ */
spin_lock_init(&cli->cl_write_rpc_hist.oh_lock);
spin_lock_init(&cli->cl_read_page_hist.oh_lock);
spin_lock_init(&cli->cl_write_page_hist.oh_lock);
- cli->cl_max_pages_per_rpc = PTL_MD_MAX_PAGES;
+ cli->cl_max_pages_per_rpc = PTLRPC_MAX_BRW_PAGES;
cli->cl_max_rpcs_in_flight = OSC_MAX_RIF_DEFAULT;
ldlm_get_ref();
LDLM_LOCK_GET(lock); /* dropped by bl thread */
ldlm_lock_remove_from_lru(lock);
+#ifdef __KERNEL__
ldlm_bl_to_thread(ns, NULL, lock);
l_unlock(&ns->ns_lock);
+#else
+ l_unlock(&ns->ns_lock);
+ liblustre_ldlm_handle_bl_callback(ns, NULL, lock);
+#endif
} else if (ns->ns_client == LDLM_NAMESPACE_CLIENT &&
!lock->l_readers && !lock->l_writers) {
/* If this is a client-side namespace and this was the last
RETURN(0);
}
+#else
+/* XXX */
+void liblustre_ldlm_handle_bl_callback(struct ldlm_namespace *ns,
+ struct ldlm_lock_desc *ld,
+ struct ldlm_lock *lock)
+{
+ ldlm_handle_bl_callback(ns, ld, lock);
+}
#endif
static int ldlm_callback_handler(struct ptlrpc_request *req)
int ldlm_cancel_lru(struct ldlm_namespace *ns)
{
struct list_head *tmp, *next;
+#ifndef __KERNEL__
+ LIST_HEAD(cblist);
+#endif
int count, rc = 0;
ENTRY;
LDLM_LOCK_GET(lock); /* dropped by bl thread */
ldlm_lock_remove_from_lru(lock);
+#if __KERNEL__
ldlm_bl_to_thread(ns, NULL, lock);
+#else
+ list_add(&lock->l_lru, &cblist);
+#endif
if (--count == 0)
break;
}
l_unlock(&ns->ns_lock);
+#ifndef __KERNEL__
+ while (!list_empty(&cblist)) {
+ struct ldlm_lock *lock;
+
+ lock = list_entry(cblist.next, struct ldlm_lock, l_lru);
+ list_del_init(&lock->l_lru);
+ liblustre_ldlm_handle_bl_callback(ns, NULL, lock);
+ }
+#endif
RETURN(rc);
}
$(SYSIO)/src/libsysio.a \
$(SYSIO)/dev/stdfd/libsysio_stdfd.a
-#SYSIO_LIBS = $(SYSIO)/lib/libsysio.a
-
lib_LIBRARIES = liblustre.a
noinst_LIBRARIES = libllite.a
#include "llite_lib.h"
+unsigned int portal_subsystem_debug = ~0 - (S_PORTALS | S_QSWNAL | S_SOCKNAL |
+ S_GMNAL | S_IBNAL);
ptl_handle_ni_t tcpnal_ni;
struct task_struct *current;
int init_lib_portals()
{
+ int max_interfaces;
int rc;
ENTRY;
- PtlInit();
+ PtlInit(&max_interfaces);
rc = PtlNIInit(procbridge_interface, 0, 0, 0, &tcpnal_ni);
if (rc != 0) {
CERROR("TCPNAL: PtlNIInit failed: error %d\n", rc);
mdc_set_lock_data(&it->d.lustre.it_lock_handle, inode);
}
- /* drop IT_LOOKUP locks */
- if (it->it_op == IT_LOOKUP)
+ /* drop lookup/getattr locks */
+ if (it->it_op == IT_LOOKUP || it->it_op == IT_GETATTR)
ll_intent_release(it);
}
GOTO(out, rc = 0);
rc = pnode_revalidate_finish(req, 1, it, pnode);
+ if (rc != 0) {
+ ll_intent_release(it);
+ GOTO(out, rc = 0);
+ }
+ rc = 1;
/* Note: ll_intent_lock may cause a callback, check this! */
- if (it->it_op & (IT_OPEN | IT_GETATTR))
+ if (it->it_op & IT_OPEN)
LL_SAVE_INTENT(pb->pb_ino, it);
- RETURN(1);
+
out:
- if (req)
+ if (req && rc == 1)
ptlrpc_req_finished(req);
if (rc == 0) {
LASSERT(pb->pb_ino);
} else {
llu_lookup_finish_locks(it, pnode);
llu_i2info(pb->pb_ino)->lli_stale_flag = 0;
- if (it->it_op & (IT_OPEN | IT_GETATTR))
- LL_SAVE_INTENT(pb->pb_ino, it);
}
RETURN(rc);
}
}
/* intent will be further used in cases of open()/getattr() */
- if (inode && (it->it_op & (IT_OPEN | IT_GETATTR)))
+ if (inode && (it->it_op & IT_OPEN))
LL_SAVE_INTENT(inode, it);
child->p_base->pb_ino = inode;
rc = llu_inode_revalidate(ino);
if (!rc) {
copy_stat_buf(ino, b);
-
- if (llu_i2info(ino)->lli_it) {
- struct lookup_intent *it;
-
- LL_GET_INTENT(ino, it);
- it->it_op_release(it);
- OBD_FREE(it, sizeof(*it));
- }
+ LASSERT(!llu_i2info(ino)->lli_it);
}
RETURN(rc);
-.deps
Makefile
-Makefile.in
+Makefile.in
\ No newline at end of file
struct ldlm_res_id;
struct obd_import;
+unsigned int portal_subsystem_debug = ~0 - (S_PORTALS | S_QSWNAL | S_SOCKNAL |
+ S_GMNAL | S_IBNAL);
+
void *inter_module_get(char *arg)
{
if (!strcmp(arg, "tcpnal_ni"))
int init_lib_portals()
{
+ int max_interfaces;
int rc;
- PtlInit();
+ PtlInit(&max_interfaces);
rc = PtlNIInit(procbridge_interface, 0, 0, 0, &tcpnal_ni);
if (rc != 0) {
CERROR("ksocknal: PtlNIInit failed: error %d\n", rc);
#define DEBUG_SUBSYSTEM S_LLITE
#include <linux/module.h>
+#include <linux/types.h>
#include <linux/random.h>
#include <linux/version.h>
RETURN(rc);
}
-#define LL_RA_MIN(inode) ((unsigned long)PTL_MD_MAX_PAGES / 2)
+#define LL_RA_MIN(inode) ((unsigned long)PTLRPC_MAX_BRW_PAGES / 2)
#define LL_RA_MAX(inode) ((ll_i2info(inode)->lli_smd->lsm_xfersize * 3) >> \
PAGE_CACHE_SHIFT)
#define DEBUG_SUBSYSTEM S_LLITE
#include <linux/module.h>
+#include <linux/types.h>
#include <linux/random.h>
#include <linux/version.h>
#include <linux/lustre_lite.h>
#define DEBUG_SUBSYSTEM S_LLITE
#include <linux/module.h>
+#include <linux/types.h>
#include <linux/random.h>
#include <linux/version.h>
#include <linux/lustre_lite.h>
(*lsmp)->lsm_magic = LOV_MAGIC;
(*lsmp)->lsm_stripe_count = stripe_count;
(*lsmp)->lsm_maxbytes = LUSTRE_STRIPE_MAXBYTES * stripe_count;
- (*lsmp)->lsm_xfersize = PTL_MTU * stripe_count;
+ (*lsmp)->lsm_xfersize = PTLRPC_MTU * stripe_count;
(*lsmp)->lsm_pattern = pattern;
(*lsmp)->lsm_oinfo[0].loi_ost_idx = ~0;
case OBD_IOC_CLOSE_UUID: {
- struct lustre_peer peer;
+ ptl_nid_t peer_nid;
+ ptl_handle_ni_t peer_ni;
CDEBUG(D_IOCTL, "closing all connections to uuid %s\n",
data->ioc_inlbuf1);
- lustre_uuid_to_peer(data->ioc_inlbuf1, &peer);
+ lustre_uuid_to_peer(data->ioc_inlbuf1, &peer_ni, &peer_nid);
GOTO(out, err = 0);
}
}
int llog_get_cat_list(struct obd_device *obd, struct obd_device *disk_obd,
- char *name, int count, struct llog_logid *idarray)
+ char *name, int count, struct llog_catid *idarray)
{
LBUG();
return 0;
}
int llog_put_cat_list(struct obd_device *obd, struct obd_device *disk_obd,
- char *name, int count, struct llog_logid *idarray)
+ char *name, int count, struct llog_catid *idarray)
{
LBUG();
return 0;
class_del_uuid(NULL);
}
-int lustre_uuid_to_peer(char *uuid, struct lustre_peer *peer)
+int lustre_uuid_to_peer(char *uuid,
+ ptl_handle_ni_t *peer_ni, ptl_nid_t *peer_nid)
{
struct list_head *tmp;
list_entry(tmp, struct uuid_nid_data, head);
if (strcmp(data->uuid, uuid) == 0) {
- peer->peer_nid = data->nid;
- peer->peer_ni = data->ni;
+ *peer_nid = data->nid;
+ *peer_ni = data->ni;
spin_unlock (&g_uuid_lock);
return 0;
if (rc)
return rc;
- if (val < 1 || val > PTL_MD_MAX_PAGES)
+ if (val < 1 || val > PTLRPC_MAX_BRW_PAGES)
return -ERANGE;
spin_lock(&cli->cl_loi_list_lock);
# include <linux/module.h>
# include <linux/mm.h>
# include <linux/highmem.h>
-# include <linux/lustre_dlm.h>
+# include <linux/ctype.h>
+# include <linux/init.h>
# if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
# include <linux/workqueue.h>
# include <linux/smp_lock.h>
# include <liblustre.h>
#endif
-#ifndef __CYGWIN__
-# include <linux/ctype.h>
-# include <linux/init.h>
-#else
+#ifdef __CYGWIN__
# include <ctype.h>
#endif
+# include <linux/lustre_dlm.h>
#include <linux/obd_class.h>
#include "osc_internal.h"
#ifndef OSC_INTERNAL_H
#define OSC_INTERNAL_H
-#include <portals/lib-types.h> /* for PTL_MTU and PTL_MD_MAX_PAGES */
-
-
-/* bug 1578: negotiate BRW_MAX_SIZE with the OST, instead of hard-coding it */
-#define OSC_BRW_MAX_SIZE PTL_MTU
-#define OSC_BRW_MAX_IOV PTL_MD_MAX_PAGES
-
#define OAP_MAGIC 8675309
struct osc_async_page {
# include <linux/module.h>
# include <linux/mm.h>
# include <linux/highmem.h>
-# include <linux/lustre_dlm.h>
+# include <linux/ctype.h>
+# include <linux/init.h>
# if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
# include <linux/workqueue.h>
# include <linux/smp_lock.h>
# include <liblustre.h>
#endif
+# include <linux/lustre_dlm.h>
#include <linux/kp30.h>
#include <linux/lustre_net.h>
#include <linux/lustre_user.h>
#include <linux/obd_ost.h>
#include <linux/obd_lov.h>
-#ifndef __CYGWIN__
-# include <linux/ctype.h>
-# include <linux/init.h>
-#else
+#ifdef __CYGWIN__
# include <ctype.h>
#endif
memcpy(aa->aa_oa, &body->oa, sizeof(*aa->aa_oa));
/* This should really be sent by the OST */
- aa->aa_oa->o_blksize = OSC_BRW_MAX_SIZE;
+ aa->aa_oa->o_blksize = PTLRPC_MAX_BRW_SIZE;
aa->aa_oa->o_valid |= OBD_MD_FLBLKSZ;
} else {
CERROR("can't unpack ost_body\n");
memcpy(oa, &body->oa, sizeof(*oa));
/* This should really be sent by the OST */
- oa->o_blksize = OSC_BRW_MAX_SIZE;
+ oa->o_blksize = PTLRPC_MAX_BRW_SIZE;
oa->o_valid |= OBD_MD_FLBLKSZ;
EXIT;
memcpy(oa, &body->oa, sizeof(*oa));
/* This should really be sent by the OST */
- oa->o_blksize = OSC_BRW_MAX_SIZE;
+ oa->o_blksize = PTLRPC_MAX_BRW_SIZE;
oa->o_valid |= OBD_MD_FLBLKSZ;
/* XXX LOV STACKING: the lsm that is passed to us from LOV does not
obd_count pages_per_brw;
int rc;
- if (page_count > OSC_BRW_MAX_IOV)
- pages_per_brw = OSC_BRW_MAX_IOV;
+ if (page_count > PTLRPC_MAX_BRW_PAGES)
+ pages_per_brw = PTLRPC_MAX_BRW_PAGES;
else
pages_per_brw = page_count;
obd_count pages_per_brw;
int rc;
- if (page_count > OSC_BRW_MAX_IOV)
- pages_per_brw = OSC_BRW_MAX_IOV;
+ if (page_count > PTLRPC_MAX_BRW_PAGES)
+ pages_per_brw = PTLRPC_MAX_BRW_PAGES;
else
pages_per_brw = page_count;
obd_count pages_per_brw;
int rc;
- if (page_count > OSC_BRW_MAX_IOV)
- pages_per_brw = OSC_BRW_MAX_IOV;
+ if (page_count > PTLRPC_MAX_BRW_PAGES)
+ pages_per_brw = PTLRPC_MAX_BRW_PAGES;
else
pages_per_brw = page_count;
EXTRA_DIST = Rules.linux archdep.m4 include
DIST_SUBDIRS = libcfs portals knals unals utils tests doc router
+
if LIBLUSTRE
SUBDIRS = portals unals utils
else
+
+if CRAY_PORTALS
+SUBDIRS = libcfs tests doc
+else
SUBDIRS = libcfs portals knals unals utils tests doc router
endif
+
+endif
AM_CONDITIONAL(INKERNEL, test x$enable_inkernel = xyes)
echo "Makefile for in kernel build: $INKERNEL"
+# -------- are we building against an external portals? -------
+# haha, I wonder how one is really supposed to do this
+# automake seems to have a DEFS variable which looks good
+AC_ARG_WITH(cray-portals, [ --with-cray-portals=[path] path to cray portals],
+ CRAY_PORTALS_INCLUDE="-I$with_cray_portals"
+ CC="$CC -DCRAY_PORTALS=1"
+ )
+AC_SUBST(CRAY_PORTALS_INCLUDE)
+AM_CONDITIONAL(CRAY_PORTALS, test ! "x$with_cray_portals" = x)
+
# -------- liblustre compilation --------------
AC_ARG_WITH(lib, [ --with-lib compile lustre library], host_cpu="lib")
# ------------ include paths ------------------
+KINCFLAGS="$CRAY_PORTALS_INCLUDE $CRAY_PORTALS_COMMANDLINE \
+ -I\$(top_srcdir)/include \
+ -I\$(top_srcdir)/portals/include -I$LINUX/include"
if test $host_cpu != "lib" ; then
- KINCFLAGS="-I\$(top_srcdir)/include -I\$(top_srcdir)/portals/include -I$LINUX/include"
-else
- KINCFLAGS='-I$(top_srcdir)/include -I$(top_srcdir)/portals/include'
+ KINCFLAGS="$KINCFLAGS -I$LINUX/include"
fi
CPPFLAGS="$KINCFLAGS $ARCHCPPFLAGS"
+++ /dev/null
-# This version is here to make autoconf happy; the name is a file which is
-# "unique" to this directory so that configure knows where it should run.
-AC_INIT(knals/Makefile.am, 3.0)
-AC_CANONICAL_SYSTEM
-# Copyright (C) 2001 Cluster File Systems, Inc.
-#
-# This code is issued under the GNU General Public License.
-# See the file COPYING in this distribution
-
-# Automake variables. Steal the version number from packaging/intersync.spec
-AM_INIT_AUTOMAKE(portals, builtin([esyscmd], [sed -ne '/.*define IVERSION /{ s/.*IVERSION //; p; }' libcfs/module.c]))
-# AM_MAINTAINER_MODE
-
-sinclude(build.m4)
-sinclude(archdep.m4)
-
-if test x$enable_inkernel = xyes ; then
-cp Kernelenv.mk Kernelenv.in
-cp Makefile.mk Makefile.in
-cp libcfs/Makefile.mk libcfs/Makefile.in
-cp portals/Makefile.mk portals/Makefile.in
-cp knals/Makefile.mk knals/Makefile.in
-cp knals/socknal/Makefile.mk knals/socknal/Makefile.in
-cp router/Makefile.mk router/Makefile.in
-fi
-
-AM_CONFIG_HEADER(include/config.h)
-
-AC_OUTPUT([Makefile Kernelenv libcfs/Makefile portals/Makefile \
- unals/Makefile knals/Makefile router/Makefile \
- knals/socknal/Makefile knals/gmnal/Makefile knals/qswnal/Makefile \
- knals/scimacnal/Makefile knals/ibnal/Makefile\
- utils/Makefile tests/Makefile doc/Makefile ])
-
+++ /dev/null
-/* portals/include/config.h.in. Generated from configure.in by autoheader. */
-
-/* Use the Pinger */
-#undef ENABLE_PINGER
-
-/* Define to 1 if you have the <inttypes.h> header file. */
-#undef HAVE_INTTYPES_H
-
-/* Define to 1 if you have the <memory.h> header file. */
-#undef HAVE_MEMORY_H
-
-/* Define to 1 if you have the <stdint.h> header file. */
-#undef HAVE_STDINT_H
-
-/* Define to 1 if you have the <stdlib.h> header file. */
-#undef HAVE_STDLIB_H
-
-/* Define to 1 if you have the <strings.h> header file. */
-#undef HAVE_STRINGS_H
-
-/* Define to 1 if you have the <string.h> header file. */
-#undef HAVE_STRING_H
-
-/* Define to 1 if you have the <sys/stat.h> header file. */
-#undef HAVE_SYS_STAT_H
-
-/* Define to 1 if you have the <sys/types.h> header file. */
-#undef HAVE_SYS_TYPES_H
-
-/* Define to 1 if you have the <unistd.h> header file. */
-#undef HAVE_UNISTD_H
-
-/* IOCTL Buffer Size */
-#undef OBD_MAX_IOCTL_BUFFER
-
-/* Name of package */
-#undef PACKAGE
-
-/* Define to the address where bug reports for this package should be sent. */
-#undef PACKAGE_BUGREPORT
-
-/* Define to the full name of this package. */
-#undef PACKAGE_NAME
-
-/* Define to the full name and version of this package. */
-#undef PACKAGE_STRING
-
-/* Define to the one symbol short name of this package. */
-#undef PACKAGE_TARNAME
-
-/* Define to the version of this package. */
-#undef PACKAGE_VERSION
-
-/* The size of a `unsigned long long', as computed by sizeof. */
-#undef SIZEOF_UNSIGNED_LONG_LONG
-
-/* Define to 1 if you have the ANSI C header files. */
-#undef STDC_HEADERS
-
-/* Version number of package */
-#undef VERSION
#ifndef _KP30_INCLUDED
#define _KP30_INCLUDED
+#include <linux/libcfs.h>
#define PORTAL_DEBUG
#ifndef offsetof
#define LOWEST_BIT_SET(x) ((x) & ~((x) - 1))
-/*
- * Debugging
- */
-extern unsigned int portal_subsystem_debug;
-extern unsigned int portal_stack;
-extern unsigned int portal_debug;
-extern unsigned int portal_printk;
-extern unsigned int portal_cerror;
-/* Debugging subsystems (32 bits, non-overlapping) */
-#define S_UNDEFINED 0x00000001
-#define S_MDC 0x00000002
-#define S_MDS 0x00000004
-#define S_OSC 0x00000008
-#define S_OST 0x00000010
-#define S_CLASS 0x00000020
-#define S_LOG 0x00000040
-#define S_LLITE 0x00000080
-#define S_RPC 0x00000100
-#define S_MGMT 0x00000200
-#define S_PORTALS 0x00000400
-#define S_SOCKNAL 0x00000800
-#define S_QSWNAL 0x00001000
-#define S_PINGER 0x00002000
-#define S_FILTER 0x00004000
-#define S_PTLBD 0x00008000
-#define S_ECHO 0x00010000
-#define S_LDLM 0x00020000
-#define S_LOV 0x00040000
-#define S_GMNAL 0x00080000
-#define S_PTLROUTER 0x00100000
-#define S_COBD 0x00200000
-#define S_IBNAL 0x00400000
-
-/* If you change these values, please keep portals/utils/debug.c
- * up to date! */
-
-/* Debugging masks (32 bits, non-overlapping) */
-#define D_TRACE 0x00000001 /* ENTRY/EXIT markers */
-#define D_INODE 0x00000002
-#define D_SUPER 0x00000004
-#define D_EXT2 0x00000008 /* anything from ext2_debug */
-#define D_MALLOC 0x00000010 /* print malloc, free information */
-#define D_CACHE 0x00000020 /* cache-related items */
-#define D_INFO 0x00000040 /* general information */
-#define D_IOCTL 0x00000080 /* ioctl related information */
-#define D_BLOCKS 0x00000100 /* ext2 block allocation */
-#define D_NET 0x00000200 /* network communications */
-#define D_WARNING 0x00000400 /* CWARN(...) == CDEBUG (D_WARNING, ...) */
-#define D_BUFFS 0x00000800
-#define D_OTHER 0x00001000
-#define D_DENTRY 0x00002000
-#define D_PORTALS 0x00004000 /* ENTRY/EXIT markers */
-#define D_PAGE 0x00008000 /* bulk page handling */
-#define D_DLMTRACE 0x00010000
-#define D_ERROR 0x00020000 /* CERROR(...) == CDEBUG (D_ERROR, ...) */
-#define D_EMERG 0x00040000 /* CEMERG(...) == CDEBUG (D_EMERG, ...) */
-#define D_HA 0x00080000 /* recovery and failover */
-#define D_RPCTRACE 0x00100000 /* for distributed debugging */
-#define D_VFSTRACE 0x00200000
-#define D_READA 0x00400000 /* read-ahead */
-
-#ifdef __KERNEL__
-# include <linux/sched.h> /* THREAD_SIZE */
-#else
-# ifndef THREAD_SIZE /* x86_64 has THREAD_SIZE in userspace */
-# define THREAD_SIZE 8192
-# endif
-#endif
-
-#define LUSTRE_TRACE_SIZE (THREAD_SIZE >> 5)
-
-#ifdef __KERNEL__
-# ifdef __ia64__
-# define CDEBUG_STACK (THREAD_SIZE - \
- ((unsigned long)__builtin_dwarf_cfa() & \
- (THREAD_SIZE - 1)))
-# else
-# define CDEBUG_STACK (THREAD_SIZE - \
- ((unsigned long)__builtin_frame_address(0) & \
- (THREAD_SIZE - 1)))
-# endif
-
-#define CHECK_STACK(stack) \
- do { \
- if ((stack) > 3*THREAD_SIZE/4 && (stack) > portal_stack) { \
- portals_debug_msg(DEBUG_SUBSYSTEM, D_WARNING, \
- __FILE__, __FUNCTION__, __LINE__, \
- (stack),"maximum lustre stack %u\n",\
- portal_stack = (stack)); \
- /*panic("LBUG");*/ \
- } \
- } while (0)
-#else /* __KERNEL__ */
-#define CHECK_STACK(stack) do { } while(0)
-#define CDEBUG_STACK (0L)
-#endif /* __KERNEL__ */
-
-#if 1
-#define CDEBUG(mask, format, a...) \
-do { \
- if (portal_cerror == 0) \
- break; \
- CHECK_STACK(CDEBUG_STACK); \
- if (((mask) & (D_ERROR | D_EMERG | D_WARNING)) || \
- (portal_debug & (mask) && \
- portal_subsystem_debug & DEBUG_SUBSYSTEM)) \
- portals_debug_msg(DEBUG_SUBSYSTEM, mask, \
- __FILE__, __FUNCTION__, __LINE__, \
- CDEBUG_STACK, format, ## a); \
-} while (0)
-
-#define CWARN(format, a...) CDEBUG(D_WARNING, format, ## a)
-#define CERROR(format, a...) CDEBUG(D_ERROR, format, ## a)
-#define CEMERG(format, a...) CDEBUG(D_EMERG, format, ## a)
-
-#define GOTO(label, rc) \
-do { \
- long GOTO__ret = (long)(rc); \
- CDEBUG(D_TRACE,"Process leaving via %s (rc=%lu : %ld : %lx)\n", \
- #label, (unsigned long)GOTO__ret, (signed long)GOTO__ret,\
- (signed long)GOTO__ret); \
- goto label; \
-} while (0)
-
-#define RETURN(rc) \
-do { \
- typeof(rc) RETURN__ret = (rc); \
- CDEBUG(D_TRACE, "Process leaving (rc=%lu : %ld : %lx)\n", \
- (long)RETURN__ret, (long)RETURN__ret, (long)RETURN__ret);\
- return RETURN__ret; \
-} while (0)
-
-#define ENTRY \
-do { \
- CDEBUG(D_TRACE, "Process entered\n"); \
-} while (0)
-
-#define EXIT \
-do { \
- CDEBUG(D_TRACE, "Process leaving\n"); \
-} while(0)
-#else
-#define CDEBUG(mask, format, a...) do { } while (0)
-#define CWARN(format, a...) do { } while (0)
-#define CERROR(format, a...) printk("<3>" format, ## a)
-#define CEMERG(format, a...) printk("<0>" format, ## a)
-#define GOTO(label, rc) do { (void)(rc); goto label; } while (0)
-#define RETURN(rc) return (rc)
-#define ENTRY do { } while (0)
-#define EXIT do { } while (0)
-#endif
-
#ifdef __KERNEL__
# include <linux/vmalloc.h>
# include <linux/time.h>
# include <linux/highmem.h>
# include <linux/module.h>
# include <linux/version.h>
-# include <portals/lib-nal.h>
+# include <portals/p30.h>
# include <linux/smp_lock.h>
# include <asm/atomic.h>
#endif
/******************************************************************************/
-/* Kernel Portals Router interface */
-
-typedef void (*kpr_fwd_callback_t)(void *arg, int error); // completion callback
-
-/* space for routing targets to stash "stuff" in a forwarded packet */
-typedef union {
- long long _alignment;
- void *_space[16]; /* scale with CPU arch */
-} kprfd_scratch_t;
-
-/* Kernel Portals Routing Forwarded message Descriptor */
-typedef struct {
- struct list_head kprfd_list; /* stash in queues (routing target can use) */
- ptl_nid_t kprfd_target_nid; /* final destination NID */
- ptl_nid_t kprfd_gateway_nid; /* gateway NID */
- ptl_hdr_t *kprfd_hdr; /* header in wire byte order */
- int kprfd_nob; /* # payload bytes */
- int kprfd_niov; /* # payload frags */
- ptl_kiov_t *kprfd_kiov; /* payload fragments */
- void *kprfd_router_arg; /* originating NAL's router arg */
- kpr_fwd_callback_t kprfd_callback; /* completion callback */
- void *kprfd_callback_arg; /* completion callback arg */
- kprfd_scratch_t kprfd_scratch; /* scratchpad for routing targets */
-} kpr_fwd_desc_t;
-
-typedef void (*kpr_fwd_t)(void *arg, kpr_fwd_desc_t *fwd);
-typedef void (*kpr_notify_t)(void *arg, ptl_nid_t peer, int alive);
-
-/* NAL's routing interface (Kernel Portals Routing Nal Interface) */
-typedef const struct {
- int kprni_nalid; /* NAL's id */
- void *kprni_arg; /* Arg to pass when calling into NAL */
- kpr_fwd_t kprni_fwd; /* NAL's forwarding entrypoint */
- kpr_notify_t kprni_notify; /* NAL's notification entrypoint */
-} kpr_nal_interface_t;
-
-/* Router's routing interface (Kernel Portals Routing Router Interface) */
-typedef const struct {
- /* register the calling NAL with the router and get back the handle for
- * subsequent calls */
- int (*kprri_register) (kpr_nal_interface_t *nal_interface,
- void **router_arg);
-
- /* ask the router to find a gateway that forwards to 'nid' and is a
- * peer of the calling NAL; assume caller will send 'nob' bytes of
- * payload there */
- int (*kprri_lookup) (void *router_arg, ptl_nid_t nid, int nob,
- ptl_nid_t *gateway_nid);
-
- /* hand a packet over to the router for forwarding */
- kpr_fwd_t kprri_fwd_start;
-
- /* hand a packet back to the router for completion */
- void (*kprri_fwd_done) (void *router_arg, kpr_fwd_desc_t *fwd,
- int error);
-
- /* notify the router about peer state */
- void (*kprri_notify) (void *router_arg, ptl_nid_t peer,
- int alive, time_t when);
-
- /* the calling NAL is shutting down */
- void (*kprri_shutdown) (void *router_arg);
-
- /* deregister the calling NAL with the router */
- void (*kprri_deregister) (void *router_arg);
-
-} kpr_router_interface_t;
-
-/* Convenient struct for NAL to stash router interface/args */
-typedef struct {
- kpr_router_interface_t *kpr_interface;
- void *kpr_arg;
-} kpr_router_t;
-
-/* Router's control interface (Kernel Portals Routing Control Interface) */
-typedef const struct {
- int (*kprci_add_route)(int gateway_nal, ptl_nid_t gateway_nid,
- ptl_nid_t lo_nid, ptl_nid_t hi_nid);
- int (*kprci_del_route)(int gateway_nal, ptl_nid_t gateway_nid,
- ptl_nid_t lo_nid, ptl_nid_t hi_nid);
- int (*kprci_get_route)(int index, int *gateway_nal,
- ptl_nid_t *gateway,
- ptl_nid_t *lo_nid, ptl_nid_t *hi_nid,
- int *alive);
- int (*kprci_notify)(int gateway_nal, ptl_nid_t gateway_nid,
- int alive, time_t when);
-} kpr_control_interface_t;
-
-extern kpr_control_interface_t kpr_control_interface;
-extern kpr_router_interface_t kpr_router_interface;
-
-static inline int
-kpr_register (kpr_router_t *router, kpr_nal_interface_t *nalif)
-{
- int rc;
-
- router->kpr_interface = PORTAL_SYMBOL_GET (kpr_router_interface);
- if (router->kpr_interface == NULL)
- return (-ENOENT);
-
- rc = (router->kpr_interface)->kprri_register (nalif, &router->kpr_arg);
- if (rc != 0)
- router->kpr_interface = NULL;
-
- PORTAL_SYMBOL_PUT (kpr_router_interface);
- return (rc);
-}
-
-static inline int
-kpr_routing (kpr_router_t *router)
-{
- return (router->kpr_interface != NULL);
-}
-
-static inline int
-kpr_lookup (kpr_router_t *router, ptl_nid_t nid, int nob, ptl_nid_t *gateway_nid)
-{
- if (!kpr_routing (router))
- return (-ENETUNREACH);
-
- return (router->kpr_interface->kprri_lookup(router->kpr_arg, nid, nob,
- gateway_nid));
-}
-
-static inline void
-kpr_fwd_init (kpr_fwd_desc_t *fwd, ptl_nid_t nid, ptl_hdr_t *hdr,
- int nob, int niov, ptl_kiov_t *kiov,
- kpr_fwd_callback_t callback, void *callback_arg)
-{
- fwd->kprfd_target_nid = nid;
- fwd->kprfd_gateway_nid = nid;
- fwd->kprfd_hdr = hdr;
- fwd->kprfd_nob = nob;
- fwd->kprfd_niov = niov;
- fwd->kprfd_kiov = kiov;
- fwd->kprfd_callback = callback;
- fwd->kprfd_callback_arg = callback_arg;
-}
-
-static inline void
-kpr_fwd_start (kpr_router_t *router, kpr_fwd_desc_t *fwd)
-{
- if (!kpr_routing (router))
- fwd->kprfd_callback (fwd->kprfd_callback_arg, -ENETUNREACH);
- else
- router->kpr_interface->kprri_fwd_start (router->kpr_arg, fwd);
-}
-
-static inline void
-kpr_fwd_done (kpr_router_t *router, kpr_fwd_desc_t *fwd, int error)
-{
- LASSERT (kpr_routing (router));
- router->kpr_interface->kprri_fwd_done (router->kpr_arg, fwd, error);
-}
-
-static inline void
-kpr_notify (kpr_router_t *router,
- ptl_nid_t peer, int alive, time_t when)
-{
- if (!kpr_routing (router))
- return;
-
- router->kpr_interface->kprri_notify(router->kpr_arg, peer, alive, when);
-}
-
-static inline void
-kpr_shutdown (kpr_router_t *router)
-{
- if (kpr_routing (router))
- router->kpr_interface->kprri_shutdown (router->kpr_arg);
-}
-
-static inline void
-kpr_deregister (kpr_router_t *router)
-{
- if (!kpr_routing (router))
- return;
- router->kpr_interface->kprri_deregister (router->kpr_arg);
- router->kpr_interface = NULL;
-}
-
-/******************************************************************************/
#ifdef PORTALS_PROFILING
#define prof_enum(FOO) PROF__##FOO
#define PING_SYNC 0
#define PING_ASYNC 1
-struct portal_ioctl_data {
- __u32 ioc_len;
- __u32 ioc_version;
- __u64 ioc_nid;
- __u64 ioc_nid2;
- __u64 ioc_nid3;
- __u32 ioc_count;
- __u32 ioc_nal;
- __u32 ioc_nal_cmd;
- __u32 ioc_fd;
- __u32 ioc_id;
-
- __u32 ioc_flags;
- __u32 ioc_size;
-
- __u32 ioc_wait;
- __u32 ioc_timeout;
- __u32 ioc_misc;
-
- __u32 ioc_inllen1;
- char *ioc_inlbuf1;
- __u32 ioc_inllen2;
- char *ioc_inlbuf2;
-
- __u32 ioc_plen1; /* buffers in userspace */
- char *ioc_pbuf1;
- __u32 ioc_plen2; /* buffers in userspace */
- char *ioc_pbuf2;
-
- char ioc_bulk[0];
-};
-
struct portal_ioctl_hdr {
__u32 ioc_len;
__u32 ioc_version;
DEBUG_DAEMON_CONTINUE = 4,
};
-/* XXX remove to lustre ASAP */
-struct lustre_peer {
- ptl_nid_t peer_nid;
- ptl_handle_ni_t peer_ni;
-};
-
-
/* module.c */
typedef int (*nal_cmd_handler_t)(struct portals_cfg *, void * private);
int kportal_nal_register(int nal, nal_cmd_handler_t handler, void * private);
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
+#ifndef _KPR_H
+#define _KPR_H
+
+# include <portals/lib-nal.h> /* for ptl_hdr_t */
+
+/******************************************************************************/
+/* Kernel Portals Router interface */
+
+typedef void (*kpr_fwd_callback_t)(void *arg, int error); // completion callback
+
+/* space for routing targets to stash "stuff" in a forwarded packet */
+typedef union {
+ long long _alignment;
+ void *_space[16]; /* scale with CPU arch */
+} kprfd_scratch_t;
+
+/* Kernel Portals Routing Forwarded message Descriptor */
+typedef struct {
+ struct list_head kprfd_list; /* stash in queues (routing target can use) */
+ ptl_nid_t kprfd_target_nid; /* final destination NID */
+ ptl_nid_t kprfd_gateway_nid; /* gateway NID */
+ ptl_hdr_t *kprfd_hdr; /* header in wire byte order */
+ int kprfd_nob; /* # payload bytes */
+ int kprfd_niov; /* # payload frags */
+ ptl_kiov_t *kprfd_kiov; /* payload fragments */
+ void *kprfd_router_arg; /* originating NAL's router arg */
+ kpr_fwd_callback_t kprfd_callback; /* completion callback */
+ void *kprfd_callback_arg; /* completion callback arg */
+ kprfd_scratch_t kprfd_scratch; /* scratchpad for routing targets */
+} kpr_fwd_desc_t;
+
+typedef void (*kpr_fwd_t)(void *arg, kpr_fwd_desc_t *fwd);
+typedef void (*kpr_notify_t)(void *arg, ptl_nid_t peer, int alive);
+
+/* NAL's routing interface (Kernel Portals Routing Nal Interface) */
+typedef const struct {
+ int kprni_nalid; /* NAL's id */
+ void *kprni_arg; /* Arg to pass when calling into NAL */
+ kpr_fwd_t kprni_fwd; /* NAL's forwarding entrypoint */
+ kpr_notify_t kprni_notify; /* NAL's notification entrypoint */
+} kpr_nal_interface_t;
+
+/* Router's routing interface (Kernel Portals Routing Router Interface) */
+typedef const struct {
+ /* register the calling NAL with the router and get back the handle for
+ * subsequent calls */
+ int (*kprri_register) (kpr_nal_interface_t *nal_interface,
+ void **router_arg);
+
+ /* ask the router to find a gateway that forwards to 'nid' and is a
+ * peer of the calling NAL; assume caller will send 'nob' bytes of
+ * payload there */
+ int (*kprri_lookup) (void *router_arg, ptl_nid_t nid, int nob,
+ ptl_nid_t *gateway_nid);
+
+ /* hand a packet over to the router for forwarding */
+ kpr_fwd_t kprri_fwd_start;
+
+ /* hand a packet back to the router for completion */
+ void (*kprri_fwd_done) (void *router_arg, kpr_fwd_desc_t *fwd,
+ int error);
+
+ /* notify the router about peer state */
+ void (*kprri_notify) (void *router_arg, ptl_nid_t peer,
+ int alive, time_t when);
+
+ /* the calling NAL is shutting down */
+ void (*kprri_shutdown) (void *router_arg);
+
+ /* deregister the calling NAL with the router */
+ void (*kprri_deregister) (void *router_arg);
+
+} kpr_router_interface_t;
+
+/* Convenient struct for NAL to stash router interface/args */
+typedef struct {
+ kpr_router_interface_t *kpr_interface;
+ void *kpr_arg;
+} kpr_router_t;
+
+/* Router's control interface (Kernel Portals Routing Control Interface) */
+typedef const struct {
+ int (*kprci_add_route)(int gateway_nal, ptl_nid_t gateway_nid,
+ ptl_nid_t lo_nid, ptl_nid_t hi_nid);
+ int (*kprci_del_route)(int gateway_nal, ptl_nid_t gateway_nid,
+ ptl_nid_t lo_nid, ptl_nid_t hi_nid);
+ int (*kprci_get_route)(int index, int *gateway_nal,
+ ptl_nid_t *gateway,
+ ptl_nid_t *lo_nid, ptl_nid_t *hi_nid,
+ int *alive);
+ int (*kprci_notify)(int gateway_nal, ptl_nid_t gateway_nid,
+ int alive, time_t when);
+} kpr_control_interface_t;
+
+extern kpr_control_interface_t kpr_control_interface;
+extern kpr_router_interface_t kpr_router_interface;
+
+static inline int
+kpr_register (kpr_router_t *router, kpr_nal_interface_t *nalif)
+{
+ int rc;
+
+ router->kpr_interface = PORTAL_SYMBOL_GET (kpr_router_interface);
+ if (router->kpr_interface == NULL)
+ return (-ENOENT);
+
+ rc = (router->kpr_interface)->kprri_register (nalif, &router->kpr_arg);
+ if (rc != 0)
+ router->kpr_interface = NULL;
+
+ PORTAL_SYMBOL_PUT (kpr_router_interface);
+ return (rc);
+}
+
+static inline int
+kpr_routing (kpr_router_t *router)
+{
+ return (router->kpr_interface != NULL);
+}
+
+static inline int
+kpr_lookup (kpr_router_t *router, ptl_nid_t nid, int nob, ptl_nid_t *gateway_nid)
+{
+ if (!kpr_routing (router))
+ return (-ENETUNREACH);
+
+ return (router->kpr_interface->kprri_lookup(router->kpr_arg, nid, nob,
+ gateway_nid));
+}
+
+static inline void
+kpr_fwd_init (kpr_fwd_desc_t *fwd, ptl_nid_t nid, ptl_hdr_t *hdr,
+ int nob, int niov, ptl_kiov_t *kiov,
+ kpr_fwd_callback_t callback, void *callback_arg)
+{
+ fwd->kprfd_target_nid = nid;
+ fwd->kprfd_gateway_nid = nid;
+ fwd->kprfd_hdr = hdr;
+ fwd->kprfd_nob = nob;
+ fwd->kprfd_niov = niov;
+ fwd->kprfd_kiov = kiov;
+ fwd->kprfd_callback = callback;
+ fwd->kprfd_callback_arg = callback_arg;
+}
+
+static inline void
+kpr_fwd_start (kpr_router_t *router, kpr_fwd_desc_t *fwd)
+{
+ if (!kpr_routing (router))
+ fwd->kprfd_callback (fwd->kprfd_callback_arg, -ENETUNREACH);
+ else
+ router->kpr_interface->kprri_fwd_start (router->kpr_arg, fwd);
+}
+
+static inline void
+kpr_fwd_done (kpr_router_t *router, kpr_fwd_desc_t *fwd, int error)
+{
+ LASSERT (kpr_routing (router));
+ router->kpr_interface->kprri_fwd_done (router->kpr_arg, fwd, error);
+}
+
+static inline void
+kpr_notify (kpr_router_t *router,
+ ptl_nid_t peer, int alive, time_t when)
+{
+ if (!kpr_routing (router))
+ return;
+
+ router->kpr_interface->kprri_notify(router->kpr_arg, peer, alive, when);
+}
+
+static inline void
+kpr_shutdown (kpr_router_t *router)
+{
+ if (kpr_routing (router))
+ router->kpr_interface->kprri_shutdown (router->kpr_arg);
+}
+
+static inline void
+kpr_deregister (kpr_router_t *router)
+{
+ if (!kpr_routing (router))
+ return;
+ router->kpr_interface->kprri_deregister (router->kpr_arg);
+ router->kpr_interface = NULL;
+}
+
+#endif /* _KPR_H */
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
+#ifndef _LIBCFS_H
+
+
+#define PORTAL_DEBUG
+
+#ifndef offsetof
+# define offsetof(typ,memb) ((int)((char *)&(((typ *)0)->memb)))
+#endif
+
+#define LOWEST_BIT_SET(x) ((x) & ~((x) - 1))
+
+/*
+ * Debugging
+ */
+extern unsigned int portal_subsystem_debug;
+extern unsigned int portal_stack;
+extern unsigned int portal_debug;
+extern unsigned int portal_printk;
+extern unsigned int portal_cerror;
+/* Debugging subsystems (32 bits, non-overlapping) */
+#define S_UNDEFINED 0x00000001
+#define S_MDC 0x00000002
+#define S_MDS 0x00000004
+#define S_OSC 0x00000008
+#define S_OST 0x00000010
+#define S_CLASS 0x00000020
+#define S_LOG 0x00000040
+#define S_LLITE 0x00000080
+#define S_RPC 0x00000100
+#define S_MGMT 0x00000200
+#define S_PORTALS 0x00000400
+#define S_SOCKNAL 0x00000800
+#define S_QSWNAL 0x00001000
+#define S_PINGER 0x00002000
+#define S_FILTER 0x00004000
+#define S_PTLBD 0x00008000
+#define S_ECHO 0x00010000
+#define S_LDLM 0x00020000
+#define S_LOV 0x00040000
+#define S_GMNAL 0x00080000
+#define S_PTLROUTER 0x00100000
+#define S_COBD 0x00200000
+#define S_IBNAL 0x00400000
+
+/* If you change these values, please keep portals/utils/debug.c
+ * up to date! */
+
+/* Debugging masks (32 bits, non-overlapping) */
+#define D_TRACE 0x00000001 /* ENTRY/EXIT markers */
+#define D_INODE 0x00000002
+#define D_SUPER 0x00000004
+#define D_EXT2 0x00000008 /* anything from ext2_debug */
+#define D_MALLOC 0x00000010 /* print malloc, free information */
+#define D_CACHE 0x00000020 /* cache-related items */
+#define D_INFO 0x00000040 /* general information */
+#define D_IOCTL 0x00000080 /* ioctl related information */
+#define D_BLOCKS 0x00000100 /* ext2 block allocation */
+#define D_NET 0x00000200 /* network communications */
+#define D_WARNING 0x00000400 /* CWARN(...) == CDEBUG (D_WARNING, ...) */
+#define D_BUFFS 0x00000800
+#define D_OTHER 0x00001000
+#define D_DENTRY 0x00002000
+#define D_PORTALS 0x00004000 /* ENTRY/EXIT markers */
+#define D_PAGE 0x00008000 /* bulk page handling */
+#define D_DLMTRACE 0x00010000
+#define D_ERROR 0x00020000 /* CERROR(...) == CDEBUG (D_ERROR, ...) */
+#define D_EMERG 0x00040000 /* CEMERG(...) == CDEBUG (D_EMERG, ...) */
+#define D_HA 0x00080000 /* recovery and failover */
+#define D_RPCTRACE 0x00100000 /* for distributed debugging */
+#define D_VFSTRACE 0x00200000
+#define D_READA 0x00400000 /* read-ahead */
+
+#ifdef __KERNEL__
+# include <linux/sched.h> /* THREAD_SIZE */
+#else
+# ifndef THREAD_SIZE /* x86_64 has THREAD_SIZE in userspace */
+# define THREAD_SIZE 8192
+# endif
+#endif
+
+#define LUSTRE_TRACE_SIZE (THREAD_SIZE >> 5)
+
+#ifdef __KERNEL__
+# ifdef __ia64__
+# define CDEBUG_STACK (THREAD_SIZE - \
+ ((unsigned long)__builtin_dwarf_cfa() & \
+ (THREAD_SIZE - 1)))
+# else
+# define CDEBUG_STACK (THREAD_SIZE - \
+ ((unsigned long)__builtin_frame_address(0) & \
+ (THREAD_SIZE - 1)))
+# endif
+
+#define CHECK_STACK(stack) \
+ do { \
+ if ((stack) > 3*THREAD_SIZE/4 && (stack) > portal_stack) { \
+ portals_debug_msg(DEBUG_SUBSYSTEM, D_WARNING, \
+ __FILE__, __FUNCTION__, __LINE__, \
+ (stack),"maximum lustre stack %u\n",\
+ portal_stack = (stack)); \
+ /*panic("LBUG");*/ \
+ } \
+ } while (0)
+#else /* __KERNEL__ */
+#define CHECK_STACK(stack) do { } while(0)
+#define CDEBUG_STACK (0L)
+#endif /* __KERNEL__ */
+
+#if 1
+#define CDEBUG(mask, format, a...) \
+do { \
+ if (portal_cerror == 0) \
+ break; \
+ CHECK_STACK(CDEBUG_STACK); \
+ if (((mask) & (D_ERROR | D_EMERG | D_WARNING)) || \
+ (portal_debug & (mask) && \
+ portal_subsystem_debug & DEBUG_SUBSYSTEM)) \
+ portals_debug_msg(DEBUG_SUBSYSTEM, mask, \
+ __FILE__, __FUNCTION__, __LINE__, \
+ CDEBUG_STACK, format, ## a); \
+} while (0)
+
+#define CWARN(format, a...) CDEBUG(D_WARNING, format, ## a)
+#define CERROR(format, a...) CDEBUG(D_ERROR, format, ## a)
+#define CEMERG(format, a...) CDEBUG(D_EMERG, format, ## a)
+
+#define GOTO(label, rc) \
+do { \
+ long GOTO__ret = (long)(rc); \
+ CDEBUG(D_TRACE,"Process leaving via %s (rc=%lu : %ld : %lx)\n", \
+ #label, (unsigned long)GOTO__ret, (signed long)GOTO__ret,\
+ (signed long)GOTO__ret); \
+ goto label; \
+} while (0)
+
+#define RETURN(rc) \
+do { \
+ typeof(rc) RETURN__ret = (rc); \
+ CDEBUG(D_TRACE, "Process leaving (rc=%lu : %ld : %lx)\n", \
+ (long)RETURN__ret, (long)RETURN__ret, (long)RETURN__ret);\
+ return RETURN__ret; \
+} while (0)
+
+#define ENTRY \
+do { \
+ CDEBUG(D_TRACE, "Process entered\n"); \
+} while (0)
+
+#define EXIT \
+do { \
+ CDEBUG(D_TRACE, "Process leaving\n"); \
+} while(0)
+#else
+#define CDEBUG(mask, format, a...) do { } while (0)
+#define CWARN(format, a...) do { } while (0)
+#define CERROR(format, a...) printk("<3>" format, ## a)
+#define CEMERG(format, a...) printk("<0>" format, ## a)
+#define GOTO(label, rc) do { (void)(rc); goto label; } while (0)
+#define RETURN(rc) return (rc)
+#define ENTRY do { } while (0)
+#define EXIT do { } while (0)
+#endif
+
+struct portal_ioctl_data {
+ __u32 ioc_len;
+ __u32 ioc_version;
+ __u64 ioc_nid;
+ __u64 ioc_nid2;
+ __u64 ioc_nid3;
+ __u32 ioc_count;
+ __u32 ioc_nal;
+ __u32 ioc_nal_cmd;
+ __u32 ioc_fd;
+ __u32 ioc_id;
+
+ __u32 ioc_flags;
+ __u32 ioc_size;
+
+ __u32 ioc_wait;
+ __u32 ioc_timeout;
+ __u32 ioc_misc;
+
+ __u32 ioc_inllen1;
+ char *ioc_inlbuf1;
+ __u32 ioc_inllen2;
+ char *ioc_inlbuf2;
+
+ __u32 ioc_plen1; /* buffers in userspace */
+ char *ioc_pbuf1;
+ __u32 ioc_plen2; /* buffers in userspace */
+ char *ioc_pbuf2;
+
+ char ioc_bulk[0];
+};
+
+#ifdef __KERNEL__
+
+#include <linux/list.h>
+
+struct libcfs_ioctl_handler {
+ struct list_head item;
+ int (*handle_ioctl)(struct portal_ioctl_data *data,
+ unsigned int cmd, unsigned long args);
+};
+
+#define DECLARE_IOCTL_HANDLER(ident, func) \
+ struct libcfs_ioctl_handler ident = { \
+ .item = LIST_HEAD_INIT(ident.item), \
+ .handle_ioctl = func \
+ }
+
+int libcfs_register_ioctl(struct libcfs_ioctl_handler *hand);
+int libcfs_deregister_ioctl(struct libcfs_ioctl_handler *hand);
+
+#endif
+
+#define _LIBCFS_H
+
+#endif /* _LIBCFS_H */
--- /dev/null
+#ifndef _LUSTRE_LIST_H
+#define _LUSTRE_LIST_H
+
+#ifdef __KERNEL__
+#include <linux/list.h>
+#else
+/*
+ * Simple doubly linked list implementation.
+ *
+ * Some of the internal functions ("__xxx") are useful when
+ * manipulating whole lists rather than single entries, as
+ * sometimes we already know the next/prev entries and we can
+ * generate better code by using them directly rather than
+ * using the generic single-entry routines.
+ */
+
+#define prefetch(a) ((void)a)
+
+struct list_head {
+ struct list_head *next, *prev;
+};
+
+typedef struct list_head list_t;
+
+#define LIST_HEAD_INIT(name) { &(name), &(name) }
+
+#define LIST_HEAD(name) \
+ struct list_head name = LIST_HEAD_INIT(name)
+
+#define INIT_LIST_HEAD(ptr) do { \
+ (ptr)->next = (ptr); (ptr)->prev = (ptr); \
+} while (0)
+
+/*
+ * Insert a new entry between two known consecutive entries.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static inline void __list_add(struct list_head * new,
+ struct list_head * prev,
+ struct list_head * next)
+{
+ next->prev = new;
+ new->next = next;
+ new->prev = prev;
+ prev->next = new;
+}
+
+/**
+ * list_add - add a new entry
+ * @new: new entry to be added
+ * @head: list head to add it after
+ *
+ * Insert a new entry after the specified head.
+ * This is good for implementing stacks.
+ */
+static inline void list_add(struct list_head *new, struct list_head *head)
+{
+ __list_add(new, head, head->next);
+}
+
+/**
+ * list_add_tail - add a new entry
+ * @new: new entry to be added
+ * @head: list head to add it before
+ *
+ * Insert a new entry before the specified head.
+ * This is useful for implementing queues.
+ */
+static inline void list_add_tail(struct list_head *new, struct list_head *head)
+{
+ __list_add(new, head->prev, head);
+}
+
+/*
+ * Delete a list entry by making the prev/next entries
+ * point to each other.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static inline void __list_del(struct list_head * prev, struct list_head * next)
+{
+ next->prev = prev;
+ prev->next = next;
+}
+
+/**
+ * list_del - deletes entry from list.
+ * @entry: the element to delete from the list.
+ * Note: list_empty on entry does not return true after this, the entry is in an undefined state.
+ */
+static inline void list_del(struct list_head *entry)
+{
+ __list_del(entry->prev, entry->next);
+}
+
+/**
+ * list_del_init - deletes entry from list and reinitialize it.
+ * @entry: the element to delete from the list.
+ */
+static inline void list_del_init(struct list_head *entry)
+{
+ __list_del(entry->prev, entry->next);
+ INIT_LIST_HEAD(entry);
+}
+
+/**
+ * list_move - delete from one list and add as another's head
+ * @list: the entry to move
+ * @head: the head that will precede our entry
+ */
+static inline void list_move(struct list_head *list, struct list_head *head)
+{
+ __list_del(list->prev, list->next);
+ list_add(list, head);
+}
+
+/**
+ * list_move_tail - delete from one list and add as another's tail
+ * @list: the entry to move
+ * @head: the head that will follow our entry
+ */
+static inline void list_move_tail(struct list_head *list,
+ struct list_head *head)
+{
+ __list_del(list->prev, list->next);
+ list_add_tail(list, head);
+}
+
+/**
+ * list_empty - tests whether a list is empty
+ * @head: the list to test.
+ */
+static inline int list_empty(struct list_head *head)
+{
+ return head->next == head;
+}
+
+static inline void __list_splice(struct list_head *list,
+ struct list_head *head)
+{
+ struct list_head *first = list->next;
+ struct list_head *last = list->prev;
+ struct list_head *at = head->next;
+
+ first->prev = head;
+ head->next = first;
+
+ last->next = at;
+ at->prev = last;
+}
+
+/**
+ * list_splice - join two lists
+ * @list: the new list to add.
+ * @head: the place to add it in the first list.
+ */
+static inline void list_splice(struct list_head *list, struct list_head *head)
+{
+ if (!list_empty(list))
+ __list_splice(list, head);
+}
+
+/**
+ * list_splice_init - join two lists and reinitialise the emptied list.
+ * @list: the new list to add.
+ * @head: the place to add it in the first list.
+ *
+ * The list at @list is reinitialised
+ */
+static inline void list_splice_init(struct list_head *list,
+ struct list_head *head)
+{
+ if (!list_empty(list)) {
+ __list_splice(list, head);
+ INIT_LIST_HEAD(list);
+ }
+}
+
+/**
+ * list_entry - get the struct for this entry
+ * @ptr: the &struct list_head pointer.
+ * @type: the type of the struct this is embedded in.
+ * @member: the name of the list_struct within the struct.
+ */
+#define list_entry(ptr, type, member) \
+ ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member)))
+
+/**
+ * list_for_each - iterate over a list
+ * @pos: the &struct list_head to use as a loop counter.
+ * @head: the head for your list.
+ */
+#define list_for_each(pos, head) \
+ for (pos = (head)->next, prefetch(pos->next); pos != (head); \
+ pos = pos->next, prefetch(pos->next))
+
+/**
+ * list_for_each_prev - iterate over a list in reverse order
+ * @pos: the &struct list_head to use as a loop counter.
+ * @head: the head for your list.
+ */
+#define list_for_each_prev(pos, head) \
+ for (pos = (head)->prev, prefetch(pos->prev); pos != (head); \
+ pos = pos->prev, prefetch(pos->prev))
+
+/**
+ * list_for_each_safe - iterate over a list safe against removal of list entry
+ * @pos: the &struct list_head to use as a loop counter.
+ * @n: another &struct list_head to use as temporary storage
+ * @head: the head for your list.
+ */
+#define list_for_each_safe(pos, n, head) \
+ for (pos = (head)->next, n = pos->next; pos != (head); \
+ pos = n, n = pos->next)
+
+/**
+ * list_for_each_entry - iterate over list of given type
+ * @pos: the type * to use as a loop counter.
+ * @head: the head for your list.
+ * @member: the name of the list_struct within the struct.
+ */
+#define list_for_each_entry(pos, head, member) \
+ for (pos = list_entry((head)->next, typeof(*pos), member), \
+ prefetch(pos->member.next); \
+ &pos->member != (head); \
+ pos = list_entry(pos->member.next, typeof(*pos), member), \
+ prefetch(pos->member.next))
+
+/**
+ * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry
+ * @pos: the type * to use as a loop counter.
+ * @n: another type * to use as temporary storage
+ * @head: the head for your list.
+ * @member: the name of the list_struct within the struct.
+ */
+#define list_for_each_entry_safe(pos, n, head, member) \
+ for (pos = list_entry((head)->next, typeof(*pos), member), \
+ n = list_entry(pos->member.next, typeof(*pos), member); \
+ &pos->member != (head); \
+ pos = n, n = list_entry(n->member.next, typeof(*n), member))
+
+#endif /* if !__KERNEL__*/
+#endif /* if !_LUSTRE_LIST_H */
# define DEBUG_SUBSYSTEM S_PORTALS
# define PORTAL_DEBUG
+#include "build_check.h"
+
#ifndef __KERNEL__
# include <stdio.h>
# include <stdlib.h>
#ifndef P30_API_H
#define P30_API_H
+#include "build_check.h"
+
#include <portals/types.h>
#ifndef PTL_NO_WRAP
-int PtlInit(void);
-int PtlInitialized(void);
+int PtlInit(int *);
void PtlFini(void);
int PtlNIInit(ptl_interface_t interface, ptl_pt_index_t ptl_size_in,
* Network interfaces
*/
-#ifndef PTL_NO_WRAP
-int PtlNIBarrier(ptl_handle_ni_t interface_in);
-#endif
-
int PtlNIStatus(ptl_handle_ni_t interface_in, ptl_sr_index_t register_in,
ptl_sr_value_t * status_out);
*/
int PtlFailNid (ptl_handle_ni_t ni, ptl_nid_t nid, unsigned int threshold);
+/*
+ * PtlSnprintHandle:
+ *
+ * This is not an official Portals 3 API call. It is provided
+ * so that an application can print an opaque handle.
+ */
+void PtlSnprintHandle (char *str, int str_len, ptl_handle_any_t handle);
/*
* Match entries
ptl_unlink_t unlink_in, ptl_handle_md_t * handle_out);
int PtlMDBind(ptl_handle_ni_t ni_in, ptl_md_t md_in,
- ptl_handle_md_t * handle_out);
+ ptl_unlink_t unlink_in, ptl_handle_md_t * handle_out);
int PtlMDUnlink(ptl_handle_md_t md_in);
int PtlEQWait(ptl_handle_eq_t eventq_in, ptl_event_t * event_out);
-int PtlEQWait_timeout(ptl_handle_eq_t eventq_in, ptl_event_t * event_out,
- int timeout);
+int PtlEQPoll(ptl_handle_eq_t *eventqs_in, int neq_in, int timeout,
+ ptl_event_t *event_out, int *which_out);
#endif
/*
#ifndef PTL_BLOCKS_H
#define PTL_BLOCKS_H
+#include "build_check.h"
+
/*
* blocks.h
*
ptl_handle_ni_t ni_in;
ptl_handle_eq_t eq_in;
ptl_md_t md_in;
+ ptl_unlink_t unlink_in;
} PtlMDBind_in;
typedef struct PtlMDBind_out {
--- /dev/null
+#ifndef _BUILD_CHECK_H
+#define _BUILD_CHECK_H
+
+#ifdef CRAY_PORTALS
+#error "an application got to me instead of cray's includes"
+#endif
+
+#endif
+#include "build_check.h"
/*
**
** This files contains definitions that are used throughout the cplant code.
#ifndef _P30_ERRNO_H_
#define _P30_ERRNO_H_
+#include "build_check.h"
/*
* include/portals/errno.h
*
/* If you change these, you must update the string table in api-errno.c */
typedef enum {
- PTL_OK = 0,
- PTL_SEGV = 1,
-
- PTL_NOSPACE = 2,
- PTL_INUSE = 3,
- PTL_VAL_FAILED = 4,
-
- PTL_NAL_FAILED = 5,
- PTL_NOINIT = 6,
- PTL_INIT_DUP = 7,
- PTL_INIT_INV = 8,
- PTL_AC_INV_INDEX = 9,
-
- PTL_INV_ASIZE = 10,
- PTL_INV_HANDLE = 11,
- PTL_INV_MD = 12,
- PTL_INV_ME = 13,
- PTL_INV_NI = 14,
+ PTL_OK = 0,
+ PTL_SEGV = 1,
+
+ PTL_NO_SPACE = 2,
+ PTL_ME_IN_USE = 3,
+ PTL_VAL_FAILED = 4,
+
+ PTL_NAL_FAILED = 5,
+ PTL_NO_INIT = 6,
+ PTL_IFACE_DUP = 7,
+ PTL_IFACE_INVALID = 8,
+
+ PTL_HANDLE_INVALID = 9,
+ PTL_MD_INVALID = 10,
+ PTL_ME_INVALID = 11,
/* If you change these, you must update the string table in api-errno.c */
- PTL_ILL_MD = 15,
- PTL_INV_PROC = 16,
- PTL_INV_PSIZE = 17,
- PTL_INV_PTINDEX = 18,
- PTL_INV_REG = 19,
-
- PTL_INV_SR_INDX = 20,
- PTL_ML_TOOLONG = 21,
- PTL_ADDR_UNKNOWN = 22,
- PTL_INV_EQ = 23,
- PTL_EQ_DROPPED = 24,
-
- PTL_EQ_EMPTY = 25,
- PTL_NOUPDATE = 26,
- PTL_FAIL = 27,
- PTL_NOT_IMPLEMENTED = 28,
- PTL_NO_ACK = 29,
-
- PTL_IOV_TOO_MANY = 30,
- PTL_IOV_TOO_SMALL = 31,
-
- PTL_EQ_INUSE = 32,
-
- PTL_MAX_ERRNO = 32
+ PTL_PROCESS_INVALID = 12,
+ PTL_PT_INDEX_INVALID = 13,
+
+ PTL_SR_INDEX_INVALID = 14,
+ PTL_EQ_INVALID = 15,
+ PTL_EQ_DROPPED = 16,
+
+ PTL_EQ_EMPTY = 17,
+ PTL_MD_NO_UPDATE = 18,
+ PTL_FAIL = 19,
+
+ PTL_IOV_TOO_MANY = 20,
+ PTL_IOV_TOO_SMALL = 21,
+
+ PTL_EQ_IN_USE = 22,
+
+ PTL_MAX_ERRNO = 23
} ptl_err_t;
/* If you change these, you must update the string table in api-errno.c */
#ifndef PTL_DISPATCH_H
#define PTL_DISPATCH_H
+#include "build_check.h"
/*
* include/dispatch.h
*
#ifndef _LIB_NAL_H_
#define _LIB_NAL_H_
+#include "build_check.h"
/*
* nal.h
*
#ifndef _LIB_P30_H_
#define _LIB_P30_H_
+#include "build_check.h"
+
#ifdef __KERNEL__
# include <asm/page.h>
# include <linux/string.h>
niov = umd->niov;
size = offsetof(lib_md_t, md_iov.kiov[niov]);
} else {
- niov = ((umd->options & PTL_MD_IOV) != 0) ?
+ niov = ((umd->options & PTL_MD_IOVEC) != 0) ?
umd->niov : 1;
size = offsetof(lib_md_t, md_iov.iov[niov]);
}
static inline lib_msg_t *
lib_msg_alloc(nal_cb_t *nal)
{
- /* NEVER called with statelock held */
+ /* NEVER called with statelock held; may be in interrupt... */
lib_msg_t *msg;
- PORTAL_ALLOC(msg, sizeof(*msg));
+ if (in_interrupt())
+ PORTAL_ALLOC_ATOMIC(msg, sizeof(*msg));
+ else
+ PORTAL_ALLOC(msg, sizeof(*msg));
+
if (msg != NULL) {
/* NULL pointers, clear flags etc */
memset (msg, 0, sizeof (*msg));
extern void lib_enq_event_locked (nal_cb_t *nal, void *private,
lib_eq_t *eq, ptl_event_t *ev);
extern void lib_finalize (nal_cb_t *nal, void *private, lib_msg_t *msg,
- ptl_err_t status);
+ ptl_ni_fail_t ni_fail_type);
extern void lib_parse (nal_cb_t *nal, ptl_hdr_t *hdr, void *private);
-extern lib_msg_t *lib_fake_reply_msg (nal_cb_t *nal, ptl_nid_t peer_nid,
- lib_md_t *getmd);
+extern lib_msg_t *lib_create_reply_msg (nal_cb_t *nal, ptl_nid_t peer_nid,
+ lib_msg_t *get_msg);
extern void print_hdr (nal_cb_t * nal, ptl_hdr_t * hdr);
#ifndef _LIB_TYPES_H_
#define _LIB_TYPES_H_
+#include "build_check.h"
+
#include <portals/types.h>
#ifdef __KERNEL__
# include <linux/uio.h>
} lib_counters_t;
/* temporary expedient: limit number of entries in discontiguous MDs */
-# define PTL_MTU (512<<10)
-# define PTL_MD_MAX_IOV 128
-# define PTL_MD_MAX_PAGES min_t(int, PTL_MD_MAX_IOV, PTL_MTU / PAGE_SIZE)
+#define PTL_MTU (512<<10)
+#define PTL_MD_MAX_IOV 128
struct lib_msg_t {
struct list_head msg_list;
ptl_size_t max_size;
int threshold;
int pending;
- ptl_unlink_t unlink;
unsigned int options;
unsigned int md_flags;
void *user_ptr;
} md_iov;
};
-#define PTL_MD_FLAG_UNLINK (1 << 0)
+#define PTL_MD_FLAG_ZOMBIE (1 << 0)
+#define PTL_MD_FLAG_AUTO_UNLINK (1 << 1)
+
+static inline int lib_md_exhausted (lib_md_t *md)
+{
+ return (md->threshold == 0 ||
+ ((md->options & PTL_MD_MAX_SIZE) != 0 &&
+ md->offset + md->max_size > md->length));
+}
#ifdef PTL_USE_LIB_FREELIST
typedef struct
* using the generic single-entry routines.
*/
-#define prefetch(a) ((void)a)
-
struct list_head {
struct list_head *next, *prev;
};
* @head: the head for your list.
*/
#define list_for_each(pos, head) \
- for (pos = (head)->next, prefetch(pos->next); pos != (head); \
- pos = pos->next, prefetch(pos->next))
+ for (pos = (head)->next ; pos != (head); pos = pos->next )
/**
* list_for_each_prev - iterate over a list in reverse order
* @head: the head for your list.
*/
#define list_for_each_prev(pos, head) \
- for (pos = (head)->prev, prefetch(pos->prev); pos != (head); \
- pos = pos->prev, prefetch(pos->prev))
+ for (pos = (head)->prev ; pos != (head); pos = pos->prev)
/**
* list_for_each_safe - iterate over a list safe against removal of list entry
* @member: the name of the list_struct within the struct.
*/
#define list_for_each_entry(pos, head, member) \
- for (pos = list_entry((head)->next, typeof(*pos), member), \
- prefetch(pos->member.next); \
+ for (pos = list_entry((head)->next, typeof(*pos), member); \
&pos->member != (head); \
- pos = list_entry(pos->member.next, typeof(*pos), member), \
- prefetch(pos->member.next))
+ pos = list_entry(pos->member.next, typeof(*pos), member))
#endif
#ifndef list_for_each_entry_safe
#ifndef _NAL_H_
#define _NAL_H_
+#include "build_check.h"
+
/*
* p30/nal.h
*
int (*validate) (nal_t * nal, void *base, size_t extent);
- void (*yield) (nal_t * nal);
+ int (*yield) (nal_t * nal, unsigned long *flags, int milliseconds);
void (*lock) (nal_t * nal, unsigned long *flags);
+#include "build_check.h"
+
#define PTL_IFACE_TCP 1
#define PTL_IFACE_ER 2
#define PTL_IFACE_SS 3
#ifndef _P30_H_
#define _P30_H_
+#include "build_check.h"
+
/*
* p30.h
*
#include <portals/api.h>
#include <portals/nalids.h>
-extern int __p30_initialized; /* for libraries & test codes */
-extern int __p30_myr_initialized; /* that don't know if p30 */
-extern int __p30_ip_initialized; /* had been initialized yet */
-extern ptl_handle_ni_t __myr_ni_handle, __ip_ni_handle;
-
-extern int __p30_myr_timeout; /* in seconds, for PtlNIBarrier, */
-extern int __p30_ip_timeout; /* PtlReduce_all, & PtlBroadcast_all */
-
/*
* Debugging flags reserved for the Portals reference library.
* These are not part of the API as described in the SAND report
#ifndef _P30_TYPES_H_
#define _P30_TYPES_H_
+#include "build_check.h"
+
#ifdef __linux__
# include <asm/types.h>
# if defined(__powerpc__) && !defined(__KERNEL__)
#include <portals/errno.h>
+/* This implementation uses the same type for API function return codes and
+ * the completion status in an event */
+#define PTL_NI_OK PTL_OK
+typedef ptl_err_t ptl_ni_fail_t;
+
typedef __u64 ptl_nid_t;
typedef __u32 ptl_pid_t;
typedef __u32 ptl_pt_index_t;
typedef __u64 ptl_hdr_data_t;
typedef __u32 ptl_size_t;
+#define PTL_TIME_FOREVER (-1)
+#define PTL_EQ_HANDLER_NONE NULL
+
typedef struct {
unsigned long nal_idx; /* which network interface */
__u64 cookie; /* which thing on that interface */
typedef ptl_handle_any_t ptl_handle_md_t;
typedef ptl_handle_any_t ptl_handle_me_t;
-#define PTL_HANDLE_NONE \
+#define PTL_INVALID_HANDLE \
((const ptl_handle_any_t){.nal_idx = -1, .cookie = -1})
-#define PTL_EQ_NONE PTL_HANDLE_NONE
+#define PTL_EQ_NONE PTL_INVALID_HANDLE
-static inline int PtlHandleEqual (ptl_handle_any_t h1, ptl_handle_any_t h2)
+static inline int PtlHandleIsEqual (ptl_handle_any_t h1, ptl_handle_any_t h2)
{
return (h1.nal_idx == h2.nal_idx && h1.cookie == h2.cookie);
}
} ptl_md_t;
/* Options for the MD structure */
-#define PTL_MD_OP_PUT (1 << 0)
-#define PTL_MD_OP_GET (1 << 1)
-#define PTL_MD_MANAGE_REMOTE (1 << 2)
-#define PTL_MD_AUTO_UNLINK (1 << 3)
-#define PTL_MD_TRUNCATE (1 << 4)
-#define PTL_MD_ACK_DISABLE (1 << 5)
-#define PTL_MD_IOV (1 << 6)
-#define PTL_MD_MAX_SIZE (1 << 7)
-#define PTL_MD_KIOV (1 << 8)
+#define PTL_MD_OP_PUT (1 << 0)
+#define PTL_MD_OP_GET (1 << 1)
+#define PTL_MD_MANAGE_REMOTE (1 << 2)
+/* unused (1 << 3) */
+#define PTL_MD_TRUNCATE (1 << 4)
+#define PTL_MD_ACK_DISABLE (1 << 5)
+#define PTL_MD_IOVEC (1 << 6)
+#define PTL_MD_MAX_SIZE (1 << 7)
+#define PTL_MD_KIOV (1 << 8)
+#define PTL_MD_EVENT_START_DISABLE (1 << 9)
+#define PTL_MD_EVENT_END_DISABLE (1 << 10)
+
+/* For compatibility with Cray Portals */
+#define PTL_MD_LUSTRE_COMPLETION_SEMANTICS 0
#define PTL_MD_THRESH_INF (-1)
typedef enum {
- PTL_EVENT_GET,
- PTL_EVENT_PUT,
- PTL_EVENT_REPLY,
+ PTL_EVENT_GET_START,
+ PTL_EVENT_GET_END,
+
+ PTL_EVENT_PUT_START,
+ PTL_EVENT_PUT_END,
+
+ PTL_EVENT_REPLY_START,
+ PTL_EVENT_REPLY_END,
+
PTL_EVENT_ACK,
- PTL_EVENT_SENT,
+
+ PTL_EVENT_SEND_START,
+ PTL_EVENT_SEND_END,
+
PTL_EVENT_UNLINK,
} ptl_event_kind_t;
#endif
typedef struct {
ptl_event_kind_t type;
- ptl_err_t status;
- int unlinked;
ptl_process_id_t initiator;
ptl_pt_index_t portal;
ptl_match_bits_t match_bits;
ptl_size_t offset;
ptl_md_t mem_desc;
ptl_hdr_data_t hdr_data;
- struct timeval arrival_time;
+ int unlinked;
+ ptl_ni_fail_t ni_fail_type;
volatile ptl_seq_t sequence;
} ptl_event_t;
int gmnal_api_validate(nal_t *, void *, size_t);
-void gmnal_api_yield(nal_t *);
+void gmnal_api_yield(nal_t *, unsigned long *, int);
void gmnal_api_lock(nal_t *, unsigned long *);
* Give up the processor
*/
void
-gmnal_api_yield(nal_t *nal)
+gmnal_api_yield(nal_t *nal, unsigned long *flags, int milliseconds)
{
CDEBUG(D_TRACE, "gmnal_api_yield : nal [%p]\n", nal);
- set_current_state(TASK_INTERRUPTIBLE);
- schedule();
+ if (milliseconds != 0) {
+ CERROR("Blocking yield not implemented yet\n");
+ LBUG();
+ }
+ our_cond_resched();
return;
}
return;
}
+void gmnal_cb_callback(nal_cb_t *nal_cb, void *private, lib_eq_t *eq, ptl_event_t *ev)
+{
+ /* holding cb_lock */
+
+ if (eq->event_callback != NULL)
+ eq->event_callback(ev);
+
+ /* We will wake theads sleeping in yield() here, AFTER the
+ * callback, when we implement blocking yield */
+}
+
int gmnal_cb_dist(nal_cb_t *nal_cb, ptl_nid_t nid, unsigned long *dist)
{
CDEBUG(D_TRACE, "gmnal_cb_dist\n");
// when do we call this yield function
//
void
-kibnal_yield( nal_t *nal )
+kibnal_yield( nal_t *nal, unsigned long *flags, int milliseconds )
{
kibnal_data_t *k = nal->nal_data;
nal_cb_t *nal_cb = k->kib_cb;
LASSERT (k == &kibnal_data);
LASSERT (nal_cb == &kibnal_lib);
+ if (milliseconds != 0) {
+ CERROR("Blocking yeild not implemented yet\n");
+ LBUG();
+ }
+
// check under what condition that we need to
// call schedule()
// who set this need_resched
#include <portals/p30.h>
#include <portals/lib-p30.h>
#include <linux/kp30.h>
+#include <linux/kpr.h>
// Infiniband VAPI/EVAPI header files
// Mellanox MT23108 VAPI
spin_unlock_irqrestore(&data->kib_dispatch_lock,*flags);
}
+//
+// A new event has just been created
+//
+void kibnal_callback(nal_cb_t *nal, void *private, lib_eq_t *eq, ptl_event_t *ev)
+{
+ /* holding kib_dispatch_lock */
+ if (eq->event_callback != NULL)
+ eq->event_callback(ev);
+
+ /* We will wake theads sleeping in yield() here, AFTER the
+ * callback, when we implement blocking yield */
+}
//
// nic distance
cb_printf: kibnal_printf,
cb_cli: kibnal_cli,
cb_sti: kibnal_sti,
+ cb_callback: kibnal_callback,
cb_dist: kibnal_dist // no used at this moment
};
return (0);
}
-static void
-kqswnal_yield( nal_t *nal )
+static int
+kqswnal_yield(nal_t *nal, unsigned long *flags, int milliseconds)
{
+ /* NB called holding statelock */
+ wait_queue_t wait;
+ unsigned long now = jiffies;
+
CDEBUG (D_NET, "yield\n");
- if (current->need_resched)
- schedule();
- return;
+ if (milliseconds == 0) {
+ if (current->need_resched)
+ schedule();
+ return 0;
+ }
+
+ init_waitqueue_entry(&wait, current);
+ set_current_state(TASK_INTERRUPTIBLE);
+ add_wait_queue(&kqswnal_data.kqn_yield_waitq, &wait);
+
+ kqswnal_unlock(nal, flags);
+
+ if (milliseconds < 0)
+ schedule ();
+ else
+ schedule_timeout((milliseconds * HZ) / 1000);
+
+ kqswnal_lock(nal, flags);
+
+ remove_wait_queue(&kqswnal_data.kqn_yield_waitq, &wait);
+
+ if (milliseconds > 0) {
+ milliseconds -= ((jiffies - now) * 1000) / HZ;
+ if (milliseconds < 0)
+ milliseconds = 0;
+ }
+
+ return (milliseconds);
}
static nal_t *
init_waitqueue_head (&kqswnal_data.kqn_sched_waitq);
spin_lock_init (&kqswnal_data.kqn_statelock);
+ init_waitqueue_head (&kqswnal_data.kqn_yield_waitq);
/* pointers/lists/locks initialised */
kqswnal_data.kqn_init = KQN_INIT_DATA;
#define DEBUG_SUBSYSTEM S_QSWNAL
#include <linux/kp30.h>
+#include <linux/kpr.h>
#include <portals/p30.h>
#include <portals/lib-p30.h>
struct list_head kqn_delayedtxds; /* delayed transmits */
spinlock_t kqn_statelock; /* cb_cli/cb_sti */
+ wait_queue_head_t kqn_yield_waitq; /* where yield waits */
nal_cb_t *kqn_cb; /* -> kqswnal_lib */
#if MULTIRAIL_EKC
EP_SYS *kqn_ep; /* elan system */
CDEBUG (D_NET, "%s", msg);
}
+#if (defined(CONFIG_SPARC32) || defined(CONFIG_SPARC64))
+# error "Can't save/restore irq contexts in different procedures"
+#endif
static void
kqswnal_cli(nal_cb_t *nal, unsigned long *flags)
spin_unlock_irqrestore(&data->kqn_statelock, *flags);
}
+static void
+kqswnal_callback(nal_cb_t *nal, void *private, lib_eq_t *eq, ptl_event_t *ev)
+{
+ /* holding kqn_statelock */
+
+ if (eq->event_callback != NULL)
+ eq->event_callback(ev);
+
+ if (waitqueue_active(&kqswnal_data.kqn_yield_waitq))
+ wake_up_all(&kqswnal_data.kqn_yield_waitq);
+}
static int
kqswnal_dist(nal_cb_t *nal, ptl_nid_t nid, unsigned long *dist)
lib_finalize (&kqswnal_lib, ktx->ktx_args[0],
(lib_msg_t *)ktx->ktx_args[1],
(error == 0) ? PTL_OK :
- (error == -ENOMEM) ? PTL_NOSPACE : PTL_FAIL);
+ (error == -ENOMEM) ? PTL_NO_SPACE : PTL_FAIL);
break;
case KTX_GETTING: /* Peer has DMA-ed direct? */
msg = (lib_msg_t *)ktx->ktx_args[1];
if (error == 0) {
- repmsg = lib_fake_reply_msg (&kqswnal_lib,
- ktx->ktx_nid, msg->md);
+ repmsg = lib_create_reply_msg (&kqswnal_lib,
+ ktx->ktx_nid, msg);
if (repmsg == NULL)
error = -ENOMEM;
}
lib_finalize (&kqswnal_lib, NULL, repmsg, PTL_OK);
} else {
lib_finalize (&kqswnal_lib, ktx->ktx_args[0], msg,
- (error == -ENOMEM) ? PTL_NOSPACE : PTL_FAIL);
+ (error == -ENOMEM) ? PTL_NO_SPACE : PTL_FAIL);
}
break;
in_interrupt()));
if (ktx == NULL) {
kqswnal_cerror_hdr (hdr);
- return (PTL_NOSPACE);
+ return (PTL_NO_SPACE);
}
ktx->ktx_nid = targetnid;
cb_printf: kqswnal_printf,
cb_cli: kqswnal_cli,
cb_sti: kqswnal_sti,
+ cb_callback: kqswnal_callback,
cb_dist: kqswnal_dist
};
}
-static void kscimacnal_yield( nal_t *nal )
+static void kscimacnal_yield( nal_t *nal, unsigned long *flags, int milliseconds )
{
LASSERT (nal == &kscimacnal_api);
+ if (milliseconds != 0) {
+ CERROR ("Blocking yield not implemented yet\n");
+ LBUG();
+ }
+
if (current->need_resched)
schedule();
return;
#define DEBUG_SUBSYSTEM S_UNDEFINED
#include <linux/kp30.h>
+#include <linux/kpr.h>
#include <portals/p30.h>
#include <portals/lib-p30.h>
}
+static void
+kscimacnal_callback(nal_cb_t *nal, void *private, lib_eq_t *eq, ptl_event_t *ev)
+{
+ /* holding ksci_dispatch_lock */
+
+ if (eq->event_callback != NULL)
+ eq->event_callback(ev);
+
+ /* We will wake theads sleeping in yield() here, AFTER the
+ * callback, when we implement blocking yield */
+}
+
static int
kscimacnal_dist(nal_cb_t *nal, ptl_nid_t nid, unsigned long *dist)
{
/* save transaction info for later finalize and cleanup */
PORTAL_ALLOC(ktx, (sizeof(kscimacnal_tx_t)));
if (!ktx) {
- return PTL_NOSPACE;
+ return PTL_NO_SPACE;
}
ktx->ktx_nmapped = 0; /* Start with no mapped pages :) */
kscimacnal_txrelease, ktx);
if (!msg) {
PORTAL_FREE(ktx, (sizeof(kscimacnal_tx_t)));
- return PTL_NOSPACE;
+ return PTL_NO_SPACE;
}
mac_put_mblk(msg, sizeof(ptl_hdr_t));
lastblk=msg;
if(!newblk) {
mac_free_msg(msg);
PORTAL_FREE(ktx, (sizeof(kscimacnal_tx_t)));
- return PTL_NOSPACE;
+ return PTL_NO_SPACE;
}
mac_put_mblk(newblk, nob);
mac_link_mblk(lastblk, newblk);
cb_printf: kscimacnal_printf,
cb_cli: kscimacnal_cli,
cb_sti: kscimacnal_sti,
+ cb_callback: kscimacnal_callback,
cb_dist: kscimacnal_dist
};
}
void
-ksocknal_api_yield(nal_t *nal)
-{
- our_cond_resched();
- return;
-}
-
-void
ksocknal_api_lock(nal_t *nal, unsigned long *flags)
{
ksock_nal_data_t *k;
nal_cb->cb_sti(nal_cb,flags);
}
+int
+ksocknal_api_yield(nal_t *nal, unsigned long *flags, int milliseconds)
+{
+ /* NB called holding statelock */
+ wait_queue_t wait;
+ unsigned long now = jiffies;
+
+ CDEBUG (D_NET, "yield\n");
+
+ if (milliseconds == 0) {
+ our_cond_resched();
+ return 0;
+ }
+
+ init_waitqueue_entry(&wait, current);
+ set_current_state (TASK_INTERRUPTIBLE);
+ add_wait_queue (&ksocknal_data.ksnd_yield_waitq, &wait);
+
+ ksocknal_api_unlock(nal, flags);
+
+ if (milliseconds < 0)
+ schedule ();
+ else
+ schedule_timeout((milliseconds * HZ) / 1000);
+
+ ksocknal_api_lock(nal, flags);
+
+ remove_wait_queue (&ksocknal_data.ksnd_yield_waitq, &wait);
+
+ if (milliseconds > 0) {
+ milliseconds -= ((jiffies - now) * 1000) / HZ;
+ if (milliseconds < 0)
+ milliseconds = 0;
+ }
+
+ return (milliseconds);
+}
+
nal_t *
ksocknal_init(int interface, ptl_pt_index_t ptl_size,
ptl_ac_index_t ac_size, ptl_pid_t requested_pid)
ksocknal_get_peer_addr (conn);
+ CWARN("New conn nid:"LPX64" ip:%08x/%d incarnation:"LPX64"\n",
+ nid, conn->ksnc_ipaddr, conn->ksnc_port, incarnation);
+
irq = ksocknal_conn_irq (conn);
write_lock_irqsave (&ksocknal_data.ksnd_global_lock, flags);
if (conn->ksnc_incarnation == incarnation)
continue;
+
+ CWARN("Closing stale conn nid:"LPX64" ip:%08x/%d "
+ "incarnation:"LPX64"("LPX64")\n",
+ peer->ksnp_nid, conn->ksnc_ipaddr, conn->ksnc_port,
+ conn->ksnc_incarnation, incarnation);
count++;
ksocknal_close_conn_locked (conn, -ESTALE);
ksocknal_api.forward = ksocknal_api_forward;
ksocknal_api.shutdown = ksocknal_api_shutdown;
- ksocknal_api.yield = ksocknal_api_yield;
ksocknal_api.validate = NULL; /* our api validate is a NOOP */
ksocknal_api.lock = ksocknal_api_lock;
ksocknal_api.unlock = ksocknal_api_unlock;
ksocknal_data.ksnd_nal_cb = &ksocknal_lib;
spin_lock_init (&ksocknal_data.ksnd_nal_cb_lock);
-
+ init_waitqueue_head(&ksocknal_data.ksnd_yield_waitq);
+
spin_lock_init(&ksocknal_data.ksnd_small_fmp.fmp_lock);
INIT_LIST_HEAD(&ksocknal_data.ksnd_small_fmp.fmp_idle_fmbs);
INIT_LIST_HEAD(&ksocknal_data.ksnd_small_fmp.fmp_blocked_conns);
ksocknal_data.ksnd_init = SOCKNAL_INIT_ALL;
printk(KERN_INFO "Lustre: Routing socket NAL loaded "
- "(Routing %s, initial mem %d)\n",
+ "(Routing %s, initial mem %d, incarnation "LPX64")\n",
kpr_routing (&ksocknal_data.ksnd_router) ?
- "enabled" : "disabled", pkmem);
+ "enabled" : "disabled", pkmem, ksocknal_data.ksnd_incarnation);
return (0);
}
#include <linux/kp30.h>
#include <linux/portals_compat25.h>
+#include <linux/kpr.h>
#include <portals/p30.h>
#include <portals/lib-p30.h>
#include <portals/socknal.h>
nal_cb_t *ksnd_nal_cb;
spinlock_t ksnd_nal_cb_lock; /* lib cli/sti lock */
+ wait_queue_head_t ksnd_yield_waitq; /* where yield waits */
atomic_t ksnd_nthreads; /* # live threads */
int ksnd_shuttingdown; /* tell threads to exit */
{
ksock_nal_data_t *data = nal->nal_data;
+ /* OK to ignore 'flags'; we're only ever serialise threads and
+ * never need to lock out interrupts */
spin_lock(&data->ksnd_nal_cb_lock);
}
ksock_nal_data_t *data;
data = nal->nal_data;
+ /* OK to ignore 'flags'; we're only ever serialise threads and
+ * never need to lock out interrupts */
spin_unlock(&data->ksnd_nal_cb_lock);
}
+void
+ksocknal_callback(nal_cb_t *nal, void *private, lib_eq_t *eq, ptl_event_t *ev)
+{
+ /* holding ksnd_nal_cb_lock */
+
+ if (eq->event_callback != NULL)
+ eq->event_callback(ev);
+
+ if (waitqueue_active(&ksocknal_data.ksnd_yield_waitq))
+ wake_up_all(&ksocknal_data.ksnd_yield_waitq);
+}
+
int
ksocknal_dist(nal_cb_t *nal, ptl_nid_t nid, unsigned long *dist)
{
if (ltx == NULL) {
CERROR("Can't allocate tx desc type %d size %d %s\n",
type, desc_size, in_interrupt() ? "(intr)" : "");
- return (PTL_NOSPACE);
+ return (PTL_NO_SPACE);
}
atomic_inc(&ksocknal_data.ksnd_nactive_ltxs);
}
ksocknal_data.ksnd_reaper_waketime = jiffies + timeout;
- add_wait_queue (&ksocknal_data.ksnd_reaper_waitq, &wait);
set_current_state (TASK_INTERRUPTIBLE);
+ add_wait_queue (&ksocknal_data.ksnd_reaper_waitq, &wait);
if (!ksocknal_data.ksnd_shuttingdown &&
list_empty (&ksocknal_data.ksnd_deathrow_conns) &&
cb_printf: ksocknal_printf,
cb_cli: ksocknal_cli,
cb_sti: ksocknal_sti,
+ cb_callback: ksocknal_callback,
cb_dist: ksocknal_dist
};
# See the file COPYING in this distribution
-MODULE = portals
-modulenet_DATA = portals.o
-EXTRA_PROGRAMS = portals
+MODULE = libcfs
+modulenet_DATA = libcfs.o
+EXTRA_PROGRAMS = libcfs
-LIBLINKS := lib-dispatch.c lib-eq.c lib-init.c lib-md.c lib-me.c lib-move.c lib-msg.c lib-ni.c lib-pid.c
-APILINKS := api-eq.c api-errno.c api-init.c api-me.c api-ni.c api-wrap.c
-LINKS = $(APILINKS) $(LIBLINKS)
-DISTCLEANFILES = $(LINKS) link-stamp *.orig *.rej
-
-$(LINKS): link-stamp
-link-stamp:
- -list='$(LIBLINKS)'; for f in $$list; do echo $$f ; ln -sf $(srcdir)/../portals/$$f .; done
- -list='$(APILINKS)'; for f in $$list; do echo $$f ; ln -sf $(srcdir)/../portals/$$f .; done
- echo timestamp > link-stamp
+DISTCLEANFILES = *.orig *.rej
DEFS =
-portals_SOURCES = $(LINKS) module.c proc.c debug.c lwt.c
+libcfs_SOURCES = module.c proc.c debug.c lwt.c
# Don't distribute any patched files.
dist-hook:
#include <linux/kp30.h>
#include <linux/portals_compat25.h>
+#include <linux/libcfs.h>
+
+unsigned int portal_subsystem_debug = ~0 - (S_PORTALS | S_QSWNAL | S_SOCKNAL |
+ S_GMNAL | S_IBNAL);
+EXPORT_SYMBOL(portal_subsystem_debug);
+
+unsigned int portal_debug = (D_WARNING | D_DLMTRACE | D_ERROR | D_EMERG | D_HA |
+ D_RPCTRACE | D_VFSTRACE | D_MALLOC);
+EXPORT_SYMBOL(portal_debug);
+
+unsigned int portal_cerror = 1;
+EXPORT_SYMBOL(portal_cerror);
+
+unsigned int portal_printk;
+EXPORT_SYMBOL(portal_printk);
+
+unsigned int portal_stack;
+EXPORT_SYMBOL(portal_stack);
+
+#ifdef __KERNEL__
+atomic_t portal_kmemory = ATOMIC_INIT(0);
+EXPORT_SYMBOL(portal_kmemory);
+#endif
#define DEBUG_OVERFLOW 1024
static char *debug_buf = NULL;
char *portals_nid2str(int nal, ptl_nid_t nid, char *str)
{
switch(nal){
+/* XXX this should be a nal method of some sort */
+#ifndef CRAY_PORTALS
case TCPNAL:
/* userspace NAL */
case SOCKNAL:
case SCIMACNAL:
sprintf(str, "%u:%u", (__u32)(nid >> 32), (__u32)nid);
break;
+#endif
default:
- return NULL;
+ snprintf(str, PTL_NALFMT_SIZE-1, "(?%llx)", (long long)nid);
}
return str;
}
extern void (kping_client)(struct portal_ioctl_data *);
-struct nal_cmd_handler {
- nal_cmd_handler_t nch_handler;
- void * nch_private;
-};
-
-static struct nal_cmd_handler nal_cmd[NAL_MAX_NR + 1];
-struct semaphore nal_cmd_sem;
-
#ifdef PORTAL_DEBUG
void kportal_assertion_failed(char *expr, char *file, const char *func,
const int line)
}
/* called when opening /dev/device */
-static int kportal_psdev_open(struct inode * inode, struct file * file)
+static int libcfs_psdev_open(struct inode * inode, struct file * file)
{
struct portals_device_userstate *pdu;
ENTRY;
}
/* called when closing /dev/device */
-static int kportal_psdev_release(struct inode * inode, struct file * file)
+static int libcfs_psdev_release(struct inode * inode, struct file * file)
{
struct portals_device_userstate *pdu;
ENTRY;
PORTAL_FREE(data, len);
}
-static int
-kportal_add_route(int gateway_nalid, ptl_nid_t gateway_nid,
- ptl_nid_t lo_nid, ptl_nid_t hi_nid)
-{
- int rc;
- kpr_control_interface_t *ci;
-
- ci = (kpr_control_interface_t *) PORTAL_SYMBOL_GET (kpr_control_interface);
- if (ci == NULL)
- return (-ENODEV);
-
- rc = ci->kprci_add_route (gateway_nalid, gateway_nid, lo_nid, hi_nid);
-
- PORTAL_SYMBOL_PUT(kpr_control_interface);
- return (rc);
-}
-
-static int
-kportal_del_route(int gw_nalid, ptl_nid_t gw_nid,
- ptl_nid_t lo, ptl_nid_t hi)
-{
- int rc;
- kpr_control_interface_t *ci;
-
- ci = (kpr_control_interface_t *)PORTAL_SYMBOL_GET(kpr_control_interface);
- if (ci == NULL)
- return (-ENODEV);
-
- rc = ci->kprci_del_route (gw_nalid, gw_nid, lo, hi);
-
- PORTAL_SYMBOL_PUT(kpr_control_interface);
- return (rc);
-}
-
-static int
-kportal_notify_router (int gw_nalid, ptl_nid_t gw_nid,
- int alive, time_t when)
-{
- int rc;
- kpr_control_interface_t *ci;
-
- /* No error if router not preset. Sysadmin is allowed to notify
- * _everywhere_ when a NID boots or crashes, even if they know
- * nothing of the peer. */
- ci = (kpr_control_interface_t *)PORTAL_SYMBOL_GET(kpr_control_interface);
- if (ci == NULL)
- return (0);
-
- rc = ci->kprci_notify (gw_nalid, gw_nid, alive, when);
-
- PORTAL_SYMBOL_PUT(kpr_control_interface);
- return (rc);
-}
-
-static int
-kportal_get_route(int index, __u32 *gateway_nalidp, ptl_nid_t *gateway_nidp,
- ptl_nid_t *lo_nidp, ptl_nid_t *hi_nidp, int *alivep)
-{
- int gateway_nalid;
- ptl_nid_t gateway_nid;
- ptl_nid_t lo_nid;
- ptl_nid_t hi_nid;
- int alive;
- int rc;
- kpr_control_interface_t *ci;
-
- ci = (kpr_control_interface_t *) PORTAL_SYMBOL_GET(kpr_control_interface);
- if (ci == NULL)
- return (-ENODEV);
-
- rc = ci->kprci_get_route(index, &gateway_nalid, &gateway_nid,
- &lo_nid, &hi_nid, &alive);
-
- if (rc == 0) {
- CDEBUG(D_IOCTL, "got route [%d] %d "LPX64":"LPX64" - "LPX64", %s\n",
- index, gateway_nalid, gateway_nid, lo_nid, hi_nid,
- alive ? "up" : "down");
-
- *gateway_nalidp = (__u32)gateway_nalid;
- *gateway_nidp = gateway_nid;
- *lo_nidp = lo_nid;
- *hi_nidp = hi_nid;
- *alivep = alive;
- }
-
- PORTAL_SYMBOL_PUT (kpr_control_interface);
- return (rc);
-}
-
-static int
-kportal_router_cmd(struct portals_cfg *pcfg, void * private)
-{
- int err = -EINVAL;
- ENTRY;
-
- switch(pcfg->pcfg_command) {
- default:
- CDEBUG(D_IOCTL, "Inappropriate cmd: %d\n", pcfg->pcfg_command);
- break;
-
- case NAL_CMD_ADD_ROUTE:
- CDEBUG(D_IOCTL, "Adding route: [%d] "LPU64" : "LPU64" - "LPU64"\n",
- pcfg->pcfg_nal, pcfg->pcfg_nid,
- pcfg->pcfg_nid2, pcfg->pcfg_nid3);
- err = kportal_add_route(pcfg->pcfg_gw_nal, pcfg->pcfg_nid,
- pcfg->pcfg_nid2, pcfg->pcfg_nid3);
- break;
-
- case NAL_CMD_DEL_ROUTE:
- CDEBUG (D_IOCTL, "Removing routes via [%d] "LPU64" : "LPU64" - "LPU64"\n",
- pcfg->pcfg_gw_nal, pcfg->pcfg_nid,
- pcfg->pcfg_nid2, pcfg->pcfg_nid3);
- err = kportal_del_route (pcfg->pcfg_gw_nal, pcfg->pcfg_nid,
- pcfg->pcfg_nid2, pcfg->pcfg_nid3);
- break;
-
- case NAL_CMD_NOTIFY_ROUTER: {
- CDEBUG (D_IOCTL, "Notifying peer [%d] "LPU64" %s @ %ld\n",
- pcfg->pcfg_gw_nal, pcfg->pcfg_nid,
- pcfg->pcfg_flags ? "Enabling" : "Disabling",
- (time_t)pcfg->pcfg_nid3);
-
- err = kportal_notify_router (pcfg->pcfg_gw_nal, pcfg->pcfg_nid,
- pcfg->pcfg_flags,
- (time_t)pcfg->pcfg_nid3);
- break;
- }
-
- case NAL_CMD_GET_ROUTE:
- CDEBUG (D_IOCTL, "Getting route [%d]\n", pcfg->pcfg_count);
- err = kportal_get_route(pcfg->pcfg_count, &pcfg->pcfg_gw_nal,
- &pcfg->pcfg_nid,
- &pcfg->pcfg_nid2, &pcfg->pcfg_nid3,
- &pcfg->pcfg_flags);
- break;
- }
- RETURN(err);
-}
-
-int
-kportal_nal_cmd(struct portals_cfg *pcfg)
-{
- __u32 nal = pcfg->pcfg_nal;
- int rc = -EINVAL;
-
- ENTRY;
-
- down(&nal_cmd_sem);
- if (nal > 0 && nal <= NAL_MAX_NR && nal_cmd[nal].nch_handler) {
- CDEBUG(D_IOCTL, "calling handler nal: %d, cmd: %d\n", nal,
- pcfg->pcfg_command);
- rc = nal_cmd[nal].nch_handler(pcfg, nal_cmd[nal].nch_private);
- }
- up(&nal_cmd_sem);
- RETURN(rc);
-}
-
-ptl_handle_ni_t *
-kportal_get_ni (int nal)
-{
-
- switch (nal)
- {
- case QSWNAL:
- return (PORTAL_SYMBOL_GET(kqswnal_ni));
- case SOCKNAL:
- return (PORTAL_SYMBOL_GET(ksocknal_ni));
- case GMNAL:
- return (PORTAL_SYMBOL_GET(kgmnal_ni));
- case IBNAL:
- return (PORTAL_SYMBOL_GET(kibnal_ni));
- case TCPNAL:
- /* userspace NAL */
- return (NULL);
- case SCIMACNAL:
- return (PORTAL_SYMBOL_GET(kscimacnal_ni));
- default:
- /* A warning to a naive caller */
- CERROR ("unknown nal: %d\n", nal);
- return (NULL);
- }
-}
-
-void
-kportal_put_ni (int nal)
-{
-
- switch (nal)
- {
- case QSWNAL:
- PORTAL_SYMBOL_PUT(kqswnal_ni);
- break;
- case SOCKNAL:
- PORTAL_SYMBOL_PUT(ksocknal_ni);
- break;
- case GMNAL:
- PORTAL_SYMBOL_PUT(kgmnal_ni);
- break;
- case IBNAL:
- PORTAL_SYMBOL_PUT(kibnal_ni);
- break;
- case TCPNAL:
- /* A lesson to a malicious caller */
- LBUG ();
- case SCIMACNAL:
- PORTAL_SYMBOL_PUT(kscimacnal_ni);
- break;
- default:
- CERROR ("unknown nal: %d\n", nal);
- }
-}
+static DECLARE_RWSEM(ioctl_list_sem);
+static LIST_HEAD(ioctl_list);
-int
-kportal_nal_register(int nal, nal_cmd_handler_t handler, void * private)
+int libcfs_register_ioctl(struct libcfs_ioctl_handler *hand)
{
int rc = 0;
+ down_read(&ioctl_list_sem);
+ if (!list_empty(&hand->item))
+ rc = -EBUSY;
+ up_read(&ioctl_list_sem);
- CDEBUG(D_IOCTL, "Register NAL %d, handler: %p\n", nal, handler);
-
- if (nal > 0 && nal <= NAL_MAX_NR) {
- down(&nal_cmd_sem);
- if (nal_cmd[nal].nch_handler != NULL)
- rc = -EBUSY;
- else {
- nal_cmd[nal].nch_handler = handler;
- nal_cmd[nal].nch_private = private;
- }
- up(&nal_cmd_sem);
+ if (rc == 0) {
+ down_write(&ioctl_list_sem);
+ list_add_tail(&hand->item, &ioctl_list);
+ up_write(&ioctl_list_sem);
}
- return rc;
+ RETURN(0);
}
+EXPORT_SYMBOL(libcfs_register_ioctl);
-int
-kportal_nal_unregister(int nal)
+int libcfs_deregister_ioctl(struct libcfs_ioctl_handler *hand)
{
int rc = 0;
+ down_read(&ioctl_list_sem);
+ if (list_empty(&hand->item))
+ rc = -ENOENT;
+ up_read(&ioctl_list_sem);
- CDEBUG(D_IOCTL, "Unregister NAL %d\n", nal);
-
- if (nal > 0 && nal <= NAL_MAX_NR) {
- down(&nal_cmd_sem);
- nal_cmd[nal].nch_handler = NULL;
- nal_cmd[nal].nch_private = NULL;
- up(&nal_cmd_sem);
+ if (rc == 0) {
+ down_write(&ioctl_list_sem);
+ list_del_init(&hand->item);
+ up_write(&ioctl_list_sem);
}
- return rc;
+ RETURN(0);
}
+EXPORT_SYMBOL(libcfs_deregister_ioctl);
-
-static int kportal_ioctl(struct inode *inode, struct file *file,
- unsigned int cmd, unsigned long arg)
+static int libcfs_ioctl(struct inode *inode, struct file *file,
+ unsigned int cmd, unsigned long arg)
{
- int err = 0;
+ int err = -EINVAL;
char buf[1024];
struct portal_ioctl_data *data;
- char str[PTL_NALFMT_SIZE];
-
ENTRY;
if (current->fsuid != 0)
RETURN(-EINVAL);
portals_debug_mark_buffer(data->ioc_inlbuf1);
RETURN(0);
- case IOC_PORTAL_PING: {
- void (*ping)(struct portal_ioctl_data *);
-
- CDEBUG(D_IOCTL, "doing %d pings to nid "LPX64" (%s)\n",
- data->ioc_count, data->ioc_nid,
- portals_nid2str(data->ioc_nal, data->ioc_nid, str));
- ping = PORTAL_SYMBOL_GET(kping_client);
- if (!ping)
- CERROR("PORTAL_SYMBOL_GET failed\n");
- else {
- ping(data);
- PORTAL_SYMBOL_PUT(kping_client);
- }
- RETURN(0);
- }
-
- case IOC_PORTAL_GET_NID: {
- const ptl_handle_ni_t *nip;
- ptl_process_id_t pid;
-
- CDEBUG (D_IOCTL, "Getting nid for nal [%d]\n", data->ioc_nal);
-
- nip = kportal_get_ni (data->ioc_nal);
- if (nip == NULL)
- RETURN (-EINVAL);
-
- err = PtlGetId (*nip, &pid);
- LASSERT (err == PTL_OK);
- kportal_put_ni (data->ioc_nal);
-
- data->ioc_nid = pid.nid;
- if (copy_to_user ((char *)arg, data, sizeof (*data)))
- err = -EFAULT;
- break;
- }
-
- case IOC_PORTAL_NAL_CMD: {
- struct portals_cfg pcfg;
-
- LASSERT (data->ioc_plen1 == sizeof(pcfg));
- err = copy_from_user(&pcfg, (void *)data->ioc_pbuf1,
- sizeof(pcfg));
- if ( err ) {
- EXIT;
- return err;
- }
-
- CDEBUG (D_IOCTL, "nal command nal %d cmd %d\n", pcfg.pcfg_nal,
- pcfg.pcfg_command);
- err = kportal_nal_cmd(&pcfg);
- if (err == 0) {
- if (copy_to_user((char *)data->ioc_pbuf1, &pcfg,
- sizeof (pcfg)))
- err = -EFAULT;
- if (copy_to_user((char *)arg, data, sizeof (*data)))
- err = -EFAULT;
- }
- break;
- }
- case IOC_PORTAL_FAIL_NID: {
- const ptl_handle_ni_t *nip;
-
- CDEBUG (D_IOCTL, "fail nid: [%d] "LPU64" count %d\n",
- data->ioc_nal, data->ioc_nid, data->ioc_count);
-
- nip = kportal_get_ni (data->ioc_nal);
- if (nip == NULL)
- return (-EINVAL);
-
- err = PtlFailNid (*nip, data->ioc_nid, data->ioc_count);
- kportal_put_ni (data->ioc_nal);
- break;
- }
#if LWT_SUPPORT
case IOC_PORTAL_LWT_CONTROL:
err = lwt_control (data->ioc_flags, data->ioc_misc);
}
break;
- default:
+ default: {
+ struct libcfs_ioctl_handler *hand;
err = -EINVAL;
- break;
+ down_read(&ioctl_list_sem);
+ list_for_each_entry(hand, &ioctl_list, item) {
+ err = hand->handle_ioctl(data, cmd, arg);
+ if (err != -EINVAL)
+ break;
+ }
+ up_read(&ioctl_list_sem);
+ } break;
}
RETURN(err);
}
-static struct file_operations portalsdev_fops = {
- ioctl: kportal_ioctl,
- open: kportal_psdev_open,
- release: kportal_psdev_release
+static struct file_operations libcfs_fops = {
+ ioctl: libcfs_ioctl,
+ open: libcfs_psdev_open,
+ release: libcfs_psdev_release
};
-static struct miscdevice portal_dev = {
+static struct miscdevice libcfs_dev = {
PORTAL_MINOR,
"portals",
- &portalsdev_fops
+ &libcfs_fops
};
extern int insert_proc(void);
MODULE_DESCRIPTION("Portals v3.1");
MODULE_LICENSE("GPL");
-static int init_kportals_module(void)
+static int init_libcfs_module(void)
{
int rc;
goto cleanup_debug;
}
#endif
- sema_init(&nal_cmd_sem, 1);
-
- rc = misc_register(&portal_dev);
+ rc = misc_register(&libcfs_dev);
if (rc) {
CERROR("misc_register: error %d\n", rc);
goto cleanup_lwt;
}
- rc = PtlInit();
- if (rc) {
- CERROR("PtlInit: error %d\n", rc);
- goto cleanup_deregister;
- }
-
rc = insert_proc();
if (rc) {
CERROR("insert_proc: error %d\n", rc);
- goto cleanup_fini;
- }
-
- rc = kportal_nal_register(ROUTER, kportal_router_cmd, NULL);
- if (rc) {
- CERROR("kportal_nal_registre: ROUTER error %d\n", rc);
- goto cleanup_proc;
+ goto cleanup_deregister;
}
CDEBUG (D_OTHER, "portals setup OK\n");
return (0);
- cleanup_proc:
- remove_proc();
- cleanup_fini:
- PtlFini();
cleanup_deregister:
- misc_deregister(&portal_dev);
+ misc_deregister(&libcfs_dev);
cleanup_lwt:
#if LWT_SUPPORT
lwt_fini();
return rc;
}
-static void exit_kportals_module(void)
+static void exit_libcfs_module(void)
{
int rc;
- kportal_nal_unregister(ROUTER);
remove_proc();
- PtlFini();
CDEBUG(D_MALLOC, "before Portals cleanup: kmem %d\n",
atomic_read(&portal_kmemory));
- rc = misc_deregister(&portal_dev);
+ rc = misc_deregister(&libcfs_dev);
if (rc)
CERROR("misc_deregister error %d\n", rc);
printk(KERN_ERR "LustreError: portals_debug_cleanup: %d\n", rc);
}
-EXPORT_SYMBOL(lib_dispatch);
-EXPORT_SYMBOL(PtlMEAttach);
-EXPORT_SYMBOL(PtlMEInsert);
-EXPORT_SYMBOL(PtlMEUnlink);
-EXPORT_SYMBOL(PtlEQAlloc);
-EXPORT_SYMBOL(PtlMDAttach);
-EXPORT_SYMBOL(PtlMDUnlink);
-EXPORT_SYMBOL(PtlNIInit);
-EXPORT_SYMBOL(PtlNIFini);
-EXPORT_SYMBOL(PtlNIDebug);
-EXPORT_SYMBOL(PtlInit);
-EXPORT_SYMBOL(PtlFini);
-EXPORT_SYMBOL(PtlPut);
-EXPORT_SYMBOL(PtlGet);
-EXPORT_SYMBOL(ptl_err_str);
-EXPORT_SYMBOL(portal_subsystem_debug);
-EXPORT_SYMBOL(portal_debug);
-EXPORT_SYMBOL(portal_stack);
-EXPORT_SYMBOL(portal_printk);
-EXPORT_SYMBOL(portal_cerror);
-EXPORT_SYMBOL(PtlEQWait);
-EXPORT_SYMBOL(PtlEQFree);
-EXPORT_SYMBOL(PtlEQGet);
-EXPORT_SYMBOL(PtlGetId);
-EXPORT_SYMBOL(PtlMDBind);
-EXPORT_SYMBOL(lib_iov_nob);
-EXPORT_SYMBOL(lib_copy_iov2buf);
-EXPORT_SYMBOL(lib_copy_buf2iov);
-EXPORT_SYMBOL(lib_extract_iov);
-EXPORT_SYMBOL(lib_kiov_nob);
-EXPORT_SYMBOL(lib_copy_kiov2buf);
-EXPORT_SYMBOL(lib_copy_buf2kiov);
-EXPORT_SYMBOL(lib_extract_kiov);
-EXPORT_SYMBOL(lib_finalize);
-EXPORT_SYMBOL(lib_parse);
-EXPORT_SYMBOL(lib_fake_reply_msg);
-EXPORT_SYMBOL(lib_init);
-EXPORT_SYMBOL(lib_fini);
-EXPORT_SYMBOL(portal_kmemory);
EXPORT_SYMBOL(kportal_daemonize);
EXPORT_SYMBOL(kportal_blockallsigs);
-EXPORT_SYMBOL(kportal_nal_register);
-EXPORT_SYMBOL(kportal_nal_unregister);
EXPORT_SYMBOL(kportal_assertion_failed);
-EXPORT_SYMBOL(dispatch_name);
-EXPORT_SYMBOL(kportal_get_ni);
-EXPORT_SYMBOL(kportal_put_ni);
-EXPORT_SYMBOL(kportal_nal_cmd);
-module_init(init_kportals_module);
-module_exit (exit_kportals_module);
+module_init(init_libcfs_module);
+module_exit(exit_libcfs_module);
# This code is issued under the GNU General Public License.
# See the file COPYING in this distribution
+DEFS =
+
+my_sources = api-eq.c api-init.c api-me.c api-errno.c api-ni.c api-wrap.c \
+ lib-dispatch.c lib-init.c lib-me.c lib-msg.c lib-eq.c \
+ lib-md.c lib-move.c lib-ni.c lib-pid.c
-CPPFLAGS=
-INCLUDES=-I$(top_srcdir)/portals/include -I$(top_srcdir)/include
noinst_LIBRARIES= libportals.a
-libportals_a_SOURCES= api-eq.c api-init.c api-me.c api-errno.c api-ni.c api-wrap.c lib-dispatch.c lib-init.c lib-me.c lib-msg.c lib-eq.c lib-md.c lib-move.c lib-ni.c lib-pid.c
+libportals_a_SOURCES= $(my_sources)
if LIBLUSTRE
libportals_a_CFLAGS= -fPIC
+else
+
+MODULE = portals
+modulenet_DATA = portals.o
+EXTRA_PROGRAMS = portals
+
+DISTCLEANFILES = *.orig *.rej
+
+portals_SOURCES= module.c $(my_sources)
+
+# Don't distribute any patched files.
+dist-hook:
+ list='$(EXT2C)'; for f in $$list; do rm -f $(distdir)/$$f; done
+
+include ../Rules.linux
endif
portals-objs := lib-dispatch.o lib-eq.o lib-init.o lib-md.o lib-me.o \
lib-move.o lib-msg.o lib-ni.o lib-pid.o \
api-eq.o api-errno.o api-init.o api-me.o api-ni.o \
- api-wrap.o
+ api-wrap.o module.o
/* Nothing to do anymore... */
}
-int PtlEQGet(ptl_handle_eq_t eventq, ptl_event_t * ev)
+int ptl_get_event (ptl_eq_t *eq, ptl_event_t *ev)
{
- ptl_eq_t *eq;
- int rc, new_index;
- unsigned long flags;
- ptl_event_t *new_event;
- nal_t *nal;
+ int new_index = eq->sequence & (eq->size - 1);
+ ptl_event_t *new_event = &eq->base[new_index];
ENTRY;
- if (!ptl_init)
- RETURN(PTL_NOINIT);
-
- nal = ptl_hndl2nal(&eventq);
- if (!nal)
- RETURN(PTL_INV_EQ);
-
- eq = ptl_handle2usereq(&eventq);
- nal->lock(nal, &flags);
-
- /* size must be a power of 2 to handle a wrapped sequence # */
- LASSERT (eq->size != 0 &&
- eq->size == LOWEST_BIT_SET (eq->size));
-
- new_index = eq->sequence & (eq->size - 1);
- new_event = &eq->base[new_index];
CDEBUG(D_INFO, "new_event: %p, sequence: %lu, eq->size: %u\n",
new_event, eq->sequence, eq->size);
+
if (PTL_SEQ_GT (eq->sequence, new_event->sequence)) {
- nal->unlock(nal, &flags);
RETURN(PTL_EQ_EMPTY);
}
if (eq->sequence != new_event->sequence) {
CERROR("DROPPING EVENT: eq seq %lu ev seq %lu\n",
eq->sequence, new_event->sequence);
- rc = PTL_EQ_DROPPED;
- } else {
- rc = PTL_OK;
+ RETURN(PTL_EQ_DROPPED);
}
eq->sequence = new_event->sequence + 1;
- nal->unlock(nal, &flags);
- RETURN(rc);
+ RETURN(PTL_OK);
}
-
-int PtlEQWait(ptl_handle_eq_t eventq_in, ptl_event_t *event_out)
+int PtlEQGet(ptl_handle_eq_t eventq, ptl_event_t * ev)
{
- int rc;
+ int which;
- /* PtlEQGet does the handle checking */
- while ((rc = PtlEQGet(eventq_in, event_out)) == PTL_EQ_EMPTY) {
- nal_t *nal = ptl_hndl2nal(&eventq_in);
-
- if (nal->yield)
- nal->yield(nal);
- }
-
- return rc;
+ return (PtlEQPoll (&eventq, 1, 0, ev, &which));
}
-#ifndef __KERNEL__
-#if 0
-static jmp_buf eq_jumpbuf;
-
-static void eq_timeout(int signal)
-{
- sigset_t set;
-
- /* signal will be automatically disabled in sig handler,
- * must enable it before long jump
- */
- sigemptyset(&set);
- sigaddset(&set, SIGALRM);
- sigprocmask(SIG_UNBLOCK, &set, NULL);
-
- longjmp(eq_jumpbuf, -1);
-}
-
-int PtlEQWait_timeout(ptl_handle_eq_t eventq_in, ptl_event_t * event_out,
- int timeout)
+int PtlEQWait(ptl_handle_eq_t eventq_in, ptl_event_t *event_out)
{
- static void (*prev) (int) = NULL;
- static int left_over;
- time_t time_at_start;
- int rc;
-
- if (setjmp(eq_jumpbuf)) {
- signal(SIGALRM, prev);
- alarm(left_over - timeout);
- return PTL_EQ_EMPTY;
- }
-
- left_over = alarm(timeout);
- prev = signal(SIGALRM, eq_timeout);
- time_at_start = time(NULL);
- if (left_over && left_over < timeout)
- alarm(left_over);
-
- rc = PtlEQWait(eventq_in, event_out);
-
- signal(SIGALRM, prev);
- alarm(left_over); /* Should compute how long we waited */
-
- return rc;
+ int which;
+
+ return (PtlEQPoll (&eventq_in, 1, PTL_TIME_FOREVER,
+ event_out, &which));
}
-#else
-#include <errno.h>
-
-/* FIXME
- * Here timeout need a trick with tcpnal, definitely unclean but OK for
- * this moment.
- */
-/* global variables defined by tcpnal */
-extern int __tcpnal_eqwait_timeout_value;
-extern int __tcpnal_eqwait_timedout;
-
-int PtlEQWait_timeout(ptl_handle_eq_t eventq_in, ptl_event_t * event_out,
- int timeout)
+int PtlEQPoll(ptl_handle_eq_t *eventqs_in, int neq_in, int timeout,
+ ptl_event_t *event_out, int *which_out)
{
- int rc;
+ nal_t *nal;
+ int i;
+ int rc;
+ unsigned long flags;
+
+ if (!ptl_init)
+ RETURN(PTL_NO_INIT);
- if (!timeout)
- return PtlEQWait(eventq_in, event_out);
+ if (neq_in < 1)
+ RETURN(PTL_EQ_INVALID);
+
+ nal = ptl_hndl2nal(&eventqs_in[0]);
+ if (nal == NULL)
+ RETURN(PTL_EQ_INVALID);
- __tcpnal_eqwait_timeout_value = timeout;
+ nal->lock(nal, &flags);
- while ((rc = PtlEQGet(eventq_in, event_out)) == PTL_EQ_EMPTY) {
- nal_t *nal = ptl_hndl2nal(&eventq_in);
+ for (;;) {
+ for (i = 0; i < neq_in; i++) {
+ ptl_eq_t *eq = ptl_handle2usereq(&eventqs_in[i]);
+
+ if (i > 0 &&
+ ptl_hndl2nal(&eventqs_in[i]) != nal) {
+ nal->unlock(nal, &flags);
+ RETURN (PTL_EQ_INVALID);
+ }
+
+ /* size must be a power of 2 to handle a wrapped sequence # */
+ LASSERT (eq->size != 0 &&
+ eq->size == LOWEST_BIT_SET (eq->size));
+
+ rc = ptl_get_event (eq, event_out);
+ if (rc != PTL_EQ_EMPTY) {
+ nal->unlock(nal, &flags);
+ *which_out = i;
+ RETURN(rc);
+ }
+ }
- if (nal->yield)
- nal->yield(nal);
-
- if (__tcpnal_eqwait_timedout) {
- if (__tcpnal_eqwait_timedout != ETIMEDOUT)
- printf("Warning: yield return error %d\n",
- __tcpnal_eqwait_timedout);
- rc = PTL_EQ_EMPTY;
- break;
+ if (timeout == 0) {
+ nal->unlock(nal, &flags);
+ RETURN (PTL_EQ_EMPTY);
}
+
+ timeout = nal->yield(nal, &flags, timeout);
}
-
- __tcpnal_eqwait_timeout_value = 0;
-
- return rc;
}
-#endif
-#endif /* __KERNEL__ */
"PTL_OK",
"PTL_SEGV",
- "PTL_NOSPACE",
- "PTL_INUSE",
+ "PTL_NO_SPACE",
+ "PTL_ME_IN_USE",
"PTL_VAL_FAILED",
"PTL_NAL_FAILED",
- "PTL_NOINIT",
- "PTL_INIT_DUP",
- "PTL_INIT_INV",
- "PTL_AC_INV_INDEX",
-
- "PTL_INV_ASIZE",
- "PTL_INV_HANDLE",
- "PTL_INV_MD",
- "PTL_INV_ME",
- "PTL_INV_NI",
+ "PTL_NO_INIT",
+ "PTL_IFACE_DUP",
+ "PTL_IFACE_INVALID",
+
+ "PTL_HANDLE_INVALID",
+ "PTL_MD_INVALID",
+ "PTL_ME_INVALID",
/* If you change these, you must update the number table in portals/errno.h */
- "PTL_ILL_MD",
- "PTL_INV_PROC",
- "PTL_INV_PSIZE",
- "PTL_INV_PTINDEX",
- "PTL_INV_REG",
-
- "PTL_INV_SR_INDX",
- "PTL_ML_TOOLONG",
- "PTL_ADDR_UNKNOWN",
- "PTL_INV_EQ",
+ "PTL_PROCESS_INVALID",
+ "PTL_PT_INDEX_INVALID",
+
+ "PTL_SR_INDEX_INVALID",
+ "PTL_EQ_INVALID",
"PTL_EQ_DROPPED",
"PTL_EQ_EMPTY",
- "PTL_NOUPDATE",
+ "PTL_MD_NO_UPDATE",
"PTL_FAIL",
- "PTL_NOT_IMPLEMENTED",
- "PTL_NO_ACK",
"PTL_IOV_TOO_MANY",
"PTL_IOV_TOO_SMALL",
- "PTL_EQ_INUSE",
+ "PTL_EQ_IN_USE",
+
+ "PTL_MAX_ERRNO"
};
/* If you change these, you must update the number table in portals/errno.h */
#include <portals/api-support.h>
int ptl_init;
-unsigned int portal_subsystem_debug = ~0 - (S_PORTALS | S_QSWNAL | S_SOCKNAL |
- S_GMNAL | S_IBNAL);
-unsigned int portal_debug = (D_WARNING | D_DLMTRACE | D_ERROR | D_EMERG | D_HA |
- D_RPCTRACE | D_VFSTRACE | D_MALLOC);
-unsigned int portal_cerror = 1;
-unsigned int portal_printk;
-unsigned int portal_stack;
-
-#ifdef __KERNEL__
-atomic_t portal_kmemory = ATOMIC_INIT(0);
-#endif
int __p30_initialized;
int __p30_myr_initialized;
ptl_handle_ni_t __myr_ni_handle;
ptl_handle_ni_t __ip_ni_handle;
-int __p30_myr_timeout = 10;
-int __p30_ip_timeout;
-
-int PtlInit(void)
+int PtlInit(int *max_interfaces)
{
+ if (max_interfaces != NULL)
+ *max_interfaces = NAL_ENUM_END_MARKER;
if (ptl_init)
return PTL_OK;
+ LASSERT(!strcmp(ptl_err_str[PTL_MAX_ERRNO], "PTL_MAX_ERRNO"));
+
ptl_ni_init();
ptl_me_init();
ptl_eq_init();
ptl_init = 1;
- __p30_initialized = 1;
return PTL_OK;
}
ptl_ni_fini();
ptl_init = 0;
}
+
+
+void PtlSnprintHandle(char *str, int len, ptl_handle_any_t h)
+{
+ snprintf(str, len, "0x%lx."LPX64, h.nal_idx, h.cookie);
+}
int i;
if (!ptl_init)
- return PTL_NOINIT;
+ return PTL_NO_INIT;
ptl_ni_init_mutex_enter ();
if (nal->shutdown)
nal->shutdown (nal, ptl_num_interfaces);
ptl_ni_init_mutex_exit ();
- return PTL_NOSPACE;
+ return PTL_NO_SPACE;
}
handle->nal_idx = (NI_HANDLE_MAGIC & ~NI_HANDLE_MASK) | ptl_num_interfaces;
int rc;
if (!ptl_init)
- return PTL_NOINIT;
+ return PTL_NO_INIT;
ptl_ni_init_mutex_enter ();
nal = ptl_hndl2nal (&ni);
if (nal == NULL) {
ptl_ni_init_mutex_exit ();
- return PTL_INV_HANDLE;
+ return PTL_HANDLE_INVALID;
}
idx = ni.nal_idx & NI_HANDLE_MASK;
if (!ptl_init) {
CERROR("Not initialized\n");
- return PTL_NOINIT;
+ return PTL_NO_INIT;
}
nal = ptl_hndl2nal(&any_h);
if (!nal)
- return PTL_INV_HANDLE;
+ return PTL_HANDLE_INVALID;
nal->forward(nal, cmd, argbuf, argsize, retbuf, retsize);
sizeof(ret));
if (rc != PTL_OK)
- return (rc == PTL_INV_HANDLE) ? PTL_INV_ME : rc;
+ return (rc == PTL_HANDLE_INVALID) ? PTL_ME_INVALID : rc;
if (handle_out) {
handle_out->nal_idx = current_in.nal_idx;
sizeof(ret));
if (rc != PTL_OK)
- return (rc == PTL_INV_HANDLE) ? PTL_INV_ME : rc;
+ return (rc == PTL_HANDLE_INVALID) ? PTL_ME_INVALID : rc;
return ret.rc;
}
sizeof(ret));
if (rc != PTL_OK)
- return (rc == PTL_INV_HANDLE) ? PTL_INV_ME : rc;
+ return (rc == PTL_HANDLE_INVALID) ? PTL_ME_INVALID : rc;
return ret.rc;
}
if (!ptl_init) {
CERROR("PtlMDAttach/Bind/Update: Not initialized\n");
- return PTL_NOINIT;
+ return PTL_NO_INIT;
}
nal = ptl_hndl2nal(¤t_in);
if (!nal)
- return PTL_INV_HANDLE;
+ return PTL_HANDLE_INVALID;
if (nal->validate != NULL) /* nal->validate not a NOOP */
{
- if ((md_in.options & PTL_MD_IOV) == 0) /* contiguous */
+ if ((md_in.options & PTL_MD_IOVEC) == 0) /* contiguous */
{
rc = nal->validate (nal, md_in.start, md_in.length);
if (rc)
static ptl_handle_eq_t md2eq (ptl_md_t *md)
{
- if (PtlHandleEqual (md->eventq, PTL_EQ_NONE))
+ if (PtlHandleIsEqual (md->eventq, PTL_EQ_NONE))
return (PTL_EQ_NONE);
return (ptl_handle2usereq (&md->eventq)->cb_eq_handle);
}
if (rc != PTL_OK)
- return (rc == PTL_INV_HANDLE) ? PTL_INV_ME : rc;
+ return (rc == PTL_HANDLE_INVALID) ? PTL_ME_INVALID : rc;
if (handle_out) {
handle_out->nal_idx = me_in.nal_idx;
int PtlMDBind(ptl_handle_ni_t ni_in, ptl_md_t md_in,
- ptl_handle_md_t * handle_out)
+ ptl_unlink_t unlink_in, ptl_handle_md_t * handle_out)
{
PtlMDBind_in args;
PtlMDBind_out ret;
args.eq_in = md2eq(&md_in);
args.ni_in = ni_in;
args.md_in = md_in;
+ args.unlink_in = unlink_in;
rc = do_forward(ni_in, PTL_MDBIND,
&args, sizeof(args), &ret, sizeof(ret));
if (new_inout) {
rc = validate_md (md_in, *new_inout);
if (rc != PTL_OK)
- return (rc == PTL_INV_HANDLE) ? PTL_INV_MD : rc;
+ return (rc == PTL_HANDLE_INVALID) ? PTL_MD_INVALID : rc;
args.new_inout = *new_inout;
args.new_inout_valid = 1;
} else
args.new_inout_valid = 0;
- if (PtlHandleEqual (testq_in, PTL_EQ_NONE)) {
+ if (PtlHandleIsEqual (testq_in, PTL_EQ_NONE)) {
args.testq_in = PTL_EQ_NONE;
args.sequence_in = -1;
} else {
rc = do_forward(md_in, PTL_MDUPDATE, &args, sizeof(args), &ret,
sizeof(ret));
if (rc != PTL_OK)
- return (rc == PTL_INV_HANDLE) ? PTL_INV_MD : rc;
+ return (rc == PTL_HANDLE_INVALID) ? PTL_MD_INVALID : rc;
if (old_inout)
*old_inout = ret.old_inout;
rc = do_forward(md_in, PTL_MDUNLINK, &args, sizeof(args), &ret,
sizeof(ret));
if (rc != PTL_OK)
- return (rc == PTL_INV_HANDLE) ? PTL_INV_MD : rc;
+ return (rc == PTL_HANDLE_INVALID) ? PTL_MD_INVALID : rc;
return ret.rc;
}
nal_t *nal;
if (!ptl_init)
- return PTL_NOINIT;
+ return PTL_NO_INIT;
nal = ptl_hndl2nal (&interface);
if (nal == NULL)
- return PTL_INV_HANDLE;
+ return PTL_HANDLE_INVALID;
if (count != LOWEST_BIT_SET(count)) { /* not a power of 2 already */
do { /* knock off all but the top bit... */
PORTAL_ALLOC(ev, count * sizeof(ptl_event_t));
if (!ev)
- return PTL_NOSPACE;
+ return PTL_NO_SPACE;
for (i = 0; i < count; i++)
ev[i].sequence = 0;
PORTAL_ALLOC(eq, sizeof(*eq));
if (!eq) {
- rc = PTL_NOSPACE;
+ rc = PTL_NO_SPACE;
goto fail;
}
eq = lib_eq_alloc (nal);
if (eq == NULL)
- return (ret->rc = PTL_NOSPACE);
+ return (ret->rc = PTL_NO_SPACE);
state_lock(nal, &flags);
eq = ptl_handle2eq(&args->eventq_in, nal);
if (eq == NULL) {
- ret->rc = PTL_INV_EQ;
+ ret->rc = PTL_EQ_INVALID;
} else if (eq->eq_refcount != 0) {
- ret->rc = PTL_EQ_INUSE;
+ ret->rc = PTL_EQ_IN_USE;
} else {
if (nal->cb_unmap != NULL) {
struct iovec iov = {
space = nal->cb_malloc (nal, n * size);
if (space == NULL)
- return (PTL_NOSPACE);
+ return (PTL_NO_SPACE);
INIT_LIST_HEAD (&fl->fl_list);
fl->fl_objs = space;
(struct list_head *)nal->cb_malloc (nal, ni->ni_lh_hash_size
* sizeof (struct list_head));
if (ni->ni_lh_hash_table == NULL)
- return (PTL_NOSPACE);
+ return (PTL_NO_SPACE);
for (i = 0; i < ni->ni_lh_hash_size; i++)
INIT_LIST_HEAD (&ni->ni_lh_hash_table[i]);
ni->tbl.tbl = nal->cb_malloc(nal, sizeof(struct list_head) * ptl_size);
if (ni->tbl.tbl == NULL) {
- rc = PTL_NOSPACE;
+ rc = PTL_NO_SPACE;
goto out;
}
#include <portals/lib-p30.h>
#include <portals/arg-blocks.h>
-/*
- * must be called with state lock held
- */
+/* must be called with state lock held */
void lib_md_unlink(nal_cb_t * nal, lib_md_t * md)
{
- lib_me_t *me = md->me;
+ if ((md->md_flags & PTL_MD_FLAG_ZOMBIE) == 0) {
+ /* first unlink attempt... */
+ lib_me_t *me = md->me;
+
+ md->md_flags |= PTL_MD_FLAG_ZOMBIE;
+
+ /* Disassociate from ME (if any), and unlink it if it was created
+ * with PTL_UNLINK */
+ if (me != NULL) {
+ me->md = NULL;
+ if (me->unlink == PTL_UNLINK)
+ lib_me_unlink(nal, me);
+ }
+
+ /* emsure all future handle lookups fail */
+ lib_invalidate_handle(nal, &md->md_lh);
+ }
if (md->pending != 0) {
CDEBUG(D_NET, "Queueing unlink of md %p\n", md);
- md->md_flags |= PTL_MD_FLAG_UNLINK;
return;
}
if (nal->cb_unmap_pages != NULL)
nal->cb_unmap_pages (nal, md->md_niov, md->md_iov.kiov,
&md->md_addrkey);
- } else if (nal->cb_unmap != NULL)
+ } else if (nal->cb_unmap != NULL) {
nal->cb_unmap (nal, md->md_niov, md->md_iov.iov,
&md->md_addrkey);
-
- if (me) {
- me->md = NULL;
- if (me->unlink == PTL_UNLINK)
- lib_me_unlink(nal, me);
}
- if (md->eq != NULL)
- {
+ if (md->eq != NULL) {
md->eq->eq_refcount--;
LASSERT (md->eq->eq_refcount >= 0);
}
- lib_invalidate_handle (nal, &md->md_lh);
list_del (&md->md_list);
lib_md_free(nal, md);
}
static int lib_md_build(nal_cb_t *nal, lib_md_t *new, void *private,
ptl_md_t *md, ptl_handle_eq_t *eqh, int unlink)
{
- const int max_size_opts = PTL_MD_AUTO_UNLINK |
- PTL_MD_MAX_SIZE;
lib_eq_t *eq = NULL;
int rc;
int i;
* otherwise caller may only lib_md_free() it.
*/
- if (!PtlHandleEqual (*eqh, PTL_EQ_NONE)) {
+ if (!PtlHandleIsEqual (*eqh, PTL_EQ_NONE)) {
eq = ptl_handle2eq(eqh, nal);
if (eq == NULL)
- return PTL_INV_EQ;
+ return PTL_EQ_INVALID;
}
/* Must check this _before_ allocation. Also, note that non-iov
* MDs must set md_niov to 0. */
- LASSERT((md->options & (PTL_MD_IOV | PTL_MD_KIOV)) == 0 ||
+ LASSERT((md->options & (PTL_MD_IOVEC | PTL_MD_KIOV)) == 0 ||
md->niov <= PTL_MD_MAX_IOV);
- if ((md->options & max_size_opts) != 0 && /* max size used */
+ /* This implementation doesn't know how to create START events or
+ * disable END events. Best to LASSERT our caller is compliant so
+ * we find out quickly... */
+ LASSERT (PtlHandleIsEqual (*eqh, PTL_EQ_NONE) ||
+ ((md->options & PTL_MD_EVENT_START_DISABLE) != 0 &&
+ (md->options & PTL_MD_EVENT_END_DISABLE) == 0));
+
+ if ((md->options & PTL_MD_MAX_SIZE) != 0 && /* max size used */
(md->max_size < 0 || md->max_size > md->length)) // illegal max_size
- return PTL_INV_MD;
+ return PTL_MD_INVALID;
new->me = NULL;
new->start = md->start;
new->length = md->length;
new->offset = 0;
new->max_size = md->max_size;
- new->unlink = unlink;
new->options = md->options;
new->user_ptr = md->user_ptr;
new->eq = eq;
new->threshold = md->threshold;
new->pending = 0;
- new->md_flags = 0;
+ new->md_flags = (unlink == PTL_UNLINK) ? PTL_MD_FLAG_AUTO_UNLINK : 0;
- if ((md->options & PTL_MD_IOV) != 0) {
+ if ((md->options & PTL_MD_IOVEC) != 0) {
int total_length = 0;
if ((md->options & PTL_MD_KIOV) != 0) /* Can't specify both */
- return PTL_INV_MD;
+ return PTL_MD_INVALID;
new->md_niov = md->niov;
}
} else if ((md->options & PTL_MD_KIOV) != 0) {
#ifndef __KERNEL__
- return PTL_INV_MD;
+ return PTL_MD_INVALID;
#else
int total_length = 0;
/* Trap attempt to use paged I/O if unsupported early. */
if (nal->cb_send_pages == NULL ||
nal->cb_recv_pages == NULL)
- return PTL_INV_MD;
+ return PTL_MD_INVALID;
new->md_niov = md->niov;
new->options = md->options;
new->user_ptr = md->user_ptr;
ptl_eq2handle(&new->eventq, md->eq);
- new->niov = ((md->options & (PTL_MD_IOV | PTL_MD_KIOV)) == 0) ? 0 : md->md_niov;
+ new->niov = ((md->options & (PTL_MD_IOVEC | PTL_MD_KIOV)) == 0) ? 0 : md->md_niov;
}
int do_PtlMDAttach(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
lib_md_t *md;
unsigned long flags;
- if ((args->md_in.options & (PTL_MD_KIOV | PTL_MD_IOV)) != 0 &&
+ if ((args->md_in.options & (PTL_MD_KIOV | PTL_MD_IOVEC)) != 0 &&
args->md_in.niov > PTL_MD_MAX_IOV) /* too many fragments */
return (ret->rc = PTL_IOV_TOO_MANY);
md = lib_md_alloc(nal, &args->md_in);
if (md == NULL)
- return (ret->rc = PTL_NOSPACE);
+ return (ret->rc = PTL_NO_SPACE);
state_lock(nal, &flags);
me = ptl_handle2me(&args->me_in, nal);
if (me == NULL) {
- ret->rc = PTL_INV_ME;
+ ret->rc = PTL_ME_INVALID;
} else if (me->md != NULL) {
- ret->rc = PTL_INUSE;
+ ret->rc = PTL_ME_IN_USE;
} else {
ret->rc = lib_md_build(nal, md, private, &args->md_in,
&args->eq_in, args->unlink_in);
lib_md_t *md;
unsigned long flags;
- if ((args->md_in.options & (PTL_MD_KIOV | PTL_MD_IOV)) != 0 &&
+ if ((args->md_in.options & (PTL_MD_KIOV | PTL_MD_IOVEC)) != 0 &&
args->md_in.niov > PTL_MD_MAX_IOV) /* too many fragments */
return (ret->rc = PTL_IOV_TOO_MANY);
md = lib_md_alloc(nal, &args->md_in);
if (md == NULL)
- return (ret->rc = PTL_NOSPACE);
+ return (ret->rc = PTL_NO_SPACE);
state_lock(nal, &flags);
- ret->rc = lib_md_build(nal, md, private,
- &args->md_in, &args->eq_in, PTL_UNLINK);
+ ret->rc = lib_md_build(nal, md, private, &args->md_in,
+ &args->eq_in, args->unlink_in);
if (ret->rc == PTL_OK) {
ptl_md2handle(&ret->handle_out, md);
md = ptl_handle2md(&args->md_in, nal);
if (md == NULL) {
state_unlock(nal, &flags);
- return (ret->rc = PTL_INV_MD);
+ return (ret->rc = PTL_MD_INVALID);
}
/* If the MD is busy, lib_md_unlink just marks it for deletion, and
memset(&ev, 0, sizeof(ev));
ev.type = PTL_EVENT_UNLINK;
- ev.status = PTL_OK;
+ ev.ni_fail_type = PTL_OK;
ev.unlinked = 1;
lib_md_deconstruct(nal, md, &ev.mem_desc);
md = ptl_handle2md(&args->md_in, nal);
if (md == NULL) {
- ret->rc = PTL_INV_MD;
+ ret->rc = PTL_MD_INVALID;
goto out;
}
/* XXX fttb, the new MD must be the same type wrt fragmentation */
if (((new->options ^ md->options) &
- (PTL_MD_IOV | PTL_MD_KIOV)) != 0) {
- ret->rc = PTL_INV_MD;
+ (PTL_MD_IOVEC | PTL_MD_KIOV)) != 0) {
+ ret->rc = PTL_MD_INVALID;
goto out;
}
goto out;
}
- if (!PtlHandleEqual (args->testq_in, PTL_EQ_NONE)) {
+ if (!PtlHandleIsEqual (args->testq_in, PTL_EQ_NONE)) {
test_eq = ptl_handle2eq(&args->testq_in, nal);
if (test_eq == NULL) {
- ret->rc = PTL_INV_EQ;
+ ret->rc = PTL_EQ_INVALID;
goto out;
}
}
if (md->pending != 0) {
- ret->rc = PTL_NOUPDATE;
+ ret->rc = PTL_MD_NO_UPDATE;
goto out;
}
if (test_eq == NULL ||
test_eq->sequence == args->sequence_in) {
lib_me_t *me = md->me;
+ int unlink = (md->md_flags & PTL_MD_FLAG_AUTO_UNLINK) ?
+ PTL_UNLINK : PTL_RETAIN;
// #warning this does not track eq refcounts properly
ret->rc = lib_md_build(nal, md, private,
- new, &new->eventq, md->unlink);
+ new, &new->eventq, unlink);
md->me = me;
} else {
- ret->rc = PTL_NOUPDATE;
+ ret->rc = PTL_MD_NO_UPDATE;
}
out:
lib_me_t *me;
if (args->index_in >= tbl->size)
- return ret->rc = PTL_INV_PTINDEX;
+ return ret->rc = PTL_PT_INDEX_INVALID;
/* Should check for valid matchid, but not yet */
if (0)
- return ret->rc = PTL_INV_PROC;
+ return ret->rc = PTL_PROCESS_INVALID;
me = lib_me_alloc (nal);
if (me == NULL)
- return (ret->rc = PTL_NOSPACE);
+ return (ret->rc = PTL_NO_SPACE);
state_lock(nal, &flags);
new = lib_me_alloc (nal);
if (new == NULL)
- return (ret->rc = PTL_NOSPACE);
+ return (ret->rc = PTL_NO_SPACE);
/* Should check for valid matchid, but not yet */
lib_me_free (nal, new);
state_unlock (nal, &flags);
- return (ret->rc = PTL_INV_ME);
+ return (ret->rc = PTL_ME_INVALID);
}
new->match_id = args->match_id_in;
me = ptl_handle2me(&args->current_in, nal);
if (me == NULL) {
- ret->rc = PTL_INV_ME;
+ ret->rc = PTL_ME_INVALID;
} else {
lib_me_unlink(nal, me);
ret->rc = PTL_OK;
unsigned long flags;
if (args->index_in < 0 || args->index_in >= tbl->size)
- return ret->rc = PTL_INV_PTINDEX;
+ return ret->rc = PTL_PT_INDEX_INVALID;
nal->cb_printf(nal, "Portal table index %d\n", args->index_in);
me = ptl_handle2me(&args->current_in, nal);
if (me == NULL) {
- ret->rc = PTL_INV_ME;
+ ret->rc = PTL_ME_INVALID;
} else {
lib_me_dump(nal, me);
ret->rc = PTL_OK;
#include <portals/lib-p30.h>
#include <portals/arg-blocks.h>
-/*
- * Right now it does not check access control lists.
- *
- * We only support one MD per ME, which is how the Portals 3.1 spec is written.
- * All previous complication is removed.
- */
-
-static lib_me_t *
-lib_find_me(nal_cb_t *nal, int index, int op_mask, ptl_nid_t src_nid,
- ptl_pid_t src_pid, ptl_size_t rlength, ptl_size_t roffset,
- ptl_match_bits_t match_bits, ptl_size_t *mlength_out,
- ptl_size_t *offset_out, int *unlink_out)
+/* forward ref */
+static void lib_commit_md (nal_cb_t *nal, lib_md_t *md, lib_msg_t *msg);
+
+static lib_md_t *
+lib_match_md(nal_cb_t *nal, int index, int op_mask,
+ ptl_nid_t src_nid, ptl_pid_t src_pid,
+ ptl_size_t rlength, ptl_size_t roffset,
+ ptl_match_bits_t match_bits, lib_msg_t *msg,
+ ptl_size_t *mlength_out, ptl_size_t *offset_out)
{
lib_ni_t *ni = &nal->ni;
struct list_head *match_list = &ni->tbl.tbl[index];
lib_md_t *md;
ptl_size_t mlength;
ptl_size_t offset;
-
ENTRY;
CDEBUG (D_NET, "Request from "LPU64".%d of length %d into portal %d "
LASSERT (me == md->me);
- /* MD deactivated */
- if (md->threshold == 0)
- continue;
-
/* mismatched MD op */
if ((md->options & op_mask) == 0)
continue;
+ /* MD exhausted */
+ if (lib_md_exhausted(md))
+ continue;
+
/* mismatched ME nid/pid? */
if (me->match_id.nid != PTL_NID_ANY &&
me->match_id.nid != src_nid)
else
offset = roffset;
- mlength = md->length - offset;
- if ((md->options & PTL_MD_MAX_SIZE) != 0 &&
- mlength > md->max_size)
+ if ((md->options & PTL_MD_MAX_SIZE) != 0) {
mlength = md->max_size;
+ LASSERT (md->offset + mlength <= md->length);
+ } else {
+ mlength = md->length - offset;
+ }
if (rlength <= mlength) { /* fits in allowed space */
mlength = rlength;
goto failed;
}
+ /* Commit to this ME/MD */
+ CDEBUG(D_NET, "Incoming %s index %x from "LPU64"/%u of "
+ "length %d/%d into md "LPX64" [%d] + %d\n",
+ (op_mask == PTL_MD_OP_PUT) ? "put" : "get",
+ index, src_nid, src_pid, mlength, rlength,
+ md->md_lh.lh_cookie, md->md_niov, offset);
+
+ lib_commit_md(nal, md, msg);
md->offset = offset + mlength;
+ /* NB Caller sets ev.type and ev.hdr_data */
+ msg->ev.initiator.nid = src_nid;
+ msg->ev.initiator.pid = src_pid;
+ msg->ev.portal = index;
+ msg->ev.match_bits = match_bits;
+ msg->ev.rlength = rlength;
+ msg->ev.mlength = mlength;
+ msg->ev.offset = offset;
+
+ lib_md_deconstruct(nal, md, &msg->ev.mem_desc);
+
*offset_out = offset;
*mlength_out = mlength;
- *unlink_out = ((md->options & PTL_MD_AUTO_UNLINK) != 0 &&
- md->offset >= (md->length - md->max_size));
- RETURN (me);
+
+ /* Auto-unlink NOW, so the ME gets unlinked if required.
+ * We bumped md->pending above so the MD just gets flagged
+ * for unlink when it is finalized. */
+ if ((md->md_flags & PTL_MD_FLAG_AUTO_UNLINK) != 0 &&
+ lib_md_exhausted(md))
+ lib_md_unlink(nal, md);
+
+ RETURN (md);
}
failed:
lib_ni_t *ni = &nal->ni;
ptl_size_t mlength = 0;
ptl_size_t offset = 0;
- int unlink = 0;
ptl_err_t rc;
- lib_me_t *me;
lib_md_t *md;
unsigned long flags;
state_lock(nal, &flags);
- me = lib_find_me(nal, hdr->msg.put.ptl_index, PTL_MD_OP_PUT,
- hdr->src_nid, hdr->src_pid,
- hdr->payload_length, hdr->msg.put.offset,
- hdr->msg.put.match_bits,
- &mlength, &offset, &unlink);
- if (me == NULL) {
+ md = lib_match_md(nal, hdr->msg.put.ptl_index, PTL_MD_OP_PUT,
+ hdr->src_nid, hdr->src_pid,
+ hdr->payload_length, hdr->msg.put.offset,
+ hdr->msg.put.match_bits, msg,
+ &mlength, &offset);
+ if (md == NULL) {
state_unlock(nal, &flags);
return (PTL_FAIL);
}
- md = me->md;
- CDEBUG(D_NET, "Incoming put index %x from "LPU64"/%u of length %d/%d "
- "into md "LPX64" [%d] + %d\n", hdr->msg.put.ptl_index,
- hdr->src_nid, hdr->src_pid, mlength, hdr->payload_length,
- md->md_lh.lh_cookie, md->md_niov, offset);
-
- lib_commit_md(nal, md, msg);
-
- msg->ev.type = PTL_EVENT_PUT;
- msg->ev.initiator.nid = hdr->src_nid;
- msg->ev.initiator.pid = hdr->src_pid;
- msg->ev.portal = hdr->msg.put.ptl_index;
- msg->ev.match_bits = hdr->msg.put.match_bits;
- msg->ev.rlength = hdr->payload_length;
- msg->ev.mlength = mlength;
- msg->ev.offset = offset;
+ msg->ev.type = PTL_EVENT_PUT_END;
msg->ev.hdr_data = hdr->msg.put.hdr_data;
- lib_md_deconstruct(nal, md, &msg->ev.mem_desc);
-
if (!ptl_is_wire_handle_none(&hdr->msg.put.ack_wmd) &&
!(md->options & PTL_MD_ACK_DISABLE)) {
msg->ack_wmd = hdr->msg.put.ack_wmd;
ni->counters.recv_count++;
ni->counters.recv_length += mlength;
- /* only unlink after MD's pending count has been bumped in
- * lib_commit_md() otherwise lib_me_unlink() will nuke it */
- if (unlink)
- lib_me_unlink (nal, me);
-
state_unlock(nal, &flags);
rc = lib_recv(nal, private, msg, md, offset, mlength,
lib_ni_t *ni = &nal->ni;
ptl_size_t mlength = 0;
ptl_size_t offset = 0;
- int unlink = 0;
- lib_me_t *me;
lib_md_t *md;
ptl_hdr_t reply;
unsigned long flags;
state_lock(nal, &flags);
- me = lib_find_me(nal, hdr->msg.get.ptl_index, PTL_MD_OP_GET,
- hdr->src_nid, hdr->src_pid,
- hdr->msg.get.sink_length, hdr->msg.get.src_offset,
- hdr->msg.get.match_bits,
- &mlength, &offset, &unlink);
- if (me == NULL) {
+ md = lib_match_md(nal, hdr->msg.get.ptl_index, PTL_MD_OP_GET,
+ hdr->src_nid, hdr->src_pid,
+ hdr->msg.get.sink_length, hdr->msg.get.src_offset,
+ hdr->msg.get.match_bits, msg,
+ &mlength, &offset);
+ if (md == NULL) {
state_unlock(nal, &flags);
return (PTL_FAIL);
}
- md = me->md;
- CDEBUG(D_NET, "Incoming get index %d from "LPU64".%u of length %d/%d "
- "from md "LPX64" [%d] + %d\n", hdr->msg.get.ptl_index,
- hdr->src_nid, hdr->src_pid, mlength, hdr->payload_length,
- md->md_lh.lh_cookie, md->md_niov, offset);
-
- lib_commit_md(nal, md, msg);
-
- msg->ev.type = PTL_EVENT_GET;
- msg->ev.initiator.nid = hdr->src_nid;
- msg->ev.initiator.pid = hdr->src_pid;
- msg->ev.portal = hdr->msg.get.ptl_index;
- msg->ev.match_bits = hdr->msg.get.match_bits;
- msg->ev.rlength = hdr->payload_length;
- msg->ev.mlength = mlength;
- msg->ev.offset = offset;
+ msg->ev.type = PTL_EVENT_GET_END;
msg->ev.hdr_data = 0;
- lib_md_deconstruct(nal, md, &msg->ev.mem_desc);
-
ni->counters.send_count++;
ni->counters.send_length += mlength;
- /* only unlink after MD's refcount has been bumped in
- * lib_commit_md() otherwise lib_me_unlink() will nuke it */
- if (unlink)
- lib_me_unlink (nal, me);
-
state_unlock(nal, &flags);
memset (&reply, 0, sizeof (reply));
lib_commit_md(nal, md, msg);
- msg->ev.type = PTL_EVENT_REPLY;
+ msg->ev.type = PTL_EVENT_REPLY_END;
msg->ev.initiator.nid = hdr->src_nid;
msg->ev.initiator.pid = hdr->src_pid;
msg->ev.rlength = rlength;
return;
}
- do_gettimeofday(&msg->ev.arrival_time);
-
switch (hdr->type) {
case PTL_MSG_ACK:
rc = parse_ack(nal, hdr, private, msg);
{
CERROR(LPU64": Dropping PUT to "LPU64": simulated failure\n",
nal->ni.nid, id->nid);
- return (ret->rc = PTL_INV_PROC);
+ return (ret->rc = PTL_PROCESS_INVALID);
}
msg = lib_msg_alloc(nal);
if (msg == NULL) {
CERROR(LPU64": Dropping PUT to "LPU64": ENOMEM on lib_msg_t\n",
ni->nid, id->nid);
- return (ret->rc = PTL_NOSPACE);
+ return (ret->rc = PTL_NO_SPACE);
}
state_lock(nal, &flags);
lib_msg_free(nal, msg);
state_unlock(nal, &flags);
- return (ret->rc = PTL_INV_MD);
+ return (ret->rc = PTL_MD_INVALID);
}
CDEBUG(D_NET, "PtlPut -> %Lu: %lu\n", (unsigned long long)id->nid,
lib_commit_md(nal, md, msg);
- msg->ev.type = PTL_EVENT_SENT;
+ msg->ev.type = PTL_EVENT_SEND_END;
msg->ev.initiator.nid = ni->nid;
msg->ev.initiator.pid = ni->pid;
msg->ev.portal = args->portal_in;
}
lib_msg_t *
-lib_fake_reply_msg (nal_cb_t *nal, ptl_nid_t peer_nid, lib_md_t *getmd)
+lib_create_reply_msg (nal_cb_t *nal, ptl_nid_t peer_nid, lib_msg_t *getmsg)
{
/* The NAL can DMA direct to the GET md (i.e. no REPLY msg). This
- * returns a msg the NAL can pass to lib_finalize() so that a REPLY
- * event still occurs.
+ * returns a msg for the NAL to pass to lib_finalize() when the sink
+ * data has been received.
*
- * CAVEAT EMPTOR: 'getmd' is passed by pointer so it MUST be valid.
- * This can only be guaranteed while a lib_msg_t holds a reference
- * on it (ie. pending > 0), so best call this before the
- * lib_finalize() of the original GET. */
+ * CAVEAT EMPTOR: 'getmsg' is the original GET, which is freed when
+ * lib_finalize() is called on it, so the NAL must call this first */
lib_ni_t *ni = &nal->ni;
lib_msg_t *msg = lib_msg_alloc(nal);
+ lib_md_t *getmd = getmsg->md;
unsigned long flags;
state_lock(nal, &flags);
lib_commit_md (nal, getmd, msg);
- msg->ev.type = PTL_EVENT_REPLY;
+ msg->ev.type = PTL_EVENT_REPLY_END;
msg->ev.initiator.nid = peer_nid;
msg->ev.initiator.pid = 0; /* XXX FIXME!!! */
msg->ev.rlength = msg->ev.mlength = getmd->length;
{
CERROR(LPU64": Dropping PUT to "LPU64": simulated failure\n",
nal->ni.nid, id->nid);
- return (ret->rc = PTL_INV_PROC);
+ return (ret->rc = PTL_PROCESS_INVALID);
}
msg = lib_msg_alloc(nal);
if (msg == NULL) {
CERROR(LPU64": Dropping GET to "LPU64": ENOMEM on lib_msg_t\n",
ni->nid, id->nid);
- return (ret->rc = PTL_NOSPACE);
+ return (ret->rc = PTL_NO_SPACE);
}
state_lock(nal, &flags);
lib_msg_free(nal, msg);
state_unlock(nal, &flags);
- return ret->rc = PTL_INV_MD;
+ return ret->rc = PTL_MD_INVALID;
}
CDEBUG(D_NET, "PtlGet -> %Lu: %lu\n", (unsigned long long)id->nid,
lib_commit_md(nal, md, msg);
- msg->ev.type = PTL_EVENT_SENT;
+ msg->ev.type = PTL_EVENT_SEND_END;
msg->ev.initiator.nid = ni->nid;
msg->ev.initiator.pid = ni->pid;
msg->ev.portal = args->portal_in;
if (status == PTL_OK &&
!ptl_is_wire_handle_none(&msg->ack_wmd)) {
- LASSERT(msg->ev.type == PTL_EVENT_PUT);
+ LASSERT(msg->ev.type == PTL_EVENT_PUT_END);
memset (&ack, 0, sizeof (ack));
ack.type = HTON__u32 (PTL_MSG_ACK);
LASSERT (md->pending >= 0);
/* Should I unlink this MD? */
- unlink = (md->pending == 0 && /* No other refs */
- (md->threshold == 0 || /* All ops done */
- md->md_flags & PTL_MD_FLAG_UNLINK) != 0); /* black spot */
-
- msg->ev.status = status;
+ if (md->pending != 0) /* other refs */
+ unlink = 0;
+ else if ((md->md_flags & PTL_MD_FLAG_ZOMBIE) != 0)
+ unlink = 1;
+ else if ((md->md_flags & PTL_MD_FLAG_AUTO_UNLINK) == 0)
+ unlink = 0;
+ else
+ unlink = lib_md_exhausted(md);
+
+ msg->ev.ni_fail_type = status;
msg->ev.unlinked = unlink;
if (md->eq != NULL)
else if (args->register_in == PTL_SR_MSGS_MAX)
ret->status_out = count->msgs_max;
else
- ret->rc = PTL_INV_SR_INDX;
+ ret->rc = PTL_SR_INDEX_INVALID;
return ret->rc;
}
if ((rc = nal->cb_dist(nal, nid, &dist)) != 0) {
ret->distance_out = (unsigned long) MAX_DIST;
- return PTL_INV_PROC;
+ return PTL_PROCESS_INVALID;
}
ret->distance_out = dist;
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef EXPORT_SYMTAB
+# define EXPORT_SYMTAB
+#endif
+#define DEBUG_SUBSYSTEM S_PORTALS
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/smp_lock.h>
+#include <linux/unistd.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+
+#include <linux/fs.h>
+#include <linux/stat.h>
+#include <asm/uaccess.h>
+#include <asm/segment.h>
+#include <linux/miscdevice.h>
+
+#include <portals/lib-p30.h>
+#include <portals/p30.h>
+#include <linux/kp30.h>
+#include <linux/kpr.h>
+#include <linux/portals_compat25.h>
+
+extern void (kping_client)(struct portal_ioctl_data *);
+
+struct nal_cmd_handler {
+ nal_cmd_handler_t nch_handler;
+ void * nch_private;
+};
+
+static struct nal_cmd_handler nal_cmd[NAL_MAX_NR + 1];
+static DECLARE_MUTEX(nal_cmd_sem);
+
+
+static int
+kportal_add_route(int gateway_nalid, ptl_nid_t gateway_nid,
+ ptl_nid_t lo_nid, ptl_nid_t hi_nid)
+{
+ int rc;
+ kpr_control_interface_t *ci;
+
+ ci = (kpr_control_interface_t *) PORTAL_SYMBOL_GET (kpr_control_interface);
+ if (ci == NULL)
+ return (-ENODEV);
+
+ rc = ci->kprci_add_route (gateway_nalid, gateway_nid, lo_nid, hi_nid);
+
+ PORTAL_SYMBOL_PUT(kpr_control_interface);
+ return (rc);
+}
+
+static int
+kportal_del_route(int gw_nalid, ptl_nid_t gw_nid,
+ ptl_nid_t lo, ptl_nid_t hi)
+{
+ int rc;
+ kpr_control_interface_t *ci;
+
+ ci = (kpr_control_interface_t *)PORTAL_SYMBOL_GET(kpr_control_interface);
+ if (ci == NULL)
+ return (-ENODEV);
+
+ rc = ci->kprci_del_route (gw_nalid, gw_nid, lo, hi);
+
+ PORTAL_SYMBOL_PUT(kpr_control_interface);
+ return (rc);
+}
+
+static int
+kportal_notify_router (int gw_nalid, ptl_nid_t gw_nid,
+ int alive, time_t when)
+{
+ int rc;
+ kpr_control_interface_t *ci;
+
+ /* No error if router not preset. Sysadmin is allowed to notify
+ * _everywhere_ when a NID boots or crashes, even if they know
+ * nothing of the peer. */
+ ci = (kpr_control_interface_t *)PORTAL_SYMBOL_GET(kpr_control_interface);
+ if (ci == NULL)
+ return (0);
+
+ rc = ci->kprci_notify (gw_nalid, gw_nid, alive, when);
+
+ PORTAL_SYMBOL_PUT(kpr_control_interface);
+ return (rc);
+}
+
+static int
+kportal_get_route(int index, __u32 *gateway_nalidp, ptl_nid_t *gateway_nidp,
+ ptl_nid_t *lo_nidp, ptl_nid_t *hi_nidp, int *alivep)
+{
+ int gateway_nalid;
+ ptl_nid_t gateway_nid;
+ ptl_nid_t lo_nid;
+ ptl_nid_t hi_nid;
+ int alive;
+ int rc;
+ kpr_control_interface_t *ci;
+
+ ci = (kpr_control_interface_t *) PORTAL_SYMBOL_GET(kpr_control_interface);
+ if (ci == NULL)
+ return (-ENODEV);
+
+ rc = ci->kprci_get_route(index, &gateway_nalid, &gateway_nid,
+ &lo_nid, &hi_nid, &alive);
+
+ if (rc == 0) {
+ CDEBUG(D_IOCTL, "got route [%d] %d "LPX64":"LPX64" - "LPX64", %s\n",
+ index, gateway_nalid, gateway_nid, lo_nid, hi_nid,
+ alive ? "up" : "down");
+
+ *gateway_nalidp = (__u32)gateway_nalid;
+ *gateway_nidp = gateway_nid;
+ *lo_nidp = lo_nid;
+ *hi_nidp = hi_nid;
+ *alivep = alive;
+ }
+
+ PORTAL_SYMBOL_PUT (kpr_control_interface);
+ return (rc);
+}
+
+static int
+kportal_router_cmd(struct portals_cfg *pcfg, void * private)
+{
+ int err = -EINVAL;
+ ENTRY;
+
+ switch(pcfg->pcfg_command) {
+ default:
+ CDEBUG(D_IOCTL, "Inappropriate cmd: %d\n", pcfg->pcfg_command);
+ break;
+
+ case NAL_CMD_ADD_ROUTE:
+ CDEBUG(D_IOCTL, "Adding route: [%d] "LPU64" : "LPU64" - "LPU64"\n",
+ pcfg->pcfg_nal, pcfg->pcfg_nid,
+ pcfg->pcfg_nid2, pcfg->pcfg_nid3);
+ err = kportal_add_route(pcfg->pcfg_gw_nal, pcfg->pcfg_nid,
+ pcfg->pcfg_nid2, pcfg->pcfg_nid3);
+ break;
+
+ case NAL_CMD_DEL_ROUTE:
+ CDEBUG (D_IOCTL, "Removing routes via [%d] "LPU64" : "LPU64" - "LPU64"\n",
+ pcfg->pcfg_gw_nal, pcfg->pcfg_nid,
+ pcfg->pcfg_nid2, pcfg->pcfg_nid3);
+ err = kportal_del_route (pcfg->pcfg_gw_nal, pcfg->pcfg_nid,
+ pcfg->pcfg_nid2, pcfg->pcfg_nid3);
+ break;
+
+ case NAL_CMD_NOTIFY_ROUTER: {
+ CDEBUG (D_IOCTL, "Notifying peer [%d] "LPU64" %s @ %ld\n",
+ pcfg->pcfg_gw_nal, pcfg->pcfg_nid,
+ pcfg->pcfg_flags ? "Enabling" : "Disabling",
+ (time_t)pcfg->pcfg_nid3);
+
+ err = kportal_notify_router (pcfg->pcfg_gw_nal, pcfg->pcfg_nid,
+ pcfg->pcfg_flags,
+ (time_t)pcfg->pcfg_nid3);
+ break;
+ }
+
+ case NAL_CMD_GET_ROUTE:
+ CDEBUG (D_IOCTL, "Getting route [%d]\n", pcfg->pcfg_count);
+ err = kportal_get_route(pcfg->pcfg_count, &pcfg->pcfg_gw_nal,
+ &pcfg->pcfg_nid,
+ &pcfg->pcfg_nid2, &pcfg->pcfg_nid3,
+ &pcfg->pcfg_flags);
+ break;
+ }
+ RETURN(err);
+}
+
+int
+kportal_nal_cmd(struct portals_cfg *pcfg)
+{
+ __u32 nal = pcfg->pcfg_nal;
+ int rc = -EINVAL;
+
+ ENTRY;
+
+ down(&nal_cmd_sem);
+ if (nal > 0 && nal <= NAL_MAX_NR && nal_cmd[nal].nch_handler) {
+ CDEBUG(D_IOCTL, "calling handler nal: %d, cmd: %d\n", nal,
+ pcfg->pcfg_command);
+ rc = nal_cmd[nal].nch_handler(pcfg, nal_cmd[nal].nch_private);
+ }
+ up(&nal_cmd_sem);
+ RETURN(rc);
+}
+
+ptl_handle_ni_t *
+kportal_get_ni (int nal)
+{
+
+ switch (nal)
+ {
+ case QSWNAL:
+ return (PORTAL_SYMBOL_GET(kqswnal_ni));
+ case SOCKNAL:
+ return (PORTAL_SYMBOL_GET(ksocknal_ni));
+ case GMNAL:
+ return (PORTAL_SYMBOL_GET(kgmnal_ni));
+ case IBNAL:
+ return (PORTAL_SYMBOL_GET(kibnal_ni));
+ case TCPNAL:
+ /* userspace NAL */
+ return (NULL);
+ case SCIMACNAL:
+ return (PORTAL_SYMBOL_GET(kscimacnal_ni));
+ default:
+ /* A warning to a naive caller */
+ CERROR ("unknown nal: %d\n", nal);
+ return (NULL);
+ }
+}
+
+void
+kportal_put_ni (int nal)
+{
+
+ switch (nal)
+ {
+ case QSWNAL:
+ PORTAL_SYMBOL_PUT(kqswnal_ni);
+ break;
+ case SOCKNAL:
+ PORTAL_SYMBOL_PUT(ksocknal_ni);
+ break;
+ case GMNAL:
+ PORTAL_SYMBOL_PUT(kgmnal_ni);
+ break;
+ case IBNAL:
+ PORTAL_SYMBOL_PUT(kibnal_ni);
+ break;
+ case TCPNAL:
+ /* A lesson to a malicious caller */
+ LBUG ();
+ case SCIMACNAL:
+ PORTAL_SYMBOL_PUT(kscimacnal_ni);
+ break;
+ default:
+ CERROR ("unknown nal: %d\n", nal);
+ }
+}
+
+int
+kportal_nal_register(int nal, nal_cmd_handler_t handler, void * private)
+{
+ int rc = 0;
+
+ CDEBUG(D_IOCTL, "Register NAL %d, handler: %p\n", nal, handler);
+
+ if (nal > 0 && nal <= NAL_MAX_NR) {
+ down(&nal_cmd_sem);
+ if (nal_cmd[nal].nch_handler != NULL)
+ rc = -EBUSY;
+ else {
+ nal_cmd[nal].nch_handler = handler;
+ nal_cmd[nal].nch_private = private;
+ }
+ up(&nal_cmd_sem);
+ }
+ return rc;
+}
+
+int
+kportal_nal_unregister(int nal)
+{
+ int rc = 0;
+
+ CDEBUG(D_IOCTL, "Unregister NAL %d\n", nal);
+
+ if (nal > 0 && nal <= NAL_MAX_NR) {
+ down(&nal_cmd_sem);
+ nal_cmd[nal].nch_handler = NULL;
+ nal_cmd[nal].nch_private = NULL;
+ up(&nal_cmd_sem);
+ }
+ return rc;
+}
+
+static int kportal_ioctl(struct portal_ioctl_data *data,
+ unsigned int cmd, unsigned long arg)
+{
+ int err = 0;
+ char str[PTL_NALFMT_SIZE];
+ ENTRY;
+
+ switch (cmd) {
+ case IOC_PORTAL_PING: {
+ void (*ping)(struct portal_ioctl_data *);
+
+ CDEBUG(D_IOCTL, "doing %d pings to nid "LPX64" (%s)\n",
+ data->ioc_count, data->ioc_nid,
+ portals_nid2str(data->ioc_nal, data->ioc_nid, str));
+ ping = PORTAL_SYMBOL_GET(kping_client);
+ if (!ping)
+ CERROR("PORTAL_SYMBOL_GET failed\n");
+ else {
+ ping(data);
+ PORTAL_SYMBOL_PUT(kping_client);
+ }
+ RETURN(0);
+ }
+
+ case IOC_PORTAL_GET_NID: {
+ const ptl_handle_ni_t *nip;
+ ptl_process_id_t pid;
+
+ CDEBUG (D_IOCTL, "Getting nid for nal [%d]\n", data->ioc_nal);
+
+ nip = kportal_get_ni (data->ioc_nal);
+ if (nip == NULL)
+ RETURN (-EINVAL);
+
+ err = PtlGetId (*nip, &pid);
+ LASSERT (err == PTL_OK);
+ kportal_put_ni (data->ioc_nal);
+
+ data->ioc_nid = pid.nid;
+ if (copy_to_user ((char *)arg, data, sizeof (*data)))
+ err = -EFAULT;
+ break;
+ }
+
+ case IOC_PORTAL_NAL_CMD: {
+ struct portals_cfg pcfg;
+
+ LASSERT (data->ioc_plen1 == sizeof(pcfg));
+ err = copy_from_user(&pcfg, (void *)data->ioc_pbuf1,
+ sizeof(pcfg));
+ if ( err ) {
+ EXIT;
+ return err;
+ }
+
+ CDEBUG (D_IOCTL, "nal command nal %d cmd %d\n", pcfg.pcfg_nal,
+ pcfg.pcfg_command);
+ err = kportal_nal_cmd(&pcfg);
+ if (err == 0) {
+ if (copy_to_user((char *)data->ioc_pbuf1, &pcfg,
+ sizeof (pcfg)))
+ err = -EFAULT;
+ if (copy_to_user((char *)arg, data, sizeof (*data)))
+ err = -EFAULT;
+ }
+ break;
+ }
+ case IOC_PORTAL_FAIL_NID: {
+ const ptl_handle_ni_t *nip;
+
+ CDEBUG (D_IOCTL, "fail nid: [%d] "LPU64" count %d\n",
+ data->ioc_nal, data->ioc_nid, data->ioc_count);
+
+ nip = kportal_get_ni (data->ioc_nal);
+ if (nip == NULL)
+ return (-EINVAL);
+
+ err = PtlFailNid (*nip, data->ioc_nid, data->ioc_count);
+ kportal_put_ni (data->ioc_nal);
+ break;
+ }
+ default:
+ err = -EINVAL;
+ break;
+ }
+
+ RETURN(err);
+}
+
+DECLARE_IOCTL_HANDLER(kportal_ioctl_handler, kportal_ioctl);
+
+static int init_kportals_module(void)
+{
+ int rc;
+ ENTRY;
+
+ rc = PtlInit(NULL);
+ if (rc) {
+ CERROR("PtlInit: error %d\n", rc);
+ RETURN(rc);
+ }
+
+ rc = kportal_nal_register(ROUTER, kportal_router_cmd, NULL);
+ if (rc) {
+ PtlFini();
+ CERROR("kportal_nal_registre: ROUTER error %d\n", rc);
+ }
+
+ if (rc == 0)
+ libcfs_register_ioctl(&kportal_ioctl_handler);
+
+ RETURN(rc);
+}
+
+static void exit_kportals_module(void)
+{
+ libcfs_deregister_ioctl(&kportal_ioctl_handler);
+ kportal_nal_unregister(ROUTER);
+ PtlFini();
+}
+
+EXPORT_SYMBOL(kportal_nal_register);
+EXPORT_SYMBOL(kportal_nal_unregister);
+EXPORT_SYMBOL(kportal_get_ni);
+EXPORT_SYMBOL(kportal_put_ni);
+EXPORT_SYMBOL(kportal_nal_cmd);
+
+EXPORT_SYMBOL(ptl_err_str);
+EXPORT_SYMBOL(lib_dispatch);
+EXPORT_SYMBOL(PtlMEAttach);
+EXPORT_SYMBOL(PtlMEInsert);
+EXPORT_SYMBOL(PtlMEUnlink);
+EXPORT_SYMBOL(PtlEQAlloc);
+EXPORT_SYMBOL(PtlMDAttach);
+EXPORT_SYMBOL(PtlMDUnlink);
+EXPORT_SYMBOL(PtlNIInit);
+EXPORT_SYMBOL(PtlNIFini);
+EXPORT_SYMBOL(PtlNIDebug);
+EXPORT_SYMBOL(PtlInit);
+EXPORT_SYMBOL(PtlFini);
+EXPORT_SYMBOL(PtlSnprintHandle);
+EXPORT_SYMBOL(PtlPut);
+EXPORT_SYMBOL(PtlGet);
+EXPORT_SYMBOL(PtlEQWait);
+EXPORT_SYMBOL(PtlEQFree);
+EXPORT_SYMBOL(PtlEQGet);
+EXPORT_SYMBOL(PtlGetId);
+EXPORT_SYMBOL(PtlMDBind);
+EXPORT_SYMBOL(lib_iov_nob);
+EXPORT_SYMBOL(lib_copy_iov2buf);
+EXPORT_SYMBOL(lib_copy_buf2iov);
+EXPORT_SYMBOL(lib_extract_iov);
+EXPORT_SYMBOL(lib_kiov_nob);
+EXPORT_SYMBOL(lib_copy_kiov2buf);
+EXPORT_SYMBOL(lib_copy_buf2kiov);
+EXPORT_SYMBOL(lib_extract_kiov);
+EXPORT_SYMBOL(lib_finalize);
+EXPORT_SYMBOL(lib_parse);
+EXPORT_SYMBOL(lib_create_reply_msg);
+EXPORT_SYMBOL(lib_init);
+EXPORT_SYMBOL(lib_fini);
+EXPORT_SYMBOL(dispatch_name);
+
+MODULE_AUTHOR("Peter J. Braam <braam@clusterfs.com>");
+MODULE_DESCRIPTION("Portals v3.1");
+MODULE_LICENSE("GPL");
+module_init(init_kportals_module);
+module_exit(exit_kportals_module);
#define DEBUG_SUBSYSTEM S_PTLROUTER
#include <linux/kp30.h>
+#include <linux/kpr.h>
#include <portals/p30.h>
#include <portals/lib-p30.h>
client->md_in_head.length = (args->ioc_size + STDSIZE)
* count;
client->md_in_head.threshold = PTL_MD_THRESH_INF;
- client->md_in_head.options = PTL_MD_OP_PUT;
+ client->md_in_head.options = PTL_MD_EVENT_START_DISABLE | PTL_MD_OP_PUT;
client->md_in_head.user_ptr = NULL;
client->md_in_head.eventq = client->eq;
memset (client->inbuf, 0, (args->ioc_size + STDSIZE) * count);
client->md_out_head.start = client->outbuf;
client->md_out_head.length = STDSIZE + args->ioc_size;
client->md_out_head.threshold = args->ioc_count;
- client->md_out_head.options = PTL_MD_OP_PUT;
+ client->md_out_head.options = PTL_MD_EVENT_START_DISABLE | PTL_MD_OP_PUT;
client->md_out_head.user_ptr = NULL;
client->md_out_head.eventq = PTL_EQ_NONE;
/* Bind the outgoing ping header */
if ((rc=PtlMDBind (*nip, client->md_out_head,
- &client->md_out_head_h))) {
+ PTL_UNLINK, &client->md_out_head_h))) {
CERROR ("PtlMDBind error %d\n", rc);
pingcli_shutdown (1);
return NULL;
server->mdout.length = server->evnt.rlength;
server->mdout.start = server->in_buf;
server->mdout.threshold = 1;
- server->mdout.options = PTL_MD_OP_PUT;
+ server->mdout.options = PTL_MD_EVENT_START_DISABLE | PTL_MD_OP_PUT;
server->mdout.user_ptr = NULL;
server->mdout.eventq = PTL_EQ_NONE;
/* Bind the outgoing buffer */
if ((rc = PtlMDBind (server->ni, server->mdout,
- &server->mdout_h))) {
+ PTL_UNLINK, &server->mdout_h))) {
PDEBUG ("PtlMDBind", rc);
pingsrv_shutdown (1);
return 1;
server->mdin.start = server->in_buf;
server->mdin.length = MAXSIZE;
server->mdin.threshold = 1;
- server->mdin.options = PTL_MD_OP_PUT;
+ server->mdin.options = PTL_MD_EVENT_START_DISABLE | PTL_MD_OP_PUT;
server->mdin.user_ptr = NULL;
server->mdin.eventq = server->eq;
server->mdin.start = server->in_buf;
server->mdin.length = MAXSIZE;
server->mdin.threshold = 1;
- server->mdin.options = PTL_MD_OP_PUT;
+ server->mdin.options = PTL_MD_EVENT_START_DISABLE | PTL_MD_OP_PUT;
server->mdin.user_ptr = NULL;
server->mdin.eventq = server->eq;
memset (server->in_buf, 0, STDSIZE);
client->md_in_head.start = client->inbuf;
client->md_in_head.length = STDSIZE;
client->md_in_head.threshold = 1;
- client->md_in_head.options = PTL_MD_OP_PUT;
+ client->md_in_head.options = PTL_MD_EVENT_START_DISABLE | PTL_MD_OP_PUT;
client->md_in_head.user_ptr = NULL;
client->md_in_head.eventq = client->eq;
memset (client->inbuf, 0, STDSIZE);
client->md_out_head.start = client->outbuf;
client->md_out_head.length = STDSIZE;
client->md_out_head.threshold = 1;
- client->md_out_head.options = PTL_MD_OP_PUT;
+ client->md_out_head.options = PTL_MD_EVENT_START_DISABLE | PTL_MD_OP_PUT;
client->md_out_head.user_ptr = NULL;
client->md_out_head.eventq = PTL_EQ_NONE;
/* Bind the outgoing ping header */
if ((rc=PtlMDBind (*nip, client->md_out_head,
- &client->md_out_head_h))) {
+ PTL_UNLINK, &client->md_out_head_h))) {
CERROR ("PtlMDBind error %d\n", rc);
pingcli_shutdown (1);
return (NULL);
server->mdout.start = server->in_buf;
server->mdout.length = STDSIZE;
server->mdout.threshold = 1;
- server->mdout.options = PTL_MD_OP_PUT;
+ server->mdout.options = PTL_MD_EVENT_START_DISABLE | PTL_MD_OP_PUT;
server->mdout.user_ptr = NULL;
server->mdout.eventq = PTL_EQ_NONE;
/* Bind the outgoing buffer */
if ((rc = PtlMDBind (server->ni, server->mdout,
- &server->mdout_h))) {
+ PTL_UNLINK, &server->mdout_h))) {
PDEBUG ("PtlMDBind", rc);
pingsrv_shutdown (1);
return 1;
server->mdin.start = server->in_buf;
server->mdin.length = STDSIZE;
server->mdin.threshold = 1;
- server->mdin.options = PTL_MD_OP_PUT;
+ server->mdin.options = PTL_MD_EVENT_START_DISABLE | PTL_MD_OP_PUT;
server->mdin.user_ptr = NULL;
server->mdin.eventq = server->eq;
server->mdin.start = server->in_buf;
server->mdin.length = STDSIZE;
server->mdin.threshold = 1;
- server->mdin.options = PTL_MD_OP_PUT;
+ server->mdin.options = PTL_MD_EVENT_START_DISABLE | PTL_MD_OP_PUT;
server->mdin.user_ptr = NULL;
server->mdin.eventq = server->eq;
memset (server->in_buf, 0, STDSIZE);
}
-/* FIXME cfs temporary workaround! FIXME
- * global time out value
- */
-int __tcpnal_eqwait_timeout_value = 0;
-int __tcpnal_eqwait_timedout = 0;
+static void procbridge_lock(nal_t * n, unsigned long *flags)
+{
+ bridge b=(bridge)n->nal_data;
+ procbridge p=(procbridge)b->local;
+
+ pthread_mutex_lock(&p->mutex);
+}
+
+static void procbridge_unlock(nal_t * n, unsigned long *flags)
+{
+ bridge b=(bridge)n->nal_data;
+ procbridge p=(procbridge)b->local;
+
+ pthread_mutex_unlock(&p->mutex);
+}
/* Function: yield
* Arguments: pid:
* overload it to explicitly block until signalled by the
* lower half.
*/
-static void procbridge_yield(nal_t *n)
+static int procbridge_yield(nal_t *n, unsigned long *flags, int milliseconds)
{
bridge b=(bridge)n->nal_data;
procbridge p=(procbridge)b->local;
- pthread_mutex_lock(&p->mutex);
- if (!__tcpnal_eqwait_timeout_value) {
+ if (milliseconds == 0)
+ return 0;
+
+ if (milliseconds < 0) {
pthread_cond_wait(&p->cond,&p->mutex);
} else {
+ struct timeval then;
struct timeval now;
struct timespec timeout;
- gettimeofday(&now, NULL);
- timeout.tv_sec = now.tv_sec + __tcpnal_eqwait_timeout_value;
- timeout.tv_nsec = now.tv_usec * 1000;
+ gettimeofday(&then, NULL);
+ timeout.tv_sec = then.tv_sec + milliseconds/1000;
+ timeout.tv_nsec = then.tv_usec * 1000 + milliseconds % 1000 * 1000000;
+ if (timeout.tv_nsec >= 1000000000) {
+ timeout.tv_sec++;
+ timeout.tv_nsec -= 1000000000;
+ }
+
+ pthread_cond_timedwait(&p->cond, &p->mutex, &timeout);
- __tcpnal_eqwait_timedout =
- pthread_cond_timedwait(&p->cond, &p->mutex, &timeout);
+ gettimeofday(&now, NULL);
+ milliseconds -= (now.tv_sec - then.tv_sec) * 1000 +
+ (now.tv_usec - then.tv_usec) / 1000;
+
+ if (milliseconds < 0)
+ milliseconds = 0;
}
- pthread_mutex_unlock(&p->mutex);
+
+ return (milliseconds);
}
-static void procbridge_lock(nal_t * nal, unsigned long *flags){}
-static void procbridge_unlock(nal_t * nal, unsigned long *flags){}
/* api_nal
* the interface vector to allow the generic code to access
* this nal. this is seperate from the library side nal_cb.
pthread_mutex_init(&p->mutex,0);
pthread_cond_init(&p->cond, 0);
p->nal_flags = 0;
- pthread_mutex_init(&p->nal_cb_lock, 0);
/* initialize notifier */
if (socketpair(AF_UNIX, SOCK_STREAM, 0, p->notifier)) {
bridge b = (bridge) nal->nal_data;
procbridge p = (procbridge) b->local;
- pthread_mutex_lock(&p->nal_cb_lock);
+ pthread_mutex_lock(&p->mutex);
}
bridge b = (bridge)nal->nal_data;
procbridge p = (procbridge) b->local;
- pthread_mutex_unlock(&p->nal_cb_lock);
+ pthread_mutex_unlock(&p->mutex);
}
+static void nal_callback(nal_cb_t *nal, void *private,
+ lib_eq_t *eq, ptl_event_t *ev)
+{
+ bridge b = (bridge)nal->nal_data;
+ procbridge p = (procbridge) b->local;
+
+ /* holding p->mutex */
+ if (eq->event_callback != NULL)
+ eq->event_callback(ev);
+
+ pthread_cond_broadcast(&p->cond);
+}
static int nal_dist(nal_cb_t *nal,
ptl_nid_t nid,
return 0;
}
-static void wakeup_topside(void *z)
+static void check_stopping(void *z)
{
bridge b = z;
procbridge p = b->local;
- int stop;
+ if ((p->nal_flags & NAL_FLAG_STOPPING) == 0)
+ return;
+
pthread_mutex_lock(&p->mutex);
- stop = p->nal_flags & NAL_FLAG_STOPPING;
- if (stop)
- p->nal_flags |= NAL_FLAG_STOPPED;
+ p->nal_flags |= NAL_FLAG_STOPPED;
pthread_cond_broadcast(&p->cond);
pthread_mutex_unlock(&p->mutex);
- if (stop)
- pthread_exit(0);
+ pthread_exit(0);
}
b->nal_cb->cb_printf=nal_printf;
b->nal_cb->cb_cli=nal_cli;
b->nal_cb->cb_sti=nal_sti;
+ b->nal_cb->cb_callback=nal_callback;
b->nal_cb->cb_dist=nal_dist;
pid_request = args->nia_requested_pid;
performs an operation and returns to blocking mode. we
overload this function to inform the api side that
it may be interested in looking at the event queue */
- register_thunk(wakeup_topside,b);
+ register_thunk(check_stopping,b);
timer_loop();
}
return(0);
# This code is issued under the GNU General Public License.
# See the file COPYING in this distribution
-COMPILE = $(CC) -Wall -g -I$(srcdir)/../include
+# ../ for <portals/*.h>, ../../ for <config.h>
+COMPILE = $(CC) -Wall -g -I$(srcdir)/../include -I$(srcdir)/../../include
LINK = $(CC) -o $@
if LIBLUSTRE
/* avoid integer overflow */
ptlbd_size[i] = (16*1024*((1024*1024) >> BLOCK_SIZE_BITS));
ptlbd_hardsect_size[i] = 4096;
- ptlbd_max_sectors[i] = PTL_MD_MAX_IOV * (4096/512);
+ ptlbd_max_sectors[i] = PTLRPC_MAX_BRW_PAGES * (4096/512);
}
return 0;
COMMON_SOURCES = client.c recover.c connection.c niobuf.c pack_generic.c \
events.c ptlrpc_module.c service.c pinger.c recov_thread.c llog_net.c \
- llog_client.c llog_server.c import.c ptlrpcd.c ptlrpc_internal.h \
- $(LDLM_COMM_SOURCES)
+ llog_client.c llog_server.c import.c ptlrpcd.c pers.c \
+ ptlrpc_internal.h $(LDLM_COMM_SOURCES)
if LIBLUSTRE
spin_lock_init(&desc->bd_lock);
init_waitqueue_head(&desc->bd_waitq);
- desc->bd_max_pages = npages;
- desc->bd_page_count = 0;
- desc->bd_md_h = PTL_HANDLE_NONE;
+ desc->bd_max_iov = npages;
+ desc->bd_iov_count = 0;
+ desc->bd_md_h = PTL_INVALID_HANDLE;
desc->bd_portal = portal;
desc->bd_type = type;
void ptlrpc_prep_bulk_page(struct ptlrpc_bulk_desc *desc,
struct page *page, int pageoffset, int len)
{
-#ifdef __KERNEL__
- ptl_kiov_t *kiov = &desc->bd_iov[desc->bd_page_count];
-#else
- struct iovec *iov = &desc->bd_iov[desc->bd_page_count];
-#endif
- LASSERT(desc->bd_page_count < desc->bd_max_pages);
+ LASSERT(desc->bd_iov_count < desc->bd_max_iov);
LASSERT(page != NULL);
LASSERT(pageoffset >= 0);
LASSERT(len > 0);
LASSERT(pageoffset + len <= PAGE_SIZE);
-#ifdef __KERNEL__
- kiov->kiov_page = page;
- kiov->kiov_offset = pageoffset;
- kiov->kiov_len = len;
-#else
- iov->iov_base = page->addr + pageoffset;
- iov->iov_len = len;
-#endif
- desc->bd_page_count++;
desc->bd_nob += len;
+
+ pers_bulk_add_page(desc, page, pageoffset, len);
}
void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *desc)
ENTRY;
LASSERT(desc != NULL);
- LASSERT(desc->bd_page_count != 0x5a5a5a5a); /* not freed already */
+ LASSERT(desc->bd_iov_count != 0x5a5a5a5a); /* not freed already */
LASSERT(!desc->bd_network_rw); /* network hands off or */
LASSERT((desc->bd_export != NULL) ^ (desc->bd_import != NULL));
if (desc->bd_export)
class_import_put(desc->bd_import);
OBD_FREE(desc, offsetof(struct ptlrpc_bulk_desc,
- bd_iov[desc->bd_max_pages]));
+ bd_iov[desc->bd_max_iov]));
EXIT;
}
return;
rc = PtlMDUnlink (request->rq_reply_md_h);
- if (rc == PTL_INV_MD) {
+ if (rc == PTL_MD_INVALID) {
LASSERT (!ptlrpc_client_receiving_reply(request));
return;
}
#endif
#include <linux/obd_class.h>
#include <linux/lustre_net.h>
+#include "ptlrpc_internal.h"
struct ptlrpc_ni ptlrpc_interfaces[NAL_MAX_NR];
int ptlrpc_ninterfaces;
unsigned long flags;
ENTRY;
- LASSERT (ev->type == PTL_EVENT_SENT ||
+ LASSERT (ev->type == PTL_EVENT_SEND_END ||
ev->type == PTL_EVENT_UNLINK);
LASSERT (ev->unlinked);
- DEBUG_REQ((ev->status == PTL_OK) ? D_NET : D_ERROR, req,
- "type %d, status %d", ev->type, ev->status);
+ DEBUG_REQ((ev->ni_fail_type == PTL_NI_OK) ? D_NET : D_ERROR, req,
+ "type %d, status %d", ev->type, ev->ni_fail_type);
if (ev->type == PTL_EVENT_UNLINK ||
- ev->status != PTL_OK) {
+ ev->ni_fail_type != PTL_NI_OK) {
/* Failed send: make it seem like the reply timed out, just
* like failing sends in client.c does currently... */
unsigned long flags;
ENTRY;
- LASSERT (ev->type == PTL_EVENT_PUT ||
+ LASSERT (ev->type == PTL_EVENT_PUT_END ||
ev->type == PTL_EVENT_UNLINK);
LASSERT (ev->unlinked);
LASSERT (ev->mem_desc.start == req->rq_repmsg);
LASSERT (ev->offset == 0);
LASSERT (ev->mlength <= req->rq_replen);
- DEBUG_REQ((ev->status == PTL_OK) ? D_NET : D_ERROR, req,
- "type %d, status %d", ev->type, ev->status);
+ DEBUG_REQ((ev->ni_fail_type == PTL_NI_OK) ? D_NET : D_ERROR, req,
+ "type %d, status %d", ev->type, ev->ni_fail_type);
spin_lock_irqsave (&req->rq_lock, flags);
LASSERT (req->rq_receiving_reply);
req->rq_receiving_reply = 0;
- if (ev->type == PTL_EVENT_PUT &&
- ev->status == PTL_OK) {
+ if (ev->type == PTL_EVENT_PUT_END &&
+ ev->ni_fail_type == PTL_NI_OK) {
req->rq_replied = 1;
req->rq_nob_received = ev->mlength;
}
ENTRY;
LASSERT ((desc->bd_type == BULK_PUT_SINK &&
- ev->type == PTL_EVENT_PUT) ||
+ ev->type == PTL_EVENT_PUT_END) ||
(desc->bd_type == BULK_GET_SOURCE &&
- ev->type == PTL_EVENT_GET) ||
+ ev->type == PTL_EVENT_GET_END) ||
ev->type == PTL_EVENT_UNLINK);
LASSERT (ev->unlinked);
- CDEBUG((ev->status == PTL_OK) ? D_NET : D_ERROR,
+ CDEBUG((ev->ni_fail_type == PTL_NI_OK) ? D_NET : D_ERROR,
"event type %d, status %d, desc %p\n",
- ev->type, ev->status, desc);
+ ev->type, ev->ni_fail_type, desc);
spin_lock_irqsave (&desc->bd_lock, flags);
desc->bd_network_rw = 0;
if (ev->type != PTL_EVENT_UNLINK &&
- ev->status == PTL_OK) {
+ ev->ni_fail_type == PTL_NI_OK) {
desc->bd_success = 1;
desc->bd_nob_transferred = ev->mlength;
}
long flags;
ENTRY;
- LASSERT (ev->type == PTL_EVENT_PUT ||
+ LASSERT (ev->type == PTL_EVENT_PUT_END ||
ev->type == PTL_EVENT_UNLINK);
LASSERT ((char *)ev->mem_desc.start >= rqbd->rqbd_buffer);
LASSERT ((char *)ev->mem_desc.start + ev->offset + ev->mlength <=
rqbd->rqbd_buffer + service->srv_buf_size);
- CDEBUG((ev->status == PTL_OK) ? D_NET : D_ERROR,
+ CDEBUG((ev->ni_fail_type == PTL_OK) ? D_NET : D_ERROR,
"event type %d, status %d, service %s\n",
- ev->type, ev->status, service->srv_name);
+ ev->type, ev->ni_fail_type, service->srv_name);
if (ev->unlinked) {
/* If this is the last request message to fit in the
req = &rqbd->rqbd_req;
memset(req, 0, sizeof (*req));
} else {
- LASSERT (ev->type == PTL_EVENT_PUT);
- if (ev->status != PTL_OK) {
+ LASSERT (ev->type == PTL_EVENT_PUT_END);
+ if (ev->ni_fail_type != PTL_NI_OK) {
/* We moaned above already... */
return;
}
* size to non-zero if this was a successful receive. */
req->rq_xid = ev->match_bits;
req->rq_reqmsg = ev->mem_desc.start + ev->offset;
- if (ev->type == PTL_EVENT_PUT &&
- ev->status == PTL_OK)
+ if (ev->type == PTL_EVENT_PUT_END &&
+ ev->ni_fail_type == PTL_NI_OK)
req->rq_reqlen = ev->mlength;
- req->rq_arrival_time = ev->arrival_time;
+ do_gettimeofday(&req->rq_arrival_time);
req->rq_peer.peer_nid = ev->initiator.nid;
req->rq_peer.peer_ni = rqbd->rqbd_srv_ni->sni_ni;
req->rq_rqbd = rqbd;
unsigned long flags;
ENTRY;
- LASSERT (ev->type == PTL_EVENT_SENT ||
+ LASSERT (ev->type == PTL_EVENT_SEND_END ||
ev->type == PTL_EVENT_ACK ||
ev->type == PTL_EVENT_UNLINK);
unsigned long flags;
ENTRY;
- LASSERT (ev->type == PTL_EVENT_SENT ||
+ LASSERT (ev->type == PTL_EVENT_SEND_END ||
ev->type == PTL_EVENT_UNLINK ||
(desc->bd_type == BULK_PUT_SOURCE &&
ev->type == PTL_EVENT_ACK) ||
(desc->bd_type == BULK_GET_SINK &&
- ev->type == PTL_EVENT_REPLY));
+ ev->type == PTL_EVENT_REPLY_END));
- CDEBUG((ev->status == PTL_OK) ? D_NET : D_ERROR,
+ CDEBUG((ev->ni_fail_type == PTL_NI_OK) ? D_NET : D_ERROR,
"event type %d, status %d, desc %p\n",
- ev->type, ev->status, desc);
+ ev->type, ev->ni_fail_type, desc);
spin_lock_irqsave (&desc->bd_lock, flags);
if ((ev->type == PTL_EVENT_ACK ||
- ev->type == PTL_EVENT_REPLY) &&
- ev->status == PTL_OK) {
+ ev->type == PTL_EVENT_REPLY_END) &&
+ ev->ni_fail_type == PTL_NI_OK) {
/* We heard back from the peer, so even if we get this
* before the SENT event (oh yes we can), we know we
* read/wrote the peer buffer and how much... */
int ptlrpc_uuid_to_peer (struct obd_uuid *uuid, struct ptlrpc_peer *peer)
{
struct ptlrpc_ni *pni;
- struct lustre_peer lpeer;
+ ptl_nid_t peer_nid;
+ ptl_handle_ni_t peer_ni;
int i;
- int rc = lustre_uuid_to_peer (uuid->uuid, &lpeer);
-
+ char str[20];
+ int rc = lustre_uuid_to_peer(uuid->uuid,
+ &peer_ni, &peer_nid);
if (rc != 0)
RETURN (rc);
for (i = 0; i < ptlrpc_ninterfaces; i++) {
pni = &ptlrpc_interfaces[i];
- if (!memcmp(&lpeer.peer_ni, &pni->pni_ni_h,
- sizeof (lpeer.peer_ni))) {
- peer->peer_nid = lpeer.peer_nid;
+ if (!memcmp(&peer_ni, &pni->pni_ni_h,
+ sizeof (peer_ni))) {
+ peer->peer_nid = peer_nid;
peer->peer_ni = pni;
return (0);
}
}
- CERROR("Can't find ptlrpc interface for "LPX64" ni handle %08lx."LPX64"\n",
- lpeer.peer_nid, lpeer.peer_ni.nal_idx, lpeer.peer_ni.cookie);
+ PtlSnprintHandle(str, sizeof(str), peer_ni);
+ CERROR("Can't find ptlrpc interface for "LPX64" ni %s\n",
+ peer_nid, str);
return (-ENOENT);
}
kportal_put_ni (pni->pni_number);
return;
- case PTL_EQ_INUSE:
+ case PTL_EQ_IN_USE:
if (retries != 0)
CWARN("Event queue for %s still busy\n",
pni->pni_name);
int ptlrpc_ni_init(int number, char *name, struct ptlrpc_ni *pni)
{
int rc;
+ char str[20];
ptl_handle_ni_t *nip = kportal_get_ni (number);
if (nip == NULL) {
return (-ENOENT);
}
- CDEBUG (D_NET, "init %d %s: nal_idx %ld\n", number, name, nip->nal_idx);
+ PtlSnprintHandle(str, sizeof(str), *nip);
+ CDEBUG (D_NET, "init %d %s: %s\n", number, name, str);
pni->pni_name = name;
pni->pni_number = number;
pni->pni_ni_h = *nip;
- pni->pni_eq_h = PTL_HANDLE_NONE;
+ pni->pni_eq_h = PTL_INVALID_HANDLE;
-#ifdef __KERNEL__
- /* kernel: portals calls the callback when the event is added to the
- * queue, so we don't care if we lose events */
- rc = PtlEQAlloc(pni->pni_ni_h, 1024, ptlrpc_master_callback,
+ rc = PtlEQAlloc(pni->pni_ni_h, PTLRPC_NUM_EQ, PTLRPC_EQ_CALLBACK,
&pni->pni_eq_h);
-#else
- /* liblustre: no asynchronous callback and allocate a nice big event
- * queue so we don't drop any events... */
- rc = PtlEQAlloc(pni->pni_ni_h, 10240, NULL, &pni->pni_eq_h);
-#endif
+
if (rc != PTL_OK)
GOTO (fail, rc = -ENOMEM);
{
ptl_event_t ev;
int rc;
+ int i;
ENTRY;
- if (timeout) {
- rc = PtlEQWait_timeout(ptlrpc_interfaces[0].pni_eq_h, &ev, timeout);
- } else {
- rc = PtlEQGet (ptlrpc_interfaces[0].pni_eq_h, &ev);
- }
+ rc = PtlEQPoll(&ptlrpc_interfaces[0].pni_eq_h, 1, timeout * 1000,
+ &ev, &i);
if (rc == PTL_EQ_EMPTY)
RETURN(0);
LASSERT (rc == PTL_EQ_DROPPED || rc == PTL_OK);
-#ifndef __KERNEL__
/* liblustre: no asynch callback so we can't affort to miss any
* events... */
if (rc == PTL_EQ_DROPPED) {
}
ptlrpc_master_callback (&ev);
-#endif
RETURN(1);
}
+int liblustre_waiting = 0;
+
int
liblustre_wait_event (int timeout)
{
struct liblustre_wait_callback *llwc;
int found_something = 0;
- /* First check for any new events */
- if (liblustre_check_events(0))
- found_something = 1;
+ /* single threaded recursion check... */
+ liblustre_waiting = 1;
- /* Now give all registered callbacks a bite at the cherry */
- list_for_each(tmp, &liblustre_wait_callbacks) {
- llwc = list_entry(tmp, struct liblustre_wait_callback,
- llwc_list);
-
- if (llwc->llwc_fn(llwc->llwc_arg))
+ for (;;) {
+ /* Deal with all pending events */
+ while (liblustre_check_events(0))
found_something = 1;
- }
- /* return to caller if something happened */
- if (found_something)
- return 1;
-
- /* block for an event, returning immediately on timeout */
- if (!liblustre_check_events(timeout))
- return 0;
-
- /* an event occurred; let all registered callbacks progress... */
- list_for_each(tmp, &liblustre_wait_callbacks) {
- llwc = list_entry(tmp, struct liblustre_wait_callback,
- llwc_list);
+ /* Give all registered callbacks a bite at the cherry */
+ list_for_each(tmp, &liblustre_wait_callbacks) {
+ llwc = list_entry(tmp, struct liblustre_wait_callback,
+ llwc_list);
- if (llwc->llwc_fn(llwc->llwc_arg))
- found_something = 1;
+ if (llwc->llwc_fn(llwc->llwc_arg))
+ found_something = 1;
+ }
+
+ if (found_something || timeout == 0)
+ break;
+
+ /* Nothing so far, but I'm allowed to block... */
+ found_something = liblustre_check_events(timeout);
+ if (!found_something) /* still nothing */
+ break; /* I timed out */
}
- /* ...and tell caller something happened */
- return 1;
+ liblustre_waiting = 0;
+
+ return found_something;
}
-#endif
+
+static int cray_portals_callback(ptl_event_t *ev)
+{
+ /* We get a callback from the client Cray portals implementation
+ * whenever anyone calls PtlEQPoll(), and an event queue with a
+ * callback handler has outstanding events.
+ *
+ * If it's not liblustre calling PtlEQPoll(), this lets us know we
+ * have outstanding events which we handle with
+ * liblustre_wait_event().
+ *
+ * Otherwise, we're already eagerly consuming events and we'd
+ * handle events out of order if we recursed. */
+ if (liblustre_waiting)
+ return;
+
+ liblustre_wait_event(0);
+}
+#endif /* __KERNEL__ */
int ptlrpc_init_portals(void)
{
#else /* !__KERNEL__ */
int llog_origin_connect(struct llog_ctxt *ctxt, int count,
- struct llog_logid *logid, struct llog_gen *gen)
+ struct llog_logid *logid, struct llog_gen *gen,
+ struct obd_uuid *uuid)
{
return 0;
}
md.start = base;
md.length = len;
md.threshold = (ack == PTL_ACK_REQ) ? 2 : 1;
- md.options = 0;
+ md.options = PTLRPC_MD_OPTIONS;
md.user_ptr = cbid;
md.eventq = conn->c_peer.peer_ni->pni_eq_h;
obd_fail_loc |= OBD_FAIL_ONCE | OBD_FAILED;
}
- rc = PtlMDBind (conn->c_peer.peer_ni->pni_ni_h, md, mdh);
+ rc = PtlMDBind (conn->c_peer.peer_ni->pni_ni_h, md,
+ PTL_UNLINK, mdh);
if (rc != PTL_OK) {
CERROR ("PtlMDBind failed: %d\n", rc);
- LASSERT (rc == PTL_NOSPACE);
+ LASSERT (rc == PTL_NO_SPACE);
RETURN (-ENOMEM);
}
RETURN (0);
}
+static void ptlrpc_fill_md(ptl_md_t *md, struct ptlrpc_bulk_desc *desc)
+{
+ LASSERT(ptl_md_max_iovs() == 0 ||
+ (desc->bd_iov_count <= ptl_md_max_iovs()));
+
+ if (ptl_requires_iov() || desc->bd_iov_count > 0) {
+ md->options |= PTLRPC_PTL_MD_IOV;
+ md->start = &desc->bd_iov[0];
+ md->niov = desc->bd_iov_count;
+ } else {
+ md->start = ptl_iov_base(&desc->bd_iov[0]);
+ }
+}
+
int ptlrpc_start_bulk_transfer (struct ptlrpc_bulk_desc *desc)
{
int rc;
desc->bd_success = 0;
peer = &desc->bd_export->exp_connection->c_peer;
- md.start = &desc->bd_iov[0];
- md.niov = desc->bd_page_count;
md.length = desc->bd_nob;
md.eventq = peer->peer_ni->pni_eq_h;
md.threshold = 2; /* SENT and ACK/REPLY */
-#ifdef __KERNEL__
- md.options = PTL_MD_KIOV;
-#else
- md.options = PTL_MD_IOV;
-#endif
+ md.options = PTLRPC_MD_OPTIONS;
+
+ ptlrpc_fill_md(&md, desc);
md.user_ptr = &desc->bd_cbid;
LASSERT (desc->bd_cbid.cbid_fn == server_bulk_callback);
LASSERT (desc->bd_cbid.cbid_arg == desc);
/* NB total length may be 0 for a read past EOF, so we send a 0
* length bulk, since the client expects a bulk event. */
- rc = PtlMDBind(peer->peer_ni->pni_ni_h, md, &desc->bd_md_h);
+ rc = PtlMDBind(peer->peer_ni->pni_ni_h, md,
+ PTL_UNLINK, &desc->bd_md_h);
if (rc != PTL_OK) {
CERROR("PtlMDBind failed: %d\n", rc);
- LASSERT (rc == PTL_NOSPACE);
+ LASSERT (rc == PTL_NO_SPACE);
RETURN(-ENOMEM);
}
* happened. */
rc = PtlMDUnlink (desc->bd_md_h);
- if (rc == PTL_INV_MD) {
+ if (rc == PTL_MD_INVALID) {
LASSERT(!ptlrpc_bulk_active(desc));
return;
}
/* NB no locking required until desc is on the network */
LASSERT (desc->bd_nob > 0);
LASSERT (!desc->bd_network_rw);
- LASSERT (desc->bd_page_count <= PTL_MD_MAX_PAGES);
+ LASSERT (desc->bd_iov_count <= PTLRPC_MAX_BRW_PAGES);
LASSERT (desc->bd_req != NULL);
LASSERT (desc->bd_type == BULK_PUT_SINK ||
desc->bd_type == BULK_GET_SOURCE);
peer = &desc->bd_import->imp_connection->c_peer;
- md.start = &desc->bd_iov[0];
- md.niov = desc->bd_page_count;
md.length = desc->bd_nob;
md.eventq = peer->peer_ni->pni_eq_h;
md.threshold = 1; /* PUT or GET */
- md.options = (desc->bd_type == BULK_GET_SOURCE) ?
- PTL_MD_OP_GET : PTL_MD_OP_PUT;
-#ifdef __KERNEL__
- md.options |= PTL_MD_KIOV;
-#else
- md.options |= PTL_MD_IOV;
-#endif
+ md.options = PTLRPC_MD_OPTIONS |
+ ((desc->bd_type == BULK_GET_SOURCE) ?
+ PTL_MD_OP_GET : PTL_MD_OP_PUT);
+ ptlrpc_fill_md(&md, desc);
md.user_ptr = &desc->bd_cbid;
LASSERT (desc->bd_cbid.cbid_fn == client_bulk_callback);
LASSERT (desc->bd_cbid.cbid_arg == desc);
PTL_UNLINK, PTL_INS_AFTER, &me_h);
if (rc != PTL_OK) {
CERROR("PtlMEAttach failed: %d\n", rc);
- LASSERT (rc == PTL_NOSPACE);
+ LASSERT (rc == PTL_NO_SPACE);
RETURN (-ENOMEM);
}
rc = PtlMDAttach(me_h, md, PTL_UNLINK, &desc->bd_md_h);
if (rc != PTL_OK) {
CERROR("PtlMDAttach failed: %d\n", rc);
- LASSERT (rc == PTL_NOSPACE);
+ LASSERT (rc == PTL_NO_SPACE);
desc->bd_network_rw = 0;
rc2 = PtlMEUnlink (me_h);
LASSERT (rc2 == PTL_OK);
* happened. */
rc = PtlMDUnlink (desc->bd_md_h);
- if (rc == PTL_INV_MD) {
+ if (rc == PTL_MD_INVALID) {
LASSERT(!ptlrpc_bulk_active(desc));
return;
}
PTL_INS_AFTER, &reply_me_h);
if (rc != PTL_OK) {
CERROR("PtlMEAttach failed: %d\n", rc);
- LASSERT (rc == PTL_NOSPACE);
+ LASSERT (rc == PTL_NO_SPACE);
GOTO(cleanup_repmsg, rc = -ENOMEM);
}
reply_md.start = request->rq_repmsg;
reply_md.length = request->rq_replen;
reply_md.threshold = 1;
- reply_md.options = PTL_MD_OP_PUT;
+ reply_md.options = PTLRPC_MD_OPTIONS | PTL_MD_OP_PUT;
reply_md.user_ptr = &request->rq_reply_cbid;
reply_md.eventq = connection->c_peer.peer_ni->pni_eq_h;
&request->rq_reply_md_h);
if (rc != PTL_OK) {
CERROR("PtlMDAttach failed: %d\n", rc);
- LASSERT (rc == PTL_NOSPACE);
+ LASSERT (rc == PTL_NO_SPACE);
GOTO(cleanup_me, rc -ENOMEM);
}
ptl_md_t md;
ptl_handle_me_t me_h;
- CDEBUG(D_NET, "PtlMEAttach: portal %d on %s h %lx."LPX64"\n",
- service->srv_req_portal, srv_ni->sni_ni->pni_name,
- srv_ni->sni_ni->pni_ni_h.nal_idx,
- srv_ni->sni_ni->pni_ni_h.cookie);
+ CDEBUG(D_NET, "PtlMEAttach: portal %d on %s\n",
+ service->srv_req_portal, srv_ni->sni_ni->pni_name);
if (OBD_FAIL_CHECK_ONCE(OBD_FAIL_PTLRPC_RQBD))
return (-ENOMEM);
LASSERT(rqbd->rqbd_refcount == 0);
rqbd->rqbd_refcount = 1;
- md.start = rqbd->rqbd_buffer;
- md.length = service->srv_buf_size;
- md.max_size = service->srv_max_req_size;
- md.threshold = PTL_MD_THRESH_INF;
- md.options = PTL_MD_OP_PUT | PTL_MD_MAX_SIZE | PTL_MD_AUTO_UNLINK;
- md.user_ptr = &rqbd->rqbd_cbid;
- md.eventq = srv_ni->sni_ni->pni_eq_h;
+ md.start = rqbd->rqbd_buffer;
+ md.length = service->srv_buf_size;
+ md.max_size = service->srv_max_req_size;
+ md.threshold = PTL_MD_THRESH_INF;
+ md.options = PTLRPC_MD_OPTIONS | PTL_MD_OP_PUT | PTL_MD_MAX_SIZE;
+ md.user_ptr = &rqbd->rqbd_cbid;
+ md.eventq = srv_ni->sni_ni->pni_eq_h;
rc = PtlMDAttach(me_h, md, PTL_UNLINK, &rqbd->rqbd_md_h);
if (rc == PTL_OK)
return (0);
CERROR("PtlMDAttach failed: %d; \n", rc);
- LASSERT (rc == PTL_NOSPACE);
+ LASSERT (rc == PTL_NO_SPACE);
rc = PtlMEUnlink (me_h);
LASSERT (rc == PTL_OK);
rqbd->rqbd_refcount = 0;
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (c) 2004 Cluster File Systems, Inc.
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#define DEBUG_SUBSYSTEM S_RPC
+#ifndef __KERNEL__
+#include <errno.h>
+#include <signal.h>
+#include <liblustre.h>
+#endif
+
+#include <linux/obd_support.h>
+#include <linux/obd_class.h>
+#include <linux/lustre_lib.h>
+#include <linux/lustre_ha.h>
+#include <linux/lustre_import.h>
+
+#include "ptlrpc_internal.h"
+
+#ifdef __KERNEL__
+#ifndef CRAY_PORTALS
+void pers_bulk_add_page(struct ptlrpc_bulk_desc *desc, struct page *page,
+ int pageoffset, int len)
+{
+ ptl_kiov_t *kiov = &desc->bd_iov[desc->bd_iov_count];
+
+ kiov->kiov_page = page;
+ kiov->kiov_offset = pageoffset;
+ kiov->kiov_len = len;
+
+ desc->bd_iov_count++;
+}
+#else
+void pers_bulk_add_page(struct ptlrpc_bulk_desc *desc, struct page *page,
+ int pageoffset, int len)
+{
+ struct iovec *iov = &desc->bd_iov[desc->bd_iov_count];
+
+ /* Should get a compiler warning if sizeof(physaddr) > sizeof(void *) */
+ iov->iov_base = (void *)(page_to_phys(page) + pageoffset);
+ iov->iov_len = len;
+
+ desc->bd_iov_count++;
+}
+#endif
+
+#else /* !__KERNEL__ */
+
+int can_merge_iovs(struct iovec *existing, struct iovec *candidate)
+{
+ if (existing->iov_base + existing->iov_len == candidate->iov_base)
+ return 1;
+ return 0;
+}
+void pers_bulk_add_page(struct ptlrpc_bulk_desc *desc, struct page *page,
+ int pageoffset, int len)
+{
+ struct iovec *iov = &desc->bd_iov[desc->bd_iov_count];
+
+ iov->iov_base = page->addr + pageoffset;
+ iov->iov_len = len;
+
+ if (desc->bd_iov_count > 0 && can_merge_iovs(iov - 1, iov)) {
+ (iov - 1)->iov_len += len;
+ } else {
+ desc->bd_iov_count++;
+ }
+}
+#endif
static DECLARE_MUTEX(pinger_sem);
static struct list_head pinger_imports = LIST_HEAD_INIT(pinger_imports);
-#ifdef __KERNEL__
static struct ptlrpc_thread *pinger_thread = NULL;
int ptlrpc_ping(struct obd_import *imp)
RETURN(rc);
}
+#ifdef __KERNEL__
static int ptlrpc_pinger_main(void *arg)
{
struct ptlrpc_svc_data *data = (struct ptlrpc_svc_data *)arg;
#endif
}
-#else
+#else /* !__KERNEL__ */
+
/* XXX
* the current implementation of pinger in liblustre is not optimized
*/
int pd_recursion;
unsigned long pd_this_ping;
unsigned long pd_next_ping;
- struct ptlrpc_request_set *pd_set;
+ int pd_force_check;
} pinger_args;
static int pinger_check_rpcs(void *arg)
{
unsigned long curtime = time(NULL);
- struct ptlrpc_request *req;
- struct ptlrpc_request_set *set;
struct list_head *iter;
struct pinger_data *pd = &pinger_args;
- int rc;
/* prevent recursion */
if (pd->pd_recursion++) {
CDEBUG(D_HA, "pinger: recursion! quit\n");
- LASSERT(pd->pd_set);
pd->pd_recursion--;
return 0;
}
/* have we reached ping point? */
- if (!pd->pd_set && pd->pd_next_ping > curtime) {
+ if (pd->pd_next_ping > curtime && !pd->pd_force_check) {
pd->pd_recursion--;
return 0;
}
- /* if we have rpc_set already, continue processing it */
- if (pd->pd_set) {
- LASSERT(pd->pd_this_ping);
- set = pd->pd_set;
- goto do_check_set;
- }
+ if (pd->pd_force_check)
+ pd->pd_force_check = 0;
pd->pd_this_ping = curtime;
- pd->pd_set = ptlrpc_prep_set();
- set = pd->pd_set;
/* add rpcs into set */
down(&pinger_sem);
struct obd_import *imp =
list_entry(iter, struct obd_import,
imp_pinger_chain);
- int generation, level;
+ int level, force;
unsigned long flags;
- if (imp->imp_next_ping <= pd->pd_this_ping) {
- /* Add a ping. */
- spin_lock_irqsave(&imp->imp_lock, flags);
- generation = imp->imp_generation;
- level = imp->imp_state;
- spin_unlock_irqrestore(&imp->imp_lock, flags);
- if (level != LUSTRE_IMP_FULL) {
- CDEBUG(D_HA,
- "not pinging %s (in recovery)\n",
- imp->imp_target_uuid.uuid);
- continue;
+ spin_lock_irqsave(&imp->imp_lock, flags);
+ level = imp->imp_state;
+ force = imp->imp_force_verify;
+ if (force)
+ imp->imp_force_verify = 0;
+ spin_unlock_irqrestore(&imp->imp_lock, flags);
+
+ if (imp->imp_next_ping <= pd->pd_this_ping || force) {
+ if (level == LUSTRE_IMP_DISCON) {
+ /* wait at least a timeout before
+ trying recovery again. */
+ imp->imp_next_ping = time(NULL) +
+ (obd_timeout * HZ);
+ ptlrpc_initiate_recovery(imp);
+ }
+ else if (level != LUSTRE_IMP_FULL ||
+ imp->imp_obd->obd_no_recov) {
+ CDEBUG(D_HA,
+ "not pinging %s (in recovery "
+ " or recovery disabled: %s)\n",
+ imp->imp_target_uuid.uuid,
+ ptlrpc_import_state_name(level));
+ }
+ else if (imp->imp_pingable || force) {
+ ptlrpc_ping(imp);
}
- req = ptlrpc_prep_req(imp, OBD_PING, 0, NULL,
- NULL);
- if (!req) {
- CERROR("out of memory\n");
- break;
- }
- req->rq_no_resend = 1;
- req->rq_replen = lustre_msg_size(0, NULL);
- req->rq_send_state = LUSTRE_IMP_FULL;
- req->rq_phase = RQ_PHASE_RPC;
- req->rq_import_generation = generation;
- ptlrpc_set_add_req(set, req);
} else {
- CDEBUG(D_HA, "don't need to ping %s (%lu > "
- "%lu)\n", imp->imp_target_uuid.uuid,
- imp->imp_next_ping, pd->pd_this_ping);
+ if (imp->imp_pingable) {
+ CDEBUG(D_HA, "don't need to ping %s "
+ "(%lu > %lu)\n",
+ imp->imp_target_uuid.uuid,
+ imp->imp_next_ping, pd->pd_this_ping);
+ }
}
}
- pd->pd_this_ping = curtime;
- up(&pinger_sem);
-
- /* Might be empty, that's OK. */
- if (set->set_remaining == 0)
- CDEBUG(D_HA, "nothing to ping\n");
- list_for_each(iter, &set->set_requests) {
- struct ptlrpc_request *req =
- list_entry(iter, struct ptlrpc_request,
- rq_set_chain);
- DEBUG_REQ(D_HA, req, "pinging %s->%s",
- req->rq_import->imp_obd->obd_uuid.uuid,
- req->rq_import->imp_target_uuid.uuid);
- (void)ptl_send_rpc(req);
- }
-
-do_check_set:
- rc = ptlrpc_check_set(set);
-
- /* not finished, and we are not expired, simply return */
- if (!rc && curtime < pd->pd_this_ping + obd_timeout) {
- CDEBUG(D_HA, "not finished, but also not expired\n");
- pd->pd_recursion--;
- return 0;
- }
-
- /* Expire all the requests that didn't come back. */
- down(&pinger_sem);
- list_for_each(iter, &set->set_requests) {
- req = list_entry(iter, struct ptlrpc_request,
- rq_set_chain);
-
- if (req->rq_replied)
- continue;
-
- req->rq_phase = RQ_PHASE_COMPLETE;
- set->set_remaining--;
- /* If it was disconnected, don't sweat it. */
- if (list_empty(&req->rq_import->imp_pinger_chain)) {
- ptlrpc_unregister_reply(req);
- continue;
- }
-
- CDEBUG(D_HA, "pinger initiate expire_one_request\n");
- ptlrpc_expire_one_request(req);
- }
up(&pinger_sem);
- ptlrpc_set_destroy(set);
- pd->pd_set = NULL;
-
- pd->pd_next_ping = pd->pd_this_ping + obd_timeout;
- pd->pd_this_ping = 0; /* XXX for debug */
+ pd->pd_next_ping = pd->pd_this_ping + (obd_timeout * HZ);
CDEBUG(D_HA, "finished a round ping\n");
pd->pd_recursion--;
{
down(&pinger_sem);
imp->imp_next_ping = time(NULL) + obd_timeout;
- if (pinger_args.pd_set == NULL &&
- pinger_args.pd_next_ping > imp->imp_next_ping) {
+ if (pinger_args.pd_next_ping > imp->imp_next_ping) {
CDEBUG(D_HA, "set next ping to %ld(cur %ld)\n",
imp->imp_next_ping, time(NULL));
pinger_args.pd_next_ping = imp->imp_next_ping;
void ptlrpc_pinger_wake_up()
{
-#ifdef ENABLE_PINGER
- /* XXX force pinger to run, if needed */
-#endif
+ pinger_args.pd_force_check = 1;
}
#endif /* !__KERNEL__ */
int ptlrpc_expire_one_request(struct ptlrpc_request *req);
+/* XXX these should be run-time checks so we can have one build run against
+ * many nals */
+#if defined(__KERNEL__)
+#define ptl_requires_iov() 1
+#else
+#define ptl_requires_iov() 0
+#endif
+
+#if defined(__KERNEL__)
+# if defined(CRAY_PORTALS)
+# define PTLRPC_PTL_MD_IOV (PTL_MD_IOVEC | PTL_MD_PHYS)
+# else
+# define PTLRPC_PTL_MD_IOV PTL_MD_KIOV
+# endif
+#else
+# define PTLRPC_PTL_MD_IOV PTL_MD_IOVEC
+#endif
+
+#if !defined(__KERNEL__) && defined(CRAY_PORTALS)
+#define ptl_md_max_iovs() 1
+#else
+#define ptl_md_max_iovs() 0 /* unlimited */
+#endif
+
+/* XXX hopefully we can make the iov a consistent type across portals imps */
+#if defined(__KERNEL__)
+#define ptl_iov_base(kiov) (NULL) /* this is meaningless */
+#else
+#define ptl_iov_base(iov) ((iov)->iov_base)
+#endif
+
+#ifdef __KERNEL__
+/* portals calls the callback when the event is added to the queue, so we don't
+ * care if we lose events */
+# define PTLRPC_NUM_EQ 1024
+# define PTLRPC_EQ_CALLBACK ptlrpc_master_callback
+#else
+/* liblustre: no callback, or only when app polls event queues, so allocate a
+ * nice big event queue to ensure we don't drop any */
+# define PTLRPC_NUM_EQ 10240
+# if CRAY_PORTALS
+int cray_portals_callback(ptl_event_t *ev);
+# define PTLRPC_EQ_CALLBACK cray_portals_callback
+# else
+# define PTLRPC_EQ_CALLBACK PTL_EQ_HANDLER_NONE
+# endif
+#endif
+
+/* pers.c */
+void pers_bulk_add_page(struct ptlrpc_bulk_desc *desc, struct page *page,
+ int pageoffset, int len);
+
/* pinger.c */
int ptlrpc_start_pinger(void);
int ptlrpc_stop_pinger(void);
rqbd_list);
rc = PtlMDUnlink(rqbd->rqbd_md_h);
- LASSERT (rc == PTL_OK || rc == PTL_INV_MD);
+ LASSERT (rc == PTL_OK || rc == PTL_MD_INVALID);
}
/* Wait for the network to release any buffers it's
panic("unable to set hostaddr for", self.net_type, self.hostaddr, self.cluster_id)
debug("hostaddr:", self.hostaddr)
- self.add_portals_module("libcfs", 'portals')
+ self.add_portals_module("libcfs", 'libcfs')
+ self.add_portals_module("portals", 'portals')
if node_needs_router():
self.add_portals_module("router", 'kptlrouter')
if self.net_type == 'tcp':