+2005-02-04 Eric Barton <eeb@bartonsoftware.com>
+
+ * Landed portals:b_port_step as follows...
+
+ - removed CFS_DECL_SPIN*
+ just use 'spinlock_t' and initialise with spin_lock_init()
+
+ - removed CFS_DECL_MUTEX*
+ just use 'struct semaphore' and initialise with init_mutex()
+
+ - removed CFS_DECL_RWSEM*
+ just use 'struct rw_semaphore' and initialise with init_rwsem()
+
+ - renamed cfs_sleep_chan -> cfs_waitq
+ cfs_sleep_link -> cfs_waitlink
+
+ - fixed race in linux version of arch-independent socknal
+ (the ENOMEM/EAGAIN decision).
+
+ - Didn't fix problems in Darwin version of arch-independent socknal
+ (resetting socket callbacks, eager ack hack, ENOMEM/EAGAIN decision)
+
+ - removed libcfs types from non-socknal header files (only some types
+ in the header files had been changed; the .c files hadn't been
+ updated at all).
# This code is issued under the GNU General Public License.
# See the file COPYING in this distribution
-SUBDIRS = portals libcfs knals unals router tests doc utils include \
+SUBDIRS = libcfs portals knals unals router tests doc utils include \
autoconf
-sources:
+sources: include/libcfs/arch
+ $(MAKE) sources -C libcfs
+
+all-recursive: include/libcfs/arch
+
+include/libcfs/arch:
+ case `uname` in \
+ Linux) \
+ ln -s linux include/libcfs/arch \
+ ;; \
+ Darwin) \
+ ln -s darwin include/libcfs/arch \
+ ;; \
+ *) \
+ echo "Platform `uname` is not supported" \
+ ;; \
+ esac
])
#
+# LP_PROG_DARWIN
+#
+# Darwin checks
+#
+AC_DEFUN([LP_PROG_DARWIN],
+[LB_DARWIN_CHECK_FUNCS([get_preemption_level])
+])
+
+#
# LP_PATH_DEFAULTS
#
# default paths for installed files
#
AC_DEFUN([LP_CONFIGURE],
[# portals/utils/portals.c
-AC_CHECK_HEADERS([netdb.h netinet/tcp.h asm/types.h])
+AC_CHECK_HEADERS([netdb.h netinet/tcp.h asm/types.h endian.h])
AC_CHECK_FUNCS([gethostbyname socket connect])
# portals/utils/debug.c
[],
[#include <linux/spinlock.h>])
+# portals/utils/wirecheck.c
+AC_CHECK_FUNCS([strnlen])
+
# -------- Check for required packages --------------
-# this doesn't seem to work on older autoconf
-# AC_CHECK_LIB(readline, readline,,)
-AC_MSG_CHECKING([for readline support])
-AC_ARG_ENABLE(readline,
- AC_HELP_STRING([--disable-readline],
- [do not use readline library]),
- [],[enable_readline='yes'])
-AC_MSG_RESULT([$enable_readline])
-if test x$enable_readline = xyes ; then
+LIBS_save="$LIBS"
+LIBS="-lncurses $LIBS"
+AC_CHECK_LIB([readline],[readline],[
LIBREADLINE="-lreadline -lncurses"
AC_DEFINE(HAVE_LIBREADLINE, 1, [readline library is available])
-else
+],[
LIBREADLINE=""
-fi
+])
+LIBS="$LIBS_save"
AC_SUBST(LIBREADLINE)
AC_MSG_CHECKING([if efence debugging support is requested])
portals/autoconf/Makefile
portals/doc/Makefile
portals/include/Makefile
-portals/include/linux/Makefile
+portals/include/libcfs/Makefile
+portals/include/libcfs/darwin/Makefile
+portals/include/libcfs/linux/Makefile
portals/include/portals/Makefile
+portals/include/portals/darwin/Makefile
+portals/include/portals/linux/Makefile
portals/knals/Makefile
portals/knals/autoMakefile
portals/knals/gmnal/Makefile
portals/knals/socknal/autoMakefile
portals/libcfs/Makefile
portals/libcfs/autoMakefile
+portals/libcfs/darwin/Makefile
+portals/libcfs/linux/Makefile
portals/portals/Makefile
portals/portals/autoMakefile
portals/router/Makefile
-SUBDIRS = linux portals
+SUBDIRS = libcfs portals
EXTRA_DIST = cygwin-ioctl.h
--- /dev/null
+Makefile.in
+Makefile
--- /dev/null
+SUBDIRS := darwin linux
+
+EXTRA_DIST := libcfs.h list.h lltrace.h kp30.h portals_utils.h portals_lib.h
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Lustre curproc API declaration
+ *
+ * Copyright (C) 2004 Cluster File Systems, Inc.
+ * Author: Nikita Danilov <nikita@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or modify it under the
+ * terms of version 2 of the GNU General Public License as published by the
+ * Free Software Foundation. Lustre is distributed in the hope that it will be
+ * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+ * Public License for more details. You should have received a copy of the GNU
+ * General Public License along with Lustre; if not, write to the Free
+ * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#ifndef __LIBCFS_CURPROC_H__
+#define __LIBCFS_CURPROC_H__
+
+/*
+ * Portable API to access common characteristics of "current" UNIX process.
+ *
+ * Implemented in portals/include/libcfs/<os>/
+ */
+uid_t cfs_curproc_uid(void);
+gid_t cfs_curproc_gid(void);
+uid_t cfs_curproc_fsuid(void);
+gid_t cfs_curproc_fsgid(void);
+pid_t cfs_curproc_pid(void);
+int cfs_curproc_groups_nr(void);
+int cfs_curproc_is_in_groups(gid_t group);
+void cfs_curproc_groups_dump(gid_t *array, int size);
+mode_t cfs_curproc_umask(void);
+char *cfs_curproc_comm(void);
+
+
+/*
+ * Plus, platform-specific constant
+ *
+ * CFS_CURPROC_COMM_MAX,
+ *
+ * and opaque scalar type
+ *
+ * cfs_kernel_cap_t
+ */
+cfs_kernel_cap_t cfs_curproc_cap_get(void);
+void cfs_curproc_cap_set(cfs_kernel_cap_t cap);
+
+/* __LIBCFS_CURPROC_H__ */
+#endif
+/*
+ * Local variables:
+ * c-indentation-style: "K&R"
+ * c-basic-offset: 8
+ * tab-width: 8
+ * fill-column: 80
+ * scroll-step: 1
+ * End:
+ */
--- /dev/null
+Makefile.in
+Makefile
--- /dev/null
+EXTRA_DIST := darwin-mem.h darwin-types.h libcfs.h portals_utils.h \
+ darwin-fs.h darwin-prim.h darwin-utils.h lltrace.h \
+ darwin-lock.h darwin-sync.h kp30.h portals_lib.h
--- /dev/null
+#ifndef __LIBCFS_DARWIN_CFS_FS_H__
+#define __LIBCFS_DARWIN_CFS_FS_H__
+
+#ifndef __LIBCFS_LIBCFS_H__
+#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
+#endif
+
+#ifdef __KERNEL__
+
+#include <sys/types.h>
+#include <sys/systm.h>
+/*
+ * __APPLE_API_PRIVATE is defined before include user.h
+ * Doing this way to get the define of uthread, it's not good
+ * but I do need to know what's inside uthread.
+ */
+#ifndef __APPLE_API_PRIVATE
+#define __APPLE_API_PRIVATE
+#include <sys/vnode.h>
+#undef __APPLE_API_PRIVATE
+#else
+#include <sys/vnode.h>
+#endif
+
+#include <sys/kernel.h>
+#include <sys/file.h>
+#include <sys/time.h>
+#include <sys/filedesc.h>
+#include <sys/stat.h>
+#include <sys/mount.h>
+#include <sys/sysctl.h>
+#include <sys/ubc.h>
+#include <sys/mbuf.h>
+#include <sys/namei.h>
+#include <sys/fcntl.h>
+#include <sys/lockf.h>
+#include <stdarg.h>
+
+#include <mach/mach_types.h>
+#include <mach/mach_traps.h>
+#include <mach/time_value.h>
+#include <kern/clock.h>
+#include <sys/param.h>
+#include <IOKit/system.h>
+
+#include <libcfs/darwin/darwin-types.h>
+#include <libcfs/darwin/darwin-lock.h>
+#include <libcfs/darwin/darwin-mem.h>
+#include <libcfs/list.h>
+
+/*
+ * File operating APIs in kernel
+ */
+typedef struct file cfs_file_t;
+
+int filp_node_size(cfs_file_t *fp, off_t *size);
+#define cfs_filp_size(fp) \
+ ({ \
+ off_t __size; \
+ filp_node_size((fp), &__size); \
+ __size; \
+ })
+#define cfs_filp_poff(fp) (NULL)
+
+cfs_file_t *filp_open(const char *name, int flags, int mode, int *err);
+int filp_close(cfs_file_t *fp);
+int filp_read(cfs_file_t *fp, void *buf, size_t nbytes, off_t *pos);
+int filp_write(cfs_file_t *fp, void *buf, size_t nbytes, off_t *pos);
+int filp_fsync(cfs_file_t *fp);
+
+#define cfs_filp_open(n, f, m, e) filp_open(n, f, m, e)
+#define cfs_filp_close(f) filp_close(f)
+#define cfs_filp_read(f, b, n, p) filp_read(f, b, n, p)
+#define cfs_filp_write(f, b, n, p) filp_write(f, b, n, p)
+#define cfs_filp_fsync(f) filp_fsync(f)
+
+int ref_file(cfs_file_t *fp);
+int rele_file(cfs_file_t *fp);
+int file_count(cfs_file_t *fp);
+#define cfs_get_file(f) ref_file(f)
+#define cfs_put_file(f) rele_file(f)
+#define cfs_file_count(f) file_count(f)
+
+#define CFS_INT_LIMIT(x) (~((x)1 << (sizeof(x)*8 - 1)))
+#define CFS_OFFSET_MAX CFS_INT_LIMIT(loff_t)
+
+typedef struct flock cfs_flock_t;
+#define CFS_FLOCK_TYPE(fl) ((fl)->l_type)
+#define CFS_FLOCK_SET_TYPE(fl, type) do { (fl)->l_type = (type); } while(0)
+#define CFS_FLOCK_PID(fl) ((fl)->l_pid)
+#define CFS_FLOCK_SET_PID(fl, pid) do { (fl)->l_pid = (pid); } while(0)
+#define CFS_FLOCK_START(fl) ((fl)->l_start)
+#define CFS_FLOCK_SET_START(fl, start) do { (fl)->l_start = (start); } while(0)
+#define CFS_FLOCK_END(fl) ((fl)->l_len == 0? CFS_OFFSET_MAX: ((fl)->l_start + (fl)->l_en))
+#define CFS_FLOCK_SET_END(fl, end) \
+ do { \
+ if (end == CFS_OFFSET_MAX) \
+ (fl)->l_len = 0; \
+ else \
+ (fl)->l_len = (end) - (fl)->l_start;\
+ } while(0)
+
+typedef struct {
+ void *d;
+} cfs_dentry_t;
+typedef unsigned short umode_t;
+
+#define ATTR_MODE 0x0001
+#define ATTR_UID 0x0002
+#define ATTR_GID 0x0004
+#define ATTR_SIZE 0x0008
+#define ATTR_ATIME 0x0010
+#define ATTR_MTIME 0x0020
+#define ATTR_CTIME 0x0040
+#define ATTR_ATIME_SET 0x0080
+#define ATTR_MTIME_SET 0x0100
+#define ATTR_FORCE 0x0200 /* Not a change, but a change it */
+#define ATTR_ATTR_FLAG 0x0400
+#define ATTR_RAW 0x0800 /* file system, not vfs will massage attrs */
+#define ATTR_FROM_OPEN 0x1000 /* called from open path, ie O_TRUNC */
+#define ATTR_CTIME_SET 0x2000
+
+#define in_group_p(x) (0)
+
+#endif
+
+#define O_SYNC 0
+#define O_DIRECTORY 0
+#define O_LARGEFILE 0
+
+#endif
--- /dev/null
+#ifndef __LIBCFS_DARWIN_CFS_LOCK_H__
+#define __LIBCFS_DARWIN_CFS_LOCK_H__
+
+#ifndef __LIBCFS_LIBCFS_H__
+#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
+#endif
+
+#ifdef __KERNEL__
+#include <mach/sync_policy.h>
+#include <mach/task.h>
+#include <mach/semaphore.h>
+#include <mach/mach_traps.h>
+
+/* spin lock types and operations */
+#include <kern/simple_lock.h>
+#include <kern/assert.h>
+#include <kern/thread.h>
+
+#include <libcfs/darwin/darwin-types.h>
+#include <libcfs/darwin/darwin-sync.h>
+
+/*
+ * spin_lock (use Linux kernel's primitives)
+ *
+ * - spin_lock_init(x)
+ * - spin_lock(x)
+ * - spin_unlock(x)
+ * - spin_trylock(x)
+ *
+ * - spin_lock_irqsave(x, f)
+ * - spin_unlock_irqrestore(x, f)
+ */
+struct spin_lock {
+ struct kspin spin;
+};
+
+typedef struct spin_lock spinlock_t;
+
+static inline void spin_lock_init(spinlock_t *lock)
+{
+ kspin_init(&lock->spin);
+}
+
+static inline void spin_lock(spinlock_t *lock)
+{
+ kspin_lock(&lock->spin);
+}
+
+static inline void spin_unlock(spinlock_t *lock)
+{
+ kspin_unlock(&lock->spin);
+}
+
+static inline int spin_trylock(spinlock_t *lock)
+{
+ return kspin_trylock(&lock->spin);
+}
+
+#define spin_lock_bh(x) spin_lock(x)
+#define spin_unlock_bh(x) spin_unlock(x)
+#define spin_lock_bh_init(x) spin_lock_init(x)
+
+extern boolean_t ml_set_interrupts_enabled(boolean_t enable);
+#define __disable_irq() (spl_t) ml_set_interrupts_enabled(FALSE)
+#define __enable_irq(x) (void) ml_set_interrupts_enabled(x)
+
+#define spin_lock_irqsave(s, f) do{ \
+ f = __disable_irq(); \
+ spin_lock(s); }while(0)
+
+#define spin_unlock_irqrestore(s, f) do{ \
+ spin_unlock(s); \
+ __enable_irq(f);}while(0)
+
+/*
+ * Semaphore
+ *
+ * - sema_init(x, v)
+ * - __down(x)
+ * - __up(x)
+ */
+struct semaphore {
+ struct ksem sem;
+};
+
+static inline void sema_init(struct semaphore *s, int val)
+{
+ ksem_init(&s->sem, val);
+}
+
+static inline void __down(struct semaphore *s)
+{
+ ksem_down(&s->sem, 1);
+}
+
+static inline void __up(struct semaphore *s)
+{
+ ksem_up(&s->sem, 1);
+}
+
+/*
+ * Mutex:
+ *
+ * - init_mutex(x)
+ * - init_mutex_locked(x)
+ * - mutex_up(x)
+ * - mutex_down(x)
+ */
+
+#define mutex_up(s) __up(s)
+#define mutex_down(s) __down(s)
+
+#define init_mutex(x) sema_init(x, 1)
+#define init_mutex_locked(x) sema_init(x, 0)
+
+/*
+ * Completion:
+ *
+ * - init_completion(c)
+ * - complete(c)
+ * - wait_for_completion(c)
+ */
+struct completion {
+ /*
+ * Emulate completion by semaphore for now.
+ *
+ * XXX nikita: this is not safe if completion is used to synchronize
+ * exit from kernel daemon thread and kext unloading. In this case
+ * some core function (a la complete_and_exit()) is needed.
+ */
+ struct ksem sem;
+};
+
+static inline void init_completion(struct completion *c)
+{
+ ksem_init(&c->sem, 0);
+}
+
+static inline void complete(struct completion *c)
+{
+ ksem_up(&c->sem, 1);
+}
+
+static inline void wait_for_completion(struct completion *c)
+{
+ ksem_down(&c->sem, 1);
+}
+
+/*
+ * rw_semaphore:
+ *
+ * - DECLARE_RWSEM(x)
+ * - init_rwsem(x)
+ * - down_read(x)
+ * - up_read(x)
+ * - down_write(x)
+ * - up_write(x)
+ */
+struct rw_semaphore {
+ struct krw_sem s;
+};
+
+static inline void init_rwsem(struct rw_semaphore *s)
+{
+ krw_sem_init(&s->s);
+}
+
+static inline void down_read(struct rw_semaphore *s)
+{
+ krw_sem_down_r(&s->s);
+}
+
+static inline int down_read_trylock(struct rw_semaphore *s)
+{
+ int ret = krw_sem_down_r_try(&s->s);
+ return ret == 0? 1: 0;
+}
+
+static inline void down_write(struct rw_semaphore *s)
+{
+ krw_sem_down_w(&s->s);
+}
+
+static inline int down_write_trylock(struct rw_semaphore *s)
+{
+ int ret = krw_sem_down_w_try(&s->s);
+ return ret == 0? 1: 0;
+}
+
+static inline void up_read(struct rw_semaphore *s)
+{
+ krw_sem_up_r(&s->s);
+}
+
+static inline void up_write(struct rw_semaphore *s)
+{
+ krw_sem_up_w(&s->s);
+}
+
+/*
+ * read-write lock : Need to be investigated more!!
+ * XXX nikita: for now, let rwlock_t to be identical to rw_semaphore
+ *
+ * - DECLARE_RWLOCK(l)
+ * - rwlock_init(x)
+ * - read_lock(x)
+ * - read_unlock(x)
+ * - write_lock(x)
+ * - write_unlock(x)
+ */
+typedef struct rw_semaphore rwlock_t;
+
+#define rwlock_init(pl) init_rwsem(pl)
+
+#define read_lock(l) down_read(l)
+#define read_unlock(l) up_read(l)
+#define write_lock(l) down_write(l)
+#define write_unlock(l) up_write(l)
+
+#define write_lock_irqsave(l, f) do{ \
+ f = __disable_irq(); \
+ write_lock(l); }while(0)
+
+#define write_unlock_irqrestore(l, f) do{ \
+ write_unlock(l); \
+ __enable_irq(f);}while(0)
+
+#define read_lock_irqsave(l, f) do{ \
+ f = __disable_irq(); \
+ read_lock(l); }while(0)
+
+#define read_unlock_irqrestore(l, f) do{ \
+ read_unlock(l); \
+ __enable_irq(f);}while(0)
+
+/*
+ * Funnel:
+ *
+ * Safe funnel in/out
+ */
+
+#define CFS_DECL_FUNNEL_DATA \
+ boolean_t __funnel_state = FALSE; \
+ funnel_t *__funnel
+#define CFS_DECL_CONE_DATA CFS_DECL_FUNNEL_DATA
+#define CFS_DECL_NET_DATA CFS_DECL_FUNNEL_DATA
+
+void lustre_cone_in(boolean_t *state, funnel_t **cone);
+void lustre_cone_ex(boolean_t state, funnel_t *cone);
+
+#define CFS_CONE_IN lustre_cone_in(&__funnel_state, &__funnel)
+#define CFS_CONE_EX lustre_cone_ex(__funnel_state, __funnel)
+
+void lustre_net_in(boolean_t *state, funnel_t **cone);
+void lustre_net_ex(boolean_t state, funnel_t *cone);
+
+#define CFS_NET_IN lustre_net_in(&__funnel_state, &__funnel)
+#define CFS_NET_EX lustre_net_ex(__funnel_state, __funnel)
+
+/* __KERNEL__ */
+#endif
+
+/* __XNU_CFS_LOCK_H */
+#endif
--- /dev/null
+#ifndef __LIBCFS_DARWIN_CFS_MEM_H__
+#define __LIBCFS_DARWIN_CFS_MEM_H__
+
+#ifndef __LIBCFS_LIBCFS_H__
+#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
+#endif
+
+#ifdef __KERNEL__
+
+#include <sys/types.h>
+#include <sys/systm.h>
+
+#include <sys/vm.h>
+#include <sys/kernel.h>
+#include <sys/ubc.h>
+#include <sys/uio.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/lockf.h>
+
+#include <mach/mach_types.h>
+#include <mach/vm_types.h>
+#include <vm/pmap.h>
+#include <vm/vm_kern.h>
+#include <mach/machine/vm_param.h>
+#include <kern/thread_call.h>
+#include <sys/param.h>
+#include <sys/vm.h>
+
+#include <libcfs/darwin/darwin-types.h>
+#include <libcfs/darwin/darwin-sync.h>
+#include <libcfs/darwin/darwin-lock.h>
+#include <libcfs/list.h>
+
+/*
+ * Page of OSX
+ *
+ * There is no page in OSX, however, we need page in lustre.
+ */
+#define PAGE_MASK (~(PAGE_SIZE-1))
+#define _ALIGN_UP(addr,size) (((addr)+((size)-1))&(~((size)-1)))
+#define _ALIGN(addr,size) _ALIGN_UP(addr,size)
+#define PAGE_ALIGN(addr) _ALIGN(addr, PAGE_SIZE)
+
+/*
+ * Basic xnu_page struct, should be binary compatibility with
+ * all page types in xnu (we have only xnu_raw_page, xll_page now)
+ */
+
+/* Variable sized pages are not supported */
+
+#define CFS_PAGE_SHIFT 12
+#define CFS_PAGE_SIZE (1 << CFS_PAGE_SHIFT)
+#define PAGE_CACHE_SIZE CFS_PAGE_SIZE
+#define CFS_PAGE_MASK (~(CFS_PAGE_SIZE - 1))
+
+enum {
+ XNU_PAGE_RAW,
+ XNU_PAGE_XLL,
+ XNU_PAGE_NTYPES
+};
+
+typedef __u32 page_off_t;
+
+/*
+ * For XNU we have our own page cache built on top of underlying BSD/MACH
+ * infrastructure. In particular, we have two disjoint types of pages:
+ *
+ * - "raw" pages (XNU_PAGE_RAW): these are just buffers mapped into KVM,
+ * based on UPLs, and
+ *
+ * - "xll" pages (XNU_PAGE_XLL): these are used by file system to cache
+ * file data, owned by file system objects, hashed, lrued, etc.
+ *
+ * cfs_page_t has to cover both of them, because core Lustre code is based on
+ * the Linux assumption that page is _both_ memory buffer and file system
+ * caching entity.
+ *
+ * To achieve this, all types of pages supported on XNU has to start from
+ * common header that contains only "page type". Common cfs_page_t operations
+ * dispatch through operation vector based on page type.
+ *
+ */
+typedef struct xnu_page {
+ int type;
+} cfs_page_t;
+
+struct xnu_page_ops {
+ void *(*page_map) (cfs_page_t *);
+ void (*page_unmap) (cfs_page_t *);
+ void *(*page_address) (cfs_page_t *);
+};
+
+void xnu_page_ops_register(int type, struct xnu_page_ops *ops);
+void xnu_page_ops_unregister(int type);
+
+/*
+ * raw page, no cache object, just like buffer
+ */
+struct xnu_raw_page {
+ struct xnu_page header;
+ vm_address_t virtual;
+ upl_t upl;
+ int order;
+ atomic_t count;
+ void *private;
+};
+
+/*
+ * Public interface to lustre
+ *
+ * - cfs_alloc_pages(f, o)
+ * - cfs_alloc_page(f)
+ * - cfs_free_pages(p, o)
+ * - cfs_free_page(p)
+ * - cfs_kmap(p)
+ * - cfs_kunmap(p)
+ * - cfs_page_address(p)
+ */
+
+/*
+ * Of all functions above only cfs_kmap(), cfs_kunmap(), and
+ * cfs_page_address() can be called on file system pages. The rest is for raw
+ * pages only.
+ */
+
+cfs_page_t *cfs_alloc_pages(u_int32_t flags, u_int32_t order);
+cfs_page_t *cfs_alloc_page(u_int32_t flags);
+void cfs_free_pages(cfs_page_t *pages, int order);
+void cfs_free_page(cfs_page_t *page);
+void cfs_get_page(cfs_page_t *page);
+int cfs_put_page_testzero(cfs_page_t *page);
+int cfs_page_count(cfs_page_t *page);
+void cfs_set_page_count(cfs_page_t *page, int v);
+
+void *cfs_page_address(cfs_page_t *pg);
+void *cfs_kmap(cfs_page_t *pg);
+void cfs_kunmap(cfs_page_t *pg);
+
+/*
+ * Memory allocator
+ */
+
+extern void *cfs_alloc(size_t nr_bytes, u_int32_t flags);
+extern void cfs_free(void *addr);
+
+extern void *cfs_alloc_large(size_t nr_bytes);
+extern void cfs_free_large(void *addr);
+
+/*
+ * Slab:
+ *
+ * No slab in OSX, use zone allocator to fake slab
+ */
+#define SLAB_HWCACHE_ALIGN 0
+
+typedef struct cfs_mem_cache {
+ struct list_head link;
+ zone_t zone;
+ int size;
+ char name [ZONE_NAME_MAX_LEN];
+} cfs_mem_cache_t;
+
+#define KMEM_CACHE_MAX_COUNT 64
+#define KMEM_MAX_ZONE 8192
+
+extern cfs_mem_cache_t * cfs_mem_cache_create (const char *, size_t, size_t, unsigned long,
+ void (*)(void *, cfs_mem_cache_t *, unsigned long),
+ void (*)(void *, cfs_mem_cache_t *, unsigned long));
+extern int cfs_mem_cache_destroy ( cfs_mem_cache_t * );
+extern void *cfs_mem_cache_alloc ( cfs_mem_cache_t *, int);
+extern void cfs_mem_cache_free ( cfs_mem_cache_t *, void *);
+
+/*
+ * Misc
+ */
+/* XXX fix me */
+#define num_physpages (64 * 1024)
+
+#define CFS_DECL_MMSPACE
+#define CFS_MMSPACE_OPEN do {} while(0)
+#define CFS_MMSPACE_CLOSE do {} while(0)
+
+#define copy_from_user(kaddr, uaddr, size) copyin((caddr_t)uaddr, (caddr_t)kaddr, size)
+#define copy_to_user(uaddr, kaddr, size) copyout((caddr_t)kaddr, (caddr_t)uaddr, size)
+
+#if defined (__ppc__)
+#define mb() __asm__ __volatile__ ("sync" : : : "memory")
+#define rmb() __asm__ __volatile__ ("sync" : : : "memory")
+#define wmb() __asm__ __volatile__ ("eieio" : : : "memory")
+#elif defined (__i386__)
+#define mb() __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory")
+#define rmb() mb()
+#define wmb() __asm__ __volatile__ ("": : :"memory")
+#else
+#error architecture not supported
+#endif
+
+#else /* !__KERNEL__ */
+
+typedef struct cfs_page{
+ void *foo;
+} cfs_page_t;
+#endif /* __KERNEL__ */
+
+#endif /* __XNU_CFS_MEM_H__ */
--- /dev/null
+#ifndef __LIBCFS_DARWIN_CFS_PRIM_H__
+#define __LIBCFS_DARWIN_CFS_PRIM_H__
+
+#ifndef __LIBCFS_LIBCFS_H__
+#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
+#endif
+
+#ifdef __KERNEL__
+#include <sys/types.h>
+#include <sys/systm.h>
+
+#ifndef __APPLE_API_PRIVATE
+#define __APPLE_API_PRIVATE
+#include <sys/user.h>
+#undef __APPLE_API_PRIVATE
+#else
+#include <sys/user.h>
+#endif
+
+#include <sys/kernel.h>
+
+#include <mach/thread_act.h>
+#include <mach/mach_types.h>
+#include <mach/mach_traps.h>
+#include <mach/thread_switch.h>
+#include <mach/time_value.h>
+#include <kern/sched_prim.h>
+#include <vm/pmap.h>
+#include <vm/vm_kern.h>
+#include <mach/machine/vm_param.h>
+#include <kern/clock.h>
+#include <kern/thread_call.h>
+#include <sys/param.h>
+#include <sys/vm.h>
+
+#include <libcfs/darwin/darwin-types.h>
+#include <libcfs/darwin/darwin-utils.h>
+#include <libcfs/darwin/darwin-lock.h>
+
+/*
+ * Symbol functions for libcfs
+ *
+ * OSX has no facility for use to register symbol.
+ * So we have to implement it.
+ */
+#define CFS_SYMBOL_LEN 64
+
+struct cfs_symbol {
+ char name[CFS_SYMBOL_LEN];
+ void *value;
+ int ref;
+ struct list_head sym_list;
+};
+
+extern kern_return_t cfs_symbol_register(const char *, const void *);
+extern kern_return_t cfs_symbol_unregister(const char *);
+extern void * cfs_symbol_get(const char *);
+extern kern_return_t cfs_symbol_put(const char *);
+
+/*
+ * sysctl typedef
+ *
+ * User can register/unregister a list of sysctl_oids
+ * sysctl_oid is data struct of osx's sysctl-entry
+ */
+typedef struct sysctl_oid * cfs_sysctl_table_t;
+typedef cfs_sysctl_table_t cfs_sysctl_table_header_t;
+cfs_sysctl_table_header_t *register_cfs_sysctl_table (cfs_sysctl_table_t *table, int arg);
+void unregister_cfs_sysctl_table (cfs_sysctl_table_header_t *table);
+
+/*
+ * Proc file system APIs, no /proc fs support in OSX
+ */
+typedef struct cfs_proc_dir_entry{
+ void *data;
+}cfs_proc_dir_entry_t;
+
+cfs_proc_dir_entry_t * cfs_create_proc_entry(char *name, int mod,
+ cfs_proc_dir_entry_t *parent);
+void cfs_free_proc_entry(cfs_proc_dir_entry_t *de);
+void cfs_remove_proc_entry(char *name, cfs_proc_dir_entry_t *entry);
+
+typedef int (cfs_read_proc_t)(char *page, char **start, off_t off,
+ int count, int *eof, void *data);
+typedef int (cfs_write_proc_t)(struct file *file, const char *buffer,
+ unsigned long count, void *data);
+
+/*
+ * cfs pseudo device
+ *
+ * cfs_psdev_t
+ * cfs_psdev_register:
+ * cfs_psdev_deregister:
+ */
+typedef struct {
+ int index;
+ void *handle;
+ const char *name;
+ struct cdevsw *devsw;
+ void *private;
+} cfs_psdev_t;
+
+extern kern_return_t cfs_psdev_register(cfs_psdev_t *);
+extern kern_return_t cfs_psdev_deregister(cfs_psdev_t *);
+
+/*
+ * Task struct and ...
+ *
+ * Using BSD current_proc in Darwin
+ */
+extern boolean_t assert_wait_possible(void);
+extern void *get_bsdtask_info(task_t);
+
+typedef struct uthread cfs_task_t;
+#define current_uthread() ((struct uthread *)get_bsdthread_info(current_act()))
+#define cfs_current() current_uthread()
+
+#define set_current_state(s) do {;} while (0)
+#define reparent_to_init() do {;} while (0)
+
+#define CFS_DECL_JOURNAL_DATA
+#define CFS_PUSH_JOURNAL do {;} while(0)
+#define CFS_POP_JOURNAL do {;} while(0)
+
+#define THREAD_NAME(comm, fmt, a...)
+/*
+ * Kernel thread:
+ *
+ * OSX kernel thread can not be created with args,
+ * so we have to implement new APIs to create thread with args
+ *
+ * All requests to create kernel thread will create a new
+ * thread instance of cfs_thread_agent, one by one.
+ * cfs_thread_agent will call the caller's thread function
+ * with argument supplied by caller.
+ */
+
+typedef int (*cfs_thread_t)(void *);
+
+extern task_t kernel_task;
+
+struct kernel_thread_arg
+{
+ spinlock_t lock;
+ atomic_t inuse;
+ cfs_thread_t func;
+ void *arg;
+};
+
+extern struct kernel_thread_arg cfs_thread_arg;
+extern void cfs_thread_agent(void);
+
+#define THREAD_ARG_FREE 0
+#define THREAD_ARG_HOLD 1
+#define THREAD_ARG_RECV 2
+
+#define set_targ_stat(a, v) atomic_set(&(a)->inuse, v)
+#define get_targ_stat(a) atomic_read(&(a)->inuse)
+
+/*
+ * Hold the thread argument and set the status of thread_status
+ * to THREAD_ARG_HOLD, if the thread argument is held by other
+ * threads (It's THREAD_ARG_HOLD already), current-thread has to wait.
+ */
+#define thread_arg_hold(pta, _func, _arg) \
+ do { \
+ spin_lock(&(pta)->lock); \
+ if (get_targ_stat(pta) == THREAD_ARG_FREE) { \
+ set_targ_stat((pta), THREAD_ARG_HOLD); \
+ (pta)->arg = (void *)_arg; \
+ (pta)->func = _func; \
+ spin_unlock(&(pta)->lock); \
+ break; \
+ } \
+ spin_unlock(&(pta)->lock); \
+ schedule(); \
+ } while(1); \
+
+/*
+ * Release the thread argument if the thread argument has been
+ * received by the child-thread (Status of thread_args is
+ * THREAD_ARG_RECV), otherwise current-thread has to wait.
+ * After release, the thread_args' status will be set to
+ * THREAD_ARG_FREE, and others can re-use the thread_args to
+ * create new kernel_thread.
+ */
+#define thread_arg_release(pta) \
+ do { \
+ spin_lock(&(pta)->lock); \
+ if (get_targ_stat(pta) == THREAD_ARG_RECV) { \
+ (pta)->arg = NULL; \
+ (pta)->func = NULL; \
+ set_targ_stat(pta, THREAD_ARG_FREE); \
+ spin_unlock(&(pta)->lock); \
+ break; \
+ } \
+ spin_unlock(&(pta)->lock); \
+ schedule(); \
+ } while(1)
+
+/*
+ * Receive thread argument (Used in child thread), set the status
+ * of thread_args to THREAD_ARG_RECV.
+ */
+#define __thread_arg_recv_fin(pta, _func, _arg, fin) \
+ do { \
+ spin_lock(&(pta)->lock); \
+ if (get_targ_stat(pta) == THREAD_ARG_HOLD) { \
+ if (fin) \
+ set_targ_stat(pta, THREAD_ARG_RECV);\
+ _arg = (pta)->arg; \
+ _func = (pta)->func; \
+ spin_unlock(&(pta)->lock); \
+ break; \
+ } \
+ spin_unlock(&(pta)->lock); \
+ schedule(); \
+ } while (1); \
+
+/*
+ * Just set the thread_args' status to THREAD_ARG_RECV
+ */
+#define thread_arg_fin(pta) \
+ do { \
+ spin_lock(&(pta)->lock); \
+ assert( get_targ_stat(pta) == THREAD_ARG_HOLD); \
+ set_targ_stat(pta, THREAD_ARG_RECV); \
+ spin_unlock(&(pta)->lock); \
+ } while(0)
+
+#define thread_arg_recv(pta, f, a) __thread_arg_recv_fin(pta, f, a, 1)
+#define thread_arg_keep(pta, f, a) __thread_arg_recv_fin(pta, f, a, 0)
+
+/*
+ * cloning flags, no use in OSX, just copy them from Linux
+ */
+#define CSIGNAL 0x000000ff /* signal mask to be sent at exit */
+#define CLONE_VM 0x00000100 /* set if VM shared between processes */
+#define CLONE_FS 0x00000200 /* set if fs info shared between processes */
+#define CLONE_FILES 0x00000400 /* set if open files shared between processes */
+#define CLONE_SIGHAND 0x00000800 /* set if signal handlers and blocked signals shared */
+#define CLONE_PID 0x00001000 /* set if pid shared */
+#define CLONE_PTRACE 0x00002000 /* set if we want to let tracing continue on the child too */
+#define CLONE_VFORK 0x00004000 /* set if the parent wants the child to wake it up on mm_release */
+#define CLONE_PARENT 0x00008000 /* set if we want to have the same parent as the cloner */
+#define CLONE_THREAD 0x00010000 /* Same thread group? */
+#define CLONE_NEWNS 0x00020000 /* New namespace group? */
+
+#define CLONE_SIGNAL (CLONE_SIGHAND | CLONE_THREAD)
+
+extern int cfs_kernel_thread(cfs_thread_t func, void *arg, int flag);
+
+
+/*
+ * Wait Queue implementation
+ *
+ * Like wait_queue in Linux
+ */
+typedef struct cfs_waitq {
+ struct ksleep_chan wq_ksleep_chan;
+} cfs_waitq_t;
+
+typedef struct cfs_waitlink {
+ struct cfs_waitq *wl_waitq;
+ struct ksleep_link wl_ksleep_link;
+} cfs_waitlink_t;
+
+void cfs_waitq_init(struct cfs_waitq *waitq);
+void cfs_waitlink_init(struct cfs_waitlink *link);
+
+void cfs_waitq_add(struct cfs_waitq *waitq, struct cfs_waitlink *link);
+void cfs_waitq_add_exclusive(struct cfs_waitq *waitq,
+ struct cfs_waitlink *link);
+void cfs_waitq_forward(struct cfs_waitlink *link, struct cfs_waitq *waitq);
+void cfs_waitq_del(struct cfs_waitq *waitq, struct cfs_waitlink *link);
+int cfs_waitq_active(struct cfs_waitq *waitq);
+
+void cfs_waitq_signal(struct cfs_waitq *waitq);
+void cfs_waitq_signal_nr(struct cfs_waitq *waitq, int nr);
+void cfs_waitq_broadcast(struct cfs_waitq *waitq);
+
+void cfs_waitq_wait(struct cfs_waitlink *link);
+cfs_duration_t cfs_waitq_timedwait(struct cfs_waitlink *link,
+ cfs_duration_t timeout);
+
+/*
+ * Thread schedule APIs.
+ */
+#define MAX_SCHEDULE_TIMEOUT ((long)(~0UL>>12))
+
+static inline int schedule_timeout(int64_t timeout)
+{
+ int result;
+
+ AbsoluteTime clock_current;
+ AbsoluteTime clock_delay;
+ result = assert_wait((event_t)current_uthread(), THREAD_UNINT);
+ clock_get_uptime(&clock_current);
+ nanoseconds_to_absolutetime(timeout, &clock_delay);
+ ADD_ABSOLUTETIME(&clock_current, &clock_delay);
+ thread_set_timer_deadline(clock_current);
+ if (result == THREAD_WAITING)
+ result = thread_block(THREAD_CONTINUE_NULL);
+ thread_cancel_timer();
+ if (result == THREAD_TIMED_OUT)
+ result = 0;
+ else
+ result = 1;
+ return result;
+}
+
+#define schedule() \
+ do { \
+ if (assert_wait_possible()) \
+ schedule_timeout(1); \
+ else \
+ schedule_timeout(0); \
+ } while (0)
+
+#define __wait_event(wq, condition) \
+do { \
+ struct cfs_waitlink __wait; \
+ \
+ cfs_waitlink_init(&__wait); \
+ for (;;) { \
+ cfs_waitq_add(&wq, &__wait); \
+ if (condition) \
+ break; \
+ cfs_waitq_wait(&__wait); \
+ cfs_waitq_del(&wq, &__wait); \
+ } \
+ cfs_waitq_del(&wq, &__wait); \
+} while (0)
+
+#define wait_event(wq, condition) \
+do { \
+ if (condition) \
+ break; \
+ __wait_event(wq, condition); \
+} while (0)
+
+#define wait_event_interruptible(wq, condition) \
+({ \
+ wait_event(wq, condition); \
+ 0; \
+})
+
+extern void wakeup_one __P((void * chan));
+/* only used in tests */
+#define wake_up_process(p) \
+ do { \
+ wakeup_one(p); \
+ } while (0)
+
+/* used in couple of places */
+static inline void sleep_on(cfs_waitq_t *waitq)
+{
+ cfs_waitlink_t link;
+
+ cfs_waitlink_init(&link);
+ cfs_waitq_add(waitq, &link);
+ cfs_waitq_wait(&link);
+ cfs_waitq_del(waitq, &link);
+}
+
+/*
+ * XXX
+ * Signal
+ */
+#define cfs_sigmask_lock(t, f) do { f = 0; } while(0)
+#define cfs_sigmask_unlock(t, f) do { f = 0; } while(0)
+#define cfs_signal_pending(t) (0)
+
+#define cfs_siginitset(pmask, sigs) \
+ do { \
+ sigset_t __sigs = sigs & (~sigcantmask); \
+ *(pmask) = __sigs; \
+ } while(0)
+
+#define cfs_siginitsetinv(pmask, sigs) \
+ do { \
+ sigset_t __sigs = ~(sigs | sigcantmask); \
+ *(pmask) = __sigs; \
+ } while(0)
+
+#define cfs_recalc_sigpending(ut) \
+ do { \
+ (ut)->uu_siglist = (ut)->uu_siglist & ~(ut)->uu_sigmask;\
+ } while (0)
+#define cfs_sigfillset(s) \
+ do { \
+ memset((s), -1, sizeof(sigset_t)); \
+ } while(0)
+
+#define cfs_set_sig_blocked(ut, b) do {(ut)->uu_sigmask = b;} while(0)
+#define cfs_get_sig_blocked(ut) (&(ut)->uu_sigmask)
+
+#define SIGNAL_MASK_ASSERT()
+
+/*
+ * Timer
+ */
+
+typedef struct cfs_timer {
+ struct ktimer t;
+} cfs_timer_t;
+
+#define cfs_init_timer(t) do {} while(0)
+void cfs_timer_init(struct cfs_timer *t, void (*func)(unsigned long), void *arg);
+void cfs_timer_done(struct cfs_timer *t);
+void cfs_timer_arm(struct cfs_timer *t, cfs_time_t deadline);
+void cfs_timer_disarm(struct cfs_timer *t);
+int cfs_timer_is_armed(struct cfs_timer *t);
+
+cfs_time_t cfs_timer_deadline(struct cfs_timer *t);
+
+/*
+ * Ioctl
+ * We don't need to copy out everything in osx
+ */
+#define cfs_ioctl_data_out(a, d, l) \
+ ({ \
+ int __size; \
+ int __rc = 0; \
+ assert((l) >= sizeof(*d)); \
+ __size = (l) - sizeof(*d); \
+ if (__size > 0) \
+ __rc = copy_to_user((void *)a + __size, \
+ (void *)d + __size, \
+ __size); \
+ __rc; \
+ })
+
+/*
+ * CPU
+ */
+#include <machine/cpu_number.h>
+/* Run in PowerG5 who is PPC64 */
+#define SMP_CACHE_BYTES 128
+#define __cacheline_aligned __attribute__((__aligned__(SMP_CACHE_BYTES)))
+/* XXX How to get the value of NCPUS from xnu ? */
+#define NR_CPUS 2
+#define smp_processor_id() cpu_number()
+#define smp_num_cpus NR_CPUS
+/* XXX smp_call_function is not supported in xnu */
+#define smp_call_function(f, a, n, w) do {} while(0)
+
+/*
+ * Misc
+ */
+#ifndef likely
+#define likely(exp) (exp)
+#endif
+#ifndef unlikely
+#define unlikely(exp) (exp)
+#endif
+
+#define lock_kernel() do {} while(0)
+#define unlock_kernel() do {} while(0)
+
+#define exit_mm(t) do {} while(0)
+#define exit_files(t) do {} while(0)
+
+#define CAP_SYS_ADMIN 0
+#define capable(a) suser(current_proc()->p_ucred, &(current_proc()->p_acflag))
+
+#define USERMODEHELPER(path, argv, envp) (0)
+
+#define cfs_module(name, version, init, fini) \
+extern kern_return_t _start(kmod_info_t *ki, void *data); \
+extern kern_return_t _stop(kmod_info_t *ki, void *data); \
+__private_extern__ kern_return_t name##_start(kmod_info_t *ki, void *data); \
+__private_extern__ kern_return_t name##_stop(kmod_info_t *ki, void *data); \
+ \
+kmod_info_t KMOD_INFO_NAME = { 0, KMOD_INFO_VERSION, -1, \
+ { "com.clusterfs.lustre." #name }, { version }, \
+ -1, 0, 0, 0, 0, name##_start, name##_stop }; \
+ \
+__private_extern__ kmod_start_func_t *_realmain = name##_start; \
+__private_extern__ kmod_stop_func_t *_antimain = name##_stop; \
+__private_extern__ int _kext_apple_cc = __APPLE_CC__ ; \
+ \
+kern_return_t name##_start(kmod_info_t *ki, void *d) \
+{ \
+ return init(); \
+} \
+ \
+kern_return_t name##_stop(kmod_info_t *ki, void *d) \
+{ \
+ fini(); \
+ return KERN_SUCCESS; \
+} \
+ \
+/* \
+ * to allow semicolon after cfs_module(...) \
+ */ \
+struct __dummy_ ## name ## _struct {}
+
+#define inter_module_get(n) cfs_symbol_get(n)
+#define inter_module_put(n) cfs_symbol_put(n)
+
+#ifndef __exit
+#define __exit
+#endif
+#ifndef __init
+#define __init
+#endif
+
+#define EXPORT_SYMBOL(s)
+#define MODULE_AUTHOR(s)
+#define MODULE_DESCRIPTION(s)
+#define MODULE_LICENSE(s)
+#define MODULE_PARM(a, b)
+#define MODULE_PARM_DESC(a, b)
+
+#define KERNEL_VERSION(a,b,c) ((a)*100+(b)*10+c)
+#define LINUX_VERSION_CODE (2*200+5*10+0)
+
+#define NR_IRQS 512
+#define in_interrupt() (0)
+
+#define KERN_EMERG "<0>" /* system is unusable */
+#define KERN_ALERT "<1>" /* action must be taken immediately */
+#define KERN_CRIT "<2>" /* critical conditions */
+#define KERN_ERR "<3>" /* error conditions */
+#define KERN_WARNING "<4>" /* warning conditions */
+#define KERN_NOTICE "<5>" /* normal but significant condition */
+#define KERN_INFO "<6>" /* informational */
+#define KERN_DEBUG "<7>" /* debug-level messages */
+
+static inline long PTR_ERR(const void *ptr)
+{
+ return (long) ptr;
+}
+
+#define ERR_PTR(err) ((void *)err)
+
+/* XXX */
+#define IS_ERR(p) (0)
+
+/*
+ * Error nubmer
+ */
+#define EBADR EBADRPC
+#define ERESTARTSYS ERESTART
+#define EDEADLOCK EDEADLK
+#define ECOMM EINVAL
+#define ENODATA EINVAL
+
+#else
+#define __WORDSIZE 32
+#endif /* __KERNEL__ */
+
+#endif /* __LINUX__ */
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Lustre Light Super operations
+ *
+ * Copyright (c) 2004 Cluster File Systems, Inc.
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or modify it under
+ * the terms of version 2 of the GNU General Public License as published by
+ * the Free Software Foundation. Lustre is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details. You should have received a
+ * copy of the GNU General Public License along with Lustre; if not, write
+ * to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139,
+ * USA.
+ */
+
+/*
+ * xnu_sync.h
+ *
+ * Created by nikita on Sun Jul 18 2004.
+ *
+ * Prototypes of XNU synchronization primitives.
+ */
+
+#ifndef __LIBCFS_DARWIN_XNU_SYNC_H__
+#define __LIBCFS_DARWIN_XNU_SYNC_H__
+
+#ifndef __LIBCFS_LIBCFS_H__
+#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
+#endif
+
+#define XNU_SYNC_DEBUG (0)
+
+#if XNU_SYNC_DEBUG
+#define ON_SYNC_DEBUG(e) e
+#else
+#define ON_SYNC_DEBUG(e)
+#endif
+
+enum {
+ /* "egrep -i '^(o?x)?[abcdeflo]*$' /usr/dict/words" is your friend */
+ KMUT_MAGIC = 0x0bac0cab, /* [a, [b, c]] = b (a, c) - c (a, b) */
+ KSEM_MAGIC = 0x1abe11ed,
+ KCOND_MAGIC = 0xb01dface,
+ KRW_MAGIC = 0xdabb1edd,
+ KSPIN_MAGIC = 0xca11ab1e,
+ KSLEEP_CHAN_MAGIC = 0x0debac1e,
+ KSLEEP_LINK_MAGIC = 0xacc01ade,
+ KTIMER_MAGIC = 0xbefadd1e
+};
+
+/* ------------------------- spin lock ------------------------- */
+
+/*
+ * XXX nikita: don't use NCPUS it's hardcoded to (1) in cpus.h
+ */
+#define SMP (1)
+
+#include <kern/simple_lock.h>
+
+#include <libcfs/list.h>
+
+struct kspin {
+#if SMP
+ hw_lock_data_t lock;
+#endif
+#if XNU_SYNC_DEBUG
+ unsigned magic;
+ thread_t owner;
+#endif
+};
+
+/*
+ * XXX nikita: we cannot use simple_* functions, because bsd/sys/lock.h
+ * redefines them to nothing. Use low-level hw_lock_* instead.
+ */
+
+void kspin_init(struct kspin *spin);
+void kspin_done(struct kspin *spin);
+void kspin_lock(struct kspin *spin);
+void kspin_unlock(struct kspin *spin);
+int kspin_trylock(struct kspin *spin);
+
+#if XNU_SYNC_DEBUG
+/*
+ * two functions below are for use in assertions
+ */
+/* true, iff spin-lock is locked by the current thread */
+int kspin_islocked(struct kspin *spin);
+/* true, iff spin-lock is not locked by the current thread */
+int kspin_isnotlocked(struct kspin *spin);
+#else
+#define kspin_islocked(s) (1)
+#define kspin_isnotlocked(s) (1)
+#endif
+
+/* ------------------------- semaphore ------------------------- */
+
+struct ksem {
+ struct kspin guard;
+ struct wait_queue q;
+ int value;
+#if XNU_SYNC_DEBUG
+ unsigned magic;
+#endif
+};
+
+void ksem_init(struct ksem *sem, int value);
+void ksem_done(struct ksem *sem);
+int ksem_up (struct ksem *sem, int value);
+void ksem_down(struct ksem *sem, int value);
+int ksem_trydown(struct ksem *sem, int value);
+
+/* ------------------------- mutex ------------------------- */
+
+struct kmut {
+ struct ksem s;
+#if XNU_SYNC_DEBUG
+ unsigned magic;
+ thread_t owner;
+#endif
+};
+
+void kmut_init(struct kmut *mut);
+void kmut_done(struct kmut *mut);
+
+void kmut_lock (struct kmut *mut);
+void kmut_unlock (struct kmut *mut);
+int kmut_trylock(struct kmut *mut);
+
+#if XNU_SYNC_DEBUG
+/*
+ * two functions below are for use in assertions
+ */
+/* true, iff mutex is locked by the current thread */
+int kmut_islocked(struct kmut *mut);
+/* true, iff mutex is not locked by the current thread */
+int kmut_isnotlocked(struct kmut *mut);
+#else
+#define kmut_islocked(m) (1)
+#define kmut_isnotlocked(m) (1)
+#endif
+
+/* ------------------------- condition variable ------------------------- */
+
+struct kcond_link {
+ struct kcond_link *next;
+ struct ksem sem;
+};
+
+struct kcond {
+ struct kspin guard;
+ struct kcond_link *waiters;
+#if XNU_SYNC_DEBUG
+ unsigned magic;
+#endif
+};
+
+void kcond_init(struct kcond *cond);
+void kcond_done(struct kcond *cond);
+void kcond_wait(struct kcond *cond, struct kspin *lock);
+void kcond_signal(struct kcond *cond);
+void kcond_broadcast(struct kcond *cond);
+
+void kcond_wait_guard(struct kcond *cond);
+void kcond_signal_guard(struct kcond *cond);
+void kcond_broadcast_guard(struct kcond *cond);
+
+/* ------------------------- read-write semaphore ------------------------- */
+
+struct krw_sem {
+ int count;
+ struct kcond cond;
+#if XNU_SYNC_DEBUG
+ unsigned magic;
+#endif
+};
+
+void krw_sem_init(struct krw_sem *sem);
+void krw_sem_done(struct krw_sem *sem);
+void krw_sem_down_r(struct krw_sem *sem);
+int krw_sem_down_r_try(struct krw_sem *sem);
+void krw_sem_down_w(struct krw_sem *sem);
+int krw_sem_down_w_try(struct krw_sem *sem);
+void krw_sem_up_r(struct krw_sem *sem);
+void krw_sem_up_w(struct krw_sem *sem);
+
+/* ------------------------- sleep-channel ------------------------- */
+
+struct ksleep_chan {
+ struct kspin guard;
+ struct list_head waiters;
+#if XNU_SYNC_DEBUG
+ unsigned magic;
+#endif
+};
+
+#define KSLEEP_CHAN_INITIALIZER {{{0}}}
+
+struct ksleep_link {
+ int flags;
+ event_t event;
+ int hits;
+ struct ksleep_chan *forward;
+ struct list_head linkage;
+#if XNU_SYNC_DEBUG
+ unsigned magic;
+#endif
+};
+
+enum {
+ KSLEEP_EXCLUSIVE = 1
+};
+
+void ksleep_chan_init(struct ksleep_chan *chan);
+void ksleep_chan_done(struct ksleep_chan *chan);
+
+void ksleep_link_init(struct ksleep_link *link);
+void ksleep_link_done(struct ksleep_link *link);
+
+void ksleep_add(struct ksleep_chan *chan, struct ksleep_link *link);
+void ksleep_del(struct ksleep_chan *chan, struct ksleep_link *link);
+
+void ksleep_wait(struct ksleep_chan *chan);
+int64_t ksleep_timedwait(struct ksleep_chan *chan, uint64_t timeout);
+
+void ksleep_wake(struct ksleep_chan *chan);
+void ksleep_wake_all(struct ksleep_chan *chan);
+void ksleep_wake_nr(struct ksleep_chan *chan, int nr);
+
+#define KSLEEP_LINK_DECLARE(name) \
+{ \
+ .flags = 0, \
+ .event = 0, \
+ .hits = 0, \
+ .linkage = PTL_LIST_HEAD_INIT(name.linkage), \
+ .magic = KSLEEP_LINK_MAGIC \
+}
+
+/* ------------------------- timer ------------------------- */
+
+struct ktimer {
+ struct kspin guard;
+ void (*func)(void *);
+ void *arg;
+ u_int64_t deadline; /* timer deadline in absolute nanoseconds */
+ int armed;
+#if XNU_SYNC_DEBUG
+ unsigned magic;
+#endif
+};
+
+void ktimer_init(struct ktimer *t, void (*func)(void *), void *arg);
+void ktimer_done(struct ktimer *t);
+void ktimer_arm(struct ktimer *t, u_int64_t deadline);
+void ktimer_disarm(struct ktimer *t);
+int ktimer_is_armed(struct ktimer *t);
+
+u_int64_t ktimer_deadline(struct ktimer *t);
+
+/* __XNU_SYNC_H__ */
+#endif
+
+/*
+ * Local variables:
+ * c-indentation-style: "K&R"
+ * c-basic-offset: 8
+ * tab-width: 8
+ * fill-column: 80
+ * scroll-step: 1
+ * End:
+ */
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2004 Cluster File Systems, Inc.
+ * Author: Nikita Danilov <nikita@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or modify it under the
+ * terms of version 2 of the GNU General Public License as published by the
+ * Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass
+ * Ave, Cambridge, MA 02139, USA.
+ *
+ * Implementation of portable time API for XNU kernel
+ *
+ */
+
+#ifndef __LIBCFS_DARWIN_DARWIN_TIME_H__
+#define __LIBCFS_DARWIN_DARWIN_TIME_H__
+
+#ifndef __LIBCFS_LIBCFS_H__
+#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
+#endif
+
+/* Portable time API */
+
+/*
+ * Platform provides three opaque data-types:
+ *
+ * cfs_time_t represents point in time. This is internal kernel
+ * time rather than "wall clock". This time bears no
+ * relation to gettimeofday().
+ *
+ * cfs_duration_t represents time interval with resolution of internal
+ * platform clock
+ *
+ * cfs_fs_time_t represents instance in world-visible time. This is
+ * used in file-system time-stamps
+ *
+ * cfs_time_t cfs_time_current(void);
+ * cfs_time_t cfs_time_add (cfs_time_t, cfs_duration_t);
+ * cfs_duration_t cfs_time_sub (cfs_time_t, cfs_time_t);
+ * int cfs_time_before (cfs_time_t, cfs_time_t);
+ * int cfs_time_beforeq(cfs_time_t, cfs_time_t);
+ *
+ * cfs_duration_t cfs_duration_build(int64_t);
+ *
+ * time_t cfs_duration_sec (cfs_duration_t);
+ * void cfs_duration_usec(cfs_duration_t, struct timeval *);
+ * void cfs_duration_nsec(cfs_duration_t, struct timespec *);
+ *
+ * void cfs_fs_time_current(cfs_fs_time_t *);
+ * time_t cfs_fs_time_sec (cfs_fs_time_t *);
+ * void cfs_fs_time_usec (cfs_fs_time_t *, struct timeval *);
+ * void cfs_fs_time_nsec (cfs_fs_time_t *, struct timespec *);
+ * int cfs_fs_time_before (cfs_fs_time_t *, cfs_fs_time_t *);
+ * int cfs_fs_time_beforeq(cfs_fs_time_t *, cfs_fs_time_t *);
+ *
+ * cfs_duration_t cfs_time_minimal_timeout(void)
+ *
+ * CFS_TIME_FORMAT
+ * CFS_DURATION_FORMAT
+ *
+ */
+
+#define ONE_BILLION ((u_int64_t)1000000000)
+#define ONE_MILLION ((u_int64_t) 1000000)
+
+#ifdef __KERNEL__
+#include <sys/types.h>
+#include <sys/systm.h>
+
+#ifndef __APPLE_API_PRIVATE
+#define __APPLE_API_PRIVATE
+#include <sys/user.h>
+#undef __APPLE_API_PRIVATE
+#else
+#include <sys/user.h>
+#endif
+
+#include <sys/kernel.h>
+
+#include <mach/thread_act.h>
+#include <mach/mach_types.h>
+#include <mach/mach_traps.h>
+#include <mach/thread_switch.h>
+#include <mach/time_value.h>
+#include <kern/sched_prim.h>
+#include <vm/pmap.h>
+#include <vm/vm_kern.h>
+#include <mach/machine/vm_param.h>
+#include <kern/clock.h>
+#include <kern/thread_call.h>
+#include <sys/param.h>
+#include <sys/vm.h>
+
+#include <libcfs/darwin/darwin-types.h>
+#include <libcfs/darwin/darwin-utils.h>
+#include <libcfs/darwin/darwin-lock.h>
+
+typedef u_int64_t cfs_time_t; /* nanoseconds */
+typedef int64_t cfs_duration_t;
+
+#define CFS_TIME_T "%llu"
+#define CFS_DURATION_T "%lld"
+
+typedef struct timeval cfs_fs_time_t;
+
+static inline cfs_time_t cfs_time_current(void)
+{
+ struct timespec instant;
+
+ nanotime(&instant);
+ return ((u_int64_t)instant.tv_sec) * ONE_BILLION + instant.tv_nsec;
+}
+
+static inline time_t cfs_time_current_sec(void)
+{
+ struct timespec instant;
+
+ nanotime(&instant);
+ return instant.tv_sec;
+}
+
+static inline cfs_time_t cfs_time_add(cfs_time_t t, cfs_duration_t d)
+{
+ return t + d;
+}
+
+static inline cfs_duration_t cfs_time_sub(cfs_time_t t1, cfs_time_t t2)
+{
+ return t1 - t2;
+}
+
+static inline int cfs_time_before(cfs_time_t t1, cfs_time_t t2)
+{
+ return (int64_t)t1 - (int64_t)t2 < 0;
+}
+
+static inline int cfs_time_beforeq(cfs_time_t t1, cfs_time_t t2)
+{
+ return (int64_t)t1 - (int64_t)t2 <= 0;
+}
+
+static inline void cfs_fs_time_current(cfs_fs_time_t *t)
+{
+ *t = time;
+}
+
+static inline time_t cfs_fs_time_sec(cfs_fs_time_t *t)
+{
+ return t->tv_sec;
+}
+
+static inline cfs_duration_t cfs_duration_build(int64_t nano)
+{
+ return nano;
+}
+
+
+static inline void cfs_fs_time_usec(cfs_fs_time_t *t, struct timeval *v)
+{
+ *v = *t;
+}
+
+static inline void cfs_fs_time_nsec(cfs_fs_time_t *t, struct timespec *s)
+{
+ s->tv_sec = t->tv_sec;
+ s->tv_nsec = t->tv_usec * 1000;
+}
+
+static inline cfs_duration_t cfs_time_seconds(int seconds)
+{
+ return cfs_duration_build(ONE_BILLION * (int64_t)seconds);
+}
+
+static inline cfs_time_t cfs_time_shift(int seconds)
+{
+ return cfs_time_add(cfs_time_current(), cfs_time_seconds(seconds));
+}
+
+/*
+ * internal helper function used by cfs_fs_time_before*()
+ */
+static inline int64_t __cfs_fs_time_flat(cfs_fs_time_t *t)
+{
+ return ((int64_t)t->tv_sec) * ONE_BILLION + t->tv_usec;
+}
+
+static inline int cfs_fs_time_before(cfs_fs_time_t *t1, cfs_fs_time_t *t2)
+{
+ return __cfs_fs_time_flat(t1) - __cfs_fs_time_flat(t2) < 0;
+}
+
+static inline int cfs_fs_time_beforeq(cfs_fs_time_t *t1, cfs_fs_time_t *t2)
+{
+ return __cfs_fs_time_flat(t1) - __cfs_fs_time_flat(t2) <= 0;
+}
+
+static inline time_t cfs_duration_sec(cfs_duration_t d)
+{
+ return d / ONE_BILLION;
+}
+
+static inline void cfs_duration_usec(cfs_duration_t d, struct timeval *s)
+{
+ s->tv_sec = d / ONE_BILLION;
+ s->tv_usec = (d - s->tv_sec * ONE_BILLION) / 1000;
+}
+
+static inline void cfs_duration_nsec(cfs_duration_t d, struct timespec *s)
+{
+ s->tv_sec = d / ONE_BILLION;
+ s->tv_nsec = d - ((int64_t)s->tv_sec) * ONE_BILLION;
+}
+
+static inline cfs_duration_t cfs_time_minimal_timeout(void)
+{
+ return ONE_BILLION / (u_int64_t)hz;
+}
+
+/* inline function cfs_time_minimal_timeout() can not be used to
+ * initiallize static variable */
+#define CFS_MIN_DELAY (ONE_BILLION / (u_int64_t)100)
+
+#define LTIME_S(t) (t)
+
+/* __KERNEL__ */
+#else
+
+/*
+ * User level
+ */
+#include <libcfs/user-time.h>
+
+/* __KERNEL__ */
+#endif
+
+/* __LIBCFS_DARWIN_DARWIN_TIME_H__ */
+#endif
+/*
+ * Local variables:
+ * c-indentation-style: "K&R"
+ * c-basic-offset: 8
+ * tab-width: 8
+ * fill-column: 80
+ * scroll-step: 1
+ * End:
+ */
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Basic library routines.
+ *
+ */
+
+#ifndef __LIBCFS_DARWIN_XNU_TYPES_H__
+#define __LIBCFS_DARWIN_XNU_TYPES_H__
+
+#ifndef __LIBCFS_LIBCFS_H__
+#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
+#endif
+
+#include <mach/mach_types.h>
+#include <sys/types.h>
+
+typedef u_int8_t __u8;
+typedef u_int16_t __u16;
+typedef u_int32_t __u32;
+typedef u_int64_t __u64;
+
+#ifdef __KERNEL__
+
+#include <kern/kern_types.h>
+
+#ifndef __s32
+typedef __signed__ int __s32;
+#endif
+#ifndef __s64
+typedef __signed__ long long __s64;
+#endif
+
+typedef struct { int e; } event_chan_t;
+typedef dev_t kdev_t;
+
+/*
+ * Atmoic define
+ */
+#include <libkern/OSAtomic.h>
+
+typedef struct { volatile uint32_t counter; } atomic_t;
+
+#define ATOMIC_INIT(i) { (i) }
+#define atomic_read(a) ((a)->counter)
+#define atomic_set(a, v) (((a)->counter) = (v))
+#define atomic_add(v, a) hw_atomic_add((uint32_t *)&((a)->counter), v)
+#define atomic_sub(v, a) hw_atomic_sub((uint32_t *)&((a)->counter), v)
+#define atomic_inc(a) atomic_add(1, a)
+#define atomic_dec(a) atomic_sub(1, a)
+#define atomic_sub_and_test(v, a) ( atomic_sub(v, a) == 0 )
+#define atomic_dec_and_test(a) ( atomic_dec(a) == 0 )
+
+#include <libsa/mach/mach.h>
+typedef uint64_t loff_t;
+
+#else /* !__KERNEL__ */
+
+#include <stdint.h>
+
+typedef uint64_t loff_t;
+
+#endif /* __KERNEL END */
+
+#endif /* __XNU_CFS_TYPES_H__ */
--- /dev/null
+#ifndef __LIBCFS_DARWIN_XNU_UTILS_H__
+#define __LIBCFS_DARWIN_XNU_UTILS_H__
+
+#ifndef __LIBCFS_LIBCFS_H__
+#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
+#endif
+
+#include <sys/random.h>
+
+#ifdef __KERNEL__
+inline int isspace(char c);
+char *strpbrk(const char *cs, const char *ct);
+char * strsep(char **s, const char *ct);
+size_t strnlen(const char * s, size_t count);
+char * strstr(const char *in, const char *str);
+char * strrchr(const char *p, int ch);
+char * ul2dstr(unsigned long address, char *buf, int len);
+
+#define simple_strtol(a1, a2, a3) strtol(a1, a2, a3)
+#define simple_strtoul(a1, a2, a3) strtoul(a1, a2, a3)
+#define simple_strtoll(a1, a2, a3) strtoq(a1, a2, a3)
+#define simple_strtoull(a1, a2, a3) strtouq(a1, a2, a3)
+
+#define test_bit(i, a) isset(a, i)
+#define set_bit(i, a) setbit(a, i)
+#define clear_bit(i, a) clrbit(a, i)
+
+#define get_random_bytes(buf, len) read_random(buf, len)
+
+#endif /* __KERNEL__ */
+
+#ifndef min_t
+#define min_t(type,x,y) \
+ ({ type __x = (x); type __y = (y); __x < __y ? __x: __y; })
+#endif
+#ifndef max_t
+#define max_t(type,x,y) \
+ ({ type __x = (x); type __y = (y); __x > __y ? __x: __y; })
+#endif
+
+#define do_div(n,base) \
+ ({ \
+ __u64 __n = (n); \
+ __u32 __base = (base); \
+ __u32 __mod; \
+ \
+ __mod = __n % __base; \
+ n = __n / __base; \
+ __mod; \
+ })
+
+#define NIPQUAD(addr) \
+ ((unsigned char *)&addr)[0], \
+ ((unsigned char *)&addr)[1], \
+ ((unsigned char *)&addr)[2], \
+ ((unsigned char *)&addr)[3]
+
+#define HIPQUAD NIPQUAD
+
+#endif /* __XNU_UTILS_H__ */
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
+#ifndef __LIBCFS_DARWIN_KP30__
+#define __LIBCFS_DARWIN_KP30__
+
+#ifndef __LIBCFS_KP30_H__
+#error Do not #include this file directly. #include <libcfs/kp30.h> instead
+#endif
+
+#ifdef __KERNEL__
+
+#include <sys/types.h>
+#include <sys/malloc.h>
+#include <sys/systm.h>
+#include <mach/mach_types.h>
+#include <string.h>
+#include <sys/file.h>
+#include <sys/conf.h>
+#include <miscfs/devfs/devfs.h>
+#include <stdarg.h>
+
+#include <libcfs/darwin/darwin-lock.h>
+#include <libcfs/darwin/darwin-prim.h>
+#include <portals/p30.h>
+
+#define our_cond_resched() schedule_timeout(1);
+
+#ifdef CONFIG_SMP
+#define LASSERT_SPIN_LOCKED(lock) do {} while(0) /* XXX */
+#else
+#define LASSERT_SPIN_LOCKED(lock) do {} while(0)
+#endif
+
+#define LBUG_WITH_LOC(file, func, line) do {} while(0)
+
+/* --------------------------------------------------------------------- */
+
+#define PORTAL_SYMBOL_REGISTER(x) cfs_symbol_register(#x, &x)
+#define PORTAL_SYMBOL_UNREGISTER(x) cfs_symbol_unregister(#x)
+
+#define PORTAL_SYMBOL_GET(x) ((typeof(&x))cfs_symbol_get(#x))
+#define PORTAL_SYMBOL_PUT(x) cfs_symbol_put(#x)
+
+#define PORTAL_MODULE_USE do{int i = 0; i++;}while(0)
+#define PORTAL_MODULE_UNUSE do{int i = 0; i--;}while(0)
+
+#define printk(format, args...) printf(format, ## args)
+
+#else /* !__KERNEL__ */
+# include <stdio.h>
+# include <stdlib.h>
+# include <stdint.h>
+# include <unistd.h>
+# include <time.h>
+# include <machine/limits.h>
+# include <sys/types.h>
+#endif
+
+/******************************************************************************/
+/* Light-weight trace
+ * Support for temporary event tracing with minimal Heisenberg effect. */
+#define LWT_SUPPORT 0
+
+typedef struct {
+ long long lwte_when;
+ char *lwte_where;
+ void *lwte_task;
+ long lwte_p1;
+ long lwte_p2;
+ long lwte_p3;
+ long lwte_p4;
+} lwt_event_t;
+
+# define LWT_EVENT(p1,p2,p3,p4) /* no lwt implementation yet */
+
+/* -------------------------------------------------------------------------- */
+
+#define IOCTL_PORTAL_TYPE struct portal_ioctl_data
+
+#define LPU64 "%llu"
+#define LPD64 "%lld"
+#define LPX64 "%llx"
+#define LPSZ "%lu"
+#define LPSSZ "%ld"
+# define LI_POISON ((int)0x5a5a5a5a)
+# define LL_POISON ((long)0x5a5a5a5a)
+# define LP_POISON ((void *)(long)0x5a5a5a5a)
+
+#endif
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
+#ifndef __LIBCFS_DARWIN_LIBCFS_H__
+#define __LIBCFS_DARWIN_LIBCFS_H__
+
+#ifndef __LIBCFS_LIBCFS_H__
+#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
+#endif
+
+#include <mach/mach_types.h>
+#include <string.h>
+#include <libcfs/darwin/darwin-types.h>
+#include <libcfs/darwin/darwin-time.h>
+#include <libcfs/darwin/darwin-prim.h>
+#include <libcfs/darwin/darwin-mem.h>
+#include <libcfs/darwin/darwin-lock.h>
+#include <libcfs/darwin/darwin-fs.h>
+
+#ifdef __KERNEL__
+# include <sys/types.h>
+# include <sys/time.h>
+# define do_gettimeofday(tv) microuptime(tv)
+#else
+# include <sys/time.h>
+# define do_gettimeofday(tv) gettimeofday(tv, NULL);
+typedef unsigned long long cycles_t;
+#endif
+
+#define __cpu_to_le64(x) OSSwapHostToLittleInt64(x)
+#define __cpu_to_le32(x) OSSwapHostToLittleInt32(x)
+#define __cpu_to_le16(x) OSSwapHostToLittleInt16(x)
+
+#define __le16_to_cpu(x) OSSwapLittleToHostInt16(x)
+#define __le32_to_cpu(x) OSSwapLittleToHostInt32(x)
+#define __le64_to_cpu(x) OSSwapLittleToHostInt64(x)
+
+#define cpu_to_le64(x) __cpu_to_le64(x)
+#define cpu_to_le32(x) __cpu_to_le32(x)
+#define cpu_to_le16(x) __cpu_to_le16(x)
+
+#define le64_to_cpu(x) __le64_to_cpu(x)
+#define le32_to_cpu(x) __le32_to_cpu(x)
+#define le16_to_cpu(x) __le16_to_cpu(x)
+
+#define __swab16(x) OSSwapInt16(x)
+#define __swab32(x) OSSwapInt32(x)
+#define __swab64(x) OSSwapInt64(x)
+#define __swab16s(x) do { *(x) = __swab16(*(x)); } while (0)
+#define __swab32s(x) do { *(x) = __swab32(*(x)); } while (0)
+#define __swab64s(x) do { *(x) = __swab64(*(x)); } while (0)
+
+struct ptldebug_header {
+ __u32 ph_len;
+ __u32 ph_flags;
+ __u32 ph_subsys;
+ __u32 ph_mask;
+ __u32 ph_cpu_id;
+ __u32 ph_sec;
+ __u64 ph_usec;
+ __u32 ph_stack;
+ __u32 ph_pid;
+ __u32 ph_extern_pid;
+ __u32 ph_line_num;
+} __attribute__((packed));
+
+
+#ifdef __KERNEL__
+# include <sys/systm.h>
+# include <pexpert/pexpert.h>
+/* Fix me */
+# define THREAD_SIZE 8192
+#else
+# define THREAD_SIZE 8192
+#endif
+#define LUSTRE_TRACE_SIZE (THREAD_SIZE >> 5)
+
+#define CHECK_STACK(stack) do { } while(0)
+#define CDEBUG_STACK (0L)
+
+/* Darwin has defined RETURN, so we have to undef it in lustre */
+#ifdef RETURN
+#undef RETURN
+#endif
+
+/*
+ * When this is enabled debugging messages are indented according to the
+ * current "nesting level". Nesting level in increased when ENTRY macro
+ * is executed, and decreased on EXIT and RETURN.
+ */
+#ifdef __KERNEL__
+#define ENTRY_NESTING_SUPPORT (0)
+#endif
+
+#if ENTRY_NESTING_SUPPORT
+
+/*
+ * Currently ENTRY_NESTING_SUPPORT is only supported for XNU port. Basic
+ * idea is to keep per-thread pointer to small data structure (struct
+ * cfs_debug_data) describing current nesting level. In XNU unused
+ * proc->p_wmegs field in hijacked for this. On Linux
+ * current->journal_info can be used. In user space
+ * pthread_{g,s}etspecific().
+ *
+ * ENTRY macro allocates new cfs_debug_data on stack, and installs it as
+ * a current nesting level, storing old data in cfs_debug_data it just
+ * created.
+ *
+ * EXIT pops old value back.
+ *
+ */
+
+/*
+ * One problem with this approach is that there is a lot of code that
+ * does ENTRY and then escapes scope without doing EXIT/RETURN. In this
+ * case per-thread current nesting level pointer is dangling (it points
+ * to the stack area that is possible already overridden). To detect
+ * such cases, we add two magic fields to the cfs_debug_data and check
+ * them whenever current nesting level pointer is dereferenced. While
+ * looking flaky this works because stack is always consumed
+ * "continously".
+ */
+enum {
+ CDD_MAGIC1 = 0x02128506,
+ CDD_MAGIC2 = 0x42424242
+};
+
+struct cfs_debug_data {
+ unsigned int magic1;
+ struct cfs_debug_data *parent;
+ int nesting_level;
+ unsigned int magic2;
+};
+
+void __entry_nesting(struct cfs_debug_data *child);
+void __exit_nesting(struct cfs_debug_data *child);
+unsigned int __current_nesting_level(void);
+
+#define ENTRY_NESTING \
+struct cfs_debug_data __cdd = { .magic1 = CDD_MAGIC1, \
+ .parent = NULL, \
+ .nesting_level = 0, \
+ .magic2 = CDD_MAGIC2 }; \
+__entry_nesting(&__cdd);
+
+#define EXIT_NESTING __exit_nesting(&__cdd)
+
+/* ENTRY_NESTING_SUPPORT */
+#else
+
+#define ENTRY_NESTING do {;} while (0)
+#define EXIT_NESTING do {;} while (0)
+#define __current_nesting_level() (0)
+
+/* ENTRY_NESTING_SUPPORT */
+#endif
+
+#define LUSTRE_PTL_PID 12345
+
+#define _XNU_LIBCFS_H
+
+/*
+ * Platform specific declarations for cfs_curproc API (libcfs/curproc.h)
+ *
+ * Implementation is in darwin-curproc.c
+ */
+#define CFS_CURPROC_COMM_MAX (sizeof ((struct proc *)0)->p_comm)
+/*
+ * XNU has no capabilities
+ */
+typedef int cfs_kernel_cap_t;
+
+#endif /* _XNU_LIBCFS_H */
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
+#ifndef __LIBCFS_DARWIN_LLTRACE_H__
+#define __LIBCFS_DARWIN_LLTRACE_H__
+
+#ifndef __LIBCFS_LLTRACE_H__
+#error Do not #include this file directly. #include <libcfs/lltrace.h> instead
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <getopt.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/time.h>
+#include <portals/types.h>
+#include <libcfs/kp30.h>
+#include <mach/vm_param.h>
+#include <portals/ptlctl.h>
+
+#endif
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Basic library routines.
+ *
+ */
+
+#ifndef __LIBCFS_DARWIN_PORTALS_LIB_H__
+#define __LIBCFS_DARWIN_PORTALS_LIB_H__
+
+#ifndef __LIBCFS_PORTALS_LIB_H__
+#error Do not #include this file directly. #include <libcfs/portals_lib.h> instead
+#endif
+
+#include <string.h>
+
+#endif
--- /dev/null
+#ifndef __LIBCFS_DARWIN_PORTALS_UTILS_H__
+#define __LIBCFS_DARWIN_PORTALS_UTILS_H__
+
+#ifndef __LIBCFS_PORTALS_UTILS_H__
+#error Do not #include this file directly. #include <libcfs/portals_utils.h> instead
+#endif
+
+#include <libcfs/list.h>
+#ifdef __KERNEL__
+#include <mach/mach_types.h>
+#include <libcfs/libcfs.h>
+#else /* !__KERNEL__ */
+#include <machine/endian.h>
+#include <netinet/in.h>
+#include <sys/syscall.h>
+#endif /* !__KERNEL__ */
+
+#endif
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*/
-#ifndef _KP30_INCLUDED
-#define _KP30_INCLUDED
+#ifndef __LIBCFS_KP30_H__
+#define __LIBCFS_KP30_H__
-#include <linux/libcfs.h>
#define PORTAL_DEBUG
+#include <libcfs/libcfs.h>
-#ifdef __KERNEL__
-# include <linux/vmalloc.h>
-# include <linux/time.h>
-# include <linux/slab.h>
-# include <linux/interrupt.h>
-# include <linux/highmem.h>
-# include <linux/module.h>
-# include <linux/version.h>
-# include <portals/p30.h>
-# include <linux/smp_lock.h>
-# include <asm/atomic.h>
-
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-#define schedule_work schedule_task
-#define prepare_work(wq,cb,cbdata) \
-do { \
- INIT_TQUEUE((wq), 0, 0); \
- PREPARE_TQUEUE((wq), (cb), (cbdata)); \
-} while (0)
-
-#define PageUptodate Page_Uptodate
-#define our_recalc_sigpending(current) recalc_sigpending(current)
-#define num_online_cpus() smp_num_cpus
-static inline void our_cond_resched(void)
-{
- if (current->need_resched)
- schedule ();
-}
-#define work_struct_t struct tq_struct
+#include <libcfs/arch/kp30.h>
-#else
-
-#define prepare_work(wq,cb,cbdata) \
-do { \
- INIT_WORK((wq), (void *)(cb), (void *)(cbdata)); \
-} while (0)
-#define wait_on_page wait_on_page_locked
-#define our_recalc_sigpending(current) recalc_sigpending()
-#define strtok(a,b) strpbrk(a, b)
-static inline void our_cond_resched(void)
-{
- cond_resched();
-}
-#define work_struct_t struct work_struct
+#ifdef __KERNEL__
-#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) */
+# ifndef DEBUG_SUBSYSTEM
+# define DEBUG_SUBSYSTEM S_UNDEFINED
+# endif
#ifdef PORTAL_DEBUG
extern void kportal_assertion_failed(char *expr, char *file, const char *func,
#define LASSERTF(cond, fmt...) do { } while (0)
#endif
-#ifdef CONFIG_SMP
-#define LASSERT_SPIN_LOCKED(lock) LASSERT(spin_is_locked(lock))
-#else
-#define LASSERT_SPIN_LOCKED(lock) do {} while(0)
-#endif
-
-#ifdef __arch_um__
-#define LBUG_WITH_LOC(file, func, line) \
-do { \
- CEMERG("LBUG - trying to dump log to /tmp/lustre-log\n"); \
- portals_debug_dumplog(); \
- portals_run_lbug_upcall(file, func, line); \
- panic("LBUG"); \
-} while (0)
-#else
-#define LBUG_WITH_LOC(file, func, line) \
-do { \
- CEMERG("LBUG\n"); \
- portals_debug_dumpstack(NULL); \
- portals_debug_dumplog(); \
- portals_run_lbug_upcall(file, func, line); \
- set_task_state(current, TASK_UNINTERRUPTIBLE); \
- schedule(); \
-} while (0)
-#endif /* __arch_um__ */
-
+/* LBUG_WITH_LOC defined in portals/<os>/kp30.h */
#define LBUG() LBUG_WITH_LOC(__FILE__, __FUNCTION__, __LINE__)
/*
#define PORTAL_ALLOC_GFP(ptr, size, mask) \
do { \
LASSERT(!in_interrupt() || \
- (size <= PORTAL_VMALLOC_SIZE && mask == GFP_ATOMIC)); \
+ (size <= PORTAL_VMALLOC_SIZE && mask == CFS_ALLOC_ATOMIC));\
if ((size) > PORTAL_VMALLOC_SIZE) \
- (ptr) = vmalloc(size); \
+ (ptr) = cfs_alloc_large(size); \
else \
- (ptr) = kmalloc((size), (mask)); \
+ (ptr) = cfs_alloc((size), (mask)); \
if ((ptr) == NULL) { \
CERROR("PORTALS: out of memory at %s:%d (tried to alloc '"\
#ptr "' = %d)\n", __FILE__, __LINE__, (int)(size));\
atomic_read(&portal_kmemory)); \
} else { \
portal_kmem_inc((ptr), (size)); \
- memset((ptr), 0, (size)); \
+ if (!((mask) & CFS_ALLOC_ZERO)) \
+ memset((ptr), 0, (size)); \
} \
CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %d at %p (tot %d).\n", \
(int)(size), (ptr), atomic_read (&portal_kmemory)); \
} while (0)
#define PORTAL_ALLOC(ptr, size) \
- PORTAL_ALLOC_GFP(ptr, size, GFP_NOFS)
+ PORTAL_ALLOC_GFP(ptr, size, CFS_ALLOC_IO)
#define PORTAL_ALLOC_ATOMIC(ptr, size) \
- PORTAL_ALLOC_GFP(ptr, size, GFP_ATOMIC)
+ PORTAL_ALLOC_GFP(ptr, size, CFS_ALLOC_ATOMIC)
#define PORTAL_FREE(ptr, size) \
do { \
break; \
} \
if (s > PORTAL_VMALLOC_SIZE) \
- vfree(ptr); \
+ cfs_free_large(ptr); \
else \
- kfree(ptr); \
+ cfs_free(ptr); \
portal_kmem_dec((ptr), s); \
CDEBUG(D_MALLOC, "kfreed '" #ptr "': %d at %p (tot %d).\n", \
s, (ptr), atomic_read(&portal_kmemory)); \
} while (0)
-/* ------------------------------------------------------------------- */
-
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-
-#define PORTAL_SYMBOL_REGISTER(x) inter_module_register(#x, THIS_MODULE, &x)
-#define PORTAL_SYMBOL_UNREGISTER(x) inter_module_unregister(#x)
-
-#define PORTAL_SYMBOL_GET(x) ((typeof(&x))inter_module_get(#x))
-#define PORTAL_SYMBOL_PUT(x) inter_module_put(#x)
-
-#define PORTAL_MODULE_USE MOD_INC_USE_COUNT
-#define PORTAL_MODULE_UNUSE MOD_DEC_USE_COUNT
-#else
-
-#define PORTAL_SYMBOL_REGISTER(x)
-#define PORTAL_SYMBOL_UNREGISTER(x)
-
-#define PORTAL_SYMBOL_GET(x) symbol_get(x)
-#define PORTAL_SYMBOL_PUT(x) symbol_put(x)
-
-#define PORTAL_MODULE_USE try_module_get(THIS_MODULE)
-#define PORTAL_MODULE_UNUSE module_put(THIS_MODULE)
-
-#endif
-
/******************************************************************************/
#ifdef PORTALS_PROFILING
#endif /* PORTALS_PROFILING */
/* debug.c */
-void portals_debug_dumpstack(struct task_struct *tsk);
+extern spinlock_t stack_backtrace_lock;
+
+void portals_debug_dumpstack(cfs_task_t *tsk);
void portals_run_upcall(char **argv);
void portals_run_lbug_upcall(char * file, const char *fn, const int line);
void portals_debug_dumplog(void);
int portals_debug_set_daemon(unsigned int cmd, unsigned int length,
char *file, unsigned int size);
__s32 portals_debug_copy_to_user(char *buf, unsigned long len);
-#if (__GNUC__)
/* Use the special GNU C __attribute__ hack to have the compiler check the
* printf style argument string against the actual argument count and
* types.
*/
-#ifdef printf
-# warning printf has been defined as a macro...
-# undef printf
-#endif
void portals_debug_msg(int subsys, int mask, char *file, const char *fn,
const int line, unsigned long stack,
char *format, ...)
__attribute__ ((format (printf, 7, 8)));
-#else
-void portals_debug_msg(int subsys, int mask, char *file, const char *fn,
- const int line, unsigned long stack,
- const char *format, ...);
-#endif /* __GNUC__ */
void portals_debug_set_level(unsigned int debug_level);
-# define fprintf(a, format, b...) CDEBUG(D_OTHER, format , ## b)
-# define printf(format, b...) CDEBUG(D_OTHER, format , ## b)
-# define time(a) CURRENT_TIME
-
extern void kportal_daemonize (char *name);
extern void kportal_blockallsigs (void);
#else /* !__KERNEL__ */
-# include <stdio.h>
-# include <stdlib.h>
-#ifndef __CYGWIN__
-# include <stdint.h>
-#else
-# include <cygwin-ioctl.h>
-#endif
-# include <unistd.h>
-# include <time.h>
-# include <limits.h>
# ifndef DEBUG_SUBSYSTEM
# define DEBUG_SUBSYSTEM S_UNDEFINED
# endif
#define CERROR(format, a...) CDEBUG(D_ERROR, format, ## a)
#endif
+/*
+ * compile-time assertions. @cond has to be constant expression.
+ * ISO C Standard:
+ *
+ * 6.8.4.2 The switch statement
+ *
+ * ....
+ *
+ * [#3] The expression of each case label shall be an integer
+ * constant expression and no two of the case constant
+ * expressions in the same switch statement shall have the same
+ * value after conversion...
+ *
+ */
+#define CLASSERT(cond) ({ switch(42) { case (cond): case 0: break; } })
+
/* support decl needed both by kernel and liblustre */
char *portals_nid2str(int nal, ptl_nid_t nid, char *str);
char *portals_id2str(int nal, ptl_process_id_t nid, char *str);
# define CURRENT_TIME time(0)
#endif
-/******************************************************************************/
-/* Light-weight trace
- * Support for temporary event tracing with minimal Heisenberg effect. */
-#define LWT_SUPPORT 0
-
-#define LWT_MEMORY (16<<20)
-
-#if !KLWT_SUPPORT
-# if defined(__KERNEL__)
-# if !defined(BITS_PER_LONG)
-# error "BITS_PER_LONG not defined"
-# endif
-# elif !defined(__WORDSIZE)
-# error "__WORDSIZE not defined"
-# else
-# define BITS_PER_LONG __WORDSIZE
-# endif
-
-/* kernel hasn't defined this? */
-typedef struct {
- long long lwte_when;
- char *lwte_where;
- void *lwte_task;
- long lwte_p1;
- long lwte_p2;
- long lwte_p3;
- long lwte_p4;
-# if BITS_PER_LONG > 32
- long lwte_pad;
-# endif
-} lwt_event_t;
-#endif /* !KLWT_SUPPORT */
-
-#if LWT_SUPPORT
-# ifdef __KERNEL__
-# if !KLWT_SUPPORT
-
-typedef struct _lwt_page {
- struct list_head lwtp_list;
- struct page *lwtp_page;
- lwt_event_t *lwtp_events;
-} lwt_page_t;
-
-typedef struct {
- int lwtc_current_index;
- lwt_page_t *lwtc_current_page;
-} lwt_cpu_t;
-
-extern int lwt_enabled;
-extern lwt_cpu_t lwt_cpus[];
-
-/* Note that we _don't_ define LWT_EVENT at all if LWT_SUPPORT isn't set.
- * This stuff is meant for finding specific problems; it never stays in
- * production code... */
-
-#define LWTSTR(n) #n
-#define LWTWHERE(f,l) f ":" LWTSTR(l)
-#define LWT_EVENTS_PER_PAGE (PAGE_SIZE / sizeof (lwt_event_t))
-
-#define LWT_EVENT(p1, p2, p3, p4) \
-do { \
- unsigned long flags; \
- lwt_cpu_t *cpu; \
- lwt_page_t *p; \
- lwt_event_t *e; \
- \
- if (lwt_enabled) { \
- local_irq_save (flags); \
- \
- cpu = &lwt_cpus[smp_processor_id()]; \
- p = cpu->lwtc_current_page; \
- e = &p->lwtp_events[cpu->lwtc_current_index++]; \
- \
- if (cpu->lwtc_current_index >= LWT_EVENTS_PER_PAGE) { \
- cpu->lwtc_current_page = \
- list_entry (p->lwtp_list.next, \
- lwt_page_t, lwtp_list); \
- cpu->lwtc_current_index = 0; \
- } \
- \
- e->lwte_when = get_cycles(); \
- e->lwte_where = LWTWHERE(__FILE__,__LINE__); \
- e->lwte_task = current; \
- e->lwte_p1 = (long)(p1); \
- e->lwte_p2 = (long)(p2); \
- e->lwte_p3 = (long)(p3); \
- e->lwte_p4 = (long)(p4); \
- \
- local_irq_restore (flags); \
- } \
-} while (0)
-
-#endif /* !KLWT_SUPPORT */
-
-extern int lwt_init (void);
-extern void lwt_fini (void);
-extern int lwt_lookup_string (int *size, char *knlptr,
- char *usrptr, int usrsize);
-extern int lwt_control (int enable, int clear);
-extern int lwt_snapshot (cycles_t *now, int *ncpu, int *total_size,
- void *user_ptr, int user_size);
-# else /* __KERNEL__ */
-# define LWT_EVENT(p1,p2,p3,p4) /* no userland implementation yet */
-# endif /* __KERNEL__ */
-#endif /* LWT_SUPPORT */
+/* --------------------------------------------------------------------
+ * Light-weight trace
+ * Support for temporary event tracing with minimal Heisenberg effect.
+ * All stuff about lwt are put in arch/kp30.h
+ * -------------------------------------------------------------------- */
struct portals_device_userstate
{
int pdu_memhog_pages;
- struct page *pdu_memhog_root_page;
+ cfs_page_t *pdu_memhog_root_page;
};
-#include <linux/portals_lib.h>
+#include <libcfs/portals_lib.h>
/*
* USER LEVEL STUFF BELOW
return 0;
}
-#else
-#include <asm/uaccess.h>
-
-/* buffer MUST be at least the size of portal_ioctl_hdr */
-static inline int portal_ioctl_getdata(char *buf, char *end, void *arg)
-{
- struct portal_ioctl_hdr *hdr;
- struct portal_ioctl_data *data;
- int err;
- ENTRY;
-
- hdr = (struct portal_ioctl_hdr *)buf;
- data = (struct portal_ioctl_data *)buf;
-
- err = copy_from_user(buf, (void *)arg, sizeof(*hdr));
- if (err)
- RETURN(err);
- if (hdr->ioc_version != PORTAL_IOCTL_VERSION) {
- CERROR("PORTALS: version mismatch kernel vs application\n");
- RETURN(-EINVAL);
- }
-
- if (hdr->ioc_len + buf >= end) {
- CERROR("PORTALS: user buffer exceeds kernel buffer\n");
- RETURN(-EINVAL);
- }
-
-
- if (hdr->ioc_len < sizeof(struct portal_ioctl_data)) {
- CERROR("PORTALS: user buffer too small for ioctl\n");
- RETURN(-EINVAL);
- }
-
- err = copy_from_user(buf, (void *)arg, hdr->ioc_len);
- if (err)
- RETURN(err);
-
- if (portal_ioctl_is_invalid(data)) {
- CERROR("PORTALS: ioctl not correctly formatted\n");
- RETURN(-EINVAL);
- }
-
- if (data->ioc_inllen1)
- data->ioc_inlbuf1 = &data->ioc_bulk[0];
+#else
- if (data->ioc_inllen2)
- data->ioc_inlbuf2 = &data->ioc_bulk[0] +
- size_round(data->ioc_inllen1);
+extern inline int portal_ioctl_getdata(char *buf, char *end, void *arg);
- RETURN(0);
-}
#endif
/* ioctls for manipulating snapshots 30- */
#define IOC_PORTAL_TYPE 'e'
#define IOC_PORTAL_MIN_NR 30
-#define IOC_PORTAL_PING _IOWR('e', 30, long)
+#define IOC_PORTAL_PING _IOWR('e', 30, IOCTL_PORTAL_TYPE)
-#define IOC_PORTAL_CLEAR_DEBUG _IOWR('e', 32, long)
-#define IOC_PORTAL_MARK_DEBUG _IOWR('e', 33, long)
-#define IOC_PORTAL_PANIC _IOWR('e', 34, long)
-#define IOC_PORTAL_NAL_CMD _IOWR('e', 35, long)
-#define IOC_PORTAL_GET_NID _IOWR('e', 36, long)
-#define IOC_PORTAL_FAIL_NID _IOWR('e', 37, long)
+#define IOC_PORTAL_CLEAR_DEBUG _IOWR('e', 32, IOCTL_PORTAL_TYPE)
+#define IOC_PORTAL_MARK_DEBUG _IOWR('e', 33, IOCTL_PORTAL_TYPE)
+#define IOC_PORTAL_PANIC _IOWR('e', 34, IOCTL_PORTAL_TYPE)
+#define IOC_PORTAL_NAL_CMD _IOWR('e', 35, IOCTL_PORTAL_TYPE)
+#define IOC_PORTAL_GET_NID _IOWR('e', 36, IOCTL_PORTAL_TYPE)
+#define IOC_PORTAL_FAIL_NID _IOWR('e', 37, IOCTL_PORTAL_TYPE)
-#define IOC_PORTAL_LWT_CONTROL _IOWR('e', 39, long)
-#define IOC_PORTAL_LWT_SNAPSHOT _IOWR('e', 40, long)
-#define IOC_PORTAL_LWT_LOOKUP_STRING _IOWR('e', 41, long)
-#define IOC_PORTAL_MEMHOG _IOWR('e', 42, long)
+#define IOC_PORTAL_LWT_CONTROL _IOWR('e', 39, IOCTL_PORTAL_TYPE)
+#define IOC_PORTAL_LWT_SNAPSHOT _IOWR('e', 40, IOCTL_PORTAL_TYPE)
+#define IOC_PORTAL_LWT_LOOKUP_STRING _IOWR('e', 41, IOCTL_PORTAL_TYPE)
+#define IOC_PORTAL_MEMHOG _IOWR('e', 42, IOCTL_PORTAL_TYPE)
#define IOC_PORTAL_MAX_NR 42
enum {
typedef int (*cfg_record_cb_t)(enum cfg_record_type, int len, void *data);
-#ifdef __CYGWIN__
-# ifndef BITS_PER_LONG
-# if (~0UL) == 0xffffffffUL
-# define BITS_PER_LONG 32
-# else
-# define BITS_PER_LONG 64
-# endif
-# endif
-#endif
-
-#if BITS_PER_LONG > 32
-# define LI_POISON ((int)0x5a5a5a5a5a5a5a5a)
-# define LL_POISON ((long)0x5a5a5a5a5a5a5a5a)
-# define LP_POISON ((void *)(long)0x5a5a5a5a5a5a5a5a)
-#else
-# define LI_POISON ((int)0x5a5a5a5a)
-# define LL_POISON ((long)0x5a5a5a5a)
-# define LP_POISON ((void *)(long)0x5a5a5a5a)
-#endif
-
-#if defined(__x86_64__)
-# define LPU64 "%Lu"
-# define LPD64 "%Ld"
-# define LPX64 "%#Lx"
-# define LPSZ "%lu"
-# define LPSSZ "%ld"
-#elif (BITS_PER_LONG == 32 || __WORDSIZE == 32)
-# define LPU64 "%Lu"
-# define LPD64 "%Ld"
-# define LPX64 "%#Lx"
-# define LPSZ "%u"
-# define LPSSZ "%d"
-#elif (BITS_PER_LONG == 64 || __WORDSIZE == 64)
-# define LPU64 "%lu"
-# define LPD64 "%ld"
-# define LPX64 "%#lx"
-# define LPSZ "%lu"
-# define LPSSZ "%ld"
-#endif
-#ifndef LPU64
-# error "No word size defined"
-#endif
-
/* lustre_id output helper macros */
#define DLID4 "%lu/%lu/%lu/%lu"
(unsigned long)(id)->li_fid.lf_group, \
(unsigned long)(id)->li_stc.u.e3s.l3s_ino, \
(unsigned long)(id)->li_stc.u.e3s.l3s_gen
-
+
#endif
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*/
-#ifndef _LIBCFS_H
-#define _LIBCFS_H
+#ifndef __LIBCFS_LIBCFS_H__
+#define __LIBCFS_LIBCFS_H__
-#ifdef HAVE_ASM_TYPES_H
-#include <asm/types.h>
-#else
-#include "types.h"
+#if !__GNUC__
+#define __attribute__(x)
#endif
-#ifdef __KERNEL__
-# include <linux/time.h>
-# include <asm/timex.h>
-#else
-# include <sys/time.h>
-# define do_gettimeofday(tv) gettimeofday(tv, NULL);
-typedef unsigned long long cycles_t;
-#endif
+#include <libcfs/arch/libcfs.h>
+#include "curproc.h"
#define PORTAL_DEBUG
#define LOWEST_BIT_SET(x) ((x) & ~((x) - 1))
-#ifndef __KERNEL__
-/* Userpace byte flipping */
-# include <endian.h>
-# include <byteswap.h>
-# define __swab16(x) bswap_16(x)
-# define __swab32(x) bswap_32(x)
-# define __swab64(x) bswap_64(x)
-# define __swab16s(x) do {*(x) = bswap_16(*(x));} while (0)
-# define __swab32s(x) do {*(x) = bswap_32(*(x));} while (0)
-# define __swab64s(x) do {*(x) = bswap_64(*(x));} while (0)
-# if __BYTE_ORDER == __LITTLE_ENDIAN
-# define le16_to_cpu(x) (x)
-# define cpu_to_le16(x) (x)
-# define le32_to_cpu(x) (x)
-# define cpu_to_le32(x) (x)
-# define le64_to_cpu(x) (x)
-# define cpu_to_le64(x) (x)
-# else
-# if __BYTE_ORDER == __BIG_ENDIAN
-# define le16_to_cpu(x) bswap_16(x)
-# define cpu_to_le16(x) bswap_16(x)
-# define le32_to_cpu(x) bswap_32(x)
-# define cpu_to_le32(x) bswap_32(x)
-# define le64_to_cpu(x) bswap_64(x)
-# define cpu_to_le64(x) bswap_64(x)
-# else
-# error "Unknown byte order"
-# endif /* __BIG_ENDIAN */
-# endif /* __LITTLE_ENDIAN */
-#endif /* ! __KERNEL__ */
-
/*
* Debugging
*/
extern unsigned int portal_debug;
extern unsigned int portal_printk;
-struct ptldebug_header {
- __u32 ph_len;
- __u32 ph_flags;
- __u32 ph_subsys;
- __u32 ph_mask;
- __u32 ph_cpu_id;
- __u32 ph_sec;
- __u64 ph_usec;
- __u32 ph_stack;
- __u32 ph_pid;
- __u32 ph_extern_pid;
- __u32 ph_line_num;
-} __attribute__((packed));
+/*
+ * struct ptldebug_header is defined in libcfs/<os>/libcfs.h
+ */
#define PH_FLAG_FIRST_RECORD 1
* utils/lconf
*/
-#ifdef __KERNEL__
-# include <linux/sched.h> /* THREAD_SIZE */
-#else
-# ifndef THREAD_SIZE /* x86_64 has THREAD_SIZE in userspace */
-# define THREAD_SIZE 8192
-# endif
+#ifndef DEBUG_SUBSYSTEM
+# define DEBUG_SUBSYSTEM S_UNDEFINED
#endif
-#define LUSTRE_TRACE_SIZE (THREAD_SIZE >> 5)
-
-#ifdef __KERNEL__
-# ifdef __ia64__
-# define CDEBUG_STACK (THREAD_SIZE - \
- ((unsigned long)__builtin_dwarf_cfa() & \
- (THREAD_SIZE - 1)))
-# else
-# define CDEBUG_STACK (THREAD_SIZE - \
- ((unsigned long)__builtin_frame_address(0) & \
- (THREAD_SIZE - 1)))
-# endif /* __ia64__ */
-
-#define CHECK_STACK(stack) \
- do { \
- if ((stack) > 3*THREAD_SIZE/4 && (stack) > portal_stack) { \
- portals_debug_msg(DEBUG_SUBSYSTEM, D_WARNING, \
- __FILE__, __FUNCTION__, __LINE__, \
- (stack),"maximum lustre stack %u\n",\
- portal_stack = (stack)); \
- /*panic("LBUG");*/ \
- } \
- } while (0)
-#else /* !__KERNEL__ */
-#define CHECK_STACK(stack) do { } while(0)
-#define CDEBUG_STACK (0L)
-#endif /* __KERNEL__ */
-
#if 1
#define CDEBUG(mask, format, a...) \
do { \
#define CDEBUG_MAX_LIMIT 600
#define CDEBUG_LIMIT(cdebug_mask, cdebug_format, a...) \
do { \
- static unsigned long cdebug_next = 0; \
- static int cdebug_count = 0, cdebug_delay = 1; \
+ static cfs_time_t cdebug_next = 0; \
+ static int cdebug_count = 0; \
+ static cfs_duration_t cdebug_delay = CFS_MIN_DELAY; \
\
CHECK_STACK(CDEBUG_STACK); \
- if (time_after(jiffies, cdebug_next)) { \
+ if (cfs_time_after(cfs_time_current(), cdebug_next)) { \
portals_debug_msg(DEBUG_SUBSYSTEM, cdebug_mask, __FILE__, \
__FUNCTION__, __LINE__, CDEBUG_STACK, \
cdebug_format, ## a); \
cdebug_count); \
cdebug_count = 0; \
} \
- if (time_after(jiffies, cdebug_next+(CDEBUG_MAX_LIMIT+10)*HZ))\
- cdebug_delay = cdebug_delay > 8 ? cdebug_delay/8 : 1; \
+ if (cfs_time_after(cfs_time_current(), \
+ cdebug_next + \
+ cfs_time_seconds(CDEBUG_MAX_LIMIT+10))) \
+ cdebug_delay = cdebug_delay > (8 * CFS_MIN_DELAY)? \
+ cdebug_delay/8 : CFS_MIN_DELAY; \
else \
- cdebug_delay = cdebug_delay*2 >= CDEBUG_MAX_LIMIT*HZ ?\
- CDEBUG_MAX_LIMIT*HZ : cdebug_delay*2; \
- cdebug_next = jiffies + cdebug_delay; \
+ cdebug_delay = cdebug_delay*2 >= cfs_time_seconds(CDEBUG_MAX_LIMIT)?\
+ cfs_time_seconds(CDEBUG_MAX_LIMIT) : \
+ cdebug_delay*2; \
+ cdebug_next = cfs_time_current() + cdebug_delay; \
} else { \
portals_debug_msg(DEBUG_SUBSYSTEM, \
portal_debug & ~(D_EMERG|D_ERROR|D_WARNING),\
goto label; \
} while (0)
+#define CDEBUG_ENTRY_EXIT (0)
+
+#ifdef CDEBUG_ENTRY_EXIT
+
+/*
+ * if rc == NULL, we need to code as RETURN((void *)NULL), otherwise
+ * there will be a warning in osx.
+ */
#define RETURN(rc) \
do { \
typeof(rc) RETURN__ret = (rc); \
CDEBUG(D_TRACE, "Process leaving (rc=%lu : %ld : %lx)\n", \
(long)RETURN__ret, (long)RETURN__ret, (long)RETURN__ret);\
+ EXIT_NESTING; \
return RETURN__ret; \
} while (0)
#define ENTRY \
+ENTRY_NESTING; \
do { \
CDEBUG(D_TRACE, "Process entered\n"); \
} while (0)
#define EXIT \
do { \
CDEBUG(D_TRACE, "Process leaving\n"); \
+ EXIT_NESTING; \
} while(0)
#else
+
+#define RETURN(rc) return (rc)
+#define ENTRY
+#define EXIT
+
+#endif
+
+#else
#define CDEBUG(mask, format, a...) do { } while (0)
#define CWARN(format, a...) printk(KERN_WARNING format, ## a)
#define CERROR(format, a...) printk(KERN_ERR format, ## a)
#define EXIT do { } while (0)
#endif
-/* initial pid */
-# if CRAY_PORTALS
-/*
- * 1) ptl_pid_t in cray portals is only 16 bits, not 32 bits, therefore this
- * is too big.
- *
- * 2) the implementation of ernal in cray portals further restricts the pid
- * space that may be used to 0 <= pid <= 255 (an 8 bit value). Returns
- * an error at nal init time for any pid outside this range. Other nals
- * in cray portals don't have this restriction.
- * */
-#define LUSTRE_PTL_PID 9
-# else
-#define LUSTRE_PTL_PID 12345
-# endif
-
-#define LUSTRE_SRV_PTL_PID LUSTRE_PTL_PID
+#define LUSTRE_SRV_PTL_PID LUSTRE_PTL_PID
#define PORTALS_CFG_VERSION 0x00010001;
#ifdef __KERNEL__
-#include <linux/list.h>
+#include <libcfs/list.h>
struct libcfs_ioctl_handler {
struct list_head item;
#define DECLARE_IOCTL_HANDLER(ident, func) \
struct libcfs_ioctl_handler ident = { \
- .item = LIST_HEAD_INIT(ident.item), \
+ .item = CFS_LIST_HEAD_INIT(ident.item), \
.handle_ioctl = func \
}
int libcfs_register_ioctl(struct libcfs_ioctl_handler *hand);
int libcfs_deregister_ioctl(struct libcfs_ioctl_handler *hand);
-#endif
-
-#ifdef __KERNEL__
/* libcfs watchdogs */
struct lc_watchdog;
/* Add a watchdog which fires after "time" milliseconds of delay. You have to
* touch it once to enable it. */
-struct lc_watchdog *lc_watchdog_add(int time,
+struct lc_watchdog *lc_watchdog_add(int time,
void (*cb)(struct lc_watchdog *,
struct task_struct *,
void *),
void lc_watchdog_dumplog(struct lc_watchdog *lcw,
struct task_struct *tsk,
void *data);
-#endif /* !__KERNEL__ */
+
+/* __KERNEL__ */
+#endif
+
+/*
+ * libcfs pseudo device operations
+ *
+ * struct cfs_psdev_t and
+ * cfs_psdev_register() and
+ * cfs_psdev_deregister() are declared in
+ * libcfs/<os>/cfs_prim.h
+ *
+ * It's just draft now.
+ */
+
+struct cfs_psdev_file {
+ unsigned long off;
+ void *private_data;
+ unsigned long reserved1;
+ unsigned long reserved2;
+};
+
+struct cfs_psdev_ops {
+ int (*p_open)(unsigned long, void *);
+ int (*p_close)(unsigned long, void *);
+ int (*p_read)(struct cfs_psdev_file *, char *, unsigned long);
+ int (*p_write)(struct cfs_psdev_file *, char *, unsigned long);
+ int (*p_ioctl)(struct cfs_psdev_file *, unsigned long, void *);
+};
+
+/*
+ * generic time manipulation functions.
+ */
+
+static inline int cfs_time_after(cfs_time_t t1, cfs_time_t t2)
+{
+ return cfs_time_before(t2, t1);
+}
+
+static inline int cfs_time_aftereq(cfs_time_t t1, cfs_time_t t2)
+{
+ return cfs_time_beforeq(t2, t1);
+}
+
+/*
+ * return seconds since UNIX epoch
+ */
+static inline time_t cfs_unix_seconds(void)
+{
+ cfs_fs_time_t t;
+
+ cfs_fs_time_current(&t);
+ return cfs_fs_time_sec(&t);
+}
+
+#define CFS_RATELIMIT(seconds) \
+({ \
+ /* \
+ * XXX nikita: non-portable initializer \
+ */ \
+ static time_t __next_message = 0; \
+ int result; \
+ \
+ if (cfs_time_after(cfs_time_current(), __next_message)) \
+ result = 1; \
+ else { \
+ __next_message = cfs_time_shift(seconds); \
+ result = 0; \
+ } \
+ result; \
+})
+
+extern void portals_debug_msg(int subsys, int mask, char *file, const char *fn,
+ const int line, unsigned long stack,
+ char *format, ...)
+ __attribute__ ((format (printf, 7, 8)));
+
+
+static inline void cfs_slow_warning(cfs_time_t now, int seconds, char *msg)
+{
+ if (cfs_time_after(cfs_time_current(),
+ cfs_time_add(now, cfs_time_seconds(15))))
+ CERROR("slow %s %lu sec\n", msg,
+ cfs_duration_sec(cfs_time_sub(cfs_time_current(), now)));
+}
+
+/*
+ * helper function similar to do_gettimeofday() of Linux kernel
+ */
+static inline void cfs_fs_timeval(struct timeval *tv)
+{
+ cfs_fs_time_t time;
+
+ cfs_fs_time_current(&time);
+ cfs_fs_time_usec(&time, tv);
+}
+
+/*
+ * return valid time-out based on user supplied one. Currently we only check
+ * that time-out is not shorted than allowed.
+ */
+static inline cfs_duration_t cfs_timeout_cap(cfs_duration_t timeout)
+{
+ if (timeout < cfs_time_minimal_timeout())
+ timeout = cfs_time_minimal_timeout();
+ return timeout;
+}
+
+/*
+ * Portable memory allocator API (draft)
+ */
+enum cfs_alloc_flags {
+ /* allocation is not allowed to block */
+ CFS_ALLOC_ATOMIC = (1 << 0),
+ /* allocation is allowed to block */
+ CFS_ALLOC_WAIT = (1 << 1),
+ /* allocation should return zeroed memory */
+ CFS_ALLOC_ZERO = (1 << 2),
+ /* allocation is allowed to call file-system code to free/clean
+ * memory */
+ CFS_ALLOC_FS = (1 << 3),
+ /* allocation is allowed to do io to free/clean memory */
+ CFS_ALLOC_IO = (1 << 4),
+ /* standard allocator flag combination */
+ CFS_ALLOC_STD = CFS_ALLOC_FS | CFS_ALLOC_IO,
+ CFS_ALLOC_USER = CFS_ALLOC_WAIT | CFS_ALLOC_FS | CFS_ALLOC_IO,
+};
+
+#define CFS_SLAB_ATOMIC CFS_ALLOC_ATOMIC
+#define CFS_SLAB_WAIT CFS_ALLOC_WAIT
+#define CFS_SLAB_ZERO CFS_ALLOC_ZERO
+#define CFS_SLAB_FS CFS_ALLOC_FS
+#define CFS_SLAB_IO CFS_ALLOC_IO
+#define CFS_SLAB_STD CFS_ALLOC_STD
+#define CFS_SLAB_USER CFS_ALLOC_USER
+
+/* flags for cfs_page_alloc() in addition to enum cfs_alloc_flags */
+enum cfs_page_alloc_flags {
+ /* allow to return page beyond KVM. It has to be mapped into KVM by
+ * cfs_page_map(); */
+ CFS_ALLOC_HIGH = (1 << 5),
+ CFS_ALLOC_HIGHUSER = CFS_ALLOC_WAIT | CFS_ALLOC_FS | CFS_ALLOC_IO | CFS_ALLOC_HIGH,
+};
+
+
+#define _LIBCFS_H
#endif /* _LIBCFS_H */
--- /dev/null
+Makefile.in
+Makefile
--- /dev/null
+EXTRA_DIST := linux-fs.h linux-prim.h portals_lib.h kp30.h \
+ linux-lock.h lltrace.h portals_utils.h libcfs.h linux-mem.h \
+ portals_compat25.h
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
+#ifndef __LIBCFS_LINUX_KP30_H__
+#define __LIBCFS_LINUX_KP30_H__
+
+#ifndef __LIBCFS_KP30_H__
+#error Do not #include this file directly. #include <libcfs/kp30.h> instead
+#endif
+
+#ifdef __KERNEL__
+# include <linux/config.h>
+# include <linux/kernel.h>
+# include <linux/mm.h>
+# include <linux/string.h>
+# include <linux/stat.h>
+# include <linux/init.h>
+# include <linux/errno.h>
+# include <linux/unistd.h>
+# include <asm/system.h>
+# include <linux/kmod.h>
+# include <linux/notifier.h>
+# include <linux/fs.h>
+# include <asm/segment.h>
+# include <linux/miscdevice.h>
+# include <linux/vmalloc.h>
+# include <linux/time.h>
+# include <linux/slab.h>
+# include <linux/interrupt.h>
+# include <linux/highmem.h>
+# include <linux/module.h>
+# include <linux/version.h>
+# include <portals/p30.h>
+# include <linux/smp_lock.h>
+# include <asm/atomic.h>
+# include <asm/uaccess.h>
+# include <linux/rwsem.h>
+# include <linux/proc_fs.h>
+# include <linux/file.h>
+# include <linux/smp.h>
+# include <linux/ctype.h>
+# ifdef HAVE_MM_INLINE
+# include <linux/mm_inline.h>
+# endif
+# if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
+# include <linux/kallsyms.h>
+# endif
+
+#include <libcfs/linux/portals_compat25.h>
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+#define schedule_work schedule_task
+#define prepare_work(wq,cb,cbdata) \
+do { \
+ INIT_TQUEUE((wq), 0, 0); \
+ PREPARE_TQUEUE((wq), (cb), (cbdata)); \
+} while (0)
+
+#define PageUptodate Page_Uptodate
+#define our_recalc_sigpending(current) recalc_sigpending(current)
+#define num_online_cpus() smp_num_cpus
+static inline void our_cond_resched(void)
+{
+ if (current->need_resched)
+ schedule ();
+}
+#define work_struct_t struct tq_struct
+
+#else
+
+#define prepare_work(wq,cb,cbdata) \
+do { \
+ INIT_WORK((wq), (void *)(cb), (void *)(cbdata)); \
+} while (0)
+#define wait_on_page wait_on_page_locked
+#define our_recalc_sigpending(current) recalc_sigpending()
+#define strtok(a,b) strpbrk(a, b)
+static inline void our_cond_resched(void)
+{
+ cond_resched();
+}
+#define work_struct_t struct work_struct
+
+#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) */
+
+#ifdef CONFIG_SMP
+#define LASSERT_SPIN_LOCKED(lock) LASSERT(spin_is_locked(lock))
+#else
+#define LASSERT_SPIN_LOCKED(lock) do {} while(0)
+#endif
+
+#ifdef __arch_um__
+#define LBUG_WITH_LOC(file, func, line) \
+do { \
+ CEMERG("LBUG - trying to dump log to /tmp/lustre-log\n"); \
+ portals_debug_dumplog(); \
+ portals_run_lbug_upcall(file, func, line); \
+ panic("LBUG"); \
+} while (0)
+#else
+#define LBUG_WITH_LOC(file, func, line) \
+do { \
+ CEMERG("LBUG\n"); \
+ portals_debug_dumpstack(NULL); \
+ portals_debug_dumplog(); \
+ portals_run_lbug_upcall(file, func, line); \
+ set_task_state(current, TASK_UNINTERRUPTIBLE); \
+ schedule(); \
+} while (0)
+#endif /* __arch_um__ */
+
+/* ------------------------------------------------------------------- */
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+
+#define PORTAL_SYMBOL_REGISTER(x) inter_module_register(#x, THIS_MODULE, &x)
+#define PORTAL_SYMBOL_UNREGISTER(x) inter_module_unregister(#x)
+
+#define PORTAL_SYMBOL_GET(x) ((typeof(&x))inter_module_get(#x))
+#define PORTAL_SYMBOL_PUT(x) inter_module_put(#x)
+
+#define PORTAL_MODULE_USE MOD_INC_USE_COUNT
+#define PORTAL_MODULE_UNUSE MOD_DEC_USE_COUNT
+#else
+
+#define PORTAL_SYMBOL_REGISTER(x)
+#define PORTAL_SYMBOL_UNREGISTER(x)
+
+#define PORTAL_SYMBOL_GET(x) symbol_get(x)
+#define PORTAL_SYMBOL_PUT(x) symbol_put(x)
+
+#define PORTAL_MODULE_USE try_module_get(THIS_MODULE)
+#define PORTAL_MODULE_UNUSE module_put(THIS_MODULE)
+
+#endif
+
+/******************************************************************************/
+
+#if (__GNUC__)
+/* Use the special GNU C __attribute__ hack to have the compiler check the
+ * printf style argument string against the actual argument count and
+ * types.
+ */
+#ifdef printf
+# warning printf has been defined as a macro...
+# undef printf
+#endif
+
+#endif /* __GNUC__ */
+
+# define fprintf(a, format, b...) CDEBUG(D_OTHER, format , ## b)
+# define printf(format, b...) CDEBUG(D_OTHER, format , ## b)
+# define time(a) CURRENT_TIME
+
+#else /* !__KERNEL__ */
+# include <stdio.h>
+# include <stdlib.h>
+#ifndef __CYGWIN__
+# include <stdint.h>
+#else
+# include <cygwin-ioctl.h>
+#endif
+# include <unistd.h>
+# include <time.h>
+# include <limits.h>
+
+#endif /* End of !__KERNEL__ */
+
+/******************************************************************************/
+/* Light-weight trace
+ * Support for temporary event tracing with minimal Heisenberg effect. */
+#define LWT_SUPPORT 0
+
+#define LWT_MEMORY (16<<20)
+
+#if !KLWT_SUPPORT
+# if defined(__KERNEL__)
+# if !defined(BITS_PER_LONG)
+# error "BITS_PER_LONG not defined"
+# endif
+# elif !defined(__WORDSIZE)
+# error "__WORDSIZE not defined"
+# else
+# define BITS_PER_LONG __WORDSIZE
+# endif
+
+/* kernel hasn't defined this? */
+typedef struct {
+ long long lwte_when;
+ char *lwte_where;
+ void *lwte_task;
+ long lwte_p1;
+ long lwte_p2;
+ long lwte_p3;
+ long lwte_p4;
+# if BITS_PER_LONG > 32
+ long lwte_pad;
+# endif
+} lwt_event_t;
+#endif /* !KLWT_SUPPORT */
+
+#if LWT_SUPPORT
+# ifdef __KERNEL__
+# if !KLWT_SUPPORT
+
+typedef struct _lwt_page {
+ struct list_head lwtp_list;
+ struct page *lwtp_page;
+ lwt_event_t *lwtp_events;
+} lwt_page_t;
+
+typedef struct {
+ int lwtc_current_index;
+ lwt_page_t *lwtc_current_page;
+} lwt_cpu_t;
+
+extern int lwt_enabled;
+extern lwt_cpu_t lwt_cpus[];
+
+/* Note that we _don't_ define LWT_EVENT at all if LWT_SUPPORT isn't set.
+ * This stuff is meant for finding specific problems; it never stays in
+ * production code... */
+
+#define LWTSTR(n) #n
+#define LWTWHERE(f,l) f ":" LWTSTR(l)
+#define LWT_EVENTS_PER_PAGE (PAGE_SIZE / sizeof (lwt_event_t))
+
+#define LWT_EVENT(p1, p2, p3, p4) \
+do { \
+ unsigned long flags; \
+ lwt_cpu_t *cpu; \
+ lwt_page_t *p; \
+ lwt_event_t *e; \
+ \
+ if (lwt_enabled) { \
+ local_irq_save (flags); \
+ \
+ cpu = &lwt_cpus[smp_processor_id()]; \
+ p = cpu->lwtc_current_page; \
+ e = &p->lwtp_events[cpu->lwtc_current_index++]; \
+ \
+ if (cpu->lwtc_current_index >= LWT_EVENTS_PER_PAGE) { \
+ cpu->lwtc_current_page = \
+ list_entry (p->lwtp_list.next, \
+ lwt_page_t, lwtp_list); \
+ cpu->lwtc_current_index = 0; \
+ } \
+ \
+ e->lwte_when = get_cycles(); \
+ e->lwte_where = LWTWHERE(__FILE__,__LINE__); \
+ e->lwte_task = current; \
+ e->lwte_p1 = (long)(p1); \
+ e->lwte_p2 = (long)(p2); \
+ e->lwte_p3 = (long)(p3); \
+ e->lwte_p4 = (long)(p4); \
+ \
+ local_irq_restore (flags); \
+ } \
+} while (0)
+
+#endif /* !KLWT_SUPPORT */
+
+extern int lwt_init (void);
+extern void lwt_fini (void);
+extern int lwt_lookup_string (int *size, char *knlptr,
+ char *usrptr, int usrsize);
+extern int lwt_control (int enable, int clear);
+extern int lwt_snapshot (cycles_t *now, int *ncpu, int *total_size,
+ void *user_ptr, int user_size);
+# else /* __KERNEL__ */
+# define LWT_EVENT(p1,p2,p3,p4) /* no userland implementation yet */
+# endif /* __KERNEL__ */
+#endif /* LWT_SUPPORT */
+
+/* ------------------------------------------------------------------ */
+
+#define IOCTL_PORTAL_TYPE long
+
+#ifdef __CYGWIN__
+# ifndef BITS_PER_LONG
+# if (~0UL) == 0xffffffffUL
+# define BITS_PER_LONG 32
+# else
+# define BITS_PER_LONG 64
+# endif
+# endif
+#endif
+
+#if BITS_PER_LONG > 32
+# define LI_POISON ((int)0x5a5a5a5a5a5a5a5a)
+# define LL_POISON ((long)0x5a5a5a5a5a5a5a5a)
+# define LP_POISON ((void *)(long)0x5a5a5a5a5a5a5a5a)
+#else
+# define LI_POISON ((int)0x5a5a5a5a)
+# define LL_POISON ((long)0x5a5a5a5a)
+# define LP_POISON ((void *)(long)0x5a5a5a5a)
+#endif
+
+#if defined(__x86_64__)
+# define LPU64 "%Lu"
+# define LPD64 "%Ld"
+# define LPX64 "%#Lx"
+# define LPSZ "%lu"
+# define LPSSZ "%ld"
+#elif (BITS_PER_LONG == 32 || __WORDSIZE == 32)
+# define LPU64 "%Lu"
+# define LPD64 "%Ld"
+# define LPX64 "%#Lx"
+# define LPSZ "%u"
+# define LPSSZ "%d"
+#elif (BITS_PER_LONG == 64 || __WORDSIZE == 64)
+# define LPU64 "%lu"
+# define LPD64 "%ld"
+# define LPX64 "%#lx"
+# define LPSZ "%lu"
+# define LPSSZ "%ld"
+#endif
+#ifndef LPU64
+# error "No word size defined"
+#endif
+
+#endif
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
+#ifndef __LIBCFS_LINUX_LIBCFS_H__
+#define __LIBCFS_LINUX_LIBCFS_H__
+
+#ifndef __LIBCFS_LIBCFS_H__
+#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
+#endif
+
+#include <libcfs/linux/linux-mem.h>
+#include <libcfs/linux/linux-time.h>
+#include <libcfs/linux/linux-prim.h>
+#include <libcfs/linux/linux-lock.h>
+#include <libcfs/linux/linux-fs.h>
+
+#ifdef HAVE_ASM_TYPES_H
+#include <asm/types.h>
+#else
+#include "types.h"
+#endif
+
+
+#ifdef __KERNEL__
+# include <linux/types.h>
+# include <linux/time.h>
+# include <asm/timex.h>
+#else
+# include <sys/types.h>
+# include <sys/time.h>
+# define do_gettimeofday(tv) gettimeofday(tv, NULL);
+typedef unsigned long long cycles_t;
+#endif
+
+#ifndef __KERNEL__
+/* Userpace byte flipping */
+# include <endian.h>
+# include <byteswap.h>
+# define __swab16(x) bswap_16(x)
+# define __swab32(x) bswap_32(x)
+# define __swab64(x) bswap_64(x)
+# define __swab16s(x) do {*(x) = bswap_16(*(x));} while (0)
+# define __swab32s(x) do {*(x) = bswap_32(*(x));} while (0)
+# define __swab64s(x) do {*(x) = bswap_64(*(x));} while (0)
+# if __BYTE_ORDER == __LITTLE_ENDIAN
+# define le16_to_cpu(x) (x)
+# define cpu_to_le16(x) (x)
+# define le32_to_cpu(x) (x)
+# define cpu_to_le32(x) (x)
+# define le64_to_cpu(x) (x)
+# define cpu_to_le64(x) (x)
+# else
+# if __BYTE_ORDER == __BIG_ENDIAN
+# define le16_to_cpu(x) bswap_16(x)
+# define cpu_to_le16(x) bswap_16(x)
+# define le32_to_cpu(x) bswap_32(x)
+# define cpu_to_le32(x) bswap_32(x)
+# define le64_to_cpu(x) bswap_64(x)
+# define cpu_to_le64(x) bswap_64(x)
+# else
+# error "Unknown byte order"
+# endif /* __BIG_ENDIAN */
+# endif /* __LITTLE_ENDIAN */
+#endif /* ! __KERNEL__ */
+
+struct ptldebug_header {
+ __u32 ph_len;
+ __u32 ph_flags;
+ __u32 ph_subsys;
+ __u32 ph_mask;
+ __u32 ph_cpu_id;
+ __u32 ph_sec;
+ __u64 ph_usec;
+ __u32 ph_stack;
+ __u32 ph_pid;
+ __u32 ph_extern_pid;
+ __u32 ph_line_num;
+} __attribute__((packed));
+
+#ifdef __KERNEL__
+# include <linux/sched.h> /* THREAD_SIZE */
+#else
+# ifndef THREAD_SIZE /* x86_64 has THREAD_SIZE in userspace */
+# define THREAD_SIZE 8192
+# endif
+#endif
+
+#define LUSTRE_TRACE_SIZE (THREAD_SIZE >> 5)
+
+#ifdef __KERNEL__
+# ifdef __ia64__
+# define CDEBUG_STACK (THREAD_SIZE - \
+ ((unsigned long)__builtin_dwarf_cfa() & \
+ (THREAD_SIZE - 1)))
+# else
+# define CDEBUG_STACK (THREAD_SIZE - \
+ ((unsigned long)__builtin_frame_address(0) & \
+ (THREAD_SIZE - 1)))
+# endif /* __ia64__ */
+
+#define CHECK_STACK(stack) \
+ do { \
+ if ((stack) > 3*THREAD_SIZE/4 && (stack) > portal_stack) { \
+ portals_debug_msg(DEBUG_SUBSYSTEM, D_WARNING, \
+ __FILE__, __FUNCTION__, __LINE__, \
+ (stack),"maximum lustre stack %u\n",\
+ portal_stack = (stack)); \
+ /*panic("LBUG");*/ \
+ } \
+ } while (0)
+#else /* !__KERNEL__ */
+#define CHECK_STACK(stack) do { } while(0)
+#define CDEBUG_STACK (0L)
+#endif /* __KERNEL__ */
+
+/* initial pid */
+# if CRAY_PORTALS
+/*
+ * 1) ptl_pid_t in cray portals is only 16 bits, not 32 bits, therefore this
+ * is too big.
+ *
+ * 2) the implementation of ernal in cray portals further restricts the pid
+ * space that may be used to 0 <= pid <= 255 (an 8 bit value). Returns
+ * an error at nal init time for any pid outside this range. Other nals
+ * in cray portals don't have this restriction.
+ * */
+#define LUSTRE_PTL_PID 9
+# else
+#define LUSTRE_PTL_PID 12345
+# endif
+
+#define ENTRY_NESTING_SUPPORT (0)
+#define ENTRY_NESTING do {;} while (0)
+#define EXIT_NESTING do {;} while (0)
+#define __current_nesting_level() (0)
+
+/*
+ * Platform specific declarations for cfs_curproc API (libcfs/curproc.h)
+ *
+ * Implementation is in linux-curproc.c
+ */
+#define CFS_CURPROC_COMM_MAX (sizeof ((struct task_struct *)0)->comm)
+
+#if defined(__KERNEL__)
+#include <linux/capability.h>
+typedef kernel_cap_t cfs_kernel_cap_t;
+#else
+typedef __u32 cfs_kernel_cap_t;
+#endif
+
+#endif /* _LINUX_LIBCFS_H */
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Basic library routines.
+ *
+ */
+
+#ifndef __LIBCFS_LINUX_CFS_FS_H__
+#define __LIBCFS_LINUX_CFS_FS_H__
+
+#ifndef __LIBCFS_LIBCFS_H__
+#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
+#endif
+
+#ifdef __KERNEL__
+#include <linux/fs.h>
+#include <linux/stat.h>
+#include <linux/mount.h>
+#endif
+
+typedef struct file cfs_file_t;
+typedef struct dentry cfs_dentry_t;
+
+#ifdef __KERNEL__
+#define cfs_filp_size(f) ((f)->f_dentry->d_inode->i_size)
+#define cfs_filp_poff(f) (&(f)->f_pos)
+
+/*
+ * XXX Do we need to parse flags and mode in cfs_filp_open?
+ */
+cfs_file_t *cfs_filp_open (const char *name, int flags, int mode, int *err);
+#define cfs_filp_close(f) filp_close(f, NULL)
+#define cfs_filp_read(fp, buf, size, pos) (fp)->f_op->read((fp), (buf), (size), pos)
+#define cfs_filp_write(fp, buf, size, pos) (fp)->f_op->write((fp), (buf), (size), pos)
+#define cfs_filp_fsync(fp) (fp)->f_op->fsync((fp), (fp)->f_dentry, 1)
+
+#define cfs_get_file(f) get_file(f)
+#define cfs_put_file(f) fput(f)
+#define cfs_file_count(f) file_count(f)
+
+typedef struct file_lock cfs_flock_t;
+#define CFS_FLOCK_TYPE(fl) ((fl)->fl_type)
+#define CFS_FLOCK_SET_TYPE(fl, type) do { (fl)->fl_type = (type); } while(0)
+#define CFS_FLOCK_PID(fl) ((fl)->fl_pid)
+#define CFS_FLOCK_SET_PID(fl, pid) do { (fl)->fl_pid = (pid); } while(0)
+#define CFS_FLOCK_START(fl) ((fl)->fl_start)
+#define CFS_FLOCK_SET_START(fl, start) do { (fl)->fl_start = (start); } while(0)
+#define CFS_FLOCK_END(fl) ((fl)->fl_end)
+#define CFS_FLOCK_SET_END(fl, end) do { (fl)->fl_end = (end); } while(0)
+
+#endif
+
+#endif
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Basic library routines.
+ *
+ */
+
+#ifndef __LIBCFS_LINUX_CFS_LOCK_H__
+#define __LIBCFS_LINUX_CFS_LOCK_H__
+
+#ifndef __LIBCFS_LIBCFS_H__
+#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
+#endif
+
+#ifdef __KERNEL__
+#include <linux/smp_lock.h>
+
+/*
+ * IMPORTANT !!!!!!!!
+ *
+ * All locks' declaration are not guaranteed to be initialized,
+ * Althought some of they are initialized in Linux. All locks
+ * declared by CFS_DECL_* should be initialized explicitly.
+ */
+
+
+/*
+ * spin_lock (use Linux kernel's primitives)
+ *
+ * - spin_lock_init(x)
+ * - spin_lock(x)
+ * - spin_unlock(x)
+ * - spin_trylock(x)
+ *
+ * - spin_lock_irqsave(x, f)
+ * - spin_unlock_irqrestore(x, f)
+ */
+
+/*
+ * rw_semaphore (use Linux kernel's primitives)
+ *
+ * - init_rwsem(x)
+ * - down_read(x)
+ * - up_read(x)
+ * - down_write(x)
+ * - up_write(x)
+ */
+
+/*
+ * rwlock_t (use Linux kernel's primitives)
+ *
+ * - rwlock_init(x)
+ * - read_lock(x)
+ * - read_unlock(x)
+ * - write_lock(x)
+ * - write_unlock(x)
+ */
+
+/*
+ * mutex_t:
+ *
+ * - init_mutex(x)
+ * - init_mutex_locked(x)
+ * - mutex_up(x)
+ * - mutex_down(x)
+ */
+#define init_mutex(x) init_MUTEX(x)
+#define init_mutex_locked(x) init_MUTEX_LOCKED(x)
+#define mutex_up(x) up(x)
+#define mutex_down(x) down(x)
+
+/*
+ * completion (use Linux kernel's primitives)
+ *
+ * - init_complition(c)
+ * - complete(c)
+ * - wait_for_completion(c)
+ */
+
+/*
+ * OSX funnels:
+ *
+ * No funnels needed in Linux
+ */
+#define CFS_DECL_FUNNEL_DATA
+#define CFS_DECL_CONE_DATA DECLARE_FUNNEL_DATA
+#define CFS_DECL_NET_DATA DECLARE_FUNNEL_DATA
+#define CFS_CONE_IN do {} while(0)
+#define CFS_CONE_EX do {} while(0)
+
+#define CFS_NET_IN do {} while(0)
+#define CFS_NET_EX do {} while(0)
+
+/* __KERNEL__ */
+#else
+
+//#include "../user-lock.h"
+
+/* __KERNEL__ */
+#endif
+#endif
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Basic library routines.
+ *
+ */
+
+#ifndef __LIBCFS_LINUX_CFS_MEM_H__
+#define __LIBCFS_LINUX_CFS_MEM_H__
+
+#ifndef __LIBCFS_LIBCFS_H__
+#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
+#endif
+
+#ifdef __KERNEL__
+# include <linux/mm.h>
+# include <linux/vmalloc.h>
+# include <linux/pagemap.h>
+# include <linux/slab.h>
+# ifdef HAVE_MM_INLINE
+# include <linux/mm_inline.h>
+# endif
+
+typedef struct page cfs_page_t;
+#define CFS_PAGE_SIZE PAGE_CACHE_SIZE
+#define CFS_PAGE_SHIFT PAGE_CACHE_SHIFT
+#define CFS_PAGE_MASK PAGE_CACHE_MASK
+
+cfs_page_t *cfs_alloc_pages(unsigned int flags, unsigned int order);
+#define cfs_alloc_page(f) cfs_alloc_pages(f, 0)
+#define cfs_free_pages(p, o) __free_pages(p, o)
+#define cfs_free_page(p) __free_pages(p, 0)
+
+static inline void *cfs_page_address(cfs_page_t *page)
+{
+ return page_address(page);
+}
+
+static inline void *cfs_kmap(cfs_page_t *page)
+{
+ return kmap(page);
+}
+
+static inline void cfs_kunmap(cfs_page_t *page)
+{
+ kunmap(page);
+}
+
+static inline void cfs_get_page(cfs_page_t *page)
+{
+ get_page(page);
+}
+
+static inline int cfs_page_count(cfs_page_t *page)
+{
+ return page_count(page);
+}
+
+static inline void cfs_set_page_count(cfs_page_t *page, int v)
+{
+ set_page_count(page, v);
+}
+
+/*
+ * Memory allocator
+ */
+extern void *cfs_alloc(size_t nr_bytes, u_int32_t flags);
+extern void cfs_free(void *addr);
+
+extern void *cfs_alloc_large(size_t nr_bytes);
+extern void cfs_free_large(void *addr);
+
+/*
+ * SLAB allocator
+ */
+typedef kmem_cache_t cfs_mem_cache_t;
+extern cfs_mem_cache_t * cfs_mem_cache_create (const char *, size_t, size_t, unsigned long,
+ void (*)(void *, cfs_mem_cache_t *, unsigned long),
+ void (*)(void *, cfs_mem_cache_t *, unsigned long));
+extern int cfs_mem_cache_destroy ( cfs_mem_cache_t * );
+extern void *cfs_mem_cache_alloc ( cfs_mem_cache_t *, int);
+extern void cfs_mem_cache_free ( cfs_mem_cache_t *, void *);
+
+/*
+ */
+#define CFS_DECL_MMSPACE mm_segment_t __oldfs
+#define CFS_MMSPACE_OPEN do { __oldfs = get_fs(); set_fs(get_ds());} while(0)
+#define CFS_MMSPACE_CLOSE set_fs(__oldfs)
+
+/* __KERNEL__ */
+#endif
+
+#endif /* __LINUX_CFS_MEM_H__ */
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Basic library routines.
+ *
+ */
+
+#ifndef __LIBCFS_LINUX_CFS_PRIM_H__
+#define __LIBCFS_LINUX_CFS_PRIM_H__
+
+#ifndef __LIBCFS_LIBCFS_H__
+#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
+#endif
+
+#ifdef __KERNEL__
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/version.h>
+#include <linux/proc_fs.h>
+#include <linux/mm.h>
+#include <linux/timer.h>
+
+#include <linux/miscdevice.h>
+#include <libcfs/linux/portals_compat25.h>
+#include <asm/div64.h>
+
+#include <libcfs/linux/linux-time.h>
+
+/*
+ * Pseudo device register
+ */
+typedef struct miscdevice cfs_psdev_t;
+#define cfs_psdev_register(dev) misc_register(dev)
+#define cfs_psdev_deregister(dev) misc_deregister(dev)
+
+/*
+ * Sysctl register
+ */
+typedef struct ctl_table cfs_sysctl_table_t;
+typedef struct ctl_table_header cfs_sysctl_table_header_t;
+
+#define register_cfs_sysctl_table(t, a) register_sysctl_table(t, a)
+#define unregister_cfs_sysctl_table(t) unregister_sysctl_table(t, a)
+
+/*
+ * Proc file system APIs
+ */
+typedef read_proc_t cfs_read_proc_t;
+typedef write_proc_t cfs_write_proc_t;
+typedef struct proc_dir_entry cfs_proc_dir_entry_t;
+#define cfs_create_proc_entry(n, m, p) create_proc_entry(n, m, p)
+#define cfs_free_proc_entry(e) free_proc_entry(e)
+#define cfs_remove_proc_entry(n, e) remove_proc_entry(n, e)
+
+/*
+ * Wait Queue
+ */
+typedef wait_queue_t cfs_waitlink_t;
+typedef wait_queue_head_t cfs_waitq_t;
+
+#define cfs_waitq_init(w) init_waitqueue_head(w)
+#define cfs_waitlink_init(l) init_waitqueue_entry(l, current)
+#define cfs_waitq_add(w, l) add_wait_queue(w, l)
+#define cfs_waitq_add_exclusive(w, l) add_wait_queue_exclusive(w, l)
+#define cfs_waitq_forward(l, w) do {} while(0)
+#define cfs_waitq_del(w, l) remove_wait_queue(w, l)
+#define cfs_waitq_active(w) waitqueue_active(w)
+#define cfs_waitq_signal(w) wake_up(w)
+#define cfs_waitq_signal_nr(w,n) wake_up_nr(w, n)
+#define cfs_waitq_broadcast(w) wake_up_all(w)
+#define cfs_waitq_wait(l) schedule()
+#define cfs_waitq_timedwait(l, t) schedule_timeout(t)
+
+/* Kernel thread */
+typedef int (*cfs_thread_t)(void *);
+#define cfs_kernel_thread(func, a, f) kernel_thread(func, a, f)
+
+/*
+ * Task struct
+ */
+typedef struct task_struct cfs_task_t;
+#define cfs_current() current
+#define CFS_DECL_JOURNAL_DATA void *journal_info
+#define CFS_PUSH_JOURNAL do { \
+ journal_info = current->journal_info; \
+ current->journal_info = NULL; \
+ } while(0)
+#define CFS_POP_JOURNAL do { \
+ current->journal_info = journal_info; \
+ } while(0)
+
+/* Module interfaces */
+#define cfs_module(name, version, init, fini) \
+module_init(init); \
+module_exit(fini)
+
+/*
+ * Signal
+ */
+#define cfs_sigmask_lock(t, f) SIGNAL_MASK_LOCK(t, f)
+#define cfs_sigmask_unlock(t, f) SIGNAL_MASK_UNLOCK(t, f)
+#define cfs_recalc_sigpending(t) RECALC_SIGPENDING
+#define cfs_signal_pending(t) signal_pending(t)
+#define cfs_sigfillset(s) sigfillset(s)
+
+#define cfs_set_sig_blocked(t, b) do { (t)->blocked = b; } while(0)
+#define cfs_get_sig_blocked(t) (&(t)->blocked)
+
+/*
+ * Timer
+ */
+typedef struct timer_list cfs_timer_t;
+typedef void (*timer_func_t)(unsigned long);
+
+#define cfs_init_timer(t) init_timer(t)
+
+static inline void cfs_timer_init(cfs_timer_t *t, void (*func)(unsigned long), void *arg)
+{
+ init_timer(t);
+ t->function = (timer_func_t)func;
+ t->data = (unsigned long)arg;
+}
+
+static inline void cfs_timer_done(cfs_timer_t *t)
+{
+ return;
+}
+
+static inline void cfs_timer_arm(cfs_timer_t *t, cfs_time_t deadline)
+{
+ mod_timer(t, deadline);
+}
+
+static inline void cfs_timer_disarm(cfs_timer_t *t)
+{
+ del_timer(t);
+}
+
+static inline int cfs_timer_is_armed(cfs_timer_t *t)
+{
+ return timer_pending(t);
+}
+
+static inline cfs_time_t cfs_timer_deadline(cfs_timer_t *t)
+{
+ return t->expires;
+}
+
+#else /* !__KERNEL__ */
+
+#include "../user-prim.h"
+
+#endif /* __KERNEL__ */
+
+#endif
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2004 Cluster File Systems, Inc.
+ * Author: Nikita Danilov <nikita@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or modify it under the
+ * terms of version 2 of the GNU General Public License as published by the
+ * Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass
+ * Ave, Cambridge, MA 02139, USA.
+ *
+ * Implementation of portable time API for Linux (kernel and user-level).
+ *
+ */
+
+#ifndef __LIBCFS_LINUX_LINUX_TIME_H__
+#define __LIBCFS_LINUX_LINUX_TIME_H__
+
+#ifndef __LIBCFS_LIBCFS_H__
+#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
+#endif
+
+/* Portable time API */
+
+/*
+ * Platform provides three opaque data-types:
+ *
+ * cfs_time_t represents point in time. This is internal kernel
+ * time rather than "wall clock". This time bears no
+ * relation to gettimeofday().
+ *
+ * cfs_duration_t represents time interval with resolution of internal
+ * platform clock
+ *
+ * cfs_fs_time_t represents instance in world-visible time. This is
+ * used in file-system time-stamps
+ *
+ * cfs_time_t cfs_time_current(void);
+ * cfs_time_t cfs_time_add (cfs_time_t, cfs_duration_t);
+ * cfs_duration_t cfs_time_sub (cfs_time_t, cfs_time_t);
+ * int cfs_time_before (cfs_time_t, cfs_time_t);
+ * int cfs_time_beforeq(cfs_time_t, cfs_time_t);
+ *
+ * cfs_duration_t cfs_duration_build(int64_t);
+ *
+ * time_t cfs_duration_sec (cfs_duration_t);
+ * void cfs_duration_usec(cfs_duration_t, struct timeval *);
+ * void cfs_duration_nsec(cfs_duration_t, struct timespec *);
+ *
+ * void cfs_fs_time_current(cfs_fs_time_t *);
+ * time_t cfs_fs_time_sec (cfs_fs_time_t *);
+ * void cfs_fs_time_usec (cfs_fs_time_t *, struct timeval *);
+ * void cfs_fs_time_nsec (cfs_fs_time_t *, struct timespec *);
+ * int cfs_fs_time_before (cfs_fs_time_t *, cfs_fs_time_t *);
+ * int cfs_fs_time_beforeq(cfs_fs_time_t *, cfs_fs_time_t *);
+ *
+ * cfs_duration_t cfs_time_minimal_timeout(void)
+ *
+ * CFS_TIME_FORMAT
+ * CFS_DURATION_FORMAT
+ *
+ */
+
+#define ONE_BILLION ((u_int64_t)1000000000)
+#define ONE_MILLION ((u_int64_t) 1000000)
+
+#ifdef __KERNEL__
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/version.h>
+#include <linux/time.h>
+#include <asm/div64.h>
+
+#include <libcfs/linux/portals_compat25.h>
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+
+/*
+ * old kernels---CURRENT_TIME is struct timeval
+ */
+typedef struct timeval cfs_fs_time_t;
+
+static inline void cfs_fs_time_usec(cfs_fs_time_t *t, struct timeval *v)
+{
+ *v = *t;
+}
+
+static inline void cfs_fs_time_nsec(cfs_fs_time_t *t, struct timespec *s)
+{
+ s->tv_sec = t->tv_sec;
+ s->tv_nsec = t->tv_usec * 1000;
+}
+
+/*
+ * internal helper function used by cfs_fs_time_before*()
+ */
+static inline unsigned long __cfs_fs_time_flat(cfs_fs_time_t *t)
+{
+ return ((unsigned long)t->tv_sec) * ONE_MILLION + t->tv_usec * 1000;
+}
+
+#define CURRENT_KERN_TIME xtime
+
+/* (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) */
+#else
+
+/*
+ * post 2.5 kernels.
+ */
+
+#include <linux/jiffies.h>
+
+typedef struct timespec cfs_fs_time_t;
+
+static inline void cfs_fs_time_usec(cfs_fs_time_t *t, struct timeval *v)
+{
+ v->tv_sec = t->tv_sec;
+ v->tv_usec = t->tv_nsec / 1000;
+}
+
+static inline void cfs_fs_time_nsec(cfs_fs_time_t *t, struct timespec *s)
+{
+ *s = *t;
+}
+
+/*
+ * internal helper function used by cfs_fs_time_before*()
+ */
+static inline unsigned long __cfs_fs_time_flat(cfs_fs_time_t *t)
+{
+ return ((unsigned long)t->tv_sec) * ONE_BILLION + t->tv_nsec;
+}
+
+#define CURRENT_KERN_TIME CURRENT_TIME
+
+/* (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) */
+#endif
+
+/*
+ * Generic kernel stuff
+ */
+
+typedef unsigned long cfs_time_t; /* jiffies */
+typedef long cfs_duration_t;
+
+
+static inline cfs_time_t cfs_time_current(void)
+{
+ return jiffies;
+}
+
+static inline time_t cfs_time_current_sec(void)
+{
+ return CURRENT_SECONDS;
+}
+
+static inline cfs_time_t cfs_time_add(cfs_time_t t, cfs_duration_t d)
+{
+ return t + d;
+}
+
+static inline cfs_duration_t cfs_time_sub(cfs_time_t t1, cfs_time_t t2)
+{
+ return t1 - t2;
+}
+
+static inline int cfs_time_before(cfs_time_t t1, cfs_time_t t2)
+{
+ return time_before(t1, t2);
+}
+
+static inline int cfs_time_beforeq(cfs_time_t t1, cfs_time_t t2)
+{
+ return time_before_eq(t1, t2);
+}
+
+static inline void cfs_fs_time_current(cfs_fs_time_t *t)
+{
+ *t = CURRENT_KERN_TIME;
+}
+
+static inline time_t cfs_fs_time_sec(cfs_fs_time_t *t)
+{
+ return t->tv_sec;
+}
+
+static inline int cfs_fs_time_before(cfs_fs_time_t *t1, cfs_fs_time_t *t2)
+{
+ return time_before(__cfs_fs_time_flat(t1), __cfs_fs_time_flat(t2));
+}
+
+static inline int cfs_fs_time_beforeq(cfs_fs_time_t *t1, cfs_fs_time_t *t2)
+{
+ return time_before_eq(__cfs_fs_time_flat(t1), __cfs_fs_time_flat(t2));
+}
+
+#if 0
+static inline cfs_duration_t cfs_duration_build(int64_t nano)
+{
+#if (BITS_PER_LONG == 32)
+ /* We cannot use do_div(t, ONE_BILLION), do_div can only process
+ * 64 bits n and 32 bits base */
+ int64_t t = nano * HZ;
+ do_div(t, 1000);
+ do_div(t, 1000000);
+ return (cfs_duration_t)t;
+#else
+ return (nano * HZ / ONE_BILLION);
+#endif
+}
+#endif
+
+static inline cfs_duration_t cfs_time_seconds(int seconds)
+{
+ return seconds * HZ;
+}
+
+static inline cfs_time_t cfs_time_shift(int seconds)
+{
+ return jiffies + seconds * HZ;
+}
+
+static inline time_t cfs_duration_sec(cfs_duration_t d)
+{
+ return d / HZ;
+}
+
+static inline void cfs_duration_usec(cfs_duration_t d, struct timeval *s)
+{
+#if (BITS_PER_LONG == 32)
+ uint64_t t = (d - s->tv_sec * HZ) * ONE_MILLION;
+ s->tv_usec = do_div (t, HZ);
+#else
+ s->tv_usec = (d - s->tv_sec * HZ) * ONE_MILLION / HZ;
+#endif
+ s->tv_sec = d / HZ;
+}
+
+static inline void cfs_duration_nsec(cfs_duration_t d, struct timespec *s)
+{
+#if (BITS_PER_LONG == 32)
+ uint64_t t = (d - s->tv_sec * HZ) * ONE_BILLION;
+ s->tv_nsec = do_div (t, HZ);
+#else
+ s->tv_nsec = (d - s->tv_sec * HZ) * ONE_BILLION / HZ;
+#endif
+ s->tv_sec = d / HZ;
+}
+
+static inline cfs_duration_t cfs_time_minimal_timeout(void)
+{
+ return 1;
+}
+
+/* inline function cfs_time_minimal_timeout() can not be used
+ * to initiallize static variable */
+#define CFS_MIN_DELAY (1)
+
+#define CFS_TIME_T "%lu"
+#define CFS_DURATION_T "%ld"
+
+#else /* !__KERNEL__ */
+
+/*
+ * Liblustre. time(2) based implementation.
+ */
+#include <libcfs/user-time.h>
+#endif /* __KERNEL__ */
+
+/* __LIBCFS_LINUX_LINUX_TIME_H__ */
+#endif
+/*
+ * Local variables:
+ * c-indentation-style: "K&R"
+ * c-basic-offset: 8
+ * tab-width: 8
+ * fill-column: 80
+ * scroll-step: 1
+ * End:
+ */
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
+#ifndef __LIBCFS_LINUX_LLTRACE_H__
+#define __LIBCFS_LINUX_LLTRACE_H__
+
+#ifndef __LIBCFS_LLTRACE_H__
+#error Do not #include this file directly. #include <libcfs/lltrace.h> instead
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <getopt.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/time.h>
+#include <portals/types.h>
+#include <libcfs/kp30.h>
+#include <portals/ptlctl.h>
+#include <linux/limits.h>
+#include <asm/page.h>
+#include <linux/version.h>
+
+#endif
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*/
-#ifndef _PORTALS_COMPAT_H
-#define _PORTALS_COMPAT_H
+#ifndef __LIBCFS_LINUX_PORTALS_COMPAT_H__
+#define __LIBCFS_LINUX_PORTALS_COMPAT_H__
// XXX BUG 1511 -- remove this stanza and all callers when bug 1511 is resolved
#if SPINLOCK_DEBUG
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Basic library routines.
+ *
+ */
+
+#ifndef __LIBCFS_LINUX_PORTALS_LIB_H__
+#define __LIBCFS_LINUX_PORTALS_LIB_H__
+
+#ifndef __LIBCFS_PORTALS_LIB_H__
+#error Do not #include this file directly. #include <libcfs/portals_lib.h> instead
+#endif
+
+#ifndef __KERNEL__
+# include <string.h>
+#else
+# include <asm/types.h>
+#endif
+
+#endif
--- /dev/null
+#ifndef __LIBCFS_LINUX_PORTALS_UTILS_H__
+#define __LIBCFS_LINUX_PORTALS_UTILS_H__
+
+#ifndef __LIBCFS_PORTALS_UTILS_H__
+#error Do not #include this file directly. #include <libcfs/portals_utils.h> instead
+#endif
+
+#ifdef __KERNEL__
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/smp_lock.h>
+#include <linux/poll.h>
+#include <linux/random.h>
+
+#include <asm/unistd.h>
+#include <asm/semaphore.h>
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+# include <linux/tqueue.h>
+#else /* (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) */
+# include <linux/workqueue.h>
+#endif
+#include <libcfs/linux/linux-mem.h>
+#include <libcfs/linux/linux-prim.h>
+#else /* !__KERNEL__ */
+
+#include <endian.h>
+#include <libcfs/list.h>
+
+#ifdef HAVE_LINUX_VERSION_H
+# include <linux/version.h>
+
+# if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+# define BUG() /* workaround for module.h includes */
+# include <linux/module.h>
+# endif
+#endif /* !HAVE_LINUX_VERSION_H */
+
+#ifndef __CYGWIN__
+# include <syscall.h>
+#else /* __CYGWIN__ */
+# include <windows.h>
+# include <windef.h>
+# include <netinet/in.h>
+#endif /* __CYGWIN__ */
+
+#endif /* !__KERNEL__ */
+#endif
-#ifndef _LUSTRE_LIST_H
-#define _LUSTRE_LIST_H
+#ifndef __LIBCFS_LIST_H__
+#define __LIBCFS_LIST_H__
+
+#if defined (__linux__) && defined(__KERNEL__)
-#ifdef __KERNEL__
#include <linux/list.h>
-#else
+
+#define CFS_LIST_HEAD_INIT(n) LIST_HEAD_INIT(n)
+#define CFS_LIST_HEAD(n) LIST_HEAD(n)
+#define CFS_INIT_LIST_HEAD(p) INIT_LIST_HEAD(p)
+
+#else /* !defined (__linux__) && defined(__KERNEL__) */
+
/*
* Simple doubly linked list implementation.
*
typedef struct list_head list_t;
-#define LIST_HEAD_INIT(name) { &(name), &(name) }
+#define CFS_LIST_HEAD_INIT(name) { &(name), &(name) }
-#define LIST_HEAD(name) \
+#define CFS_LIST_HEAD(name) \
struct list_head name = LIST_HEAD_INIT(name)
-#define INIT_LIST_HEAD(ptr) do { \
+#define CFS_INIT_LIST_HEAD(ptr) do { \
(ptr)->next = (ptr); (ptr)->prev = (ptr); \
} while (0)
+#ifndef __APPLE__
+#define LIST_HEAD(n) CFS_LIST_HEAD(n)
+#endif
+
+#define LIST_HEAD_INIT(n) CFS_LIST_HEAD_INIT(n)
+#define INIT_LIST_HEAD(p) CFS_INIT_LIST_HEAD(p)
+
/*
* Insert a new entry between two known consecutive entries.
*
static inline void list_del_init(struct list_head *entry)
{
__list_del(entry->prev, entry->next);
- INIT_LIST_HEAD(entry);
+ CFS_INIT_LIST_HEAD(entry);
}
/**
{
if (!list_empty(list)) {
__list_splice(list, head);
- INIT_LIST_HEAD(list);
+ CFS_INIT_LIST_HEAD(list);
}
}
pos = pos->next, prefetch(pos->next))
/**
- * list_for_each_prev - iterate over a list in reverse order
- * @pos: the &struct list_head to use as a loop counter.
- * @head: the head for your list.
- */
-#define list_for_each_prev(pos, head) \
- for (pos = (head)->prev, prefetch(pos->prev); pos != (head); \
- pos = pos->prev, prefetch(pos->prev))
-
-/**
* list_for_each_safe - iterate over a list safe against removal of list entry
* @pos: the &struct list_head to use as a loop counter.
* @n: another &struct list_head to use as temporary storage
for (pos = (head)->next, n = pos->next; pos != (head); \
pos = n, n = pos->next)
+#endif /* __linux__*/
+
+#ifndef list_for_each_prev
+/**
+ * list_for_each_prev - iterate over a list in reverse order
+ * @pos: the &struct list_head to use as a loop counter.
+ * @head: the head for your list.
+ */
+#define list_for_each_prev(pos, head) \
+ for (pos = (head)->prev, prefetch(pos->prev); pos != (head); \
+ pos = pos->prev, prefetch(pos->prev))
+
+#endif /* list_for_each_prev */
+
+#ifndef list_for_each_entry
/**
* list_for_each_entry - iterate over list of given type
* @pos: the type * to use as a loop counter.
&pos->member != (head); \
pos = list_entry(pos->member.next, typeof(*pos), member), \
prefetch(pos->member.next))
+#endif /* list_for_each_entry */
+#ifndef list_for_each_entry_safe
/**
* list_for_each_entry_safe - iterate over list of given type safe against removal of list entry
* @pos: the type * to use as a loop counter.
n = list_entry(pos->member.next, typeof(*pos), member); \
&pos->member != (head); \
pos = n, n = list_entry(n->member.next, typeof(*n), member))
+#endif /* list_for_each_entry_safe */
-#endif /* if !__KERNEL__*/
-#endif /* if !_LUSTRE_LIST_H */
+#endif /* __LIBCFS_LUSTRE_LIST_H__ */
* Compile with:
* cc -I../../portals/include -o fio fio.c -L../../portals/linux/utils -lptlctl
*/
-#ifndef __LTRACE_H_
-#define __LTRACE_H_
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <getopt.h>
-#include <string.h>
-#include <errno.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include <sys/time.h>
-#include <portals/types.h>
-#include <linux/kp30.h>
-#include <portals/ptlctl.h>
-#include <linux/limits.h>
-#include <asm/page.h>
-#include <linux/version.h>
+#ifndef __LIBCFS_LLTRACE_H__
+#define __LIBCFS_LLTRACE_H__
+
+#if defined(__linux__)
+#include <libcfs/linux/lltrace.h>
+#elif defined(__APPLE__)
+#include <libcfs/darwin/lltrace.h>
+#else
+#error Unsupported Operating System
+#endif
static inline int ltrace_write_file(char* fname)
{
*
*/
-#ifndef _PORTALS_LIB_H
-#define _PORTALS_LIB_H
+#ifndef __LIBCFS_PORTALS_LIB_H__
+#define __LIBCFS_PORTALS_LIB_H__
-#ifndef __KERNEL__
-# include <string.h>
-#else
-# include <asm/types.h>
+#if defined(__linux__)
+#include <libcfs/linux/portals_lib.h>
+#elif defined(__APPLE__)
+#include <libcfs/darwin/portals_lib.h>
+#else
+#error Unsupported Operating System
#endif
#undef MIN
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
+#ifndef __LIBCFS_PORTALS_UTILS_H__
+#define __LIBCFS_PORTALS_UTILS_H__
+
+/*
+ * portals_utils.h
+ *
+ */
+#if defined(__linux__)
+#include <libcfs/linux/portals_utils.h>
+#elif defined(__APPLE__)
+#include <libcfs/darwin/portals_utils.h>
+#else
+#error Unsupported Operating System
+#endif
+
+#endif
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2004 Cluster File Systems, Inc.
+ * Author: Nikita Danilov <nikita@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or modify it under the
+ * terms of version 2 of the GNU General Public License as published by the
+ * Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass
+ * Ave, Cambridge, MA 02139, USA.
+ *
+ * Implementation of portable time API for user-level.
+ *
+ */
+
+#ifndef __LIBCFS_USER_LOCK_H__
+#define __LIBCFS_USER_LOCK_H__
+
+#ifndef __LIBCFS_LIBCFS_H__
+#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
+#endif
+
+/* Implementations of portable synchronization APIs for liblustre */
+
+/*
+ * liblustre is single-threaded, so most "synchronization" APIs are trivial.
+ */
+
+#ifndef __KERNEL__
+
+/*
+ * Optional debugging (magic stamping and checking ownership) can be added.
+ */
+
+/*
+ * spin_lock
+ *
+ * - spin_lock_init(x)
+ * - spin_lock(x)
+ * - spin_unlock(x)
+ * - spin_trylock(x)
+ *
+ * - spin_lock_irqsave(x, f)
+ * - spin_unlock_irqrestore(x, f)
+ *
+ * No-op implementation.
+ */
+struct spin_lock {};
+
+typedef struct spin_lock spinlock_t;
+
+void spin_lock_init(spinlock_t *lock);
+void spin_lock(spinlock_t *lock);
+void spin_unlock(spinlock_t *lock);
+int spin_trylock(spinlock_t *lock);
+void spin_lock_bh_init(spinlock_t *lock);
+void spin_lock_bh(spinlock_t *lock);
+void spin_unlock_bh(spinlock_t *lock);
+
+#define spin_lock_irqsave(l, flags) ({ spin_lock(l); (void)flags; })
+#define spin_unlock_irqrestore(l, flags) ({ spin_unlock(l); (void)flags; })
+
+/*
+ * Semaphore
+ *
+ * - sema_init(x, v)
+ * - __down(x)
+ * - __up(x)
+ */
+struct semaphore {};
+
+void sema_init(struct semaphore *s, int val);
+void __down(struct semaphore *s);
+void __up(struct semaphore *s);
+
+/*
+ * Mutex:
+ *
+ * - init_mutex(x)
+ * - init_mutex_locked(x)
+ * - mutex_up(x)
+ * - mutex_down(x)
+ */
+#define mutex_up(s) __up(s)
+#define mutex_down(s) __down(s)
+
+#define init_mutex(x) sema_init(x, 1)
+#define init_mutex_locked(x) sema_init(x, 0)
+
+/*
+ * Completion:
+ *
+ * - init_completion(c)
+ * - complete(c)
+ * - wait_for_completion(c)
+ */
+struct completion {};
+
+void init_completion(struct completion *c);
+void complete(struct completion *c);
+void wait_for_completion(struct completion *c);
+
+/*
+ * rw_semaphore:
+ *
+ * - init_rwsem(x)
+ * - down_read(x)
+ * - up_read(x)
+ * - down_write(x)
+ * - up_write(x)
+ */
+struct rw_semaphore {};
+
+void init_rwsem(struct rw_semaphore *s);
+void down_read(struct rw_semaphore *s);
+int down_read_trylock(struct rw_semaphore *s);
+void down_write(struct rw_semaphore *s);
+int down_write_trylock(struct rw_semaphore *s);
+void up_read(struct rw_semaphore *s);
+void up_write(struct rw_semaphore *s);
+
+/*
+ * read-write lock : Need to be investigated more!!
+ * XXX nikita: for now, let rwlock_t to be identical to rw_semaphore
+ *
+ * - DECLARE_RWLOCK(l)
+ * - rwlock_init(x)
+ * - read_lock(x)
+ * - read_unlock(x)
+ * - write_lock(x)
+ * - write_unlock(x)
+ */
+typedef struct rw_semaphore rwlock_t;
+
+#define rwlock_init(pl) init_rwsem(pl)
+
+#define read_lock(l) down_read(l)
+#define read_unlock(l) up_read(l)
+#define write_lock(l) down_write(l)
+#define write_unlock(l) up_write(l)
+
+#define write_lock_irqsave(l, f) write_lock(l)
+#define write_unlock_irqrestore(l, f) write_unlock(l)
+
+#define read_lock_irqsave(l, f) read_lock(l)
+#define read_unlock_irqrestore(l, f) read_unlock(l)
+
+/* !__KERNEL__ */
+#endif
+
+/* __LIBCFS_USER_LOCK_H__ */
+#endif
+/*
+ * Local variables:
+ * c-indentation-style: "K&R"
+ * c-basic-offset: 8
+ * tab-width: 8
+ * fill-column: 80
+ * scroll-step: 1
+ * End:
+ */
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2004 Cluster File Systems, Inc.
+ * Author: Nikita Danilov <nikita@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or modify it under the
+ * terms of version 2 of the GNU General Public License as published by the
+ * Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass
+ * Ave, Cambridge, MA 02139, USA.
+ *
+ * Implementation of portable time API for user-level.
+ *
+ */
+
+#ifndef __LIBCFS_USER_PRIM_H__
+#define __LIBCFS_USER_PRIM_H__
+
+#ifndef __LIBCFS_LIBCFS_H__
+#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
+#endif
+
+/* Implementations of portable APIs for liblustre */
+
+/*
+ * liblustre is single-threaded, so most "synchronization" APIs are trivial.
+ */
+
+#ifndef __KERNEL__
+
+#include <libcfs/list.h>
+
+/*
+ * Wait Queue. No-op implementation.
+ */
+
+typedef struct cfs_waitlink {} cfs_waitlink_t;
+typedef struct cfs_waitq {} cfs_waitq_t;
+
+void cfs_waitq_init(struct cfs_waitq *waitq);
+void cfs_waitlink_init(struct cfs_waitlink *link);
+void cfs_waitq_add(struct cfs_waitq *waitq, struct cfs_waitlink *link);
+void cfs_waitq_add_exclusive(struct cfs_waitq *waitq,
+ struct cfs_waitlink *link);
+void cfs_waitq_forward(struct cfs_waitlink *link, struct cfs_waitq *waitq);
+void cfs_waitq_del(struct cfs_waitq *waitq, struct cfs_waitlink *link);
+int cfs_waitq_active(struct cfs_waitq *waitq);
+void cfs_waitq_signal(struct cfs_waitq *waitq);
+void cfs_waitq_signal_nr(struct cfs_waitq *waitq, int nr);
+void cfs_waitq_broadcast(struct cfs_waitq *waitq);
+void cfs_waitq_wait(struct cfs_waitlink *link);
+int64_t cfs_waitq_timedwait(struct cfs_waitlink *link, int64_t timeout);
+
+/*
+ * Allocator
+ */
+
+/* 2.4 defines */
+
+/* XXX
+ * for this moment, liblusre will not rely OST for non-page-aligned write
+ */
+#define LIBLUSTRE_HANDLE_UNALIGNED_PAGE
+
+struct page {
+ void *addr;
+ unsigned long index;
+ struct list_head list;
+ unsigned long private;
+
+ /* internally used by liblustre file i/o */
+ int _offset;
+ int _count;
+#ifdef LIBLUSTRE_HANDLE_UNALIGNED_PAGE
+ int _managed;
+#endif
+};
+
+typedef struct page cfs_page_t;
+
+#define CFS_PAGE_SIZE PAGE_CACHE_SIZE
+#define CFS_PAGE_SHIFT PAGE_CACHE_SHIFT
+#define CFS_PAGE_MASK PAGE_CACHE_MASK
+
+cfs_page_t *cfs_alloc_pages(unsigned int flags, unsigned int order);
+void cfs_free_pages(struct page *pg, int what);
+
+cfs_page_t *cfs_alloc_page(unsigned int flags);
+void cfs_free_page(cfs_page_t *pg, int what);
+void *cfs_page_address(cfs_page_t *pg);
+void *cfs_kmap(cfs_page_t *pg);
+void cfs_kunmap(cfs_page_t *pg);
+
+#define cfs_get_page(p) __I_should_not_be_called__(at_all)
+#define cfs_page_count(p) __I_should_not_be_called__(at_all)
+#define cfs_set_page_count(p, v) __I_should_not_be_called__(at_all)
+
+/*
+ * Memory allocator
+ */
+void *cfs_alloc(size_t nr_bytes, u_int32_t flags);
+void cfs_free(void *addr);
+void *cfs_alloc_large(size_t nr_bytes);
+void cfs_free_large(void *addr);
+
+/*
+ * SLAB allocator
+ */
+typedef struct {
+ int size;
+} cfs_mem_cache_t;
+
+#define SLAB_HWCACHE_ALIGN 0
+
+cfs_mem_cache_t *
+cfs_mem_cache_create(const char *, size_t, size_t, unsigned long,
+ void (*)(void *, cfs_mem_cache_t *, unsigned long),
+ void (*)(void *, cfs_mem_cache_t *, unsigned long));
+int cfs_mem_cache_destroy(cfs_mem_cache_t *c);
+void *cfs_mem_cache_alloc(cfs_mem_cache_t *c, int gfp);
+void cfs_mem_cache_free(cfs_mem_cache_t *c, void *addr);
+
+typedef int (cfs_read_proc_t)(char *page, char **start, off_t off,
+ int count, int *eof, void *data);
+
+struct file; /* forward ref */
+typedef int (cfs_write_proc_t)(struct file *file, const char *buffer,
+ unsigned long count, void *data);
+
+/*
+ * Timer
+ */
+
+typedef struct cfs_timer {} cfs_timer_t;
+
+#if 0
+#define cfs_init_timer(t) do {} while(0)
+void cfs_timer_init(struct cfs_timer *t, void (*func)(unsigned long), void *arg);
+void cfs_timer_done(struct cfs_timer *t);
+void cfs_timer_arm(struct cfs_timer *t, cfs_time_t deadline);
+void cfs_timer_disarm(struct cfs_timer *t);
+int cfs_timer_is_armed(struct cfs_timer *t);
+
+cfs_time_t cfs_timer_deadline(struct cfs_timer *t);
+#endif
+
+typedef void cfs_psdev_t;
+
+static inline int cfs_psdev_register(cfs_psdev_t *foo)
+{
+ return 0;
+}
+
+static inline int cfs_psdev_deregister(cfs_psdev_t *foo)
+{
+ return 0;
+}
+
+/* !__KERNEL__ */
+#endif
+
+/* __LIBCFS_USER_PRIM_H__ */
+#endif
+/*
+ * Local variables:
+ * c-indentation-style: "K&R"
+ * c-basic-offset: 8
+ * tab-width: 8
+ * fill-column: 80
+ * scroll-step: 1
+ * End:
+ */
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2004 Cluster File Systems, Inc.
+ * Author: Nikita Danilov <nikita@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or modify it under the
+ * terms of version 2 of the GNU General Public License as published by the
+ * Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass
+ * Ave, Cambridge, MA 02139, USA.
+ *
+ * Implementation of portable time API for user-level.
+ *
+ */
+
+#ifndef __LIBCFS_USER_TIME_H__
+#define __LIBCFS_USER_TIME_H__
+
+#ifndef __LIBCFS_LIBCFS_H__
+#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
+#endif
+
+/* Portable time API */
+
+/*
+ * Platform provides three opaque data-types:
+ *
+ * cfs_time_t represents point in time. This is internal kernel
+ * time rather than "wall clock". This time bears no
+ * relation to gettimeofday().
+ *
+ * cfs_duration_t represents time interval with resolution of internal
+ * platform clock
+ *
+ * cfs_fs_time_t represents instance in world-visible time. This is
+ * used in file-system time-stamps
+ *
+ * cfs_time_t cfs_time_current(void);
+ * cfs_time_t cfs_time_add (cfs_time_t, cfs_duration_t);
+ * cfs_duration_t cfs_time_sub (cfs_time_t, cfs_time_t);
+ * int cfs_time_before (cfs_time_t, cfs_time_t);
+ * int cfs_time_beforeq(cfs_time_t, cfs_time_t);
+ *
+ * cfs_duration_t cfs_duration_build(int64_t);
+ *
+ * time_t cfs_duration_sec (cfs_duration_t);
+ * void cfs_duration_usec(cfs_duration_t, struct timeval *);
+ * void cfs_duration_nsec(cfs_duration_t, struct timespec *);
+ *
+ * void cfs_fs_time_current(cfs_fs_time_t *);
+ * time_t cfs_fs_time_sec (cfs_fs_time_t *);
+ * void cfs_fs_time_usec (cfs_fs_time_t *, struct timeval *);
+ * void cfs_fs_time_nsec (cfs_fs_time_t *, struct timespec *);
+ * int cfs_fs_time_before (cfs_fs_time_t *, cfs_fs_time_t *);
+ * int cfs_fs_time_beforeq(cfs_fs_time_t *, cfs_fs_time_t *);
+ *
+ * cfs_duration_t cfs_time_minimal_timeout(void)
+ *
+ * CFS_TIME_FORMAT
+ * CFS_DURATION_FORMAT
+ *
+ */
+
+#define ONE_BILLION ((u_int64_t)1000000000)
+#define ONE_MILLION ((u_int64_t) 1000000)
+
+#ifndef __KERNEL__
+
+/*
+ * Liblustre. time(2) based implementation.
+ */
+
+#include <sys/types.h>
+#include <sys/time.h>
+#include <time.h>
+
+typedef time_t cfs_fs_time_t;
+typedef time_t cfs_time_t;
+typedef long cfs_duration_t;
+
+static inline cfs_time_t cfs_time_current(void)
+{
+ return time(NULL);
+}
+
+static inline cfs_duration_t cfs_time_seconds(int seconds)
+{
+ return seconds;
+}
+
+static inline int cfs_time_before(cfs_time_t t1, cfs_time_t t2)
+{
+ return t1 < t2;
+}
+
+static inline int cfs_time_beforeq(cfs_time_t t1, cfs_time_t t2)
+{
+ return t1 <= t2;
+}
+
+static inline cfs_duration_t cfs_duration_build(int64_t nano)
+{
+ return nano / ONE_BILLION;
+}
+
+static inline time_t cfs_duration_sec(cfs_duration_t d)
+{
+ return d;
+}
+
+static inline void cfs_duration_usec(cfs_duration_t d, struct timeval *s)
+{
+ s->tv_sec = d;
+ s->tv_usec = 0;
+}
+
+static inline void cfs_duration_nsec(cfs_duration_t d, struct timespec *s)
+{
+ s->tv_sec = d;
+ s->tv_nsec = 0;
+}
+
+static inline void cfs_fs_time_current(cfs_fs_time_t *t)
+{
+ time(t);
+}
+
+static inline time_t cfs_fs_time_sec(cfs_fs_time_t *t)
+{
+ return *t;
+}
+
+static inline void cfs_fs_time_usec(cfs_fs_time_t *t, struct timeval *v)
+{
+ v->tv_sec = *t;
+ v->tv_usec = 0;
+}
+
+static inline void cfs_fs_time_nsec(cfs_fs_time_t *t, struct timespec *s)
+{
+ s->tv_sec = *t;
+ s->tv_nsec = 0;
+}
+
+static inline int cfs_fs_time_before(cfs_fs_time_t *t1, cfs_fs_time_t *t2)
+{
+ return *t1 < *t2;
+}
+
+static inline int cfs_fs_time_beforeq(cfs_fs_time_t *t1, cfs_fs_time_t *t2)
+{
+ return *t1 <= *t2;
+}
+
+static inline cfs_duration_t cfs_time_minimal_timeout(void)
+{
+ return 1;
+}
+
+#define CFS_MIN_DELAY (1)
+
+static inline cfs_time_t cfs_time_add(cfs_time_t t, cfs_duration_t d)
+{
+ return t + d;
+}
+
+static inline cfs_duration_t cfs_time_sub(cfs_time_t t1, cfs_time_t t2)
+{
+ return t1 - t2;
+}
+
+#define CFS_TIME_T "%lu"
+#define CFS_DURATION_T "%ld"
+
+/* !__KERNEL__ */
+#endif
+
+/* __LIBCFS_USER_TIME_H__ */
+#endif
+/*
+ * Local variables:
+ * c-indentation-style: "K&R"
+ * c-basic-offset: 8
+ * tab-width: 8
+ * fill-column: 80
+ * scroll-step: 1
+ * End:
+ */
+++ /dev/null
-linuxdir = $(includedir)/linux
-
-EXTRA_DIST = kp30.h kpr.h libcfs.h lustre_list.h portals_compat25.h \
- portals_lib.h
portalsdir=$(includedir)/portals
-if UTILS
-portals_HEADERS = list.h
-endif
+SUBDIRS := darwin linux
-EXTRA_DIST = api.h api-support.h build_check.h errno.h \
- internal.h lib-p30.h lib-types.h list.h \
- lltrace.h myrnal.h nal.h nalids.h p30.h ptlctl.h \
+EXTRA_DIST = api.h api-support.h build_check.h errno.h \
+ internal.h kpr.h lib-p30.h lib-types.h \
+ myrnal.h nal.h nalids.h p30.h ptlctl.h \
socknal.h stringtab.h types.h
-
+#ifndef __API_SUPPORT_H__
+#define __API_SUPPORT_H__
#include "build_check.h"
#ifndef __KERNEL__
#endif
#include <portals/types.h>
-#include <linux/kp30.h>
+#include <libcfs/kp30.h>
#include <portals/p30.h>
#include <portals/internal.h>
#include <portals/nal.h>
+#endif
--- /dev/null
+EXTRA_DIST := lib-p30.h lib-types.h p30.h
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
+#ifndef __PORTALS_DARWIN_LIB_P30_H__
+#define __PORTALS_DARWIN_LIB_P30_H__
+
+#ifndef __PORTALS_LIB_P30_H__
+#error Do not #include this file directly. #include <portals/lib-p30.h> instead
+#endif
+
+#include <string.h>
+#include <libcfs/libcfs.h>
+
+#endif
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
+#ifndef __PORTALS_DARWIN_LIB_P30_H__
+#define __PORTALS_DARWIN_LIB_P30_H__
+
+#ifndef __PORTALS_LIB_P30_H__
+#error Do not #include this file directly. #include <portals/lib-p30.h> instead
+#endif
+
+#include <string.h>
+#include <libcfs/libcfs.h>
+
+#endif
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
+#ifndef __PORTALS_DARWIN_LIB_TYPES_H__
+#define __PORTALS_DARWIN_LIB_TYPES_H__
+
+#ifndef __PORTALS_LIB_TYPES_H__
+#error Do not #include this file directly. #include <portals/lib-types.h> instead
+#endif
+
+#include <sys/types.h>
+#include <libcfs/libcfs.h>
+#include <libcfs/list.h>
+
+#endif
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
+#ifndef _PORTALS_DARWIN_P30_H_
+#define _PORTALS_DARWIN_P30_H_
+
+#ifndef __PORTALS_P30_H__
+#error Do not #include this file directly. #include <portals/p30.h> instead
+#endif
+
+/*
+ * p30.h
+ *
+ * User application interface file
+ */
+
+#include <sys/types.h>
+#include <sys/uio.h>
+
+#endif
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
+#ifndef _PORTALS_DARWIN_P30_H_
+#define _PORTALS_DARWIN_P30_H_
+
+#ifndef __PORTALS_P30_H__
+#error Do not #include this file directly. #include <portals/p30.h> instead
+#endif
+
+/*
+ * p30.h
+ *
+ * User application interface file
+ */
+
+#include <sys/types.h>
+#include <sys/uio.h>
+
+#endif
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*/
-#ifndef _KPR_H
-#define _KPR_H
+#ifndef __PORTALS_KPR_H__
+#define __PORTALS_KPR_H__
# include <portals/lib-types.h> /* for ptl_hdr_t */
* Top level include for library side routines
*/
-#ifndef _LIB_P30_H_
-#define _LIB_P30_H_
+#ifndef __PORTALS_LIB_P30_H__
+#define __PORTALS_LIB_P30_H__
#include "build_check.h"
-#ifdef __KERNEL__
-# include <asm/page.h>
-# include <linux/string.h>
+#if defined(__linux__)
+#include <portals/linux/lib-p30.h>
+#elif defined(__APPLE__)
+#include <portals/darwin/lib-p30.h>
#else
-# include <portals/list.h>
-# include <string.h>
-# include <pthread.h>
+#error Unsupported Operating System
#endif
+
#include <portals/types.h>
-#include <linux/kp30.h>
+#include <libcfs/kp30.h>
#include <portals/p30.h>
#include <portals/nal.h>
#include <portals/lib-types.h>
* Top level include for library side routines
*/
-#ifndef _LIB_P30_H_
-#define _LIB_P30_H_
+#ifndef __PORTALS_LIB_P30_H__
+#define __PORTALS_LIB_P30_H__
#include "build_check.h"
-#ifdef __KERNEL__
-# include <asm/page.h>
-# include <linux/string.h>
+#if defined(__linux__)
+#include <portals/linux/lib-p30.h>
+#elif defined(__APPLE__)
+#include <portals/darwin/lib-p30.h>
#else
-# include <portals/list.h>
-# include <string.h>
-# include <pthread.h>
+#error Unsupported Operating System
#endif
+
#include <portals/types.h>
-#include <linux/kp30.h>
+#include <libcfs/kp30.h>
#include <portals/p30.h>
#include <portals/nal.h>
#include <portals/lib-types.h>
* exposed to the user application
*/
-#ifndef _LIB_TYPES_H_
-#define _LIB_TYPES_H_
+#ifndef __PORTALS_LIB_TYPES_H__
+#define __PORTALS_LIB_TYPES_H__
#include "build_check.h"
-#include <portals/types.h>
-#include <portals/nal.h>
-#ifdef __KERNEL__
-# include <linux/uio.h>
-# include <linux/smp_lock.h>
-# include <linux/types.h>
+#if defined(__linux__)
+#include <portals/linux/lib-types.h>
+#elif defined(__APPLE__)
+#include <portals/darwin/lib-types.h>
#else
-# define PTL_USE_LIB_FREELIST
-# include <sys/types.h>
+#error Unsupported Operating System
#endif
+#include <libcfs/libcfs.h>
+#include <libcfs/list.h>
+#include <portals/types.h>
+#include <portals/nal.h>
+
typedef char *user_ptr;
typedef struct lib_msg_t lib_msg_t;
typedef struct lib_ptl_t lib_ptl_t;
#ifdef __KERNEL__
spinlock_t ni_lock;
- wait_queue_head_t ni_waitq;
+ cfs_waitq_t ni_waitq;
#else
pthread_mutex_t ni_mutex;
pthread_cond_t ni_cond;
--- /dev/null
+Makefile.in
+Makefile
--- /dev/null
+EXTRA_DIST := lib-p30.h lib-types.h p30.h
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
+#ifndef __PORTALS_LINUX_LIB_P30_H__
+#define __PORTALS_LINUX_LIB_P30_H__
+
+#ifndef __PORTALS_LIB_P30_H__
+#error Do not #include this file directly. #include <portals/lib-p30.h> instead
+#endif
+
+#ifdef __KERNEL__
+# include <asm/page.h>
+# include <linux/string.h>
+#else
+# include <libcfs/list.h>
+# include <string.h>
+# include <pthread.h>
+#endif
+
+#endif
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
+#ifndef __PORTALS_LINUX_LIB_P30_H__
+#define __PORTALS_LINUX_LIB_P30_H__
+
+#ifndef __PORTALS_LIB_P30_H__
+#error Do not #include this file directly. #include <portals/lib-p30.h> instead
+#endif
+
+#ifdef __KERNEL__
+# include <asm/page.h>
+# include <linux/string.h>
+#else
+# include <libcfs/list.h>
+# include <string.h>
+# include <pthread.h>
+#endif
+
+#endif
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
+#ifndef __PORTALS_LINUX_LIB_TYPES_H__
+#define __PORTALS_LINUX_LIB_TYPES_H__
+
+#ifndef __PORTALS_LIB_TYPES_H__
+#error Do not #include this file directly. #include <portals/lib-types.h> instead
+#endif
+
+#ifdef __KERNEL__
+# include <linux/uio.h>
+# include <linux/smp_lock.h>
+# include <linux/types.h>
+#else
+# define PTL_USE_LIB_FREELIST
+# include <sys/types.h>
+#endif
+
+#endif
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
+#ifndef __PORTALS_LINUX_P30_H__
+#define __PORTALS_LINUX_P30_H__
+
+#ifndef __PORTALS_P30_H__
+#error Do not #include this file directly. #include <portals/p30.h> instead
+#endif
+
+/*
+ * p30.h
+ *
+ * User application interface file
+ */
+
+#if defined (__KERNEL__)
+#include <linux/uio.h>
+#include <linux/types.h>
+#else
+#include <sys/types.h>
+#include <sys/uio.h>
+#endif
+
+#endif
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
+#ifndef __PORTALS_LINUX_P30_H__
+#define __PORTALS_LINUX_P30_H__
+
+#ifndef __PORTALS_P30_H__
+#error Do not #include this file directly. #include <portals/p30.h> instead
+#endif
+
+/*
+ * p30.h
+ *
+ * User application interface file
+ */
+
+#if defined (__KERNEL__)
+#include <linux/uio.h>
+#include <linux/types.h>
+#else
+#include <sys/types.h>
+#include <sys/uio.h>
+#endif
+
+#endif
+++ /dev/null
-#ifndef _LINUX_LIST_H
-/*
- * Simple doubly linked list implementation.
- *
- * Some of the internal functions ("__xxx") are useful when
- * manipulating whole lists rather than single entries, as
- * sometimes we already know the next/prev entries and we can
- * generate better code by using them directly rather than
- * using the generic single-entry routines.
- */
-
-struct list_head {
- struct list_head *next, *prev;
-};
-
-typedef struct list_head list_t;
-
-#define LIST_HEAD_INIT(name) { &(name), &(name) }
-
-#define LIST_HEAD(name) \
- struct list_head name = LIST_HEAD_INIT(name)
-
-#define INIT_LIST_HEAD(ptr) do { \
- (ptr)->next = (ptr); (ptr)->prev = (ptr); \
-} while (0)
-
-/*
- * Insert a new entry between two known consecutive entries.
- *
- * This is only for internal list manipulation where we know
- * the prev/next entries already!
- */
-static inline void __list_add(struct list_head * new,
- struct list_head * prev,
- struct list_head * next)
-{
- next->prev = new;
- new->next = next;
- new->prev = prev;
- prev->next = new;
-}
-
-/**
- * list_add - add a new entry
- * @new: new entry to be added
- * @head: list head to add it after
- *
- * Insert a new entry after the specified head.
- * This is good for implementing stacks.
- */
-static inline void list_add(struct list_head *new, struct list_head *head)
-{
- __list_add(new, head, head->next);
-}
-
-/**
- * list_add_tail - add a new entry
- * @new: new entry to be added
- * @head: list head to add it before
- *
- * Insert a new entry before the specified head.
- * This is useful for implementing queues.
- */
-static inline void list_add_tail(struct list_head *new, struct list_head *head)
-{
- __list_add(new, head->prev, head);
-}
-
-/*
- * Delete a list entry by making the prev/next entries
- * point to each other.
- *
- * This is only for internal list manipulation where we know
- * the prev/next entries already!
- */
-static inline void __list_del(struct list_head * prev, struct list_head * next)
-{
- next->prev = prev;
- prev->next = next;
-}
-
-/**
- * list_del - deletes entry from list.
- * @entry: the element to delete from the list.
- * Note: list_empty on entry does not return true after this, the entry is in an undefined state.
- */
-static inline void list_del(struct list_head *entry)
-{
- __list_del(entry->prev, entry->next);
-}
-
-/**
- * list_del_init - deletes entry from list and reinitialize it.
- * @entry: the element to delete from the list.
- */
-static inline void list_del_init(struct list_head *entry)
-{
- __list_del(entry->prev, entry->next);
- INIT_LIST_HEAD(entry);
-}
-#endif
-
-#ifndef list_for_each_entry
-/**
- * list_move - delete from one list and add as another's head
- * @list: the entry to move
- * @head: the head that will precede our entry
- */
-static inline void list_move(struct list_head *list, struct list_head *head)
-{
- __list_del(list->prev, list->next);
- list_add(list, head);
-}
-
-/**
- * list_move_tail - delete from one list and add as another's tail
- * @list: the entry to move
- * @head: the head that will follow our entry
- */
-static inline void list_move_tail(struct list_head *list,
- struct list_head *head)
-{
- __list_del(list->prev, list->next);
- list_add_tail(list, head);
-}
-#endif
-
-#ifndef _LINUX_LIST_H
-#define _LINUX_LIST_H
-/**
- * list_empty - tests whether a list is empty
- * @head: the list to test.
- */
-static inline int list_empty(struct list_head *head)
-{
- return head->next == head;
-}
-
-static inline void __list_splice(struct list_head *list,
- struct list_head *head)
-{
- struct list_head *first = list->next;
- struct list_head *last = list->prev;
- struct list_head *at = head->next;
-
- first->prev = head;
- head->next = first;
-
- last->next = at;
- at->prev = last;
-}
-
-/**
- * list_splice - join two lists
- * @list: the new list to add.
- * @head: the place to add it in the first list.
- */
-static inline void list_splice(struct list_head *list, struct list_head *head)
-{
- if (!list_empty(list))
- __list_splice(list, head);
-}
-
-/**
- * list_splice_init - join two lists and reinitialise the emptied list.
- * @list: the new list to add.
- * @head: the place to add it in the first list.
- *
- * The list at @list is reinitialised
- */
-static inline void list_splice_init(struct list_head *list,
- struct list_head *head)
-{
- if (!list_empty(list)) {
- __list_splice(list, head);
- INIT_LIST_HEAD(list);
- }
-}
-
-/**
- * list_entry - get the struct for this entry
- * @ptr: the &struct list_head pointer.
- * @type: the type of the struct this is embedded in.
- * @member: the name of the list_struct within the struct.
- */
-#define list_entry(ptr, type, member) \
- ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member)))
-
-/**
- * list_for_each - iterate over a list
- * @pos: the &struct list_head to use as a loop counter.
- * @head: the head for your list.
- */
-#define list_for_each(pos, head) \
- for (pos = (head)->next ; pos != (head); pos = pos->next )
-
-/**
- * list_for_each_prev - iterate over a list in reverse order
- * @pos: the &struct list_head to use as a loop counter.
- * @head: the head for your list.
- */
-#define list_for_each_prev(pos, head) \
- for (pos = (head)->prev ; pos != (head); pos = pos->prev)
-
-/**
- * list_for_each_safe - iterate over a list safe against removal of list entry
- * @pos: the &struct list_head to use as a loop counter.
- * @n: another &struct list_head to use as temporary storage
- * @head: the head for your list.
- */
-#define list_for_each_safe(pos, n, head) \
- for (pos = (head)->next, n = pos->next; pos != (head); \
- pos = n, n = pos->next)
-
-#endif
-
-#ifndef list_for_each_entry
-/**
- * list_for_each_entry - iterate over list of given type
- * @pos: the type * to use as a loop counter.
- * @head: the head for your list.
- * @member: the name of the list_struct within the struct.
- */
-#define list_for_each_entry(pos, head, member) \
- for (pos = list_entry((head)->next, typeof(*pos), member); \
- &pos->member != (head); \
- pos = list_entry(pos->member.next, typeof(*pos), member))
-#endif
-
-#ifndef list_for_each_entry_safe
-/**
- * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry
- * @pos: the type * to use as a loop counter.
- * @n: another type * to use as temporary storage
- * @head: the head for your list.
- * @member: the name of the list_struct within the struct.
- */
-#define list_for_each_entry_safe(pos, n, head, member) \
- for (pos = list_entry((head)->next, typeof(*pos), member), \
- n = list_entry(pos->member.next, typeof(*pos), member); \
- &pos->member != (head); \
- pos = n, n = list_entry(n->member.next, typeof(*n), member))
-#endif
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*/
-#ifndef _P30_H_
-#define _P30_H_
+#ifndef __PORTALS_P30_H__
+#define __PORTALS_P30_H__
#include "build_check.h"
*
* User application interface file
*/
-
-#if defined (__KERNEL__)
-#include <linux/uio.h>
-#include <linux/types.h>
+#if defined(__linux__)
+#include <portals/linux/p30.h>
+#elif defined(__APPLE__)
+#include <portals/darwin/p30.h>
#else
-#include <sys/types.h>
-#include <sys/uio.h>
+#error Unsupported Operating System
#endif
#include <portals/types.h>
#define _PTLCTL_H_
#include <portals/types.h>
-#include <linux/kp30.h>
-#include <linux/libcfs.h>
+#include <libcfs/kp30.h>
+#include <libcfs/libcfs.h>
#define PORTALS_DEV_ID 0
#define PORTALS_DEV_PATH "/dev/portals"
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*/
-#ifndef _P30_H_
-#define _P30_H_
+#ifndef __PORTALS_P30_H__
+#define __PORTALS_P30_H__
#include "build_check.h"
*
* User application interface file
*/
-
-#if defined (__KERNEL__)
-#include <linux/uio.h>
-#include <linux/types.h>
+#if defined(__linux__)
+#include <portals/linux/p30.h>
+#elif defined(__APPLE__)
+#include <portals/darwin/p30.h>
#else
-#include <sys/types.h>
-#include <sys/uio.h>
+#error Unsupported Operating System
#endif
#include <portals/types.h>
#define _PTLCTL_H_
#include <portals/types.h>
-#include <linux/kp30.h>
-#include <linux/libcfs.h>
+#include <libcfs/kp30.h>
+#include <libcfs/libcfs.h>
#define PORTALS_DEV_ID 0
#define PORTALS_DEV_PATH "/dev/portals"
#include "build_check.h"
-#include <linux/libcfs.h>
+#include <libcfs/libcfs.h>
#include <portals/errno.h>
/* This implementation uses the same type for API function return codes and
typedef struct iovec ptl_md_iovec_t;
typedef struct {
- struct page *kiov_page;
+ cfs_page_t *kiov_page;
unsigned int kiov_len;
unsigned int kiov_offset;
} ptl_kiov_t;
#include "portals/nal.h"
#include "portals/api.h"
#include "portals/errno.h"
-#include "linux/kp30.h"
+#include "libcfs/kp30.h"
#include "portals/p30.h"
#include "portals/nal.h"
#define DEBUG_SUBSYSTEM S_NAL
-#include <linux/kp30.h>
+#include <libcfs/kp30.h>
#include <portals/p30.h>
#include <portals/lib-p30.h>
#include <portals/nal.h>
/* XXX I have no idea. */
#define IBNAL_STARTING_PSN 1
-typedef struct
+typedef struct
{
int kib_io_timeout; /* comms timeout (seconds) */
struct ctl_table_header *kib_sysctl; /* sysctl interface */
__u32 md_rkey;
__u64 md_addr;
} kib_md_t __attribute__((packed));
-
-typedef struct
+
+typedef struct
{
int kib_init; /* initialisation state */
__u64 kib_incarnation; /* which one am I */
struct list_head kib_sched_txq; /* tx requiring attention */
struct list_head kib_sched_rxq; /* rx requiring attention */
spinlock_t kib_sched_lock; /* serialise */
-
+
struct kib_tx *kib_tx_descs; /* all the tx descriptors */
kib_pages_t *kib_tx_pages; /* premapped tx msg pages */
wait_queue_head_t kib_idle_tx_waitq; /* block here for tx descriptor */
__u64 kib_next_tx_cookie; /* RDMA completion cookie */
spinlock_t kib_tx_lock; /* serialise */
-
+
IB_HANDLE kib_hca; /* The HCA */
int kib_port; /* port on the device */
IB_HANDLE kib_pd; /* protection domain */
#define IBNAL_INIT_PD 7
#define IBNAL_INIT_FMR 8
#define IBNAL_INIT_MR 9
-#define IBNAL_INIT_TXD 10
-#define IBNAL_INIT_CQ 11
-#define IBNAL_INIT_ALL 12
+#define IBNAL_INIT_TXD 10
+#define IBNAL_INIT_CQ 11
+#define IBNAL_INIT_ALL 12
/************************************************************************
* Wire message structs.
/* these arrays serve two purposes during rdma. they are built on the passive
* side and sent to the active side as remote arguments. On the active side
- * the descs are used as a data structure on the way to local gather items.
+ * the descs are used as a data structure on the way to local gather items.
* the different roles result in split local/remote meaning of desc->rd_key */
typedef struct
{
} kib_connreq_t;
typedef struct kib_conn
-{
+{
struct kib_peer *ibc_peer; /* owning peer */
struct list_head ibc_list; /* stash on peer's conn list */
__u64 ibc_incarnation; /* which instance of the peer */
}
static inline FSTATUS
-iibt_open_hca(EUI64 hca_guid,
+iibt_open_hca(EUI64 hca_guid,
IB_COMPLETION_CALLBACK completion_callback,
IB_ASYNC_EVENT_CALLBACK async_event_callback,
void *arg,
}
static inline FSTATUS
-iibt_register_physical_memory(IB_HANDLE hca_handle,
+iibt_register_physical_memory(IB_HANDLE hca_handle,
IB_VIRT_ADDR requested_io_va,
void *phys_buffers, uint64 nphys_buffers,
uint32 io_va_offset, IB_HANDLE pd_handle,
IB_ACCESS_CONTROL access,
- IB_HANDLE *mem_handle,
+ IB_HANDLE *mem_handle,
IB_VIRT_ADDR *actual_io_va,
IB_L_KEY *lkey, IB_R_KEY *rkey)
{
return IIBT_IF.Vpi.RegisterPhysMemRegion(hca_handle, requested_io_va,
phys_buffers, nphys_buffers,
- io_va_offset, pd_handle,
+ io_va_offset, pd_handle,
access,
mem_handle, actual_io_va,
lkey, rkey);
}
static inline FSTATUS
-iibt_register_contig_physical_memory(IB_HANDLE hca_handle,
+iibt_register_contig_physical_memory(IB_HANDLE hca_handle,
IB_VIRT_ADDR requested_io_va,
- IB_MR_PHYS_BUFFER *phys_buffers,
+ IB_MR_PHYS_BUFFER *phys_buffers,
uint64 nphys_buffers,
uint32 io_va_offset, IB_HANDLE pd_handle,
IB_ACCESS_CONTROL access,
- IB_HANDLE *mem_handle,
+ IB_HANDLE *mem_handle,
IB_VIRT_ADDR *actual_io_va,
IB_L_KEY *lkey, IB_R_KEY *rkey)
{
- return IIBT_IF.Vpi.RegisterContigPhysMemRegion(hca_handle,
+ return IIBT_IF.Vpi.RegisterContigPhysMemRegion(hca_handle,
requested_io_va,
- phys_buffers,
+ phys_buffers,
nphys_buffers,
- io_va_offset, pd_handle,
+ io_va_offset, pd_handle,
access,
mem_handle, actual_io_va,
lkey, rkey);
}
static inline FSTATUS
-iibt_register_memory(IB_HANDLE hca_handle,
+iibt_register_memory(IB_HANDLE hca_handle,
void *virt_addr, unsigned int length,
IB_HANDLE pd_handle,
IB_ACCESS_CONTROL access,
- IB_HANDLE *mem_handle,
+ IB_HANDLE *mem_handle,
IB_L_KEY *lkey, IB_R_KEY *rkey)
{
- return IIBT_IF.Vpi.RegisterMemRegion(hca_handle,
+ return IIBT_IF.Vpi.RegisterMemRegion(hca_handle,
virt_addr, length,
- pd_handle,
+ pd_handle,
access,
mem_handle,
lkey, rkey);
static inline FSTATUS
iibt_qp_create(IB_HANDLE hca_handle, IB_QP_ATTRIBUTES_CREATE *create_attr,
- void *arg, IB_HANDLE *cq_handle,
+ void *arg, IB_HANDLE *cq_handle,
IB_QP_ATTRIBUTES_QUERY *query_attr)
{
- return IIBT_IF.Vpi.CreateQP(hca_handle, create_attr, arg, cq_handle,
+ return IIBT_IF.Vpi.CreateQP(hca_handle, create_attr, arg, cq_handle,
query_attr);
}
}
static inline FSTATUS
-iibt_cm_accept(IB_HANDLE cep,
+iibt_cm_accept(IB_HANDLE cep,
CM_CONN_INFO *send_info, CM_CONN_INFO *recv_info,
PFN_CM_CALLBACK callback, void *arg,
IB_HANDLE *new_cep)
/******************************************************************************/
static inline struct list_head *
-kibnal_nid2peerlist (ptl_nid_t nid)
+kibnal_nid2peerlist (ptl_nid_t nid)
{
unsigned int hash = ((unsigned int)nid) % kibnal_data.kib_peer_hash_size;
-
+
return (&kibnal_data.kib_peers [hash]);
}
{
struct ib_qp_attribute qp_attr;
int rc;
-
+
memset (&qp_attr, 0, sizeof(qp_attr));
rc = ib_qp_query(conn->ibc_qp, &qp_attr);
if (rc != 0) {
CERROR ("Can't get qp attrs: %d\n", rc);
return;
}
-
+
CWARN ("RDMA CAPABILITY: write %s read %s\n",
(qp_attr.valid_fields & TS_IB_QP_ATTRIBUTE_RDMA_ATOMIC_ENABLE) ?
(qp_attr.enable_rdma_write ? "enabled" : "disabled") : "invalid",
kibnal_page2phys (struct page *p)
{
__u64 page_number = p - mem_map;
-
+
return (page_number << PAGE_SHIFT);
}
#else
extern int kibnal_del_peer (ptl_nid_t nid, int single_share);
extern kib_peer_t *kibnal_find_peer_locked (ptl_nid_t nid);
extern void kibnal_unlink_peer_locked (kib_peer_t *peer);
-extern int kibnal_close_stale_conns_locked (kib_peer_t *peer,
+extern int kibnal_close_stale_conns_locked (kib_peer_t *peer,
__u64 incarnation);
extern kib_conn_t *kibnal_create_conn (void);
extern void kibnal_put_conn (kib_conn_t *conn);
extern int kibnal_connd (void *arg);
extern void kibnal_init_tx_msg (kib_tx_t *tx, int type, int body_nob);
extern void kibnal_close_conn (kib_conn_t *conn, int why);
-extern void kibnal_start_active_rdma (int type, int status,
- kib_rx_t *rx, lib_msg_t *libmsg,
- unsigned int niov,
+extern void kibnal_start_active_rdma (int type, int status,
+ kib_rx_t *rx, lib_msg_t *libmsg,
+ unsigned int niov,
struct iovec *iov, ptl_kiov_t *kiov,
size_t offset, size_t nob);
if MODULES
if !CRAY_PORTALS
+if LINUX
modulenet_DATA = klonal$(KMODEXT)
endif
endif
+endif
MOSTLYCLEANFILES = *.o *.ko *.mod.c
DIST_SOURCES = $(klonal-objs:%.o=%.c) lonal.h
#define DEBUG_SUBSYSTEM S_NAL
-#include <linux/kp30.h>
+#include <libcfs/kp30.h>
#include <portals/p30.h>
#include <portals/lib-p30.h>
#include <portals/nal.h>
#define DEBUG_SUBSYSTEM S_NAL
-#include <linux/kp30.h>
+#include <libcfs/kp30.h>
#include <portals/p30.h>
#include <portals/lib-p30.h>
#include <portals/nal.h>
//#define IBNAL_CALLBACK_CTXT IB_CQ_CALLBACK_PROCESS
#define IBNAL_CALLBACK_CTXT IB_CQ_CALLBACK_INTERRUPT
-typedef struct
+typedef struct
{
int kib_io_timeout; /* comms timeout (seconds) */
int kib_listener_timeout; /* listener's timeout */
struct ib_mr *ibp_handle; /* mapped region handle */
struct page *ibp_pages[0];
} kib_pages_t;
-
-typedef struct
+
+typedef struct
{
int kib_init; /* initialisation state */
__u64 kib_incarnation; /* which one am I */
struct list_head kib_sched_txq; /* tx requiring attention */
struct list_head kib_sched_rxq; /* rx requiring attention */
spinlock_t kib_sched_lock; /* serialise */
-
+
struct kib_tx *kib_tx_descs; /* all the tx descriptors */
kib_pages_t *kib_tx_pages; /* premapped tx msg pages */
wait_queue_head_t kib_idle_tx_waitq; /* block here for tx descriptor */
__u64 kib_next_tx_cookie; /* RDMA completion cookie */
spinlock_t kib_tx_lock; /* serialise */
-
+
struct ib_device *kib_device; /* "the" device */
struct ib_device_properties kib_device_props; /* its properties */
int kib_port; /* port on the device */
struct ib_fmr_pool *kib_fmr_pool; /* fast memory region pool */
#endif
struct ib_cq *kib_cq; /* completion queue */
-
+
} kib_data_t;
#define IBNAL_INIT_NOTHING 0
} kib_connreq_t;
typedef struct kib_conn
-{
+{
struct kib_peer *ibc_peer; /* owning peer */
struct list_head ibc_list; /* stash on peer's conn list */
__u64 ibc_incarnation; /* which instance of the peer */
extern kib_tunables_t kibnal_tunables;
static inline struct list_head *
-kibnal_nid2peerlist (ptl_nid_t nid)
+kibnal_nid2peerlist (ptl_nid_t nid)
{
unsigned int hash = ((unsigned int)nid) % kibnal_data.kib_peer_hash_size;
-
+
return (&kibnal_data.kib_peers [hash]);
}
{
struct ib_qp_attribute qp_attr;
int rc;
-
+
memset (&qp_attr, 0, sizeof(qp_attr));
rc = ib_qp_query(conn->ibc_qp, &qp_attr);
if (rc != 0) {
CERROR ("Can't get qp attrs: %d\n", rc);
return;
}
-
+
CWARN ("RDMA CAPABILITY: write %s read %s\n",
(qp_attr.valid_fields & TS_IB_QP_ATTRIBUTE_RDMA_ATOMIC_ENABLE) ?
(qp_attr.enable_rdma_write ? "enabled" : "disabled") : "invalid",
extern int kibnal_del_peer (ptl_nid_t nid, int single_share);
extern kib_peer_t *kibnal_find_peer_locked (ptl_nid_t nid);
extern void kibnal_unlink_peer_locked (kib_peer_t *peer);
-extern int kibnal_close_stale_conns_locked (kib_peer_t *peer,
+extern int kibnal_close_stale_conns_locked (kib_peer_t *peer,
__u64 incarnation);
extern kib_conn_t *kibnal_create_conn (void);
extern void kibnal_put_conn (kib_conn_t *conn);
extern tTS_IB_CM_CALLBACK_RETURN
kibnal_conn_callback (tTS_IB_CM_EVENT event, tTS_IB_CM_COMM_ID cid,
void *param, void *arg);
-extern tTS_IB_CM_CALLBACK_RETURN
+extern tTS_IB_CM_CALLBACK_RETURN
kibnal_passive_conn_callback (tTS_IB_CM_EVENT event, tTS_IB_CM_COMM_ID cid,
void *param, void *arg);
extern void kibnal_callback (struct ib_cq *cq, struct ib_cq_entry *e, void *arg);
extern void kibnal_init_tx_msg (kib_tx_t *tx, int type, int body_nob);
extern int kibnal_close_conn (kib_conn_t *conn, int why);
-extern void kibnal_start_active_rdma (int type, int status,
- kib_rx_t *rx, lib_msg_t *libmsg,
- unsigned int niov,
+extern void kibnal_start_active_rdma (int type, int status,
+ kib_rx_t *rx, lib_msg_t *libmsg,
+ unsigned int niov,
struct iovec *iov, ptl_kiov_t *kiov,
int offset, int nob);
* along with Lustre; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
- * Basic library routines.
+ * Basic library routines.
*
*/
#define DEBUG_SUBSYSTEM S_NAL
-#include <linux/kp30.h>
-#include <linux/kpr.h>
+#include <libcfs/kp30.h>
+#include <portals/kpr.h>
#include <portals/p30.h>
#include <portals/lib-p30.h>
#include <portals/nal.h>
int kqn_optimized_gets; /* optimized GETs? */
#if CONFIG_SYSCTL
struct ctl_table_header *kqn_sysctl; /* sysctl interface */
-#endif
+#endif
} kqswnal_tunables_t;
typedef struct
wait_queue_head_t kqn_idletxd_waitq; /* sender blocks here waiting for idle txd */
struct list_head kqn_idletxd_fwdq; /* forwarded packets block here waiting for idle txd */
atomic_t kqn_pending_txs; /* # transmits being prepped */
-
+
spinlock_t kqn_sched_lock; /* serialise packet schedulers */
wait_queue_head_t kqn_sched_waitq; /* scheduler blocks here */
extern void kqswnal_rx_done (kqswnal_rx_t *krx);
static inline ptl_nid_t
-kqswnal_elanid2nid (int elanid)
+kqswnal_elanid2nid (int elanid)
{
return (kqswnal_data.kqn_nid_offset + elanid);
}
static inline int
-kqswnal_nid2elanid (ptl_nid_t nid)
+kqswnal_nid2elanid (ptl_nid_t nid)
{
/* not in this cluster? */
if (nid < kqswnal_data.kqn_nid_offset ||
nid >= kqswnal_data.kqn_nid_offset + kqswnal_data.kqn_nnodes)
return (-1);
-
+
return (nid - kqswnal_data.kqn_nid_offset);
}
static inline ptl_nid_t
-kqswnal_rx_nid(kqswnal_rx_t *krx)
+kqswnal_rx_nid(kqswnal_rx_t *krx)
{
return (kqswnal_elanid2nid(ep_rxd_node(krx->krx_rxd)));
}
static inline kqsw_csum_t kqsw_csum (kqsw_csum_t sum, void *base, int nob)
{
unsigned char *ptr = (unsigned char *)base;
-
+
while (nob-- > 0)
sum += *ptr++;
-
+
return (sum);
}
#endif
#define EP_ENOMEM ENOMEM
static inline EP_XMTR *
-ep_alloc_xmtr(EP_DEV *e)
+ep_alloc_xmtr(EP_DEV *e)
{
return (ep_alloc_large_xmtr(e));
}
}
static inline void
-ep_free_xmtr(EP_XMTR *x)
+ep_free_xmtr(EP_XMTR *x)
{
ep_free_large_xmtr(x);
}
#define DEBUG_SUBSYSTEM S_NAL
-#include <linux/kp30.h>
-#include <linux/portals_compat25.h>
+#include <libcfs/kp30.h>
#include <portals/p30.h>
#include <portals/lib-p30.h>
#include <portals/nal.h>
--- /dev/null
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+ <key>CFBundleDevelopmentRegion</key>
+ <string>English</string>
+ <key>CFBundleExecutable</key>
+ <string>ksocknal</string>
+ <key>CFBundleIconFile</key>
+ <string></string>
+ <key>CFBundleIdentifier</key>
+ <string>com.clusterfs.lustre.ksocknal</string>
+ <key>CFBundleInfoDictionaryVersion</key>
+ <string>6.0</string>
+ <key>CFBundlePackageType</key>
+ <string>KEXT</string>
+ <key>CFBundleSignature</key>
+ <string>????</string>
+ <key>CFBundleVersion</key>
+ <string>1.0.1</string>
+ <key>OSBundleCompatibleVersion</key>
+ <string>1.0.0</string>
+ <key>OSBundleLibraries</key>
+ <dict>
+ <key>com.apple.kernel.bsd</key>
+ <string>1.1</string>
+ <key>com.apple.kernel.iokit</key>
+ <string>1.0.0b1</string>
+ <key>com.apple.kernel.mach</key>
+ <string>1.0.0b1</string>
+ <key>com.clusterfs.lustre.libcfs</key>
+ <string>1.0.0</string>
+ <key>com.clusterfs.lustre.portals</key>
+ <string>1.0.0</string>
+ </dict>
+</dict>
+</plist>
MODULES := ksocknal
-ksocknal-objs := socknal.o socknal_cb.o
+
+ksocknal-objs := socknal.o socknal_cb.o socknal_lib-linux.o
# If you don't build with -O2, your modules won't insert, becahse htonl is
# just special that way.
if MODULES
if !CRAY_PORTALS
+
+if LINUX
modulenet_DATA = ksocknal$(KMODEXT)
endif
+
+if DARWIN
+macos_PROGRAMS := ksocknal
+
+ksocknal_SOURCES := socknal.c socknal_cb.c socknal_lib-darwin.c
+
+ksocknal_CFLAGS := $(EXTRA_KCFLAGS)
+ksocknal_LDFLAGS := $(EXTRA_KLDFLAGS)
+ksocknal_LDADD := $(EXTRA_KLIBS)
+
+plist_DATA := Info.plist
+
+install-data-hook: fix-kext-ownership
+
+endif
+
endif
+endif
+
+EXTRA_DIST := Info.plist
-MOSTLYCLEANFILES = *.o *.ko *.mod.c
-DIST_SOURCES = $(ksocknal-objs:%.o=%.c) socknal.h
+MOSTLYCLEANFILES = *.o *.ko *.mod.c socknal_lib.c
+DIST_SOURCES = $(ksocknal-objs:%.o=%.c) socknal_lib-darwin.c \
+ socknal_lib-darwin.h socknal_lib-linux.h socknal.h
--- /dev/null
+// !$*UTF8*$!
+{
+ archiveVersion = 1;
+ classes = {
+ };
+ objectVersion = 39;
+ objects = {
+ 06AA1262FFB20DD611CA28AA = {
+ buildRules = (
+ );
+ buildSettings = {
+ COPY_PHASE_STRIP = NO;
+ GCC_DYNAMIC_NO_PIC = NO;
+ GCC_ENABLE_FIX_AND_CONTINUE = YES;
+ GCC_GENERATE_DEBUGGING_SYMBOLS = YES;
+ GCC_OPTIMIZATION_LEVEL = 0;
+ OPTIMIZATION_CFLAGS = "-O0";
+ ZERO_LINK = YES;
+ };
+ isa = PBXBuildStyle;
+ name = Development;
+ };
+ 06AA1263FFB20DD611CA28AA = {
+ buildRules = (
+ );
+ buildSettings = {
+ COPY_PHASE_STRIP = YES;
+ GCC_ENABLE_FIX_AND_CONTINUE = NO;
+ ZERO_LINK = NO;
+ };
+ isa = PBXBuildStyle;
+ name = Deployment;
+ };
+//060
+//061
+//062
+//063
+//064
+//080
+//081
+//082
+//083
+//084
+ 089C1669FE841209C02AAC07 = {
+ buildSettings = {
+ };
+ buildStyles = (
+ 06AA1262FFB20DD611CA28AA,
+ 06AA1263FFB20DD611CA28AA,
+ );
+ hasScannedForEncodings = 1;
+ isa = PBXProject;
+ mainGroup = 089C166AFE841209C02AAC07;
+ projectDirPath = "";
+ targets = (
+ 32A4FEB80562C75700D090E7,
+ );
+ };
+ 089C166AFE841209C02AAC07 = {
+ children = (
+ 247142CAFF3F8F9811CA285C,
+ 089C167CFE841241C02AAC07,
+ 19C28FB6FE9D52B211CA2CBB,
+ );
+ isa = PBXGroup;
+ name = ksocknal;
+ refType = 4;
+ sourceTree = "<group>";
+ };
+ 089C167CFE841241C02AAC07 = {
+ children = (
+ 32A4FEC30562C75700D090E7,
+ );
+ isa = PBXGroup;
+ name = Resources;
+ refType = 4;
+ sourceTree = "<group>";
+ };
+//080
+//081
+//082
+//083
+//084
+//190
+//191
+//192
+//193
+//194
+ 1957C5680737C71F00425049 = {
+ fileEncoding = 30;
+ isa = PBXFileReference;
+ lastKnownFileType = sourcecode.c.c;
+ path = socknal.c;
+ refType = 2;
+ sourceTree = SOURCE_ROOT;
+ };
+ 1957C5690737C71F00425049 = {
+ fileRef = 1957C5680737C71F00425049;
+ isa = PBXBuildFile;
+ settings = {
+ };
+ };
+ 1957C56A0737C72F00425049 = {
+ fileEncoding = 30;
+ isa = PBXFileReference;
+ lastKnownFileType = sourcecode.c.c;
+ path = socknal_cb.c;
+ refType = 2;
+ sourceTree = SOURCE_ROOT;
+ };
+ 1957C56B0737C72F00425049 = {
+ fileRef = 1957C56A0737C72F00425049;
+ isa = PBXBuildFile;
+ settings = {
+ };
+ };
+ 1957C5B20737C78E00425049 = {
+ fileEncoding = 30;
+ isa = PBXFileReference;
+ lastKnownFileType = sourcecode.c.c;
+ name = socknal_lib.c;
+ path = arch/xnu/socknal_lib.c;
+ refType = 2;
+ sourceTree = SOURCE_ROOT;
+ };
+ 1957C5B30737C78E00425049 = {
+ fileRef = 1957C5B20737C78E00425049;
+ isa = PBXBuildFile;
+ settings = {
+ };
+ };
+ 19C28FB6FE9D52B211CA2CBB = {
+ children = (
+ 32A4FEC40562C75800D090E7,
+ );
+ isa = PBXGroup;
+ name = Products;
+ refType = 4;
+ sourceTree = "<group>";
+ };
+//190
+//191
+//192
+//193
+//194
+//240
+//241
+//242
+//243
+//244
+ 247142CAFF3F8F9811CA285C = {
+ children = (
+ 1957C5B20737C78E00425049,
+ 1957C56A0737C72F00425049,
+ 1957C5680737C71F00425049,
+ );
+ isa = PBXGroup;
+ name = Source;
+ path = "";
+ refType = 4;
+ sourceTree = "<group>";
+ };
+//240
+//241
+//242
+//243
+//244
+//320
+//321
+//322
+//323
+//324
+ 32A4FEB80562C75700D090E7 = {
+ buildPhases = (
+ 32A4FEB90562C75700D090E7,
+ 32A4FEBA0562C75700D090E7,
+ 32A4FEBB0562C75700D090E7,
+ 32A4FEBD0562C75700D090E7,
+ 32A4FEBF0562C75700D090E7,
+ 32A4FEC00562C75700D090E7,
+ 32A4FEC10562C75700D090E7,
+ );
+ buildRules = (
+ );
+ buildSettings = {
+ FRAMEWORK_SEARCH_PATHS = "";
+ GCC_WARN_FOUR_CHARACTER_CONSTANTS = NO;
+ GCC_WARN_UNKNOWN_PRAGMAS = NO;
+ HEADER_SEARCH_PATHS = "../../include ./arch/xnu";
+ INFOPLIST_FILE = Info.plist;
+ INSTALL_PATH = "$(SYSTEM_LIBRARY_DIR)/Extensions";
+ LIBRARY_SEARCH_PATHS = "";
+ MODULE_NAME = com.clusterfs.lustre.portals.knals.ksocknal;
+ MODULE_START = ksocknal_start;
+ MODULE_STOP = ksocknal_stop;
+ MODULE_VERSION = 1.0.1;
+ OTHER_CFLAGS = "-D__KERNEL__";
+ OTHER_LDFLAGS = "";
+ OTHER_REZFLAGS = "";
+ PRODUCT_NAME = ksocknal;
+ SECTORDER_FLAGS = "";
+ WARNING_CFLAGS = "-Wmost";
+ WRAPPER_EXTENSION = kext;
+ };
+ dependencies = (
+ );
+ isa = PBXNativeTarget;
+ name = ksocknal;
+ productInstallPath = "$(SYSTEM_LIBRARY_DIR)/Extensions";
+ productName = ksocknal;
+ productReference = 32A4FEC40562C75800D090E7;
+ productType = "com.apple.product-type.kernel-extension";
+ };
+ 32A4FEB90562C75700D090E7 = {
+ buildActionMask = 2147483647;
+ files = (
+ );
+ isa = PBXShellScriptBuildPhase;
+ runOnlyForDeploymentPostprocessing = 0;
+ shellPath = /bin/sh;
+ shellScript = "script=\"${SYSTEM_DEVELOPER_DIR}/ProjectBuilder Extras/Kernel Extension Support/KEXTPreprocess\";\nif [ -x \"$script\" ]; then\n . \"$script\"\nfi";
+ };
+ 32A4FEBA0562C75700D090E7 = {
+ buildActionMask = 2147483647;
+ files = (
+ );
+ isa = PBXHeadersBuildPhase;
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+ 32A4FEBB0562C75700D090E7 = {
+ buildActionMask = 2147483647;
+ files = (
+ );
+ isa = PBXResourcesBuildPhase;
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+ 32A4FEBD0562C75700D090E7 = {
+ buildActionMask = 2147483647;
+ files = (
+ 1957C5690737C71F00425049,
+ 1957C56B0737C72F00425049,
+ 1957C5B30737C78E00425049,
+ );
+ isa = PBXSourcesBuildPhase;
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+ 32A4FEBF0562C75700D090E7 = {
+ buildActionMask = 2147483647;
+ files = (
+ );
+ isa = PBXFrameworksBuildPhase;
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+ 32A4FEC00562C75700D090E7 = {
+ buildActionMask = 2147483647;
+ files = (
+ );
+ isa = PBXRezBuildPhase;
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+ 32A4FEC10562C75700D090E7 = {
+ buildActionMask = 2147483647;
+ files = (
+ );
+ isa = PBXShellScriptBuildPhase;
+ runOnlyForDeploymentPostprocessing = 0;
+ shellPath = /bin/sh;
+ shellScript = "script=\"${SYSTEM_DEVELOPER_DIR}/ProjectBuilder Extras/Kernel Extension Support/KEXTPostprocess\";\nif [ -x \"$script\" ]; then\n . \"$script\"\nfi";
+ };
+ 32A4FEC30562C75700D090E7 = {
+ isa = PBXFileReference;
+ lastKnownFileType = text.plist.xml;
+ path = Info.plist;
+ refType = 4;
+ sourceTree = "<group>";
+ };
+ 32A4FEC40562C75800D090E7 = {
+ explicitFileType = wrapper.cfbundle;
+ includeInIndex = 0;
+ isa = PBXFileReference;
+ path = ksocknal.kext;
+ refType = 3;
+ sourceTree = BUILT_PRODUCTS_DIR;
+ };
+ };
+ rootObject = 089C1669FE841209C02AAC07;
+}
kprni_notify: ksocknal_notify,
};
-#ifdef CONFIG_SYSCTL
-#define SOCKNAL_SYSCTL 200
-
-#define SOCKNAL_SYSCTL_TIMEOUT 1
-#define SOCKNAL_SYSCTL_EAGER_ACK 2
-#define SOCKNAL_SYSCTL_ZERO_COPY 3
-#define SOCKNAL_SYSCTL_TYPED 4
-#define SOCKNAL_SYSCTL_MIN_BULK 5
-#define SOCKNAL_SYSCTL_BUFFER_SIZE 6
-#define SOCKNAL_SYSCTL_NAGLE 7
-#define SOCKNAL_SYSCTL_IRQ_AFFINITY 8
-#define SOCKNAL_SYSCTL_KEEPALIVE_IDLE 9
-#define SOCKNAL_SYSCTL_KEEPALIVE_COUNT 10
-#define SOCKNAL_SYSCTL_KEEPALIVE_INTVL 11
-
-static ctl_table ksocknal_ctl_table[] = {
- {SOCKNAL_SYSCTL_TIMEOUT, "timeout",
- &ksocknal_tunables.ksnd_io_timeout, sizeof (int),
- 0644, NULL, &proc_dointvec},
- {SOCKNAL_SYSCTL_EAGER_ACK, "eager_ack",
- &ksocknal_tunables.ksnd_eager_ack, sizeof (int),
- 0644, NULL, &proc_dointvec},
-#if SOCKNAL_ZC
- {SOCKNAL_SYSCTL_ZERO_COPY, "zero_copy",
- &ksocknal_tunables.ksnd_zc_min_frag, sizeof (int),
- 0644, NULL, &proc_dointvec},
-#endif
- {SOCKNAL_SYSCTL_TYPED, "typed",
- &ksocknal_tunables.ksnd_typed_conns, sizeof (int),
- 0644, NULL, &proc_dointvec},
- {SOCKNAL_SYSCTL_MIN_BULK, "min_bulk",
- &ksocknal_tunables.ksnd_min_bulk, sizeof (int),
- 0644, NULL, &proc_dointvec},
- {SOCKNAL_SYSCTL_BUFFER_SIZE, "buffer_size",
- &ksocknal_tunables.ksnd_buffer_size, sizeof(int),
- 0644, NULL, &proc_dointvec},
- {SOCKNAL_SYSCTL_NAGLE, "nagle",
- &ksocknal_tunables.ksnd_nagle, sizeof(int),
- 0644, NULL, &proc_dointvec},
-#if CPU_AFFINITY
- {SOCKNAL_SYSCTL_IRQ_AFFINITY, "irq_affinity",
- &ksocknal_tunables.ksnd_irq_affinity, sizeof(int),
- 0644, NULL, &proc_dointvec},
-#endif
- {SOCKNAL_SYSCTL_KEEPALIVE_IDLE, "keepalive_idle",
- &ksocknal_tunables.ksnd_keepalive_idle, sizeof(int),
- 0644, NULL, &proc_dointvec},
- {SOCKNAL_SYSCTL_KEEPALIVE_COUNT, "keepalive_count",
- &ksocknal_tunables.ksnd_keepalive_count, sizeof(int),
- 0644, NULL, &proc_dointvec},
- {SOCKNAL_SYSCTL_KEEPALIVE_INTVL, "keepalive_intvl",
- &ksocknal_tunables.ksnd_keepalive_intvl, sizeof(int),
- 0644, NULL, &proc_dointvec},
- { 0 }
-};
-
-static ctl_table ksocknal_top_ctl_table[] = {
- {SOCKNAL_SYSCTL, "socknal", NULL, 0, 0555, ksocknal_ctl_table},
- { 0 }
-};
-#endif
-
int
ksocknal_set_mynid(ptl_nid_t nid)
{
return (0);
}
-void
-ksocknal_bind_irq (unsigned int irq)
-{
-#if (defined(CONFIG_SMP) && CPU_AFFINITY)
- int bind;
- int cpu;
- unsigned long flags;
- char cmdline[64];
- ksock_irqinfo_t *info;
- char *argv[] = {"/bin/sh",
- "-c",
- cmdline,
- NULL};
- char *envp[] = {"HOME=/",
- "PATH=/sbin:/bin:/usr/sbin:/usr/bin",
- NULL};
-
- LASSERT (irq < NR_IRQS);
- if (irq == 0) /* software NIC or affinity disabled */
- return;
-
- info = &ksocknal_data.ksnd_irqinfo[irq];
-
- write_lock_irqsave (&ksocknal_data.ksnd_global_lock, flags);
-
- LASSERT (info->ksni_valid);
- bind = !info->ksni_bound;
- info->ksni_bound = 1;
-
- write_unlock_irqrestore (&ksocknal_data.ksnd_global_lock, flags);
-
- if (!bind) /* bound already */
- return;
-
- cpu = ksocknal_irqsched2cpu(info->ksni_sched);
- snprintf (cmdline, sizeof (cmdline),
- "echo %d > /proc/irq/%u/smp_affinity", 1 << cpu, irq);
-
- printk (KERN_INFO "Lustre: Binding irq %u to CPU %d with cmd: %s\n",
- irq, cpu, cmdline);
-
- /* FIXME: Find a better method of setting IRQ affinity...
- */
-
- USERMODEHELPER(argv[0], argv, envp);
-#endif
-}
-
ksock_interface_t *
ksocknal_ip2iface(__u32 ip)
{
for (i = 0; i < ksocknal_data.ksnd_ninterfaces; i++) {
LASSERT(i < SOCKNAL_MAX_INTERFACES);
iface = &ksocknal_data.ksnd_interfaces[i];
-
+
if (iface->ksni_ipaddr == ip)
return (iface);
}
-
+
return (NULL);
}
atomic_set (&route->ksnr_refcount, 1);
route->ksnr_peer = NULL;
- route->ksnr_timeout = jiffies;
+ route->ksnr_timeout = cfs_time_current();
route->ksnr_retry_interval = SOCKNAL_MIN_RECONNECT_INTERVAL;
route->ksnr_ipaddr = ipaddr;
route->ksnr_port = port;
peer->ksnp_nid = nid;
atomic_set (&peer->ksnp_refcount, 1); /* 1 ref for caller */
peer->ksnp_closing = 0;
- INIT_LIST_HEAD (&peer->ksnp_conns);
- INIT_LIST_HEAD (&peer->ksnp_routes);
- INIT_LIST_HEAD (&peer->ksnp_tx_queue);
+ CFS_INIT_LIST_HEAD (&peer->ksnp_conns);
+ CFS_INIT_LIST_HEAD (&peer->ksnp_routes);
+ CFS_INIT_LIST_HEAD (&peer->ksnp_tx_queue);
atomic_inc (&ksocknal_data.ksnd_npeers);
return (peer);
int
ksocknal_get_peer_info (int index, ptl_nid_t *nid,
- __u32 *myip, __u32 *peer_ip, int *port,
+ __u32 *myip, __u32 *peer_ip, int *port,
int *conn_count, int *share_count)
{
ksock_peer_t *peer;
read_lock (&ksocknal_data.ksnd_global_lock);
for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
-
+
list_for_each (ptmp, &ksocknal_data.ksnd_peers[i]) {
peer = list_entry (ptmp, ksock_peer_t, ksnp_list);
list_empty(&peer->ksnp_routes)) {
if (index-- > 0)
continue;
-
+
*nid = peer->ksnp_nid;
*myip = 0;
*peer_ip = 0;
for (j = 0; j < peer->ksnp_n_passive_ips; j++) {
if (index-- > 0)
continue;
-
+
*nid = peer->ksnp_nid;
*myip = peer->ksnp_passive_ips[j];
*peer_ip = 0;
rc = 0;
goto out;
}
-
+
list_for_each (rtmp, &peer->ksnp_routes) {
if (index-- > 0)
continue;
if (route->ksnr_myipaddr == 0) {
/* route wasn't bound locally yet (the initial route) */
CWARN("Binding "LPX64" %u.%u.%u.%u to %u.%u.%u.%u\n",
- peer->ksnp_nid,
+ peer->ksnp_nid,
HIPQUAD(route->ksnr_ipaddr),
HIPQUAD(conn->ksnc_myipaddr));
} else {
CWARN("Rebinding "LPX64" %u.%u.%u.%u from "
"%u.%u.%u.%u to %u.%u.%u.%u\n",
- peer->ksnp_nid,
+ peer->ksnp_nid,
HIPQUAD(route->ksnr_ipaddr),
HIPQUAD(route->ksnr_myipaddr),
HIPQUAD(conn->ksnc_myipaddr));
-
+
iface = ksocknal_ip2iface(route->ksnr_myipaddr);
- if (iface != NULL)
+ if (iface != NULL)
iface->ksni_nroutes--;
}
route->ksnr_myipaddr = conn->ksnc_myipaddr;
iface = ksocknal_ip2iface(route->ksnr_myipaddr);
- if (iface != NULL)
+ if (iface != NULL)
iface->ksni_nroutes++;
}
/* Successful connection => further attempts can
* proceed immediately */
- route->ksnr_timeout = jiffies;
+ route->ksnr_timeout = cfs_time_current();
route->ksnr_retry_interval = SOCKNAL_MIN_RECONNECT_INTERVAL;
}
atomic_inc (&peer->ksnp_refcount);
/* peer's routelist takes over my ref on 'route' */
list_add_tail(&route->ksnr_list, &peer->ksnp_routes);
-
+
list_for_each(tmp, &peer->ksnp_conns) {
conn = list_entry(tmp, ksock_conn_t, ksnc_list);
type = conn->ksnc_type;
if (conn->ksnc_route != route)
continue;
-
+
ksocknal_close_conn_locked (conn, 0);
}
ksock_peer_t *peer2;
ksock_route_t *route;
ksock_route_t *route2;
-
+
if (nid == PTL_NID_ANY)
return (-EINVAL);
route2 = NULL;
list_for_each (tmp, &peer->ksnp_routes) {
route2 = list_entry(tmp, ksock_route_t, ksnr_list);
-
+
if (route2->ksnr_ipaddr == ipaddr)
break;
-
+
route2 = NULL;
}
if (route2 == NULL) {
/* This deletes associated conns too */
ksocknal_del_route_locked (route);
}
-
+
if (single_share)
break;
}
route = list_entry(tmp, ksock_route_t, ksnr_list);
nshared += route->ksnr_share_count;
}
-
+
if (nshared == 0) {
/* remove everything else if there are no explicit entries
* left */
ksocknal_close_conn_locked(conn, 0);
}
}
-
+
/* NB peer unlinks itself when last conn/route is removed */
}
return (NULL);
}
-int
-ksocknal_get_conn_addrs (ksock_conn_t *conn)
-{
- struct sockaddr_in sin;
- int len = sizeof (sin);
- int rc;
-
- rc = conn->ksnc_sock->ops->getname (conn->ksnc_sock,
- (struct sockaddr *)&sin, &len, 2);
- /* Didn't need the {get,put}connsock dance to deref ksnc_sock... */
- LASSERT (!conn->ksnc_closing);
-
- if (rc != 0) {
- CERROR ("Error %d getting sock peer IP\n", rc);
- return rc;
- }
-
- conn->ksnc_ipaddr = ntohl (sin.sin_addr.s_addr);
- conn->ksnc_port = ntohs (sin.sin_port);
-
- rc = conn->ksnc_sock->ops->getname (conn->ksnc_sock,
- (struct sockaddr *)&sin, &len, 0);
- if (rc != 0) {
- CERROR ("Error %d getting sock local IP\n", rc);
- return rc;
- }
-
- conn->ksnc_myipaddr = ntohl (sin.sin_addr.s_addr);
-
- return 0;
-}
-
-unsigned int
-ksocknal_sock_irq (struct socket *sock)
-{
- int irq = 0;
- struct dst_entry *dst;
-
- if (!ksocknal_tunables.ksnd_irq_affinity)
- return 0;
-
- dst = sk_dst_get (sock->sk);
- if (dst != NULL) {
- if (dst->dev != NULL) {
- irq = dst->dev->irq;
- if (irq >= NR_IRQS) {
- CERROR ("Unexpected IRQ %x\n", irq);
- irq = 0;
- }
- }
- dst_release (dst);
- }
-
- return (irq);
-}
-
ksock_sched_t *
ksocknal_choose_scheduler_locked (unsigned int irq)
{
ipaddrs[i] = ksocknal_data.ksnd_interfaces[i].ksni_ipaddr;
LASSERT (ipaddrs[i] != 0);
}
-
+
read_unlock (&ksocknal_data.ksnd_global_lock);
return (nip);
}
int this_xor;
int this_netmatch;
int i;
-
+
for (i = 0; i < nips; i++) {
if (ips[i] == 0)
continue;
this_xor = (ips[i] ^ iface->ksni_ipaddr);
this_netmatch = ((this_xor & iface->ksni_netmask) == 0) ? 1 : 0;
-
+
if (!(best < 0 ||
best_netmatch < this_netmatch ||
- (best_netmatch == this_netmatch &&
+ (best_netmatch == this_netmatch &&
best_xor > this_xor)))
continue;
-
+
best = i;
best_netmatch = this_netmatch;
best_xor = this_xor;
}
-
+
LASSERT (best >= 0);
return (best);
}
/* Also note that I'm not going to return more than n_peerips
* interfaces, even if I have more myself */
-
+
write_lock_irqsave(global_lock, flags);
LASSERT (n_peerips <= SOCKNAL_MAX_INTERFACES);
/* If we have any new interfaces, first tick off all the
* peer IPs that match old interfaces, then choose new
- * interfaces to match the remaining peer IPS.
+ * interfaces to match the remaining peer IPS.
* We don't forget interfaces we've stopped using; we might
* start using them again... */
-
+
if (i < peer->ksnp_n_passive_ips) {
/* Old interface. */
ip = peer->ksnp_passive_ips[i];
best_iface = NULL;
best_netmatch = 0;
best_npeers = 0;
-
+
for (j = 0; j < ksocknal_data.ksnd_ninterfaces; j++) {
iface = &ksocknal_data.ksnd_interfaces[j];
ip = iface->ksni_ipaddr;
for (k = 0; k < peer->ksnp_n_passive_ips; k++)
if (peer->ksnp_passive_ips[k] == ip)
break;
-
+
if (k < peer->ksnp_n_passive_ips) /* using it already */
continue;
peer->ksnp_passive_ips[i] = ip;
peer->ksnp_n_passive_ips = i+1;
}
-
+
LASSERT (best_iface != NULL);
/* mark the best matching peer IP used */
j = ksocknal_match_peerip(best_iface, peerips, n_peerips);
peerips[j] = 0;
}
-
+
/* Overwrite input peer IP addresses */
memcpy(peerips, peer->ksnp_passive_ips, n_ips * sizeof(*peerips));
-
+
write_unlock_irqrestore(global_lock, flags);
-
+
return (n_ips);
}
void
-ksocknal_create_routes(ksock_peer_t *peer, int port,
+ksocknal_create_routes(ksock_peer_t *peer, int port,
__u32 *peer_ipaddrs, int npeer_ipaddrs)
{
ksock_route_t *newroute = NULL;
write_lock_irqsave(global_lock, flags);
LASSERT (npeer_ipaddrs <= SOCKNAL_MAX_INTERFACES);
-
+
for (i = 0; i < npeer_ipaddrs; i++) {
if (newroute != NULL) {
newroute->ksnr_ipaddr = peer_ipaddrs[i];
write_lock_irqsave(global_lock, flags);
}
-
+
/* Already got a route? */
route = NULL;
list_for_each(rtmp, &peer->ksnp_routes) {
if (route->ksnr_ipaddr == newroute->ksnr_ipaddr)
break;
-
+
route = NULL;
}
if (route != NULL)
if (route != NULL)
continue;
- this_netmatch = (((iface->ksni_ipaddr ^
- newroute->ksnr_ipaddr) &
+ this_netmatch = (((iface->ksni_ipaddr ^
+ newroute->ksnr_ipaddr) &
iface->ksni_netmask) == 0) ? 1 : 0;
-
+
if (!(best_iface == NULL ||
best_netmatch < this_netmatch ||
(best_netmatch == this_netmatch &&
best_nroutes > iface->ksni_nroutes)))
continue;
-
+
best_iface = iface;
best_netmatch = this_netmatch;
best_nroutes = iface->ksni_nroutes;
}
-
+
if (best_iface == NULL)
continue;
ksocknal_add_route_locked(peer, newroute);
newroute = NULL;
}
-
+
write_unlock_irqrestore(global_lock, flags);
if (newroute != NULL)
ksocknal_put_route(newroute);
* have been created in userland and (b) we need to refcount the
* socket so that we don't close it while I/O is being done on
* it, and sock->file has that pre-cooked... */
- LASSERT (sock->file != NULL);
- LASSERT (file_count(sock->file) > 0);
+ LASSERT (KSN_SOCK2FILE(sock) != NULL);
+ LASSERT (cfs_file_count(KSN_SOCK2FILE(sock)) > 0);
LASSERT (route == NULL || !passive);
- rc = ksocknal_setup_sock (sock);
+ rc = ksocknal_lib_setup_sock (sock);
if (rc != 0)
return (rc);
- irq = ksocknal_sock_irq (sock);
+ irq = ksocknal_lib_sock_irq (sock);
PORTAL_ALLOC(conn, sizeof(*conn));
if (conn == NULL)
conn->ksnc_route = NULL;
conn->ksnc_sock = sock;
conn->ksnc_type = type;
- conn->ksnc_saved_data_ready = sock->sk->sk_data_ready;
- conn->ksnc_saved_write_space = sock->sk->sk_write_space;
+ ksocknal_lib_save_callback(sock, conn);
atomic_set (&conn->ksnc_refcount, 1); /* 1 ref for me */
conn->ksnc_rx_ready = 0;
conn->ksnc_rx_scheduled = 0;
ksocknal_new_packet (conn, 0);
- INIT_LIST_HEAD (&conn->ksnc_tx_queue);
+ CFS_INIT_LIST_HEAD (&conn->ksnc_tx_queue);
conn->ksnc_tx_ready = 0;
conn->ksnc_tx_scheduled = 0;
atomic_set (&conn->ksnc_tx_nob, 0);
/* stash conn's local and remote addrs */
- rc = ksocknal_get_conn_addrs (conn);
+ rc = ksocknal_lib_get_conn_addrs (conn);
if (rc != 0)
goto failed_0;
write_unlock_irqrestore(global_lock, flags);
}
-
+
if (!passive) {
- ksocknal_create_routes(peer, conn->ksnc_port,
+ ksocknal_create_routes(peer, conn->ksnc_port,
ipaddrs, nipaddrs);
rc = 0;
} else {
}
if (rc < 0)
goto failed_1;
-
+
write_lock_irqsave (global_lock, flags);
if (peer->ksnp_closing ||
if (route->ksnr_ipaddr != conn->ksnc_ipaddr)
continue;
-
+
ksocknal_associate_route_conn_locked(route, conn);
break;
}
/* Give conn a ref on sock->file since we're going to return success */
- get_file(sock->file);
+ cfs_get_file(KSN_SOCK2FILE(sock));
conn->ksnc_peer = peer; /* conn takes my ref on peer */
conn->ksnc_incarnation = incarnation;
- peer->ksnp_last_alive = jiffies;
+ peer->ksnp_last_alive = cfs_time_current();
peer->ksnp_error = 0;
sched = ksocknal_choose_scheduler_locked (irq);
conn->ksnc_scheduler = sched;
/* Set the deadline for the outgoing HELLO to drain */
- conn->ksnc_tx_bufnob = sock->sk->sk_wmem_queued;
- conn->ksnc_tx_deadline = jiffies +
- ksocknal_tunables.ksnd_io_timeout * HZ;
+ conn->ksnc_tx_bufnob = SOCK_WMEM_QUEUED(sock);
+ conn->ksnc_tx_deadline = cfs_time_shift(ksocknal_tunables.ksnd_io_timeout);
mb(); /* order with adding to peer's conn list */
list_add (&conn->ksnc_list, &peer->ksnp_conns);
atomic_inc (&conn->ksnc_refcount);
/* NB my callbacks block while I hold ksnd_global_lock */
- sock->sk->sk_user_data = conn;
- sock->sk->sk_data_ready = ksocknal_data_ready;
- sock->sk->sk_write_space = ksocknal_write_space;
+ ksocknal_lib_set_callback(sock, conn);
/* Take all the packets blocking for a connection.
* NB, it might be nicer to share these blocked packets among any
write_unlock_irqrestore (global_lock, flags);
- ksocknal_bind_irq (irq);
+ ksocknal_lib_bind_irq (irq);
/* Call the callbacks right now to get things going. */
if (ksocknal_getconnsock(conn) == 0) {
- ksocknal_data_ready (sock->sk, 0);
- ksocknal_write_space (sock->sk);
+ ksocknal_lib_act_callback(sock, conn);
ksocknal_putconnsock(conn);
}
CWARN("New conn nid:"LPX64" %u.%u.%u.%u -> %u.%u.%u.%u/%d"
" incarnation:"LPX64" sched[%d]/%d\n",
- nid, HIPQUAD(conn->ksnc_myipaddr),
+ nid, HIPQUAD(conn->ksnc_myipaddr),
HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port, incarnation,
(int)(conn->ksnc_scheduler - ksocknal_data.ksnd_schedulers), irq);
LASSERT (!conn->ksnc_closing);
conn->ksnc_closing = 1;
atomic_inc (&ksocknal_data.ksnd_nclosing_conns);
-
+
/* ksnd_deathrow_conns takes over peer's ref */
list_del (&conn->ksnc_list);
conn2 = NULL;
list_for_each(tmp, &peer->ksnp_conns) {
conn2 = list_entry(tmp, ksock_conn_t, ksnc_list);
-
+
if (conn2->ksnc_route == route &&
conn2->ksnc_type == conn->ksnc_type)
break;
-
+
conn2 = NULL;
}
if (conn2 == NULL)
spin_lock (&ksocknal_data.ksnd_reaper_lock);
list_add_tail (&conn->ksnc_list, &ksocknal_data.ksnd_deathrow_conns);
- wake_up (&ksocknal_data.ksnd_reaper_waitq);
-
+ cfs_waitq_signal (&ksocknal_data.ksnd_reaper_waitq);
+
spin_unlock (&ksocknal_data.ksnd_reaper_lock);
}
/* extra ref for scheduler */
atomic_inc (&conn->ksnc_refcount);
- wake_up (&sched->kss_waitq);
+ cfs_waitq_signal (&sched->kss_waitq);
}
spin_unlock_irqrestore (&sched->kss_lock, flags);
/* serialise with callbacks */
write_lock_irqsave (&ksocknal_data.ksnd_global_lock, flags);
- /* Remove conn's network callbacks.
- * NB I _have_ to restore the callback, rather than storing a noop,
- * since the socket could survive past this module being unloaded!! */
- conn->ksnc_sock->sk->sk_data_ready = conn->ksnc_saved_data_ready;
- conn->ksnc_sock->sk->sk_write_space = conn->ksnc_saved_write_space;
-
- /* A callback could be in progress already; they hold a read lock
- * on ksnd_global_lock (to serialise with me) and NOOP if
- * sk_user_data is NULL. */
- conn->ksnc_sock->sk->sk_user_data = NULL;
+ ksocknal_lib_reset_callback(conn->ksnc_sock, conn);
/* OK, so this conn may not be completely disengaged from its
* scheduler yet, but it _has_ committed to terminate... */
if (peer->ksnp_error != 0) {
/* peer's last conn closed in error */
LASSERT (list_empty (&peer->ksnp_conns));
-
+
/* convert peer's last-known-alive timestamp from jiffies */
do_gettimeofday (&now);
- then = now.tv_sec - (jiffies - peer->ksnp_last_alive)/HZ;
+ then = now.tv_sec - cfs_duration_sec(cfs_time_sub(cfs_time_current(),
+ peer->ksnp_last_alive));
notify = 1;
}
-
+
write_unlock_irqrestore (&ksocknal_data.ksnd_global_lock, flags);
/* The socket is closed on the final put; either here, or in
spin_lock_irqsave (&ksocknal_data.ksnd_reaper_lock, flags);
list_add (&conn->ksnc_list, &ksocknal_data.ksnd_zombie_conns);
- wake_up (&ksocknal_data.ksnd_reaper_waitq);
+ cfs_waitq_signal (&ksocknal_data.ksnd_reaper_waitq);
spin_unlock_irqrestore (&ksocknal_data.ksnd_reaper_lock, flags);
}
"incarnation:"LPX64"("LPX64")\n",
peer->ksnp_nid, conn->ksnc_ipaddr, conn->ksnc_port,
conn->ksnc_incarnation, incarnation);
-
+
count++;
ksocknal_close_conn_locked (conn, -ESTALE);
}
}
int
-ksocknal_close_conn_and_siblings (ksock_conn_t *conn, int why)
+ksocknal_close_conn_and_siblings (ksock_conn_t *conn, int why)
{
ksock_peer_t *peer = conn->ksnc_peer;
__u32 ipaddr = conn->ksnc_ipaddr;
write_lock_irqsave (&ksocknal_data.ksnd_global_lock, flags);
count = ksocknal_close_peer_conns_locked (peer, ipaddr, why);
-
+
write_unlock_irqrestore (&ksocknal_data.ksnd_global_lock, flags);
return (count);
/* wildcards always succeed */
if (nid == PTL_NID_ANY || ipaddr == 0)
return (0);
-
+
return (count == 0 ? -ENOENT : 0);
}
ksocknal_close_matching_conns (gw_nid, 0);
return;
}
-
+
/* ...otherwise do nothing. We can only establish new connections
* if we have autroutes, and these connect on demand. */
}
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-struct tcp_opt *sock2tcp_opt(struct sock *sk)
-{
- return &(sk->tp_pinfo.af_tcp);
-}
-#else
-struct tcp_opt *sock2tcp_opt(struct sock *sk)
-{
- struct tcp_sock *s = (struct tcp_sock *)sk;
- return &s->tcp;
-}
-#endif
-
-void
-ksocknal_push_conn (ksock_conn_t *conn)
-{
- struct sock *sk;
- struct tcp_opt *tp;
- int nonagle;
- int val = 1;
- int rc;
- mm_segment_t oldmm;
-
- rc = ksocknal_getconnsock (conn);
- if (rc != 0) /* being shut down */
- return;
-
- sk = conn->ksnc_sock->sk;
- tp = sock2tcp_opt(sk);
-
- lock_sock (sk);
- nonagle = tp->nonagle;
- tp->nonagle = 1;
- release_sock (sk);
-
- oldmm = get_fs ();
- set_fs (KERNEL_DS);
-
- rc = sk->sk_prot->setsockopt (sk, SOL_TCP, TCP_NODELAY,
- (char *)&val, sizeof (val));
- LASSERT (rc == 0);
-
- set_fs (oldmm);
-
- lock_sock (sk);
- tp->nonagle = nonagle;
- release_sock (sk);
-
- ksocknal_putconnsock (conn);
-}
-
void
ksocknal_push_peer (ksock_peer_t *peer)
{
if (conn == NULL)
break;
- ksocknal_push_conn (conn);
+ ksocknal_lib_push_conn (conn);
ksocknal_put_conn (conn);
}
}
for (j = 0; i < peer->ksnp_n_passive_ips; j++)
if (peer->ksnp_passive_ips[j] == ipaddress)
iface->ksni_npeers++;
-
+
list_for_each(rtmp, &peer->ksnp_routes) {
route = list_entry(rtmp, ksock_route_t, ksnr_list);
-
+
if (route->ksnr_myipaddr == ipaddress)
iface->ksni_nroutes++;
}
rc = 0;
/* NB only new connections will pay attention to the new interface! */
}
-
+
write_unlock_irqrestore(&ksocknal_data.ksnd_global_lock, flags);
return (rc);
list_for_each_safe(tmp, nxt, &peer->ksnp_routes) {
route = list_entry (tmp, ksock_route_t, ksnr_list);
-
+
if (route->ksnr_myipaddr != ipaddr)
continue;
-
+
if (route->ksnr_share_count != 0) {
/* Manually created; keep, but unbind */
route->ksnr_myipaddr = 0;
ksocknal_del_route_locked(route);
}
}
-
+
list_for_each_safe(tmp, nxt, &peer->ksnp_conns) {
conn = list_entry(tmp, ksock_conn_t, ksnc_list);
-
+
if (conn->ksnc_myipaddr == ipaddr)
ksocknal_close_conn_locked (conn, 0);
}
for (j = i+1; j < ksocknal_data.ksnd_ninterfaces; j++)
ksocknal_data.ksnd_interfaces[j-1] =
ksocknal_data.ksnd_interfaces[j];
-
+
ksocknal_data.ksnd_ninterfaces--;
for (j = 0; j < ksocknal_data.ksnd_peer_hash_size; j++) {
list_for_each_safe(tmp, nxt, &ksocknal_data.ksnd_peers[j]) {
peer = list_entry(tmp, ksock_peer_t, ksnp_list);
-
+
ksocknal_peer_del_interface_locked(peer, this_ip);
}
}
}
-
+
write_unlock_irqrestore(&ksocknal_data.ksnd_global_lock, flags);
-
+
return (rc);
}
pcfg->pcfg_fd = iface->ksni_npeers;
pcfg->pcfg_count = iface->ksni_nroutes;
}
-
+
read_unlock (&ksocknal_data.ksnd_global_lock);
break;
}
int port = 0;
int conn_count = 0;
int share_count = 0;
-
+
rc = ksocknal_get_peer_info(pcfg->pcfg_count, &nid,
&myip, &ip, &port,
&conn_count, &share_count);
break;
}
case NAL_CMD_ADD_PEER: {
- rc = ksocknal_add_peer (pcfg->pcfg_nid,
+ rc = ksocknal_add_peer (pcfg->pcfg_nid,
pcfg->pcfg_id, /* IP */
pcfg->pcfg_misc); /* port */
break;
}
case NAL_CMD_DEL_PEER: {
- rc = ksocknal_del_peer (pcfg->pcfg_nid,
+ rc = ksocknal_del_peer (pcfg->pcfg_nid,
pcfg->pcfg_id, /* IP */
pcfg->pcfg_flags); /* single_share? */
break;
pcfg->pcfg_misc = conn->ksnc_port;
pcfg->pcfg_fd = conn->ksnc_myipaddr;
pcfg->pcfg_flags = conn->ksnc_type;
- pcfg->pcfg_gw_nal = conn->ksnc_scheduler -
+ pcfg->pcfg_gw_nal = conn->ksnc_scheduler -
ksocknal_data.ksnd_schedulers;
pcfg->pcfg_count = txmem;
pcfg->pcfg_size = rxmem;
rc = -EINVAL;
break;
}
- fput (sock->file);
+ cfs_put_file (KSN_SOCK2FILE(sock));
break;
}
case NAL_CMD_CLOSE_CONNECTION: {
- rc = ksocknal_close_matching_conns (pcfg->pcfg_nid,
+ rc = ksocknal_close_matching_conns (pcfg->pcfg_nid,
pcfg->pcfg_id);
break;
}
LASSERT (list_empty(&p->fmp_blocked_conns));
LASSERT (p->fmp_nactive_fmbs == 0);
-
+
while (!list_empty(&p->fmp_idle_fmbs)) {
fmb = list_entry(p->fmp_idle_fmbs.next,
ksock_fmb_t, fmb_list);
-
+
for (i = 0; i < npages; i++)
if (fmb->fmb_kiov[i].kiov_page != NULL)
- __free_page(fmb->fmb_kiov[i].kiov_page);
+ cfs_free_page(fmb->fmb_kiov[i].kiov_page);
list_del(&fmb->fmb_list);
PORTAL_FREE(fmb, offsetof(ksock_fmb_t, fmb_kiov[npages]));
sizeof (ksock_sched_t) * ksocknal_data.ksnd_nschedulers);
PORTAL_FREE (ksocknal_data.ksnd_peers,
- sizeof (struct list_head) *
+ sizeof (struct list_head) *
ksocknal_data.ksnd_peer_hash_size);
}
"waiting for %d peers to disconnect\n",
atomic_read (&ksocknal_data.ksnd_npeers));
set_current_state (TASK_UNINTERRUPTIBLE);
- schedule_timeout (HZ);
+ schedule_timeout (cfs_time_seconds(1));
}
/* Tell lib we've stopped calling into her. */
/* flag threads to terminate; wake and wait for them to die */
ksocknal_data.ksnd_shuttingdown = 1;
- wake_up_all (&ksocknal_data.ksnd_autoconnectd_waitq);
- wake_up_all (&ksocknal_data.ksnd_reaper_waitq);
+ cfs_waitq_broadcast (&ksocknal_data.ksnd_autoconnectd_waitq);
+ cfs_waitq_broadcast (&ksocknal_data.ksnd_reaper_waitq);
for (i = 0; i < ksocknal_data.ksnd_nschedulers; i++) {
sched = &ksocknal_data.ksnd_schedulers[i];
- wake_up_all(&sched->kss_waitq);
+ cfs_waitq_broadcast(&sched->kss_waitq);
}
i = 4;
ksocknal_data.ksnd_nthreads);
read_unlock(&ksocknal_data.ksnd_global_lock);
set_current_state (TASK_UNINTERRUPTIBLE);
- schedule_timeout (HZ);
+ schedule_timeout (cfs_time_seconds(1));
read_lock(&ksocknal_data.ksnd_global_lock);
}
read_unlock(&ksocknal_data.ksnd_global_lock);
* identifies this particular instance of the socknal. Hopefully
* we won't be able to reboot more frequently than 1MHz for the
* forseeable future :) */
-
+
do_gettimeofday(&tv);
-
- ksocknal_data.ksnd_incarnation =
+
+ ksocknal_data.ksnd_incarnation =
(((__u64)tv.tv_sec) * 1000000) + tv.tv_usec;
}
memset (&ksocknal_data, 0, sizeof (ksocknal_data)); /* zero pointers */
ksocknal_init_incarnation();
-
+
ksocknal_data.ksnd_peer_hash_size = SOCKNAL_PEER_HASH_SIZE;
PORTAL_ALLOC (ksocknal_data.ksnd_peers,
sizeof (struct list_head) * ksocknal_data.ksnd_peer_hash_size);
return (-ENOMEM);
for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++)
- INIT_LIST_HEAD(&ksocknal_data.ksnd_peers[i]);
+ CFS_INIT_LIST_HEAD(&ksocknal_data.ksnd_peers[i]);
rwlock_init(&ksocknal_data.ksnd_global_lock);
spin_lock_init(&ksocknal_data.ksnd_small_fmp.fmp_lock);
- INIT_LIST_HEAD(&ksocknal_data.ksnd_small_fmp.fmp_idle_fmbs);
- INIT_LIST_HEAD(&ksocknal_data.ksnd_small_fmp.fmp_blocked_conns);
+ CFS_INIT_LIST_HEAD(&ksocknal_data.ksnd_small_fmp.fmp_idle_fmbs);
+ CFS_INIT_LIST_HEAD(&ksocknal_data.ksnd_small_fmp.fmp_blocked_conns);
ksocknal_data.ksnd_small_fmp.fmp_buff_pages = SOCKNAL_SMALL_FWD_PAGES;
spin_lock_init(&ksocknal_data.ksnd_large_fmp.fmp_lock);
- INIT_LIST_HEAD(&ksocknal_data.ksnd_large_fmp.fmp_idle_fmbs);
- INIT_LIST_HEAD(&ksocknal_data.ksnd_large_fmp.fmp_blocked_conns);
+ CFS_INIT_LIST_HEAD(&ksocknal_data.ksnd_large_fmp.fmp_idle_fmbs);
+ CFS_INIT_LIST_HEAD(&ksocknal_data.ksnd_large_fmp.fmp_blocked_conns);
ksocknal_data.ksnd_large_fmp.fmp_buff_pages = SOCKNAL_LARGE_FWD_PAGES;
spin_lock_init (&ksocknal_data.ksnd_reaper_lock);
- INIT_LIST_HEAD (&ksocknal_data.ksnd_enomem_conns);
- INIT_LIST_HEAD (&ksocknal_data.ksnd_zombie_conns);
- INIT_LIST_HEAD (&ksocknal_data.ksnd_deathrow_conns);
- init_waitqueue_head(&ksocknal_data.ksnd_reaper_waitq);
+ CFS_INIT_LIST_HEAD (&ksocknal_data.ksnd_enomem_conns);
+ CFS_INIT_LIST_HEAD (&ksocknal_data.ksnd_zombie_conns);
+ CFS_INIT_LIST_HEAD (&ksocknal_data.ksnd_deathrow_conns);
+ cfs_waitq_init(&ksocknal_data.ksnd_reaper_waitq);
spin_lock_init (&ksocknal_data.ksnd_autoconnectd_lock);
- INIT_LIST_HEAD (&ksocknal_data.ksnd_autoconnectd_routes);
- init_waitqueue_head(&ksocknal_data.ksnd_autoconnectd_waitq);
+ CFS_INIT_LIST_HEAD (&ksocknal_data.ksnd_autoconnectd_routes);
+ cfs_waitq_init(&ksocknal_data.ksnd_autoconnectd_waitq);
/* NB memset above zeros whole of ksocknal_data, including
* ksocknal_data.ksnd_irqinfo[all].ksni_valid */
ksock_sched_t *kss = &ksocknal_data.ksnd_schedulers[i];
spin_lock_init (&kss->kss_lock);
- INIT_LIST_HEAD (&kss->kss_rx_conns);
- INIT_LIST_HEAD (&kss->kss_tx_conns);
+ CFS_INIT_LIST_HEAD (&kss->kss_rx_conns);
+ CFS_INIT_LIST_HEAD (&kss->kss_tx_conns);
#if SOCKNAL_ZC
- INIT_LIST_HEAD (&kss->kss_zctxdone_list);
+ CFS_INIT_LIST_HEAD (&kss->kss_zctxdone_list);
#endif
- init_waitqueue_head (&kss->kss_waitq);
+ cfs_waitq_init (&kss->kss_waitq);
}
/* NB we have to wait to be told our true NID... */
- process_id.pid = requested_pid;
+ process_id.pid = requested_pid;
process_id.nid = 0;
-
+
rc = lib_init(&ksocknal_lib, nal, process_id,
requested_limits, actual_limits);
if (rc != PTL_OK) {
SOCKNAL_LARGE_FWD_NMSGS); i++) {
ksock_fmb_t *fmb;
ksock_fmb_pool_t *pool;
-
+
if (i < SOCKNAL_SMALL_FWD_NMSGS)
pool = &ksocknal_data.ksnd_small_fmp;
else
pool = &ksocknal_data.ksnd_large_fmp;
-
- PORTAL_ALLOC(fmb, offsetof(ksock_fmb_t,
+
+ PORTAL_ALLOC(fmb, offsetof(ksock_fmb_t,
fmb_kiov[pool->fmp_buff_pages]));
if (fmb == NULL) {
ksocknal_api_shutdown(nal);
}
fmb->fmb_pool = pool;
-
+
for (j = 0; j < pool->fmp_buff_pages; j++) {
- fmb->fmb_kiov[j].kiov_page = alloc_page(GFP_KERNEL);
+ fmb->fmb_kiov[j].kiov_page = cfs_alloc_page(CFS_ALLOC_STD);
if (fmb->fmb_kiov[j].kiov_page == NULL) {
ksocknal_api_shutdown (nal);
return (-ENOMEM);
}
- LASSERT(page_address(fmb->fmb_kiov[j].kiov_page) != NULL);
+ LASSERT(cfs_page_address(fmb->fmb_kiov[j].kiov_page) != NULL);
}
list_add(&fmb->fmb_list, &pool->fmp_idle_fmbs);
ptl_unregister_nal(SOCKNAL);
}
+extern cfs_sysctl_table_t ksocknal_top_ctl_table[];
+
int __init
ksocknal_module_init (void)
{
#endif
/* check ksnr_connected/connecting field large enough */
LASSERT(SOCKNAL_CONN_NTYPES <= 4);
-
+
ksocknal_api.nal_ni_init = ksocknal_api_startup;
ksocknal_api.nal_ni_fini = ksocknal_api_shutdown;
ptl_unregister_nal(SOCKNAL);
return (-ENODEV);
}
-
+
#ifdef CONFIG_SYSCTL
/* Press on regardless even if registering sysctl doesn't work */
- ksocknal_tunables.ksnd_sysctl =
+ ksocknal_tunables.ksnd_sysctl =
register_sysctl_table (ksocknal_top_ctl_table, 0);
#endif
return (0);
}
MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
-MODULE_DESCRIPTION("Kernel TCP Socket NAL v0.01");
+MODULE_DESCRIPTION("Kernel TCP Socket NAL v1.0.0");
MODULE_LICENSE("GPL");
-module_init(ksocknal_module_init);
-module_exit(ksocknal_module_fini);
-
+cfs_module(ksocknal, "1.0.0", ksocknal_module_init, ksocknal_module_fini);
# define EXPORT_SYMTAB
#endif
-#include <linux/config.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/errno.h>
-#include <linux/smp_lock.h>
-#include <linux/unistd.h>
-#include <net/sock.h>
-#include <net/tcp.h>
-#include <linux/uio.h>
-
-#include <asm/system.h>
-#include <asm/uaccess.h>
-#include <asm/irq.h>
-
-#include <linux/init.h>
-#include <linux/fs.h>
-#include <linux/file.h>
-#include <linux/stat.h>
-#include <linux/list.h>
-#include <linux/kmod.h>
-#include <linux/sysctl.h>
-#include <asm/uaccess.h>
-#include <asm/segment.h>
-#include <asm/div64.h>
-
#define DEBUG_SUBSYSTEM S_NAL
-#include <linux/kp30.h>
-#include <linux/portals_compat25.h>
-#include <linux/kpr.h>
+#if defined(__linux__)
+#include "socknal_lib-linux.h"
+#elif defined(__APPLE__)
+#include "socknal_lib-darwin.h"
+#else
+#error Unsupported Operating System
+#endif
+
+#include <libcfs/kp30.h>
+#include <portals/kpr.h>
#include <portals/p30.h>
#include <portals/lib-p30.h>
#include <portals/nal.h>
#define SOCKNAL_N_AUTOCONNECTD 4 /* # socknal autoconnect daemons */
-#define SOCKNAL_MIN_RECONNECT_INTERVAL HZ /* first failed connection retry... */
-#define SOCKNAL_MAX_RECONNECT_INTERVAL (60*HZ) /* ...exponentially increasing to this */
+#define SOCKNAL_MIN_RECONNECT_INTERVAL cfs_time_seconds(1) /* first failed connection retry... */
+#define SOCKNAL_MAX_RECONNECT_INTERVAL cfs_time_seconds(60) /* ...exponentially increasing to this */
/* default vals for runtime tunables */
#define SOCKNAL_IO_TIMEOUT 50 /* default comms timeout (seconds) */
-#define SOCKNAL_EAGER_ACK 0 /* default eager ack (boolean) */
+#define SOCKNAL_EAGER_ACK SOCKNAL_ARCH_EAGER_ACK /* default eager ack (boolean) */
#define SOCKNAL_TYPED_CONNS 1 /* unidirectional large, bidirectional small? */
#define SOCKNAL_ZC_MIN_FRAG (2<<10) /* default smallest zerocopy fragment */
#define SOCKNAL_MIN_BULK (1<<10) /* smallest "large" message */
/* # pages in a large message fwd buffer */
#define SOCKNAL_RESCHED 100 /* # scheduler loops before reschedule */
-#define SOCKNAL_ENOMEM_RETRY 1 /* jiffies between retries */
+#define SOCKNAL_ENOMEM_RETRY CFS_MIN_DELAY /* jiffies between retries */
#define SOCKNAL_MAX_INTERFACES 16 /* Largest number of interfaces we bind */
#define SOCKNAL_ROUND_ROBIN 0 /* round robin / load balance */
-#define SOCKNAL_TX_LOW_WATER(sk) (((sk)->sk_sndbuf*8)/10)
-
#define SOCKNAL_SINGLE_FRAG_TX 0 /* disable multi-fragment sends */
#define SOCKNAL_SINGLE_FRAG_RX 0 /* disable multi-fragment receives */
# define SOCKNAL_RISK_KMAP_DEADLOCK 1
#endif
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,72))
-# define sk_allocation allocation
-# define sk_data_ready data_ready
-# define sk_write_space write_space
-# define sk_user_data user_data
-# define sk_prot prot
-# define sk_sndbuf sndbuf
-# define sk_socket socket
-#endif
-
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0))
-# define sk_wmem_queued wmem_queued
-# define sk_err err
-#endif
-
typedef struct /* pool of forwarding buffers */
{
spinlock_t fmp_lock; /* serialise */
#if SOCKNAL_ZC
struct list_head kss_zctxdone_list; /* completed ZC transmits */
#endif
- wait_queue_head_t kss_waitq; /* where scheduler sleeps */
+ cfs_waitq_t kss_waitq; /* where scheduler sleeps */
int kss_nconns; /* # connections assigned to this scheduler */
} ksock_sched_t;
#if SOCKNAL_ZC
unsigned int ksnd_zc_min_frag; /* minimum zero copy frag size */
#endif
- struct ctl_table_header *ksnd_sysctl; /* sysctl interface */
+ cfs_sysctl_table_header_t *ksnd_sysctl; /* sysctl interface */
} ksock_tunables_t;
typedef struct
struct list_head ksnd_deathrow_conns; /* conns to be closed */
struct list_head ksnd_zombie_conns; /* conns to be freed */
struct list_head ksnd_enomem_conns; /* conns to be retried */
- wait_queue_head_t ksnd_reaper_waitq; /* reaper sleeps here */
- unsigned long ksnd_reaper_waketime; /* when reaper will wake */
+ cfs_waitq_t ksnd_reaper_waitq; /* reaper sleeps here */
+ cfs_time_t ksnd_reaper_waketime; /* when reaper will wake */
spinlock_t ksnd_reaper_lock; /* serialise */
int ksnd_enomem_tx; /* test ENOMEM sender */
int ksnd_stall_rx; /* test sluggish receiver */
struct list_head ksnd_autoconnectd_routes; /* routes waiting to be connected */
- wait_queue_head_t ksnd_autoconnectd_waitq; /* autoconnectds sleep here */
+ cfs_waitq_t ksnd_autoconnectd_waitq; /* autoconnectds sleep here */
spinlock_t ksnd_autoconnectd_lock; /* serialise */
ksock_irqinfo_t ksnd_irqinfo[NR_IRQS];/* irq->scheduler lookup */
/* reader */
struct list_head ksnc_rx_list; /* where I enq waiting input or a forwarding descriptor */
- unsigned long ksnc_rx_deadline; /* when (in jiffies) receive times out */
+ cfs_time_t ksnc_rx_deadline; /* when (in jiffies) receive times out */
int ksnc_rx_started; /* started receiving a message */
int ksnc_rx_ready; /* data ready to read */
int ksnc_rx_scheduled; /* being progressed */
/* WRITER */
struct list_head ksnc_tx_list; /* where I enq waiting for output space */
struct list_head ksnc_tx_queue; /* packets waiting to be sent */
- unsigned long ksnc_tx_deadline; /* when (in jiffies) tx times out */
+ cfs_time_t ksnc_tx_deadline; /* when (in jiffies) tx times out */
int ksnc_tx_bufnob; /* send buffer marker */
atomic_t ksnc_tx_nob; /* # bytes queued */
int ksnc_tx_ready; /* write space */
struct list_head ksnr_connect_list; /* chain on autoconnect list */
struct ksock_peer *ksnr_peer; /* owning peer */
atomic_t ksnr_refcount; /* # users */
- unsigned long ksnr_timeout; /* when (in jiffies) reconnection can happen next */
- unsigned int ksnr_retry_interval; /* how long between retries */
+ cfs_time_t ksnr_timeout; /* when (in jiffies) reconnection can happen next */
+ cfs_duration_t ksnr_retry_interval; /* how long between retries */
__u32 ksnr_myipaddr; /* my IP */
__u32 ksnr_ipaddr; /* IP address to connect to */
int ksnr_port; /* port to connect to */
struct list_head ksnp_conns; /* all active connections */
struct list_head ksnp_routes; /* routes */
struct list_head ksnp_tx_queue; /* waiting packets */
- unsigned long ksnp_last_alive; /* when (in jiffies) I was last alive */
+ cfs_time_t ksnp_last_alive; /* when (in jiffies) I was last alive */
int ksnp_n_passive_ips; /* # of... */
__u32 ksnp_passive_ips[SOCKNAL_MAX_INTERFACES]; /* preferred local interfaces */
} ksock_peer_t;
read_lock (&ksocknal_data.ksnd_global_lock);
if (!conn->ksnc_closing) {
rc = 0;
- get_file (conn->ksnc_sock->file);
+ cfs_get_file (KSN_CONN2FILE(conn));
}
read_unlock (&ksocknal_data.ksnd_global_lock);
static inline void
ksocknal_putconnsock (ksock_conn_t *conn)
{
- fput (conn->ksnc_sock->file);
-}
-
-#ifndef CONFIG_SMP
-static inline
-int ksocknal_nsched(void)
-{
- return 1;
-}
-#else
-#include <linux/lustre_version.h>
-# if !(defined(CONFIG_X86) && (LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,21))) || defined(CONFIG_X86_64) || (LUSTRE_KERNEL_VERSION < 39) || ((LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)) && !defined(CONFIG_X86_HT))
-static inline int
-ksocknal_nsched(void)
-{
- return num_online_cpus();
-}
-
-static inline int
-ksocknal_sched2cpu(int i)
-{
- return i;
+ cfs_put_file (KSN_CONN2FILE(conn));
}
-static inline int
-ksocknal_irqsched2cpu(int i)
-{
- return i;
-}
-# else
-static inline int
-ksocknal_nsched(void)
-{
- if (smp_num_siblings == 1)
- return (num_online_cpus());
-
- /* We need to know if this assumption is crap */
- LASSERT (smp_num_siblings == 2);
- return (num_online_cpus()/2);
-}
-
-static inline int
-ksocknal_sched2cpu(int i)
-{
- if (smp_num_siblings == 1)
- return i;
-
- return (i * 2);
-}
-
-static inline int
-ksocknal_irqsched2cpu(int i)
-{
- return (ksocknal_sched2cpu(i) + 1);
-}
-# endif
-#endif
-
extern void ksocknal_put_route (ksock_route_t *route);
extern void ksocknal_put_peer (ksock_peer_t *peer);
extern ksock_peer_t *ksocknal_find_peer_locked (ptl_nid_t nid);
extern int ksocknal_thread_start (int (*fn)(void *arg), void *arg);
extern int ksocknal_new_packet (ksock_conn_t *conn, int skip);
extern int ksocknal_scheduler (void *arg);
-extern void ksocknal_data_ready(struct sock *sk, int n);
-extern void ksocknal_write_space(struct sock *sk);
extern int ksocknal_autoconnectd (void *arg);
extern int ksocknal_reaper (void *arg);
extern int ksocknal_get_conn_tunables (ksock_conn_t *conn, int *txmem,
extern int ksocknal_send_hello (ksock_conn_t *conn, __u32 *ipaddrs, int nipaddrs);
extern int ksocknal_recv_hello (ksock_conn_t *conn,
ptl_nid_t *nid, __u64 *incarnation, __u32 *ipaddrs);
+
+extern void ksocknal_lib_save_callback(struct socket *sock, ksock_conn_t *conn);
+extern void ksocknal_lib_set_callback(struct socket *sock, ksock_conn_t *conn);
+extern void ksocknal_lib_act_callback(struct socket *sock, ksock_conn_t *conn);
+extern void ksocknal_lib_reset_callback(struct socket *sock, ksock_conn_t *conn);
+extern void ksocknal_lib_push_conn (ksock_conn_t *conn);
+extern void ksocknal_lib_bind_irq (unsigned int irq);
+extern int ksocknal_lib_get_conn_addrs (ksock_conn_t *conn);
+extern unsigned int ksocknal_lib_sock_irq (struct socket *sock);
+extern int ksocknal_lib_setup_sock (struct socket *so);
+extern int ksocknal_lib_send_iov (ksock_conn_t *conn, ksock_tx_t *tx);
+extern int ksocknal_lib_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx);
+extern void ksocknal_lib_eager_ack (ksock_conn_t *conn);
+extern int ksocknal_lib_recv_iov (ksock_conn_t *conn);
+extern int ksocknal_lib_recv_kiov (ksock_conn_t *conn);
+extern int ksocknal_lib_sock_write (struct socket *sock,
+ void *buffer, int nob);
+extern int ksocknal_lib_sock_read (struct socket *sock,
+ void *buffer, int nob);
+extern int ksocknal_lib_get_conn_tunables (ksock_conn_t *conn, int *txmem,
+ int *rxmem, int *nagle);
+extern int ksocknal_lib_connect_sock(struct socket **sockp, int *may_retry,
+ ksock_route_t *route, int local_port);
*/
#include "socknal.h"
-#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
-# include <linux/syscalls.h>
-#endif
/*
* LIB functions follow
PORTAL_FREE(ltx, ltx->ltx_desc_size);
}
-#if (SOCKNAL_ZC && SOCKNAL_VADDR_ZC)
-struct page *
-ksocknal_kvaddr_to_page (unsigned long vaddr)
-{
- struct page *page;
-
- if (vaddr >= VMALLOC_START &&
- vaddr < VMALLOC_END)
- page = vmalloc_to_page ((void *)vaddr);
-#if CONFIG_HIGHMEM
- else if (vaddr >= PKMAP_BASE &&
- vaddr < (PKMAP_BASE + LAST_PKMAP * PAGE_SIZE))
- page = vmalloc_to_page ((void *)vaddr);
- /* in 2.4 ^ just walks the page tables */
-#endif
- else
- page = virt_to_page (vaddr);
-
- if (page == NULL ||
- !VALID_PAGE (page))
- return (NULL);
-
- return (page);
-}
-#endif
-
int
ksocknal_send_iov (ksock_conn_t *conn, ksock_tx_t *tx)
-{
- struct socket *sock = conn->ksnc_sock;
+{
struct iovec *iov = tx->tx_iov;
-#if (SOCKNAL_ZC && SOCKNAL_VADDR_ZC)
- unsigned long vaddr = (unsigned long)iov->iov_base
- int offset = vaddr & (PAGE_SIZE - 1);
- int zcsize = MIN (iov->iov_len, PAGE_SIZE - offset);
- struct page *page;
-#endif
- int nob;
- int rc;
+ int nob;
+ int rc;
- /* NB we can't trust socket ops to either consume our iovs
- * or leave them alone. */
LASSERT (tx->tx_niov > 0);
-
-#if (SOCKNAL_ZC && SOCKNAL_VADDR_ZC)
- if (zcsize >= ksocknal_data.ksnd_zc_min_frag &&
- (sock->sk->route_caps & NETIF_F_SG) &&
- (sock->sk->route_caps & (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | NETIF_F_HW_CSUM)) &&
- (page = ksocknal_kvaddr_to_page (vaddr)) != NULL) {
- int msgflg = MSG_DONTWAIT;
-
- CDEBUG(D_NET, "vaddr %p, page %p->%p + offset %x for %d\n",
- (void *)vaddr, page, page_address(page), offset, zcsize);
-
- if (!list_empty (&conn->ksnc_tx_queue) ||
- zcsize < tx->tx_resid)
- msgflg |= MSG_MORE;
-
- rc = tcp_sendpage_zccd(sock, page, offset, zcsize, msgflg, &tx->tx_zccd);
- } else
-#endif
- {
-#if SOCKNAL_SINGLE_FRAG_TX
- struct iovec scratch;
- struct iovec *scratchiov = &scratch;
- int niov = 1;
-#else
- struct iovec *scratchiov = conn->ksnc_tx_scratch_iov;
- int niov = tx->tx_niov;
-#endif
- struct msghdr msg = {
- .msg_name = NULL,
- .msg_namelen = 0,
- .msg_iov = scratchiov,
- .msg_iovlen = niov,
- .msg_control = NULL,
- .msg_controllen = 0,
- .msg_flags = MSG_DONTWAIT
- };
- mm_segment_t oldmm = get_fs();
- int i;
-
- for (nob = i = 0; i < niov; i++) {
- scratchiov[i] = tx->tx_iov[i];
- nob += scratchiov[i].iov_len;
- }
- if (!list_empty(&conn->ksnc_tx_queue) ||
- nob < tx->tx_resid)
- msg.msg_flags |= MSG_MORE;
-
- set_fs (KERNEL_DS);
- rc = sock_sendmsg(sock, &msg, nob);
- set_fs (oldmm);
- }
+ /* Never touch tx->tx_iov inside ksocknal_lib_send_iov() */
+ rc = ksocknal_lib_send_iov(conn, tx);
- if (rc <= 0) /* sent nothing? */
+ if (rc <= 0) /* sent nothing? */
return (rc);
- nob = rc;
- LASSERT (nob <= tx->tx_resid);
+ nob = rc;
+ LASSERT (nob <= tx->tx_resid);
tx->tx_resid -= nob;
- /* "consume" iov */
- do {
- LASSERT (tx->tx_niov > 0);
-
- if (nob < iov->iov_len) {
- iov->iov_base = (void *)(((unsigned long)(iov->iov_base)) + nob);
- iov->iov_len -= nob;
- return (rc);
- }
+ /* "consume" iov */
+ do {
+ LASSERT (tx->tx_niov > 0);
+
+ if (nob < iov->iov_len) {
+ iov->iov_base = (void *)(((unsigned long)(iov->iov_base)) + nob);
+ iov->iov_len -= nob;
+ return (rc);
+ }
- nob -= iov->iov_len;
- tx->tx_iov = ++iov;
- tx->tx_niov--;
+ nob -= iov->iov_len;
+ tx->tx_iov = ++iov;
+ tx->tx_niov--;
} while (nob != 0);
-
+
return (rc);
}
int
ksocknal_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx)
-{
- struct socket *sock = conn->ksnc_sock;
+{
ptl_kiov_t *kiov = tx->tx_kiov;
- int rc;
- int nob;
-
- /* NB we can't trust socket ops to either consume our iovs
- * or leave them alone. */
- LASSERT (tx->tx_niov == 0);
- LASSERT (tx->tx_nkiov > 0);
-
-#if SOCKNAL_ZC
- if (kiov->kiov_len >= ksocknal_tunables.ksnd_zc_min_frag &&
- (sock->sk->route_caps & NETIF_F_SG) &&
- (sock->sk->route_caps & (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | NETIF_F_HW_CSUM))) {
- struct page *page = kiov->kiov_page;
- int offset = kiov->kiov_offset;
- int fragsize = kiov->kiov_len;
- int msgflg = MSG_DONTWAIT;
-
- CDEBUG(D_NET, "page %p + offset %x for %d\n",
- page, offset, kiov->kiov_len);
-
- if (!list_empty(&conn->ksnc_tx_queue) ||
- fragsize < tx->tx_resid)
- msgflg |= MSG_MORE;
-
- rc = tcp_sendpage_zccd(sock, page, offset, fragsize, msgflg,
- &tx->tx_zccd);
- } else
-#endif
- {
-#if SOCKNAL_SINGLE_FRAG_TX || !SOCKNAL_RISK_KMAP_DEADLOCK
- struct iovec scratch;
- struct iovec *scratchiov = &scratch;
- int niov = 1;
-#else
-#ifdef CONFIG_HIGHMEM
-#warning "XXX risk of kmap deadlock on multiple frags..."
-#endif
- struct iovec *scratchiov = conn->ksnc_tx_scratch_iov;
- int niov = tx->tx_nkiov;
-#endif
- struct msghdr msg = {
- .msg_name = NULL,
- .msg_namelen = 0,
- .msg_iov = scratchiov,
- .msg_iovlen = niov,
- .msg_control = NULL,
- .msg_controllen = 0,
- .msg_flags = MSG_DONTWAIT
- };
- mm_segment_t oldmm = get_fs();
- int i;
-
- for (nob = i = 0; i < niov; i++) {
- scratchiov[i].iov_base = kmap(kiov[i].kiov_page) +
- kiov[i].kiov_offset;
- nob += scratchiov[i].iov_len = kiov[i].kiov_len;
- }
-
- if (!list_empty(&conn->ksnc_tx_queue) ||
- nob < tx->tx_resid)
- msg.msg_flags |= MSG_DONTWAIT;
-
- set_fs (KERNEL_DS);
- rc = sock_sendmsg(sock, &msg, nob);
- set_fs (oldmm);
-
- for (i = 0; i < niov; i++)
- kunmap(kiov[i].kiov_page);
- }
+ int nob;
+ int rc;
- if (rc <= 0) /* sent nothing? */
- return (rc);
+ LASSERT (tx->tx_niov == 0);
+ LASSERT (tx->tx_nkiov > 0);
- nob = rc;
- LASSERT (nob <= tx->tx_resid);
- tx->tx_resid -= nob;
+ /* Never touch tx->tx_kiov inside ksocknal_lib_send_kiov() */
+ rc = ksocknal_lib_send_kiov(conn, tx);
- do {
- LASSERT(tx->tx_nkiov > 0);
-
- if (nob < kiov->kiov_len) {
- kiov->kiov_offset += nob;
- kiov->kiov_len -= nob;
- return rc;
- }
-
- nob -= kiov->kiov_len;
- tx->tx_kiov = ++kiov;
- tx->tx_nkiov--;
+ if (rc <= 0) /* sent nothing? */
+ return (rc);
+
+ nob = rc;
+ LASSERT (nob <= tx->tx_resid);
+ tx->tx_resid -= nob;
+
+ /* "consume" kiov */
+ do {
+ LASSERT(tx->tx_nkiov > 0);
+
+ if (nob < kiov->kiov_len) {
+ kiov->kiov_offset += nob;
+ kiov->kiov_len -= nob;
+ return rc;
+ }
+
+ nob -= kiov->kiov_len;
+ tx->tx_kiov = ++kiov;
+ tx->tx_nkiov--;
} while (nob != 0);
return (rc);
if (ksocknal_data.ksnd_stall_tx != 0) {
set_current_state (TASK_UNINTERRUPTIBLE);
- schedule_timeout (ksocknal_data.ksnd_stall_tx * HZ);
+ schedule_timeout (cfs_time_seconds(ksocknal_data.ksnd_stall_tx));
}
LASSERT (tx->tx_resid != 0);
rc = ksocknal_send_kiov (conn, tx);
}
- bufnob = conn->ksnc_sock->sk->sk_wmem_queued;
+ bufnob = SOCK_WMEM_QUEUED(conn->ksnc_sock);
if (rc > 0) /* sent something? */
conn->ksnc_tx_bufnob += rc; /* account it */
if (bufnob < conn->ksnc_tx_bufnob) {
/* allocated send buffer bytes < computed; infer
* something got ACKed */
- conn->ksnc_tx_deadline = jiffies +
- ksocknal_tunables.ksnd_io_timeout * HZ;
- conn->ksnc_peer->ksnp_last_alive = jiffies;
+ conn->ksnc_tx_deadline = cfs_time_shift(ksocknal_tunables.ksnd_io_timeout);
+ conn->ksnc_peer->ksnp_last_alive = cfs_time_current();
conn->ksnc_tx_bufnob = bufnob;
mb();
}
sched = conn->ksnc_scheduler;
spin_lock_irqsave(&sched->kss_lock, flags);
- if (!test_bit(SOCK_NOSPACE, &conn->ksnc_sock->flags) &&
+ if (!SOCK_TEST_NOSPACE(conn->ksnc_sock) &&
!conn->ksnc_tx_ready) {
/* SOCK_NOSPACE is set when the socket fills
* and cleared in the write_space callback
return (rc);
}
-void
-ksocknal_eager_ack (ksock_conn_t *conn)
-{
- int opt = 1;
- mm_segment_t oldmm = get_fs();
- struct socket *sock = conn->ksnc_sock;
-
- /* Remind the socket to ACK eagerly. If I don't, the socket might
- * think I'm about to send something it could piggy-back the ACK
- * on, introducing delay in completing zero-copy sends in my
- * peer. */
-
- set_fs(KERNEL_DS);
- sock->ops->setsockopt (sock, SOL_TCP, TCP_QUICKACK,
- (char *)&opt, sizeof (opt));
- set_fs(oldmm);
-}
-
int
ksocknal_recv_iov (ksock_conn_t *conn)
-{
-#if SOCKNAL_SINGLE_FRAG_RX
- struct iovec scratch;
- struct iovec *scratchiov = &scratch;
- int niov = 1;
-#else
- struct iovec *scratchiov = conn->ksnc_rx_scratch_iov;
- int niov = conn->ksnc_rx_niov;
-#endif
+{
struct iovec *iov = conn->ksnc_rx_iov;
- struct msghdr msg = {
- .msg_name = NULL,
- .msg_namelen = 0,
- .msg_iov = scratchiov,
- .msg_iovlen = niov,
- .msg_control = NULL,
- .msg_controllen = 0,
- .msg_flags = 0
- };
- mm_segment_t oldmm = get_fs();
- int nob;
- int i;
- int rc;
-
- /* NB we can't trust socket ops to either consume our iovs
- * or leave them alone. */
- LASSERT (niov > 0);
-
- for (nob = i = 0; i < niov; i++) {
- scratchiov[i] = iov[i];
- nob += scratchiov[i].iov_len;
- }
- LASSERT (nob <= conn->ksnc_rx_nob_wanted);
-
- set_fs (KERNEL_DS);
- rc = sock_recvmsg (conn->ksnc_sock, &msg, nob, MSG_DONTWAIT);
- /* NB this is just a boolean..........................^ */
- set_fs (oldmm);
-
- if (rc <= 0)
- return (rc);
+ int nob;
+ int rc;
- /* received something... */
- nob = rc;
+ LASSERT (conn->ksnc_rx_niov > 0);
- conn->ksnc_peer->ksnp_last_alive = jiffies;
- conn->ksnc_rx_deadline = jiffies +
- ksocknal_tunables.ksnd_io_timeout * HZ;
- mb(); /* order with setting rx_started */
- conn->ksnc_rx_started = 1;
+ /* Never touch conn->ksnc_rx_iov or change connection
+ * status inside ksocknal_lib_recv_iov */
+ rc = ksocknal_lib_recv_iov(conn);
+
+ if (rc <= 0)
+ return (rc);
- conn->ksnc_rx_nob_wanted -= nob;
+ /* received something... */
+ nob = rc;
+
+ conn->ksnc_peer->ksnp_last_alive = cfs_time_current();
+ conn->ksnc_rx_deadline = cfs_time_shift (ksocknal_tunables.ksnd_io_timeout);
+ mb(); /* order with setting rx_started */
+ conn->ksnc_rx_started = 1;
+
+ conn->ksnc_rx_nob_wanted -= nob;
conn->ksnc_rx_nob_left -= nob;
- do {
- LASSERT (conn->ksnc_rx_niov > 0);
-
- if (nob < iov->iov_len) {
- iov->iov_len -= nob;
- iov->iov_base = (void *)(((unsigned long)iov->iov_base) + nob);
- return (-EAGAIN);
- }
-
- nob -= iov->iov_len;
- conn->ksnc_rx_iov = ++iov;
- conn->ksnc_rx_niov--;
+ do {
+ LASSERT (conn->ksnc_rx_niov > 0);
+
+ if (nob < iov->iov_len) {
+ iov->iov_len -= nob;
+ iov->iov_base = (void *)(((unsigned long)iov->iov_base) + nob);
+ return (-EAGAIN);
+ }
+
+ nob -= iov->iov_len;
+ conn->ksnc_rx_iov = ++iov;
+ conn->ksnc_rx_niov--;
} while (nob != 0);
return (rc);
int
ksocknal_recv_kiov (ksock_conn_t *conn)
{
-#if SOCKNAL_SINGLE_FRAG_RX || !SOCKNAL_RISK_KMAP_DEADLOCK
- struct iovec scratch;
- struct iovec *scratchiov = &scratch;
- int niov = 1;
-#else
-#ifdef CONFIG_HIGHMEM
-#warning "XXX risk of kmap deadlock on multiple frags..."
-#endif
- struct iovec *scratchiov = conn->ksnc_rx_scratch_iov;
- int niov = conn->ksnc_rx_nkiov;
-#endif
ptl_kiov_t *kiov = conn->ksnc_rx_kiov;
- struct msghdr msg = {
- .msg_name = NULL,
- .msg_namelen = 0,
- .msg_iov = scratchiov,
- .msg_iovlen = niov,
- .msg_control = NULL,
- .msg_controllen = 0,
- .msg_flags = 0
- };
- mm_segment_t oldmm = get_fs();
- int nob;
- int i;
- int rc;
-
+ int nob;
+ int rc;
LASSERT (conn->ksnc_rx_nkiov > 0);
- /* NB we can't trust socket ops to either consume our iovs
- * or leave them alone. */
- for (nob = i = 0; i < niov; i++) {
- scratchiov[i].iov_base = kmap(kiov[i].kiov_page) + kiov[i].kiov_offset;
- nob += scratchiov[i].iov_len = kiov[i].kiov_len;
- }
- LASSERT (nob <= conn->ksnc_rx_nob_wanted);
-
- set_fs (KERNEL_DS);
- rc = sock_recvmsg (conn->ksnc_sock, &msg, nob, MSG_DONTWAIT);
- /* NB this is just a boolean.......................^ */
- set_fs (oldmm);
-
- for (i = 0; i < niov; i++)
- kunmap(kiov[i].kiov_page);
-
- if (rc <= 0)
- return (rc);
+ /* Never touch conn->ksnc_rx_kiov or change connection
+ * status inside ksocknal_lib_recv_iov */
+ rc = ksocknal_lib_recv_kiov(conn);
- /* received something... */
- nob = rc;
+ if (rc <= 0)
+ return (rc);
+
+ /* received something... */
+ nob = rc;
- conn->ksnc_peer->ksnp_last_alive = jiffies;
- conn->ksnc_rx_deadline = jiffies +
- ksocknal_tunables.ksnd_io_timeout * HZ;
- mb(); /* order with setting rx_started */
+ conn->ksnc_peer->ksnp_last_alive = cfs_time_current();
+ conn->ksnc_rx_deadline = cfs_time_shift (ksocknal_tunables.ksnd_io_timeout);
+ mb(); /* order with setting rx_started */
conn->ksnc_rx_started = 1;
- conn->ksnc_rx_nob_wanted -= nob;
- conn->ksnc_rx_nob_left -= nob;
-
- do {
- LASSERT (conn->ksnc_rx_nkiov > 0);
-
- if (nob < kiov->kiov_len) {
- kiov->kiov_offset += nob;
- kiov->kiov_len -= nob;
- return -EAGAIN;
- }
-
- nob -= kiov->kiov_len;
- conn->ksnc_rx_kiov = ++kiov;
- conn->ksnc_rx_nkiov--;
+ conn->ksnc_rx_nob_wanted -= nob;
+ conn->ksnc_rx_nob_left -= nob;
+
+ do {
+ LASSERT (conn->ksnc_rx_nkiov > 0);
+
+ if (nob < kiov->kiov_len) {
+ kiov->kiov_offset += nob;
+ kiov->kiov_len -= nob;
+ return -EAGAIN;
+ }
+
+ nob -= kiov->kiov_len;
+ conn->ksnc_rx_kiov = ++kiov;
+ conn->ksnc_rx_nkiov--;
} while (nob != 0);
return 1;
if (ksocknal_data.ksnd_stall_rx != 0) {
set_current_state (TASK_UNINTERRUPTIBLE);
- schedule_timeout (ksocknal_data.ksnd_stall_rx * HZ);
+ schedule_timeout(cfs_time_seconds (ksocknal_data.ksnd_stall_rx));
}
rc = ksocknal_getconnsock (conn);
(conn->ksnc_rx_state == SOCKNAL_RX_BODY ||
conn->ksnc_rx_state == SOCKNAL_RX_BODY_FWD)) {
/* Remind the socket to ack eagerly... */
- ksocknal_eager_ack(conn);
+ ksocknal_lib_eager_ack(conn);
}
rc = 1;
break;
spin_lock_irqsave (&sched->kss_lock, flags);
list_add_tail (&tx->tx_list, &sched->kss_zctxdone_list);
- wake_up (&sched->kss_waitq);
+ cfs_waitq_signal (&sched->kss_waitq);
spin_unlock_irqrestore (&sched->kss_lock, flags);
EXIT;
LASSERT (conn->ksnc_tx_scheduled);
list_add_tail(&conn->ksnc_tx_list,
&ksocknal_data.ksnd_enomem_conns);
- if (!time_after_eq(jiffies + SOCKNAL_ENOMEM_RETRY,
+ if (!cfs_time_aftereq(cfs_time_add(cfs_time_current(),
+ SOCKNAL_ENOMEM_RETRY),
ksocknal_data.ksnd_reaper_waketime))
- wake_up (&ksocknal_data.ksnd_reaper_waitq);
+ cfs_waitq_signal (&ksocknal_data.ksnd_reaper_waitq);
spin_unlock_irqrestore(&ksocknal_data.ksnd_reaper_lock, flags);
return (rc);
list_add_tail (&route->ksnr_connect_list,
&ksocknal_data.ksnd_autoconnectd_routes);
- wake_up (&ksocknal_data.ksnd_autoconnectd_waitq);
+ cfs_waitq_signal (&ksocknal_data.ksnd_autoconnectd_waitq);
spin_unlock_irqrestore (&ksocknal_data.ksnd_autoconnectd_lock, flags);
}
const int nob = 0;
#else
int nob = atomic_read(&c->ksnc_tx_nob) +
- c->ksnc_sock->sk->sk_wmem_queued;
+ SOCK_WMEM_QUEUED(c->ksnc_sock);
#endif
LASSERT (!c->ksnc_closing);
spin_lock_irqsave (&sched->kss_lock, flags);
if (list_empty(&conn->ksnc_tx_queue) &&
- conn->ksnc_sock->sk->sk_wmem_queued == 0) {
+ SOCK_WMEM_QUEUED(conn->ksnc_sock) == 0) {
/* First packet starts the timeout */
- conn->ksnc_tx_deadline = jiffies +
- ksocknal_tunables.ksnd_io_timeout * HZ;
+ conn->ksnc_tx_deadline = cfs_time_shift(ksocknal_tunables.ksnd_io_timeout);
conn->ksnc_tx_bufnob = 0;
mb(); /* order with adding to tx_queue */
}
list_add_tail (&conn->ksnc_tx_list,
&sched->kss_tx_conns);
conn->ksnc_tx_scheduled = 1;
- wake_up (&sched->kss_waitq);
+ cfs_waitq_signal (&sched->kss_waitq);
}
spin_unlock_irqrestore (&sched->kss_lock, flags);
continue;
/* too soon to retry this guy? */
- if (!time_after_eq (jiffies, route->ksnr_timeout))
+ if (!cfs_time_aftereq (cfs_time_current(), route->ksnr_timeout))
continue;
return (route);
int
ksocknal_thread_start (int (*fn)(void *arg), void *arg)
{
- long pid = kernel_thread (fn, arg, 0);
+ long pid = cfs_kernel_thread (fn, arg, 0);
unsigned long flags;
if (pid < 0)
spin_lock_irqsave (&sched->kss_lock, flags);
list_add_tail (&conn->ksnc_rx_list, &sched->kss_rx_conns);
- wake_up (&sched->kss_waitq);
+ cfs_waitq_signal (&sched->kss_waitq);
spin_unlock_irqrestore (&sched->kss_lock, flags);
}
LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_GET_FMB);
LASSERT (kpr_routing(&ksocknal_data.ksnd_router));
- if (payload_nob <= SOCKNAL_SMALL_FWD_PAGES * PAGE_SIZE)
+ if (payload_nob <= SOCKNAL_SMALL_FWD_PAGES * CFS_PAGE_SIZE)
pool = &ksocknal_data.ksnd_small_fmp;
else
pool = &ksocknal_data.ksnd_large_fmp;
LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_GET_FMB);
LASSERT (conn->ksnc_rx_nob_wanted == conn->ksnc_rx_nob_left);
LASSERT (payload_nob >= 0);
- LASSERT (payload_nob <= fmb->fmb_pool->fmp_buff_pages * PAGE_SIZE);
- LASSERT (sizeof (ptl_hdr_t) < PAGE_SIZE);
+ LASSERT (payload_nob <= fmb->fmb_pool->fmp_buff_pages * CFS_PAGE_SIZE);
+ LASSERT (sizeof (ptl_hdr_t) < CFS_PAGE_SIZE);
LASSERT (fmb->fmb_kiov[0].kiov_offset == 0);
/* Take a ref on the conn's peer to prevent module unload before
while (nob > 0) {
LASSERT (niov < fmb->fmb_pool->fmp_buff_pages);
LASSERT (fmb->fmb_kiov[niov].kiov_offset == 0);
- fmb->fmb_kiov[niov].kiov_len = MIN (PAGE_SIZE, nob);
- nob -= PAGE_SIZE;
+ fmb->fmb_kiov[niov].kiov_len = MIN (CFS_PAGE_SIZE, nob);
+ nob -= CFS_PAGE_SIZE;
niov++;
}
return (0);
}
-void
-ksocknal_data_ready (struct sock *sk, int n)
+/*
+ * Add connection to kss_rx_conns of scheduler
+ * and wakeup the scheduler.
+ */
+void ksocknal_read_callback (ksock_conn_t *conn)
{
+ ksock_sched_t *sched;
unsigned long flags;
- ksock_conn_t *conn;
- ksock_sched_t *sched;
ENTRY;
- /* interleave correctly with closing sockets... */
- read_lock (&ksocknal_data.ksnd_global_lock);
-
- conn = sk->sk_user_data;
- if (conn == NULL) { /* raced with ksocknal_terminate_conn */
- LASSERT (sk->sk_data_ready != &ksocknal_data_ready);
- sk->sk_data_ready (sk, n);
- } else {
- sched = conn->ksnc_scheduler;
-
- spin_lock_irqsave (&sched->kss_lock, flags);
+ sched = conn->ksnc_scheduler;
- conn->ksnc_rx_ready = 1;
+ spin_lock_irqsave (&sched->kss_lock, flags);
- if (!conn->ksnc_rx_scheduled) { /* not being progressed */
- list_add_tail(&conn->ksnc_rx_list,
- &sched->kss_rx_conns);
- conn->ksnc_rx_scheduled = 1;
- /* extra ref for scheduler */
- atomic_inc (&conn->ksnc_refcount);
-
- wake_up (&sched->kss_waitq);
- }
+ conn->ksnc_rx_ready = 1;
- spin_unlock_irqrestore (&sched->kss_lock, flags);
- }
+ if (!conn->ksnc_rx_scheduled) { /* not being progressed */
+ list_add_tail(&conn->ksnc_rx_list,
+ &sched->kss_rx_conns);
+ conn->ksnc_rx_scheduled = 1;
+ /* extra ref for scheduler */
+ atomic_inc (&conn->ksnc_refcount);
- read_unlock (&ksocknal_data.ksnd_global_lock);
+ cfs_waitq_signal (&sched->kss_waitq);
+ }
+ spin_unlock_irqrestore (&sched->kss_lock, flags);
EXIT;
-}
+}
-void
-ksocknal_write_space (struct sock *sk)
-{
+/*
+ * Add connection to kss_tx_conns of scheduler
+ * and wakeup the scheduler.
+ */
+void ksocknal_write_callback (ksock_conn_t *conn)
+{
+ ksock_sched_t *sched;
unsigned long flags;
- ksock_conn_t *conn;
- ksock_sched_t *sched;
-
- /* interleave correctly with closing sockets... */
- read_lock (&ksocknal_data.ksnd_global_lock);
-
- conn = sk->sk_user_data;
-
- CDEBUG(D_NET, "sk %p wspace %d low water %d conn %p%s%s%s\n",
- sk, tcp_wspace(sk), SOCKNAL_TX_LOW_WATER(sk), conn,
- (conn == NULL) ? "" : (conn->ksnc_tx_ready ?
- " ready" : " blocked"),
- (conn == NULL) ? "" : (conn->ksnc_tx_scheduled ?
- " scheduled" : " idle"),
- (conn == NULL) ? "" : (list_empty (&conn->ksnc_tx_queue) ?
- " empty" : " queued"));
-
- if (conn == NULL) { /* raced with ksocknal_terminate_conn */
- LASSERT (sk->sk_write_space != &ksocknal_write_space);
- sk->sk_write_space (sk);
-
- read_unlock (&ksocknal_data.ksnd_global_lock);
- return;
- }
-
- if (tcp_wspace(sk) >= SOCKNAL_TX_LOW_WATER(sk)) { /* got enough space */
- sched = conn->ksnc_scheduler;
+ ENTRY;
+
+ sched = conn->ksnc_scheduler;
- spin_lock_irqsave (&sched->kss_lock, flags);
+ spin_lock_irqsave (&sched->kss_lock, flags);
- clear_bit (SOCK_NOSPACE, &sk->sk_socket->flags);
- conn->ksnc_tx_ready = 1;
+ conn->ksnc_tx_ready = 1;
- if (!conn->ksnc_tx_scheduled && // not being progressed
- !list_empty(&conn->ksnc_tx_queue)){//packets to send
- list_add_tail (&conn->ksnc_tx_list,
- &sched->kss_tx_conns);
- conn->ksnc_tx_scheduled = 1;
- /* extra ref for scheduler */
- atomic_inc (&conn->ksnc_refcount);
+ if (!conn->ksnc_tx_scheduled && // not being progressed
+ !list_empty(&conn->ksnc_tx_queue)){//packets to send
+ list_add_tail (&conn->ksnc_tx_list,
+ &sched->kss_tx_conns);
+ conn->ksnc_tx_scheduled = 1;
+ /* extra ref for scheduler */
+ atomic_inc (&conn->ksnc_refcount);
- wake_up (&sched->kss_waitq);
- }
+ cfs_waitq_signal (&sched->kss_waitq);
+ }
- spin_unlock_irqrestore (&sched->kss_lock, flags);
- }
+ spin_unlock_irqrestore (&sched->kss_lock, flags);
- read_unlock (&ksocknal_data.ksnd_global_lock);
+ EXIT;
}
int
ksocknal_sock_write (struct socket *sock, void *buffer, int nob)
{
- int rc;
- mm_segment_t oldmm = get_fs();
-
- while (nob > 0) {
- struct iovec iov = {
- .iov_base = buffer,
- .iov_len = nob
- };
- struct msghdr msg = {
- .msg_name = NULL,
- .msg_namelen = 0,
- .msg_iov = &iov,
- .msg_iovlen = 1,
- .msg_control = NULL,
- .msg_controllen = 0,
- .msg_flags = 0
- };
-
- set_fs (KERNEL_DS);
- rc = sock_sendmsg (sock, &msg, iov.iov_len);
- set_fs (oldmm);
-
- if (rc < 0)
- return (rc);
-
- if (rc == 0) {
- CERROR ("Unexpected zero rc\n");
- return (-ECONNABORTED);
- }
-
- buffer = ((char *)buffer) + rc;
- nob -= rc;
- }
-
- return (0);
+ return ksocknal_lib_sock_write(sock, buffer, nob);
}
int
ksocknal_sock_read (struct socket *sock, void *buffer, int nob)
{
- int rc;
- mm_segment_t oldmm = get_fs();
-
- while (nob > 0) {
- struct iovec iov = {
- .iov_base = buffer,
- .iov_len = nob
- };
- struct msghdr msg = {
- .msg_name = NULL,
- .msg_namelen = 0,
- .msg_iov = &iov,
- .msg_iovlen = 1,
- .msg_control = NULL,
- .msg_controllen = 0,
- .msg_flags = 0
- };
-
- set_fs (KERNEL_DS);
- rc = sock_recvmsg (sock, &msg, iov.iov_len, 0);
- set_fs (oldmm);
-
- if (rc < 0)
- return (rc);
-
- if (rc == 0)
- return (-ECONNABORTED);
-
- buffer = ((char *)buffer) + rc;
- nob -= rc;
- }
-
- return (0);
+ return ksocknal_lib_sock_read(sock, buffer, nob);
}
int
int
ksocknal_get_conn_tunables (ksock_conn_t *conn, int *txmem, int *rxmem, int *nagle)
{
- mm_segment_t oldmm = get_fs ();
- struct socket *sock = conn->ksnc_sock;
- int len;
- int rc;
-
- rc = ksocknal_getconnsock (conn);
- if (rc != 0) {
- LASSERT (conn->ksnc_closing);
- *txmem = *rxmem = *nagle = 0;
- return (-ESHUTDOWN);
- }
-
- set_fs (KERNEL_DS);
-
- len = sizeof(*txmem);
- rc = sock_getsockopt(sock, SOL_SOCKET, SO_SNDBUF,
- (char *)txmem, &len);
- if (rc == 0) {
- len = sizeof(*rxmem);
- rc = sock_getsockopt(sock, SOL_SOCKET, SO_RCVBUF,
- (char *)rxmem, &len);
- }
- if (rc == 0) {
- len = sizeof(*nagle);
- rc = sock->ops->getsockopt(sock, SOL_TCP, TCP_NODELAY,
- (char *)nagle, &len);
- }
-
- set_fs (oldmm);
- ksocknal_putconnsock (conn);
-
- if (rc == 0)
- *nagle = !*nagle;
- else
- *txmem = *rxmem = *nagle = 0;
-
- return (rc);
-}
-
-int
-ksocknal_setup_sock (struct socket *sock)
-{
- mm_segment_t oldmm = get_fs ();
- int rc;
- int option;
- int keep_idle;
- int keep_intvl;
- int keep_count;
- int do_keepalive;
- struct linger linger;
-
- sock->sk->sk_allocation = GFP_NOFS;
-
- /* Ensure this socket aborts active sends immediately when we close
- * it. */
-
- linger.l_onoff = 0;
- linger.l_linger = 0;
-
- set_fs (KERNEL_DS);
- rc = sock_setsockopt (sock, SOL_SOCKET, SO_LINGER,
- (char *)&linger, sizeof (linger));
- set_fs (oldmm);
- if (rc != 0) {
- CERROR ("Can't set SO_LINGER: %d\n", rc);
- return (rc);
- }
-
- option = -1;
- set_fs (KERNEL_DS);
- rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_LINGER2,
- (char *)&option, sizeof (option));
- set_fs (oldmm);
- if (rc != 0) {
- CERROR ("Can't set SO_LINGER2: %d\n", rc);
- return (rc);
- }
-
- if (!ksocknal_tunables.ksnd_nagle) {
- option = 1;
-
- set_fs (KERNEL_DS);
- rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_NODELAY,
- (char *)&option, sizeof (option));
- set_fs (oldmm);
- if (rc != 0) {
- CERROR ("Can't disable nagle: %d\n", rc);
- return (rc);
- }
- }
-
- if (ksocknal_tunables.ksnd_buffer_size > 0) {
- option = ksocknal_tunables.ksnd_buffer_size;
-
- set_fs (KERNEL_DS);
- rc = sock_setsockopt (sock, SOL_SOCKET, SO_SNDBUF,
- (char *)&option, sizeof (option));
- set_fs (oldmm);
- if (rc != 0) {
- CERROR ("Can't set send buffer %d: %d\n",
- option, rc);
- return (rc);
- }
-
- set_fs (KERNEL_DS);
- rc = sock_setsockopt (sock, SOL_SOCKET, SO_RCVBUF,
- (char *)&option, sizeof (option));
- set_fs (oldmm);
- if (rc != 0) {
- CERROR ("Can't set receive buffer %d: %d\n",
- option, rc);
- return (rc);
- }
- }
-
- /* snapshot tunables */
- keep_idle = ksocknal_tunables.ksnd_keepalive_idle;
- keep_count = ksocknal_tunables.ksnd_keepalive_count;
- keep_intvl = ksocknal_tunables.ksnd_keepalive_intvl;
-
- do_keepalive = (keep_idle > 0 && keep_count > 0 && keep_intvl > 0);
-
- option = (do_keepalive ? 1 : 0);
- set_fs (KERNEL_DS);
- rc = sock_setsockopt (sock, SOL_SOCKET, SO_KEEPALIVE,
- (char *)&option, sizeof (option));
- set_fs (oldmm);
- if (rc != 0) {
- CERROR ("Can't set SO_KEEPALIVE: %d\n", rc);
- return (rc);
- }
-
- if (!do_keepalive)
- return (0);
-
- set_fs (KERNEL_DS);
- rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_KEEPIDLE,
- (char *)&keep_idle, sizeof (keep_idle));
- set_fs (oldmm);
- if (rc != 0) {
- CERROR ("Can't set TCP_KEEPIDLE: %d\n", rc);
- return (rc);
- }
-
- set_fs (KERNEL_DS);
- rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_KEEPINTVL,
- (char *)&keep_intvl, sizeof (keep_intvl));
- set_fs (oldmm);
- if (rc != 0) {
- CERROR ("Can't set TCP_KEEPINTVL: %d\n", rc);
- return (rc);
- }
-
- set_fs (KERNEL_DS);
- rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_KEEPCNT,
- (char *)&keep_count, sizeof (keep_count));
- set_fs (oldmm);
- if (rc != 0) {
- CERROR ("Can't set TCP_KEEPCNT: %d\n", rc);
- return (rc);
- }
-
- return (0);
-}
-
-static int
-ksocknal_connect_sock(struct socket **sockp, int *may_retry,
- ksock_route_t *route, int local_port)
-{
- struct sockaddr_in locaddr;
- struct sockaddr_in srvaddr;
- struct socket *sock;
- int rc;
- int option;
- mm_segment_t oldmm = get_fs();
- struct timeval tv;
-
- memset(&locaddr, 0, sizeof(locaddr));
- locaddr.sin_family = AF_INET;
- locaddr.sin_port = htons(local_port);
- locaddr.sin_addr.s_addr =
- (route->ksnr_myipaddr != 0) ? htonl(route->ksnr_myipaddr)
- : INADDR_ANY;
-
- memset (&srvaddr, 0, sizeof (srvaddr));
- srvaddr.sin_family = AF_INET;
- srvaddr.sin_port = htons (route->ksnr_port);
- srvaddr.sin_addr.s_addr = htonl (route->ksnr_ipaddr);
-
- *may_retry = 0;
-
- rc = sock_create (PF_INET, SOCK_STREAM, 0, &sock);
- *sockp = sock;
- if (rc != 0) {
- CERROR ("Can't create autoconnect socket: %d\n", rc);
- return (rc);
- }
-
- /* Ugh; have to map_fd for compatibility with sockets passed in
- * from userspace. And we actually need the sock->file refcounting
- * that this gives you :) */
-
- rc = sock_map_fd (sock);
- if (rc < 0) {
- sock_release (sock);
- CERROR ("sock_map_fd error %d\n", rc);
- return (rc);
- }
-
- /* NB the file descriptor (rc) now owns the ref on sock->file */
- LASSERT (sock->file != NULL);
- LASSERT (file_count(sock->file) == 1);
-
- get_file(sock->file); /* extra ref makes sock->file */
- sys_close(rc); /* survive this close */
-
- /* Still got a single ref on sock->file */
- LASSERT (file_count(sock->file) == 1);
-
- /* Set the socket timeouts, so our connection attempt completes in
- * finite time */
- tv.tv_sec = ksocknal_tunables.ksnd_io_timeout;
- tv.tv_usec = 0;
-
- set_fs (KERNEL_DS);
- rc = sock_setsockopt (sock, SOL_SOCKET, SO_SNDTIMEO,
- (char *)&tv, sizeof (tv));
- set_fs (oldmm);
- if (rc != 0) {
- CERROR ("Can't set send timeout %d: %d\n",
- ksocknal_tunables.ksnd_io_timeout, rc);
- goto failed;
- }
-
- set_fs (KERNEL_DS);
- rc = sock_setsockopt (sock, SOL_SOCKET, SO_RCVTIMEO,
- (char *)&tv, sizeof (tv));
- set_fs (oldmm);
- if (rc != 0) {
- CERROR ("Can't set receive timeout %d: %d\n",
- ksocknal_tunables.ksnd_io_timeout, rc);
- goto failed;
- }
-
- set_fs (KERNEL_DS);
- option = 1;
- rc = sock_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR,
- (char *)&option, sizeof (option));
- set_fs (oldmm);
- if (rc != 0) {
- CERROR("Can't set SO_REUSEADDR for socket: %d\n", rc);
- goto failed;
- }
-
- rc = sock->ops->bind(sock,
- (struct sockaddr *)&locaddr, sizeof(locaddr));
- if (rc == -EADDRINUSE) {
- CDEBUG(D_NET, "Port %d already in use\n", local_port);
- *may_retry = 1;
- goto failed;
- }
- if (rc != 0) {
- CERROR("Error trying to bind to reserved port %d: %d\n",
- local_port, rc);
- goto failed;
- }
-
- rc = sock->ops->connect(sock,
- (struct sockaddr *)&srvaddr, sizeof(srvaddr),
- sock->file->f_flags);
- if (rc == 0)
- return 0;
-
- /* EADDRNOTAVAIL probably means we're already connected to the same
- * peer/port on the same local port on a differently typed
- * connection. Let our caller retry with a different local
- * port... */
- *may_retry = (rc == -EADDRNOTAVAIL);
-
- CDEBUG(*may_retry ? D_NET : D_ERROR,
- "Error %d connecting %u.%u.%u.%u/%d -> %u.%u.%u.%u/%d\n", rc,
- HIPQUAD(route->ksnr_myipaddr), local_port,
- HIPQUAD(route->ksnr_ipaddr), route->ksnr_port);
-
- failed:
- fput(sock->file);
- return rc;
+ return ksocknal_lib_get_conn_tunables(conn, txmem, rxmem, nagle);
}
int
for (port = 1023; port > 512; --port) {
- rc = ksocknal_connect_sock(&sock, &may_retry, route, port);
+ rc = ksocknal_lib_connect_sock(&sock, &may_retry, route, port);
if (rc == 0) {
rc = ksocknal_create_conn(route, sock, type);
- fput(sock->file);
+ cfs_put_file(KSN_SOCK2FILE(sock));
return rc;
}
void
ksocknal_autoconnect (ksock_route_t *route)
{
- LIST_HEAD (zombies);
+ CFS_LIST_HEAD (zombies);
ksock_tx_t *tx;
ksock_peer_t *peer;
unsigned long flags;
/* This is a retry rather than a new connection */
LASSERT (route->ksnr_retry_interval != 0);
- route->ksnr_timeout = jiffies + route->ksnr_retry_interval;
+ route->ksnr_timeout = cfs_time_add(cfs_time_current(),
+ route->ksnr_retry_interval);
route->ksnr_retry_interval = MIN (route->ksnr_retry_interval * 2,
SOCKNAL_MAX_RECONNECT_INTERVAL);
/* Don't need the {get,put}connsock dance to deref ksnc_sock... */
LASSERT (!conn->ksnc_closing);
- if (conn->ksnc_sock->sk->sk_err != 0) {
+ if (SOCK_ERROR(conn->ksnc_sock) != 0) {
/* Something (e.g. failed keepalive) set the socket error */
atomic_inc (&conn->ksnc_refcount);
CERROR ("Socket error %d: "LPX64" %p %d.%d.%d.%d\n",
- conn->ksnc_sock->sk->sk_err, peer->ksnp_nid,
+ SOCK_ERROR(conn->ksnc_sock), peer->ksnp_nid,
conn, HIPQUAD(conn->ksnc_ipaddr));
return (conn);
}
if (conn->ksnc_rx_started &&
- time_after_eq (jiffies, conn->ksnc_rx_deadline)) {
+ cfs_time_aftereq (cfs_time_current(),
+ conn->ksnc_rx_deadline)) {
/* Timed out incomplete incoming message */
atomic_inc (&conn->ksnc_refcount);
CERROR ("Timed out RX from "LPX64" %p %d.%d.%d.%d\n",
}
if ((!list_empty (&conn->ksnc_tx_queue) ||
- conn->ksnc_sock->sk->sk_wmem_queued != 0) &&
- time_after_eq (jiffies, conn->ksnc_tx_deadline)) {
+ SOCK_WMEM_QUEUED(conn->ksnc_sock) != 0) &&
+ cfs_time_aftereq (cfs_time_current(),
+ conn->ksnc_tx_deadline)) {
/* Timed out messages queued for sending or
* buffered in the socket's send buffer */
atomic_inc (&conn->ksnc_refcount);
CERROR ("Timed out TX to "LPX64" %s%d %p %d.%d.%d.%d\n",
peer->ksnp_nid,
list_empty (&conn->ksnc_tx_queue) ? "" : "Q ",
- conn->ksnc_sock->sk->sk_wmem_queued, conn,
+ SOCK_WMEM_QUEUED(conn->ksnc_sock), conn,
HIPQUAD(conn->ksnc_ipaddr));
return (conn);
}
int
ksocknal_reaper (void *arg)
{
- wait_queue_t wait;
+ cfs_waitlink_t wait;
unsigned long flags;
ksock_conn_t *conn;
ksock_sched_t *sched;
struct list_head enomem_conns;
int nenomem_conns;
- int timeout;
+ cfs_duration_t timeout;
int i;
int peer_index = 0;
- unsigned long deadline = jiffies;
+ cfs_time_t deadline = cfs_time_current();
kportal_daemonize ("ksocknal_reaper");
kportal_blockallsigs ();
- INIT_LIST_HEAD(&enomem_conns);
- init_waitqueue_entry (&wait, current);
+ CFS_INIT_LIST_HEAD(&enomem_conns);
+ cfs_waitlink_init (&wait);
spin_lock_irqsave (&ksocknal_data.ksnd_reaper_lock, flags);
LASSERT (conn->ksnc_tx_scheduled);
conn->ksnc_tx_ready = 1;
list_add_tail (&conn->ksnc_tx_list, &sched->kss_tx_conns);
- wake_up (&sched->kss_waitq);
+ cfs_waitq_signal (&sched->kss_waitq);
spin_unlock_irqrestore (&sched->kss_lock, flags);
nenomem_conns++;
}
/* careful with the jiffy wrap... */
- while ((timeout = (int)(deadline - jiffies)) <= 0) {
+ while ((timeout = cfs_time_sub(deadline,
+ cfs_time_current())) <= 0) {
const int n = 4;
const int p = 1;
int chunk = ksocknal_data.ksnd_peer_hash_size;
ksocknal_data.ksnd_peer_hash_size;
}
- deadline += p * HZ;
+ deadline = cfs_time_add(deadline, cfs_time_seconds(p));
}
if (nenomem_conns != 0) {
* if any go back on my enomem list. */
timeout = SOCKNAL_ENOMEM_RETRY;
}
- ksocknal_data.ksnd_reaper_waketime = jiffies + timeout;
+ ksocknal_data.ksnd_reaper_waketime =
+ cfs_time_add(cfs_time_current(), timeout);
set_current_state (TASK_INTERRUPTIBLE);
- add_wait_queue (&ksocknal_data.ksnd_reaper_waitq, &wait);
+ cfs_waitq_add (&ksocknal_data.ksnd_reaper_waitq, &wait);
if (!ksocknal_data.ksnd_shuttingdown &&
list_empty (&ksocknal_data.ksnd_deathrow_conns) &&
list_empty (&ksocknal_data.ksnd_zombie_conns))
- schedule_timeout (timeout);
+ cfs_waitq_timedwait (&wait, timeout);
set_current_state (TASK_RUNNING);
- remove_wait_queue (&ksocknal_data.ksnd_reaper_waitq, &wait);
+ cfs_waitq_del (&ksocknal_data.ksnd_reaper_waitq, &wait);
spin_lock_irqsave (&ksocknal_data.ksnd_reaper_lock, flags);
}
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2002 Cluster File Systems, Inc.
+ * Author: Phil Schwan <phil@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Darwin porting library
+ * Make things easy to port
+ */
+#include <mach/mach_types.h>
+#include <string.h>
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <sys/file.h>
+
+#include "socknal.h"
+
+#if 0
+#undef SOCKNAL_SINGLE_FRAG_TX
+#define SOCKNAL_SINGLE_FRAG_TX 1
+#undef SOCKNAL_SINGLE_FRAG_RX
+#define SOCKNAL_SINGLE_FRAG_RX 1
+#endif
+
+SYSCTL_DECL(_portals);
+
+SYSCTL_NODE (_portals, OID_AUTO, ksocknal, CTLFLAG_RW,
+ 0, "ksocknal_sysctl");
+
+SYSCTL_INT(_portals_ksocknal, OID_AUTO, timeout,
+ CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_io_timeout,
+ 0, "timeout");
+SYSCTL_INT(_portals_ksocknal, OID_AUTO, eager_ack,
+ CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_eager_ack,
+ 0, "eager_ack");
+SYSCTL_INT(_portals_ksocknal, OID_AUTO, typed,
+ CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_typed_conns,
+ 0, "typed");
+SYSCTL_INT(_portals_ksocknal, OID_AUTO, min_bulk,
+ CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_min_bulk,
+ 0, "min_bulk");
+SYSCTL_INT(_portals_ksocknal, OID_AUTO, buffer_size,
+ CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_buffer_size,
+ 0, "buffer_size");
+SYSCTL_INT(_portals_ksocknal, OID_AUTO, nagle,
+ CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_nagle,
+ 0, "nagle");
+
+cfs_sysctl_table_t ksocknal_top_ctl_table [] = {
+ &sysctl__portals_ksocknal,
+ &sysctl__portals_ksocknal_timeout,
+ &sysctl__portals_ksocknal_eager_ack,
+ &sysctl__portals_ksocknal_typed,
+ &sysctl__portals_ksocknal_min_bulk,
+ &sysctl__portals_ksocknal_buffer_size,
+ &sysctl__portals_ksocknal_nagle,
+ NULL
+};
+
+static unsigned long ksocknal_mbuf_size = (u_quad_t)SB_MAX * MCLBYTES / (MSIZE + MCLBYTES);
+
+struct socket *
+sockfd_lookup(int fd, void *foo)
+{
+ struct socket *so;
+ struct file *fp;
+ CFS_DECL_FUNNEL_DATA;
+
+ CFS_NET_IN;
+ getsock(current_proc()->p_fd, fd, &fp);
+ CFS_NET_EX;
+ so = (struct socket *)fp->f_data;
+ so->reserved4 = fp;
+ CFS_CONE_IN;
+ fref(fp);
+ CFS_CONE_EX;
+ return so;
+}
+
+extern struct fileops socketops;
+
+static int
+sock_map_fd (struct socket *so)
+{
+ struct file *fp;
+ int fd;
+ CFS_DECL_FUNNEL_DATA;
+
+ CFS_CONE_IN;
+ falloc(current_proc(), &fp, &fd);
+ fp->f_flag = FREAD|FWRITE;
+ fp->f_type = DTYPE_SOCKET;
+ fp->f_ops = &socketops;
+ fp->f_data = (caddr_t)so;
+ so->reserved4 = fp;
+ *fdflags(current_proc(), fd) &= ~UF_RESERVED;
+ CFS_CONE_EX;
+
+ return fd;
+}
+
+static void
+sock_release(struct socket *so)
+{
+ struct file *fp;
+ CFS_DECL_FUNNEL_DATA;
+
+ fp = (struct file *)so->reserved4;
+ so->reserved4 = NULL;
+ fp->f_data = NULL;
+ CFS_CONE_IN;
+ frele(fp);
+ CFS_CONE_EX;
+ CFS_NET_IN;
+ soshutdown(so, 0);
+ CFS_NET_EX;
+}
+
+static void
+sock_fdrelse(int fd)
+{
+ CFS_DECL_FUNNEL_DATA;
+
+ CFS_CONE_IN;
+ fdrelse(current_proc(), fd);
+ CFS_CONE_EX;
+}
+
+void
+ksocknal_lib_bind_irq (unsigned int irq)
+{
+ return;
+}
+
+unsigned int
+ksocknal_lib_sock_irq (struct socket *sock)
+{
+ return 0;
+}
+
+int
+ksocknal_lib_get_conn_addrs (ksock_conn_t *conn)
+{
+ struct sockaddr_in *sin;
+ struct sockaddr *sa;
+ int rc;
+ CFS_DECL_NET_DATA;
+
+ CFS_NET_IN;
+ rc = conn->ksnc_sock->so_proto->pr_usrreqs->pru_peeraddr(conn->ksnc_sock, &sa);
+ LASSERT (!conn->ksnc_closing);
+ if (rc != 0) {
+ CFS_NET_EX;
+ if (sa) FREE(sa, M_SONAME);
+ CERROR ("Error %d getting sock peer IP\n", rc);
+ return rc;
+ }
+ sin = (struct sockaddr_in *)sa;
+ conn->ksnc_ipaddr = ntohl (sin->sin_addr.s_addr);
+ conn->ksnc_port = ntohs (sin->sin_port);
+ if (sa) FREE(sa, M_SONAME);
+ rc = conn->ksnc_sock->so_proto->pr_usrreqs->pru_sockaddr(conn->ksnc_sock, &sa);
+ CFS_NET_EX;
+ if (rc != 0) {
+ if (sa) FREE(sa, M_SONAME);
+ CERROR ("Error %d getting sock local IP\n", rc);
+ return rc;
+ }
+ conn->ksnc_myipaddr = ntohl (sin->sin_addr.s_addr);
+
+ return 0;
+}
+
+int
+ksocknal_lib_send_iov (ksock_conn_t *conn, ksock_tx_t *tx)
+{
+#if SOCKNAL_SINGLE_FRAG_TX
+ struct iovec scratch;
+ struct iovec *scratchiov = &scratch;
+ int niov = 1;
+#else
+ struct iovec *scratchiov = conn->ksnc_tx_scratch_iov;
+ int niov = tx->tx_niov;
+#endif
+ struct socket *sock = conn->ksnc_sock;
+ int nob;
+ int rc;
+ int i;
+ struct uio suio = {
+ .uio_iov = scratchiov,
+ .uio_iovcnt = niov,
+ .uio_offset = 0,
+ .uio_resid = 0, /* This will be valued after a while */
+ .uio_segflg = UIO_SYSSPACE,
+ .uio_rw = UIO_WRITE,
+ .uio_procp = NULL
+ };
+ int flags = MSG_DONTWAIT;
+ CFS_DECL_NET_DATA;
+
+ for (nob = i = 0; i < niov; i++) {
+ scratchiov[i] = tx->tx_iov[i];
+ nob += scratchiov[i].iov_len;
+ }
+ suio.uio_resid = nob;
+
+ CFS_NET_IN;
+ rc = sosend(sock, NULL, &suio, (struct mbuf *)0, (struct mbuf *)0, flags);
+ CFS_NET_EX;
+
+ /* NB there is no return value can indicate how many
+ * have been sent and how many resid, we have to get
+ * sent bytes from suio. */
+ if (rc != 0) {
+ if (suio.uio_resid != nob &&\
+ (rc == ERESTART || rc == EINTR || rc == EWOULDBLOCK))
+ /* We have sent something */
+ rc = nob - suio.uio_resid;
+ else if ( rc == EWOULDBLOCK )
+ /* Actually, EAGAIN and EWOULDBLOCK have same value in OSX */
+ rc = -EAGAIN;
+ else
+ rc = -rc;
+ } else /* rc == 0 */
+ rc = nob - suio.uio_resid;
+
+ return rc;
+}
+
+int
+ksocknal_lib_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx)
+{
+#if SOCKNAL_SINGLE_FRAG_TX || !SOCKNAL_RISK_KMAP_DEADLOCK
+ struct iovec scratch;
+ struct iovec *scratchiov = &scratch;
+ int niov = 1;
+#else
+ struct iovec *scratchiov = conn->ksnc_tx_scratch_iov;
+ int niov = tx->tx_nkiov;
+#endif
+ struct socket *sock = conn->ksnc_sock;
+ ptl_kiov_t *kiov = tx->tx_kiov;
+ int nob;
+ int rc;
+ int i;
+ struct uio suio = {
+ .uio_iov = scratchiov,
+ .uio_iovcnt = niov,
+ .uio_offset = 0,
+ .uio_resid = 0, /* It should be valued after a while */
+ .uio_segflg = UIO_SYSSPACE,
+ .uio_rw = UIO_WRITE,
+ .uio_procp = NULL
+ };
+ int flags = MSG_DONTWAIT;
+ CFS_DECL_NET_DATA;
+
+ for (nob = i = 0; i < niov; i++) {
+ scratchiov[i].iov_base = cfs_kmap(kiov[i].kiov_page) +
+ kiov[i].kiov_offset;
+ nob += scratchiov[i].iov_len = kiov[i].kiov_len;
+ }
+ suio.uio_resid = nob;
+
+ CFS_NET_IN;
+ rc = sosend(sock, NULL, &suio, (struct mbuf *)0, (struct mbuf *)0, flags);
+ CFS_NET_EX;
+
+ for (i = 0; i < niov; i++)
+ cfs_kunmap(kiov[i].kiov_page);
+
+ if (rc != 0) {
+ if (suio.uio_resid != nob &&\
+ (rc == ERESTART || rc == EINTR || rc == EWOULDBLOCK))
+ /* We have sent something */
+ rc = nob - suio.uio_resid;
+ else if ( rc == EWOULDBLOCK )
+ /* EAGAIN and EWOULD BLOCK have same value in OSX */
+ rc = -EAGAIN;
+ else
+ rc = -rc;
+ } else /* rc == 0 */
+ rc = nob - suio.uio_resid;
+
+ return rc;
+}
+
+/*
+ * liang: Hack of inpcb and tcpcb.
+ * To get tcpcb of a socket, and call tcp_output
+ * to send quick ack.
+ */
+struct ks_tseg_qent{
+ int foo;
+};
+
+struct ks_tcptemp{
+ int foo;
+};
+
+LIST_HEAD(ks_tsegqe_head, ks_tseg_qent);
+
+struct ks_tcpcb {
+ struct ks_tsegqe_head t_segq;
+ int t_dupacks;
+ struct ks_tcptemp *unused;
+ int t_timer[4];
+ struct inpcb *t_inpcb;
+ int t_state;
+ u_int t_flags;
+ /*
+ * There are more fields but we dont need
+ * ......
+ */
+};
+
+#define TF_ACKNOW 0x00001
+#define TF_DELACK 0x00002
+
+struct ks_inpcb {
+ LIST_ENTRY(ks_inpcb) inp_hash;
+ struct in_addr reserved1;
+ struct in_addr reserved2;
+ u_short inp_fport;
+ u_short inp_lport;
+ LIST_ENTRY(inpcb) inp_list;
+ caddr_t inp_ppcb;
+ /*
+ * There are more fields but we dont need
+ * ......
+ */
+};
+
+#define ks_sotoinpcb(so) ((struct ks_inpcb *)(so)->so_pcb)
+#define ks_intotcpcb(ip) ((struct ks_tcpcb *)(ip)->inp_ppcb)
+#define ks_sototcpcb(so) (intotcpcb(sotoinpcb(so)))
+
+void
+ksocknal_lib_eager_ack (ksock_conn_t *conn)
+{
+ struct socket *sock = conn->ksnc_sock;
+ struct ks_inpcb *inp = ks_sotoinpcb(sock);
+ struct ks_tcpcb *tp = ks_intotcpcb(inp);
+ int s;
+ CFS_DECL_NET_DATA;
+
+ extern int tcp_output(register struct ks_tcpcb *tp);
+
+ CFS_NET_IN;
+ s = splnet();
+
+ if (tp && tp->t_flags & TF_DELACK){
+ tp->t_flags &= ~TF_DELACK;
+ tp->t_flags |= TF_ACKNOW;
+ (void) tcp_output(tp);
+ }
+ splx(s);
+
+ /*
+ * No TCP_QUICKACK supported in BSD, so I have to call tcp_fasttimo
+ * to send immediate ACK. It's not the best resolution because
+ * tcp_fasttimo will send out ACK for all delayed-ack tcp socket.
+ * Anyway, it's working now.
+ * extern void tcp_fasttimo();
+ * tcp_fasttimo();
+ */
+ CFS_NET_EX;
+
+ return;
+}
+
+int
+ksocknal_lib_recv_iov (ksock_conn_t *conn)
+{
+#if SOCKNAL_SINGLE_FRAG_RX
+ struct iovec scratch;
+ struct iovec *scratchiov = &scratch;
+ int niov = 1;
+#else
+ struct iovec *scratchiov = conn->ksnc_rx_scratch_iov;
+ int niov = conn->ksnc_rx_niov;
+#endif
+ struct iovec *iov = conn->ksnc_rx_iov;
+ int nob;
+ int rc;
+ int i;
+ struct uio ruio = {
+ .uio_iov = scratchiov,
+ .uio_iovcnt = niov,
+ .uio_offset = 0,
+ .uio_resid = 0, /* It should be valued after a while */
+ .uio_segflg = UIO_SYSSPACE,
+ .uio_rw = UIO_READ,
+ .uio_procp = NULL
+ };
+ int flags = MSG_DONTWAIT;
+ CFS_DECL_NET_DATA;
+
+ for (nob = i = 0; i < niov; i++) {
+ scratchiov[i] = iov[i];
+ nob += scratchiov[i].iov_len;
+ }
+ LASSERT (nob <= conn->ksnc_rx_nob_wanted);
+
+ ruio.uio_resid = nob;
+
+ CFS_NET_IN;
+ rc = soreceive(conn->ksnc_sock, (struct sockaddr **)0, &ruio, (struct mbuf **)0, (struct mbuf **)0, &flags);
+ CFS_NET_EX;
+ if (rc){
+ if (ruio.uio_resid != nob && \
+ (rc == ERESTART || rc == EINTR || rc == EWOULDBLOCK || rc == EAGAIN))
+ /* data particially received */
+ rc = nob - ruio.uio_resid;
+ else if (rc == EWOULDBLOCK)
+ /* EAGAIN and EWOULD BLOCK have same value in OSX */
+ rc = -EAGAIN;
+ else
+ rc = -rc;
+ } else
+ rc = nob - ruio.uio_resid;
+
+ return (rc);
+}
+
+int
+ksocknal_lib_recv_kiov (ksock_conn_t *conn)
+{
+#if SOCKNAL_SINGLE_FRAG_RX || !SOCKNAL_RISK_KMAP_DEADLOCK
+ struct iovec scratch;
+ struct iovec *scratchiov = &scratch;
+ int niov = 1;
+#else
+ struct iovec *scratchiov = conn->ksnc_rx_scratch_iov;
+ int niov = conn->ksnc_rx_nkiov;
+#endif
+ ptl_kiov_t *kiov = conn->ksnc_rx_kiov;
+ int nob;
+ int rc;
+ int i;
+ struct uio ruio = {
+ .uio_iov = scratchiov,
+ .uio_iovcnt = niov,
+ .uio_offset = 0,
+ .uio_resid = 0,
+ .uio_segflg = UIO_SYSSPACE,
+ .uio_rw = UIO_READ,
+ .uio_procp = NULL
+ };
+ int flags = MSG_DONTWAIT;
+ CFS_DECL_NET_DATA;
+
+ for (nob = i = 0; i < niov; i++) {
+ scratchiov[i].iov_base = cfs_kmap(kiov[i].kiov_page) + kiov[i].kiov_offset;
+ nob += scratchiov[i].iov_len = kiov[i].kiov_len;
+ }
+ LASSERT (nob <= conn->ksnc_rx_nob_wanted);
+
+ ruio.uio_resid = nob;
+
+ CFS_NET_IN;
+ rc = soreceive(conn->ksnc_sock, (struct sockaddr **)0, &ruio, (struct mbuf **)0, NULL, &flags);
+ CFS_NET_EX;
+
+ for (i = 0; i < niov; i++)
+ cfs_kunmap(kiov[i].kiov_page);
+
+ if (rc){
+ if (ruio.uio_resid != nob && \
+ (rc == ERESTART || rc == EINTR || rc == EWOULDBLOCK))
+ /* data particially received */
+ rc = nob - ruio.uio_resid;
+ else if (rc == EWOULDBLOCK)
+ /* receive blocked, EWOULDBLOCK == EAGAIN */
+ rc = -EAGAIN;
+ else
+ rc = -rc;
+ } else
+ rc = nob - ruio.uio_resid;
+
+ return (rc);
+}
+
+int
+ksocknal_lib_sock_write (struct socket *sock, void *buffer, int nob)
+{
+ int rc;
+ CFS_DECL_NET_DATA;
+
+ while (nob > 0) {
+ struct iovec iov = {
+ .iov_base = buffer,
+ .iov_len = nob
+ };
+ struct uio suio = {
+ .uio_iov = &iov,
+ .uio_iovcnt = 1,
+ .uio_offset = 0,
+ .uio_resid = nob,
+ .uio_segflg = UIO_SYSSPACE,
+ .uio_rw = UIO_WRITE,
+ .uio_procp = NULL
+ };
+
+ CFS_NET_IN;
+ rc = sosend(sock, NULL, &suio, (struct mbuf *)0, (struct mbuf *)0, 0);
+ CFS_NET_EX;
+
+ if (rc != 0) {
+ if ( suio.uio_resid != nob && ( rc == ERESTART || rc == EINTR ||\
+ rc == EWOULDBLOCK))
+ rc = 0;
+ if ( rc != 0 )
+ return -rc;
+ rc = nob - suio.uio_resid;
+ buffer = ((char *)buffer) + rc;
+ nob = suio.uio_resid;
+ continue;
+ }
+ break;
+ }
+
+ return (0);
+}
+
+int
+ksocknal_lib_sock_read (struct socket *sock, void *buffer, int nob)
+{
+ int rc;
+ CFS_DECL_NET_DATA;
+
+ while (nob > 0) {
+ struct iovec iov = {
+ .iov_base = buffer,
+ .iov_len = nob
+ };
+ struct uio ruio = {
+ .uio_iov = &iov,
+ .uio_iovcnt = 1,
+ .uio_offset = 0,
+ .uio_resid = nob,
+ .uio_segflg = UIO_SYSSPACE,
+ .uio_rw = UIO_READ,
+ .uio_procp = NULL
+ };
+
+ CFS_NET_IN;
+ rc = soreceive(sock, (struct sockaddr **)0, &ruio, (struct mbuf **)0, (struct mbuf **)0, (int *)0);
+ CFS_NET_EX;
+
+ if (rc != 0) {
+ if ( ruio.uio_resid != nob && ( rc == ERESTART || rc == EINTR ||\
+ rc == EWOULDBLOCK))
+ rc = 0;
+ if (rc != 0)
+ return -rc;
+ rc = nob - ruio.uio_resid;
+ buffer = ((char *)buffer) + rc;
+ nob = ruio.uio_resid;
+ continue;
+ }
+ break;
+ }
+
+ return (0);
+}
+
+int
+ksocknal_lib_get_conn_tunables (ksock_conn_t *conn, int *txmem, int *rxmem, int *nagle)
+{
+ struct sockopt sopt;
+ struct socket *sock = conn->ksnc_sock;
+ int len;
+ int rc;
+ CFS_DECL_NET_DATA;
+
+ rc = ksocknal_getconnsock (conn);
+ if (rc != 0) {
+ LASSERT (conn->ksnc_closing);
+ *txmem = *rxmem = *nagle = 0;
+ rc = -ESHUTDOWN;
+ goto out;
+ }
+ len = sizeof(*txmem);
+ bzero(&sopt, sizeof sopt);
+ sopt.sopt_dir = SOPT_GET;
+ sopt.sopt_level = SOL_SOCKET;
+ sopt.sopt_name = SO_SNDBUF;
+ sopt.sopt_val = txmem;
+ sopt.sopt_valsize = len;
+
+ CFS_NET_IN;
+ rc = sogetopt(sock, &sopt);
+ if (rc == 0) {
+ len = sizeof(*rxmem);
+ sopt.sopt_name = SO_RCVBUF;
+ sopt.sopt_val = rxmem;
+ rc = sogetopt(sock, &sopt);
+ }
+ if (rc == 0) {
+ len = sizeof(*nagle);
+ sopt.sopt_level = IPPROTO_TCP;
+ sopt.sopt_name = TCP_NODELAY;
+ sopt.sopt_val = nagle;
+ rc = sogetopt(sock, &sopt);
+ }
+ CFS_NET_EX;
+
+ ksocknal_putconnsock (conn);
+
+ if (rc == 0)
+ *nagle = !*nagle;
+ else
+ *txmem = *rxmem = *nagle = 0;
+out:
+ return (-rc);
+}
+
+int
+ksocknal_lib_setup_sock (struct socket *so)
+{
+ struct sockopt sopt;
+ int rc;
+ int option;
+ int keep_idle;
+ int keep_intvl;
+ int keep_count;
+ int do_keepalive;
+ struct linger linger;
+ CFS_DECL_NET_DATA;
+
+ /* Ensure this socket aborts active sends immediately when we close
+ * it. */
+
+ bzero(&sopt, sizeof sopt);
+
+ linger.l_onoff = 0;
+ linger.l_linger = 0;
+ sopt.sopt_dir = SOPT_SET;
+ sopt.sopt_level = SOL_SOCKET;
+ sopt.sopt_name = SO_LINGER;
+ sopt.sopt_val = &linger;
+ sopt.sopt_valsize = sizeof(linger);
+
+ CFS_NET_IN;
+ rc = sosetopt(so, &sopt);
+ if (rc != 0) {
+ CERROR ("Can't set SO_LINGER: %d\n", rc);
+ goto out;
+ }
+
+
+ if (!ksocknal_tunables.ksnd_nagle) {
+ option = 1;
+ bzero(&sopt, sizeof sopt);
+ sopt.sopt_dir = SOPT_SET;
+ sopt.sopt_level = IPPROTO_TCP;
+ sopt.sopt_name = TCP_NODELAY;
+ sopt.sopt_val = &option;
+ sopt.sopt_valsize = sizeof(option);
+ rc = sosetopt(so, &sopt);
+ if (rc != 0) {
+ CERROR ("Can't disable nagle: %d\n", rc);
+ goto out;
+ }
+ }
+ if (ksocknal_tunables.ksnd_buffer_size > 0) {
+ option = ksocknal_tunables.ksnd_buffer_size;
+ if (option > ksocknal_mbuf_size)
+ option = ksocknal_mbuf_size;
+
+ sopt.sopt_dir = SOPT_SET;
+ sopt.sopt_level = SOL_SOCKET;
+ sopt.sopt_name = SO_SNDBUF;
+ sopt.sopt_val = &option;
+ sopt.sopt_valsize = sizeof(option);
+ rc = sosetopt(so, &sopt);
+ if (rc != 0) {
+ CERROR ("Can't set send buffer %d: %d\n",
+ option, rc);
+ goto out;
+ }
+
+ sopt.sopt_name = SO_RCVBUF;
+ rc = sosetopt(so, &sopt);
+ if (rc != 0) {
+ CERROR ("Can't set receive buffer %d: %d\n",
+ option, rc);
+ goto out;
+ }
+ }
+ /* snapshot tunables */
+ keep_idle = ksocknal_tunables.ksnd_keepalive_idle;
+ keep_count = ksocknal_tunables.ksnd_keepalive_count;
+ keep_intvl = ksocknal_tunables.ksnd_keepalive_intvl;
+
+ do_keepalive = (keep_idle > 0 && keep_count > 0 && keep_intvl > 0);
+ option = (do_keepalive ? 1 : 0);
+ bzero(&sopt, sizeof sopt);
+ sopt.sopt_dir = SOPT_SET;
+ sopt.sopt_level = SOL_SOCKET;
+ sopt.sopt_name = SO_KEEPALIVE;
+ sopt.sopt_val = &option;
+ sopt.sopt_valsize = sizeof(option);
+ rc = sosetopt(so, &sopt);
+ if (rc != 0) {
+ CERROR ("Can't set SO_KEEPALIVE: %d\n", rc);
+ goto out;
+ }
+
+ if (!do_keepalive) {
+ /* no more setting, just return */
+ rc = 0;
+ goto out;
+ }
+
+ bzero(&sopt, sizeof sopt);
+ sopt.sopt_dir = SOPT_SET;
+ sopt.sopt_level = IPPROTO_TCP;
+ sopt.sopt_name = TCP_KEEPALIVE;
+ sopt.sopt_val = &keep_idle;
+ sopt.sopt_valsize = sizeof(keep_idle);
+ rc = sosetopt(so, &sopt);
+ if (rc != 0) {
+ CERROR ("Can't set TCP_KEEPALIVE : %d\n", rc);
+ goto out;
+ }
+out:
+ CFS_NET_EX;
+ return (-rc);
+}
+
+int
+ksocknal_lib_connect_sock (struct socket **sockp, int *may_retry,
+ ksock_route_t *route, int local_port)
+{
+ struct sockaddr_in locaddr;
+ struct sockaddr_in srvaddr;
+ struct timeval tv;
+ int fd;
+ struct socket *so;
+ struct sockopt sopt;
+ int option;
+ int rc;
+ int s;
+ CFS_DECL_FUNNEL_DATA;
+
+ ENTRY;
+ bzero (&locaddr, sizeof (locaddr));
+ locaddr.sin_len = sizeof(struct sockaddr_in);
+ locaddr.sin_family = AF_INET;
+ locaddr.sin_port = htons (local_port);
+ locaddr.sin_addr.s_addr =
+ (route->ksnr_myipaddr != 0) ? htonl(route->ksnr_myipaddr)
+ : INADDR_ANY;
+ bzero(&srvaddr, sizeof(srvaddr));
+ srvaddr.sin_len = sizeof(struct sockaddr_in);
+ srvaddr.sin_family = AF_INET;
+ srvaddr.sin_port = htons (route->ksnr_port);
+ srvaddr.sin_addr.s_addr = htonl (route->ksnr_ipaddr);
+
+ *may_retry = 0;
+
+ CFS_NET_IN;
+ rc = socreate(PF_INET, &so, SOCK_STREAM, 0);
+ CFS_NET_EX;
+ *sockp = so;
+ if (rc != 0) {
+ CERROR ("Can't create autoconnect socket: %d\n", rc);
+ return (-rc);
+ }
+
+ /*
+ * XXX
+ * Liang: what do we need here?
+ */
+ fd = sock_map_fd (so);
+ if (fd < 0) {
+ sock_release (so);
+ CERROR ("sock_map_fd error %d\n", fd);
+ return (fd);
+ }
+ sock_fdrelse(fd);
+
+ /* Set the socket timeouts, so our connection attempt completes in
+ * finite time */
+ tv.tv_sec = ksocknal_tunables.ksnd_io_timeout;
+ tv.tv_usec = 0;
+ bzero(&sopt, sizeof sopt);
+ sopt.sopt_dir = SOPT_SET;
+ sopt.sopt_level = SOL_SOCKET;
+ sopt.sopt_name = SO_SNDTIMEO;
+ sopt.sopt_val = &tv;
+ sopt.sopt_valsize = sizeof(tv);
+
+ CFS_NET_IN;
+ rc = sosetopt(so, &sopt);
+ if (rc != 0) {
+ CFS_NET_EX;
+ CERROR ("Can't set send timeout %d: %d\n",
+ ksocknal_tunables.ksnd_io_timeout, rc);
+ goto out;
+ }
+ sopt.sopt_level = SOL_SOCKET;
+ sopt.sopt_name = SO_RCVTIMEO;
+ rc = sosetopt(so, &sopt);
+ if (rc != 0) {
+ CFS_NET_EX;
+ CERROR ("Can't set receive timeout %d: %d\n",
+ ksocknal_tunables.ksnd_io_timeout, rc);
+ goto out;
+ }
+ option = 1;
+ sopt.sopt_level = SOL_SOCKET;
+ sopt.sopt_name = SO_REUSEADDR;
+ sopt.sopt_val = &option;
+ sopt.sopt_valsize = sizeof(option);
+ rc = sosetopt(so, &sopt);
+ if (rc != 0) {
+ CFS_NET_EX;
+ CERROR ("Can't set sock reuse address: %d\n", rc);
+ goto out;
+ }
+ rc = sobind(so, (struct sockaddr *)&locaddr);
+ if (rc == EADDRINUSE) {
+ CFS_NET_EX;
+ CDEBUG(D_NET, "Port %d already in use\n", local_port);
+ *may_retry = 1;
+ goto out;
+ }
+ if (rc != 0) {
+ CFS_NET_EX;
+ CERROR ("Can't bind to local IP Address %u.%u.%u.%u: %d\n",
+ HIPQUAD(route->ksnr_myipaddr), rc);
+ goto out;
+ }
+ rc = soconnect(so, (struct sockaddr *)&srvaddr);
+ *may_retry = (rc == EADDRNOTAVAIL || rc == EADDRINUSE);
+ if (rc != 0) {
+ CFS_NET_EX;
+ if (rc != EADDRNOTAVAIL && rc != EADDRINUSE)
+ CERROR ("Can't connect to nid "LPX64
+ " local IP: %u.%u.%u.%u,"
+ " remote IP: %u.%u.%u.%u/%d: %d\n",
+ route->ksnr_peer->ksnp_nid,
+ HIPQUAD(route->ksnr_myipaddr),
+ HIPQUAD(route->ksnr_ipaddr),
+ route->ksnr_port, rc);
+ goto out;
+ }
+
+ s = splnet();
+ while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
+ CDEBUG(D_NET, "ksocknal sleep for waiting auto_connect.\n");
+ (void) tsleep((caddr_t)&so->so_timeo, PSOCK, "ksocknal_conn", hz);
+ }
+ LASSERT((so->so_state & SS_ISCONNECTED));
+ splx(s);
+ CFS_NET_EX;
+
+ rc = so->so_error;
+ if (rc != 0) {
+ CERROR ("Error %d waiting for connection to nid "LPX64
+ " local IP: %u.%u.%u.%u,"
+ " remote IP: %u.%u.%u.%u/%d: %d\n", rc,
+ route->ksnr_peer->ksnp_nid,
+ HIPQUAD(route->ksnr_myipaddr),
+ HIPQUAD(route->ksnr_ipaddr),
+ route->ksnr_port, rc);
+ goto out;
+ }
+ return (-rc);
+
+ out:
+ rele_file(KSN_SOCK2FILE(so));
+
+ return (-rc);
+}
+
+void
+ksocknal_lib_push_conn(ksock_conn_t *conn)
+{
+ struct socket *sock;
+ struct sockopt sopt;
+ int val = 1;
+ int rc;
+ CFS_DECL_NET_DATA;
+
+ rc = ksocknal_getconnsock (conn);
+ if (rc != 0) /* being shut down */
+ return;
+ sock = conn->ksnc_sock;
+ bzero(&sopt, sizeof sopt);
+ sopt.sopt_dir = SOPT_SET;
+ sopt.sopt_level = IPPROTO_TCP;
+ sopt.sopt_name = TCP_NODELAY;
+ sopt.sopt_val = &val;
+ sopt.sopt_valsize = sizeof val;
+
+ CFS_NET_IN;
+ sosetopt(sock, &sopt);
+ CFS_NET_EX;
+
+ ksocknal_putconnsock (conn);
+ return;
+}
+
+extern void ksocknal_read_callback (ksock_conn_t *conn);
+extern void ksocknal_write_callback (ksock_conn_t *conn);
+
+static void
+ksocknal_upcall(struct socket *so, caddr_t arg, int waitf)
+{
+ ksock_conn_t *conn;
+ CFS_DECL_NET_DATA;
+ ENTRY;
+
+ read_lock (&ksocknal_data.ksnd_global_lock);
+ conn = so->reserved3;
+
+ if (conn == NULL){
+ /* More processing is needed? */
+ goto out;
+ }
+ if ((so->so_rcv.sb_flags & SB_UPCALL) || !arg ) {
+ extern int soreadable(struct socket *so);
+ CFS_NET_IN;
+ if (conn->ksnc_rx_nob_wanted && soreadable(so)){
+ /* To verify whether the upcall is for receive */
+ CFS_NET_EX;
+ ksocknal_read_callback (conn);
+ }else
+ CFS_NET_EX;
+ }
+ /* go foward? */
+ if ((so->so_snd.sb_flags & SB_UPCALL) || !arg){
+ extern int sowriteable(struct socket *so);
+ CFS_NET_IN;
+ if (sowriteable(so)){
+ /* socket is writable */
+ CFS_NET_EX;
+ ksocknal_write_callback(conn);
+ } else
+ CFS_NET_EX;
+ }
+out:
+ read_unlock (&ksocknal_data.ksnd_global_lock);
+
+ EXIT;
+}
+
+void
+ksocknal_lib_save_callback(struct socket *sock, ksock_conn_t *conn)
+{
+ /* No callback need to save in osx */
+ return;
+}
+
+void
+ksocknal_lib_set_callback(struct socket *sock, ksock_conn_t *conn)
+{
+ CFS_DECL_NET_DATA;
+
+ CFS_NET_IN;
+ sock->so_upcallarg = (void *)sock; /* anything not NULL */
+ sock->so_upcall = ksocknal_upcall;
+ sock->so_snd.sb_timeo = 0;
+ sock->so_rcv.sb_timeo = 2 * HZ;
+ sock->so_rcv.sb_flags |= SB_UPCALL;
+ sock->so_snd.sb_flags |= SB_UPCALL;
+ sock->reserved3 = conn;
+ CFS_NET_EX;
+ return;
+}
+
+void
+ksocknal_lib_act_callback(struct socket *sock)
+{
+ /* upcall will take the network funnel */
+ ksocknal_upcall (sock, 0, 0);
+}
+
+void
+ksocknal_lib_reset_callback(struct socket *sock, ksock_conn_t *conn)
+{
+ CFS_DECL_NET_DATA;
+
+ CFS_NET_IN;
+ sock->so_upcall = NULL;
+ sock->so_upcallarg = NULL;
+ sock->so_rcv.sb_flags &= ~SB_UPCALL;
+ sock->so_snd.sb_flags &= ~SB_UPCALL;
+ CFS_NET_EX;
+}
+
+
--- /dev/null
+#ifndef __XNU_SOCKNAL_LIB_H__
+#define __XNU_SOCKNAL_LIB_H__
+
+#include <sys/kernel.h>
+#include <sys/file.h>
+#include <sys/filedesc.h>
+#include <sys/stat.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/proc.h>
+#include <sys/sysctl.h>
+#include <sys/ubc.h>
+#include <sys/uio.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/namei.h>
+#include <sys/fcntl.h>
+#include <sys/lockf.h>
+#include <sys/syslog.h>
+#include <machine/spl.h>
+#include <mach/mach_types.h>
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <netat/sysglue.h>
+#include <stdarg.h>
+
+#include <libcfs/libcfs.h>
+
+#define SOCKNAL_ARCH_EAGER_ACK 1
+
+#define KSN_SOCK2FILE(so) ((struct file *)(so)->reserved4)
+#define KSN_CONN2FILE(conn) ((struct file *)(conn)->ksnc_sock->reserved4)
+
+#define SOCK_WMEM_QUEUED(so) ((so)->so_snd.sb_cc)
+#define SOCK_ERROR(so) ((so)->so_error)
+
+#define SOCK_TEST_NOSPACE(so) (sbspace(&(so)->so_snd) < (so)->so_snd.sb_lowat)
+extern struct socket * sockfd_lookup(int fd, void *foo);
+
+static inline
+int ksocknal_nsched(void)
+{
+ return 1;
+}
+
+#endif
--- /dev/null
+#include "socknal.h"
+
+#ifdef CONFIG_SYSCTL
+#define SOCKNAL_SYSCTL 200
+
+#define SOCKNAL_SYSCTL_TIMEOUT 1
+#define SOCKNAL_SYSCTL_EAGER_ACK 2
+#define SOCKNAL_SYSCTL_ZERO_COPY 3
+#define SOCKNAL_SYSCTL_TYPED 4
+#define SOCKNAL_SYSCTL_MIN_BULK 5
+#define SOCKNAL_SYSCTL_BUFFER_SIZE 6
+#define SOCKNAL_SYSCTL_NAGLE 7
+#define SOCKNAL_SYSCTL_IRQ_AFFINITY 8
+#define SOCKNAL_SYSCTL_KEEPALIVE_IDLE 9
+#define SOCKNAL_SYSCTL_KEEPALIVE_COUNT 10
+#define SOCKNAL_SYSCTL_KEEPALIVE_INTVL 11
+
+static ctl_table ksocknal_ctl_table[] = {
+ {SOCKNAL_SYSCTL_TIMEOUT, "timeout",
+ &ksocknal_tunables.ksnd_io_timeout, sizeof (int),
+ 0644, NULL, &proc_dointvec},
+ {SOCKNAL_SYSCTL_EAGER_ACK, "eager_ack",
+ &ksocknal_tunables.ksnd_eager_ack, sizeof (int),
+ 0644, NULL, &proc_dointvec},
+#if SOCKNAL_ZC
+ {SOCKNAL_SYSCTL_ZERO_COPY, "zero_copy",
+ &ksocknal_tunables.ksnd_zc_min_frag, sizeof (int),
+ 0644, NULL, &proc_dointvec},
+#endif
+ {SOCKNAL_SYSCTL_TYPED, "typed",
+ &ksocknal_tunables.ksnd_typed_conns, sizeof (int),
+ 0644, NULL, &proc_dointvec},
+ {SOCKNAL_SYSCTL_MIN_BULK, "min_bulk",
+ &ksocknal_tunables.ksnd_min_bulk, sizeof (int),
+ 0644, NULL, &proc_dointvec},
+ {SOCKNAL_SYSCTL_BUFFER_SIZE, "buffer_size",
+ &ksocknal_tunables.ksnd_buffer_size, sizeof(int),
+ 0644, NULL, &proc_dointvec},
+ {SOCKNAL_SYSCTL_NAGLE, "nagle",
+ &ksocknal_tunables.ksnd_nagle, sizeof(int),
+ 0644, NULL, &proc_dointvec},
+#if CPU_AFFINITY
+ {SOCKNAL_SYSCTL_IRQ_AFFINITY, "irq_affinity",
+ &ksocknal_tunables.ksnd_irq_affinity, sizeof(int),
+ 0644, NULL, &proc_dointvec},
+#endif
+ {SOCKNAL_SYSCTL_KEEPALIVE_IDLE, "keepalive_idle",
+ &ksocknal_tunables.ksnd_keepalive_idle, sizeof(int),
+ 0644, NULL, &proc_dointvec},
+ {SOCKNAL_SYSCTL_KEEPALIVE_COUNT, "keepalive_count",
+ &ksocknal_tunables.ksnd_keepalive_count, sizeof(int),
+ 0644, NULL, &proc_dointvec},
+ {SOCKNAL_SYSCTL_KEEPALIVE_INTVL, "keepalive_intvl",
+ &ksocknal_tunables.ksnd_keepalive_intvl, sizeof(int),
+ 0644, NULL, &proc_dointvec},
+ { 0 }
+};
+
+ctl_table ksocknal_top_ctl_table[] = {
+ {SOCKNAL_SYSCTL, "socknal", NULL, 0, 0555, ksocknal_ctl_table},
+ { 0 }
+};
+#endif
+
+void
+ksocknal_lib_bind_irq (unsigned int irq)
+{
+#if (defined(CONFIG_SMP) && CPU_AFFINITY)
+ int bind;
+ int cpu;
+ unsigned long flags;
+ char cmdline[64];
+ ksock_irqinfo_t *info;
+ char *argv[] = {"/bin/sh",
+ "-c",
+ cmdline,
+ NULL};
+ char *envp[] = {"HOME=/",
+ "PATH=/sbin:/bin:/usr/sbin:/usr/bin",
+ NULL};
+
+ LASSERT (irq < NR_IRQS);
+ if (irq == 0) /* software NIC or affinity disabled */
+ return;
+
+ info = &ksocknal_data.ksnd_irqinfo[irq];
+
+ write_lock_irqsave (&ksocknal_data.ksnd_global_lock, flags);
+
+ LASSERT (info->ksni_valid);
+ bind = !info->ksni_bound;
+ info->ksni_bound = 1;
+
+ write_unlock_irqrestore (&ksocknal_data.ksnd_global_lock, flags);
+
+ if (!bind) /* bound already */
+ return;
+
+ cpu = ksocknal_irqsched2cpu(info->ksni_sched);
+ snprintf (cmdline, sizeof (cmdline),
+ "echo %d > /proc/irq/%u/smp_affinity", 1 << cpu, irq);
+
+ printk (KERN_INFO "Lustre: Binding irq %u to CPU %d with cmd: %s\n",
+ irq, cpu, cmdline);
+
+ /* FIXME: Find a better method of setting IRQ affinity...
+ */
+
+ USERMODEHELPER(argv[0], argv, envp);
+#endif
+}
+
+int
+ksocknal_lib_get_conn_addrs (ksock_conn_t *conn)
+{
+ struct sockaddr_in sin;
+ int len = sizeof (sin);
+ int rc;
+
+ rc = conn->ksnc_sock->ops->getname (conn->ksnc_sock,
+ (struct sockaddr *)&sin, &len, 2);
+ /* Didn't need the {get,put}connsock dance to deref ksnc_sock... */
+ LASSERT (!conn->ksnc_closing);
+
+ if (rc != 0) {
+ CERROR ("Error %d getting sock peer IP\n", rc);
+ return rc;
+ }
+
+ conn->ksnc_ipaddr = ntohl (sin.sin_addr.s_addr);
+ conn->ksnc_port = ntohs (sin.sin_port);
+
+ rc = conn->ksnc_sock->ops->getname (conn->ksnc_sock,
+ (struct sockaddr *)&sin, &len, 0);
+ if (rc != 0) {
+ CERROR ("Error %d getting sock local IP\n", rc);
+ return rc;
+ }
+
+ conn->ksnc_myipaddr = ntohl (sin.sin_addr.s_addr);
+
+ return 0;
+}
+
+unsigned int
+ksocknal_lib_sock_irq (struct socket *sock)
+{
+ int irq = 0;
+ struct dst_entry *dst;
+
+ if (!ksocknal_tunables.ksnd_irq_affinity)
+ return 0;
+
+ dst = sk_dst_get (sock->sk);
+ if (dst != NULL) {
+ if (dst->dev != NULL) {
+ irq = dst->dev->irq;
+ if (irq >= NR_IRQS) {
+ CERROR ("Unexpected IRQ %x\n", irq);
+ irq = 0;
+ }
+ }
+ dst_release (dst);
+ }
+
+ return (irq);
+}
+
+#if (SOCKNAL_ZC && SOCKNAL_VADDR_ZC)
+static struct page *
+ksocknal_kvaddr_to_page (unsigned long vaddr)
+{
+ struct page *page;
+
+ if (vaddr >= VMALLOC_START &&
+ vaddr < VMALLOC_END)
+ page = vmalloc_to_page ((void *)vaddr);
+#if CONFIG_HIGHMEM
+ else if (vaddr >= PKMAP_BASE &&
+ vaddr < (PKMAP_BASE + LAST_PKMAP * PAGE_SIZE))
+ page = vmalloc_to_page ((void *)vaddr);
+ /* in 2.4 ^ just walks the page tables */
+#endif
+ else
+ page = virt_to_page (vaddr);
+
+ if (page == NULL ||
+ !VALID_PAGE (page))
+ return (NULL);
+
+ return (page);
+}
+#endif
+
+int
+ksocknal_lib_send_iov (ksock_conn_t *conn, ksock_tx_t *tx)
+{
+ struct socket *sock = conn->ksnc_sock;
+#if (SOCKNAL_ZC && SOCKNAL_VADDR_ZC)
+ unsigned long vaddr = (unsigned long)iov->iov_base
+ int offset = vaddr & (PAGE_SIZE - 1);
+ int zcsize = MIN (iov->iov_len, PAGE_SIZE - offset);
+ struct page *page;
+#endif
+ int nob;
+ int rc;
+
+ /* NB we can't trust socket ops to either consume our iovs
+ * or leave them alone. */
+
+#if (SOCKNAL_ZC && SOCKNAL_VADDR_ZC)
+ if (zcsize >= ksocknal_data.ksnd_zc_min_frag &&
+ (sock->sk->route_caps & NETIF_F_SG) &&
+ (sock->sk->route_caps & (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | NETIF_F_HW_CSUM)) &&
+ (page = ksocknal_kvaddr_to_page (vaddr)) != NULL) {
+ int msgflg = MSG_DONTWAIT;
+
+ CDEBUG(D_NET, "vaddr %p, page %p->%p + offset %x for %d\n",
+ (void *)vaddr, page, page_address(page), offset, zcsize);
+
+ if (!list_empty (&conn->ksnc_tx_queue) ||
+ zcsize < tx->tx_resid)
+ msgflg |= MSG_MORE;
+
+ rc = tcp_sendpage_zccd(sock, page, offset, zcsize, msgflg, &tx->tx_zccd);
+ } else
+#endif
+ {
+#if SOCKNAL_SINGLE_FRAG_TX
+ struct iovec scratch;
+ struct iovec *scratchiov = &scratch;
+ int niov = 1;
+#else
+ struct iovec *scratchiov = conn->ksnc_tx_scratch_iov;
+ int niov = tx->tx_niov;
+#endif
+ struct msghdr msg = {
+ .msg_name = NULL,
+ .msg_namelen = 0,
+ .msg_iov = scratchiov,
+ .msg_iovlen = niov,
+ .msg_control = NULL,
+ .msg_controllen = 0,
+ .msg_flags = MSG_DONTWAIT
+ };
+ mm_segment_t oldmm = get_fs();
+ int i;
+
+ for (nob = i = 0; i < niov; i++) {
+ scratchiov[i] = tx->tx_iov[i];
+ nob += scratchiov[i].iov_len;
+ }
+
+ if (!list_empty(&conn->ksnc_tx_queue) ||
+ nob < tx->tx_resid)
+ msg.msg_flags |= MSG_MORE;
+
+ set_fs (KERNEL_DS);
+ rc = sock_sendmsg(sock, &msg, nob);
+ set_fs (oldmm);
+ }
+ return rc;
+}
+
+int
+ksocknal_lib_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx)
+{
+ struct socket *sock = conn->ksnc_sock;
+ ptl_kiov_t *kiov = tx->tx_kiov;
+ int rc;
+ int nob;
+
+ /* NB we can't trust socket ops to either consume our iovs
+ * or leave them alone. */
+
+#if SOCKNAL_ZC
+ if (kiov->kiov_len >= ksocknal_tunables.ksnd_zc_min_frag &&
+ (sock->sk->route_caps & NETIF_F_SG) &&
+ (sock->sk->route_caps & (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | NETIF_F_HW_CSUM))) {
+ struct page *page = kiov->kiov_page;
+ int offset = kiov->kiov_offset;
+ int fragsize = kiov->kiov_len;
+ int msgflg = MSG_DONTWAIT;
+
+ CDEBUG(D_NET, "page %p + offset %x for %d\n",
+ page, offset, kiov->kiov_len);
+
+ if (!list_empty(&conn->ksnc_tx_queue) ||
+ fragsize < tx->tx_resid)
+ msgflg |= MSG_MORE;
+
+ rc = tcp_sendpage_zccd(sock, page, offset, fragsize, msgflg,
+ &tx->tx_zccd);
+ } else
+#endif
+ {
+#if SOCKNAL_SINGLE_FRAG_TX || !SOCKNAL_RISK_KMAP_DEADLOCK
+ struct iovec scratch;
+ struct iovec *scratchiov = &scratch;
+ int niov = 1;
+#else
+#ifdef CONFIG_HIGHMEM
+#warning "XXX risk of kmap deadlock on multiple frags..."
+#endif
+ struct iovec *scratchiov = conn->ksnc_tx_scratch_iov;
+ int niov = tx->tx_nkiov;
+#endif
+ struct msghdr msg = {
+ .msg_name = NULL,
+ .msg_namelen = 0,
+ .msg_iov = scratchiov,
+ .msg_iovlen = niov,
+ .msg_control = NULL,
+ .msg_controllen = 0,
+ .msg_flags = MSG_DONTWAIT
+ };
+ mm_segment_t oldmm = get_fs();
+ int i;
+
+ for (nob = i = 0; i < niov; i++) {
+ scratchiov[i].iov_base = kmap(kiov[i].kiov_page) +
+ kiov[i].kiov_offset;
+ nob += scratchiov[i].iov_len = kiov[i].kiov_len;
+ }
+
+ if (!list_empty(&conn->ksnc_tx_queue) ||
+ nob < tx->tx_resid)
+ msg.msg_flags |= MSG_DONTWAIT;
+
+ set_fs (KERNEL_DS);
+ rc = sock_sendmsg(sock, &msg, nob);
+ set_fs (oldmm);
+
+ for (i = 0; i < niov; i++)
+ kunmap(kiov[i].kiov_page);
+ }
+ return rc;
+}
+
+void
+ksocknal_lib_eager_ack (ksock_conn_t *conn)
+{
+ int opt = 1;
+ mm_segment_t oldmm = get_fs();
+ struct socket *sock = conn->ksnc_sock;
+
+ /* Remind the socket to ACK eagerly. If I don't, the socket might
+ * think I'm about to send something it could piggy-back the ACK
+ * on, introducing delay in completing zero-copy sends in my
+ * peer. */
+
+ set_fs(KERNEL_DS);
+ sock->ops->setsockopt (sock, SOL_TCP, TCP_QUICKACK,
+ (char *)&opt, sizeof (opt));
+ set_fs(oldmm);
+}
+
+int
+ksocknal_lib_recv_iov (ksock_conn_t *conn)
+{
+#if SOCKNAL_SINGLE_FRAG_RX
+ struct iovec scratch;
+ struct iovec *scratchiov = &scratch;
+ int niov = 1;
+#else
+ struct iovec *scratchiov = conn->ksnc_rx_scratch_iov;
+ int niov = conn->ksnc_rx_niov;
+#endif
+ struct iovec *iov = conn->ksnc_rx_iov;
+ struct msghdr msg = {
+ .msg_name = NULL,
+ .msg_namelen = 0,
+ .msg_iov = scratchiov,
+ .msg_iovlen = niov,
+ .msg_control = NULL,
+ .msg_controllen = 0,
+ .msg_flags = 0
+ };
+ mm_segment_t oldmm = get_fs();
+ int nob;
+ int i;
+ int rc;
+
+ /* NB we can't trust socket ops to either consume our iovs
+ * or leave them alone. */
+ LASSERT (niov > 0);
+
+ for (nob = i = 0; i < niov; i++) {
+ scratchiov[i] = iov[i];
+ nob += scratchiov[i].iov_len;
+ }
+ LASSERT (nob <= conn->ksnc_rx_nob_wanted);
+
+ set_fs (KERNEL_DS);
+ rc = sock_recvmsg (conn->ksnc_sock, &msg, nob, MSG_DONTWAIT);
+ /* NB this is just a boolean..........................^ */
+ set_fs (oldmm);
+
+ return rc;
+}
+
+int
+ksocknal_lib_recv_kiov (ksock_conn_t *conn)
+{
+#if SOCKNAL_SINGLE_FRAG_RX || !SOCKNAL_RISK_KMAP_DEADLOCK
+ struct iovec scratch;
+ struct iovec *scratchiov = &scratch;
+ int niov = 1;
+#else
+#ifdef CONFIG_HIGHMEM
+#warning "XXX risk of kmap deadlock on multiple frags..."
+#endif
+ struct iovec *scratchiov = conn->ksnc_rx_scratch_iov;
+ int niov = conn->ksnc_rx_nkiov;
+#endif
+ ptl_kiov_t *kiov = conn->ksnc_rx_kiov;
+ struct msghdr msg = {
+ .msg_name = NULL,
+ .msg_namelen = 0,
+ .msg_iov = scratchiov,
+ .msg_iovlen = niov,
+ .msg_control = NULL,
+ .msg_controllen = 0,
+ .msg_flags = 0
+ };
+ mm_segment_t oldmm = get_fs();
+ int nob;
+ int i;
+ int rc;
+
+ /* NB we can't trust socket ops to either consume our iovs
+ * or leave them alone. */
+ for (nob = i = 0; i < niov; i++) {
+ scratchiov[i].iov_base = kmap(kiov[i].kiov_page) + kiov[i].kiov_offset;
+ nob += scratchiov[i].iov_len = kiov[i].kiov_len;
+ }
+ LASSERT (nob <= conn->ksnc_rx_nob_wanted);
+
+ set_fs (KERNEL_DS);
+ rc = sock_recvmsg (conn->ksnc_sock, &msg, nob, MSG_DONTWAIT);
+ /* NB this is just a boolean.......................^ */
+ set_fs (oldmm);
+
+ for (i = 0; i < niov; i++)
+ kunmap(kiov[i].kiov_page);
+
+ return (rc);
+}
+
+int
+ksocknal_lib_sock_write (struct socket *sock, void *buffer, int nob)
+{
+ int rc;
+ mm_segment_t oldmm = get_fs();
+
+ while (nob > 0) {
+ struct iovec iov = {
+ .iov_base = buffer,
+ .iov_len = nob
+ };
+ struct msghdr msg = {
+ .msg_name = NULL,
+ .msg_namelen = 0,
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_control = NULL,
+ .msg_controllen = 0,
+ .msg_flags = 0
+ };
+
+ set_fs (KERNEL_DS);
+ rc = sock_sendmsg (sock, &msg, iov.iov_len);
+ set_fs (oldmm);
+
+ if (rc < 0)
+ return (rc);
+
+ if (rc == 0) {
+ CERROR ("Unexpected zero rc\n");
+ return (-ECONNABORTED);
+ }
+
+ buffer = ((char *)buffer) + rc;
+ nob -= rc;
+ }
+
+ return (0);
+}
+
+int
+ksocknal_lib_sock_read (struct socket *sock, void *buffer, int nob)
+{
+ int rc;
+ mm_segment_t oldmm = get_fs();
+
+ while (nob > 0) {
+ struct iovec iov = {
+ .iov_base = buffer,
+ .iov_len = nob
+ };
+ struct msghdr msg = {
+ .msg_name = NULL,
+ .msg_namelen = 0,
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_control = NULL,
+ .msg_controllen = 0,
+ .msg_flags = 0
+ };
+
+ set_fs (KERNEL_DS);
+ rc = sock_recvmsg (sock, &msg, iov.iov_len, 0);
+ set_fs (oldmm);
+
+ if (rc < 0)
+ return (rc);
+
+ if (rc == 0)
+ return (-ECONNABORTED);
+
+ buffer = ((char *)buffer) + rc;
+ nob -= rc;
+ }
+
+ return (0);
+}
+
+int
+ksocknal_lib_get_conn_tunables (ksock_conn_t *conn, int *txmem, int *rxmem, int *nagle)
+{
+ mm_segment_t oldmm = get_fs ();
+ struct socket *sock = conn->ksnc_sock;
+ int len;
+ int rc;
+
+ rc = ksocknal_getconnsock (conn);
+ if (rc != 0) {
+ LASSERT (conn->ksnc_closing);
+ *txmem = *rxmem = *nagle = 0;
+ return (-ESHUTDOWN);
+ }
+
+ set_fs (KERNEL_DS);
+
+ len = sizeof(*txmem);
+ rc = sock_getsockopt(sock, SOL_SOCKET, SO_SNDBUF,
+ (char *)txmem, &len);
+ if (rc == 0) {
+ len = sizeof(*rxmem);
+ rc = sock_getsockopt(sock, SOL_SOCKET, SO_RCVBUF,
+ (char *)rxmem, &len);
+ }
+ if (rc == 0) {
+ len = sizeof(*nagle);
+ rc = sock->ops->getsockopt(sock, SOL_TCP, TCP_NODELAY,
+ (char *)nagle, &len);
+ }
+
+ set_fs (oldmm);
+ ksocknal_putconnsock (conn);
+
+ if (rc == 0)
+ *nagle = !*nagle;
+ else
+ *txmem = *rxmem = *nagle = 0;
+
+ return (rc);
+}
+
+int
+ksocknal_lib_setup_sock (struct socket *sock)
+{
+ mm_segment_t oldmm = get_fs ();
+ int rc;
+ int option;
+ int keep_idle;
+ int keep_intvl;
+ int keep_count;
+ int do_keepalive;
+ struct linger linger;
+
+ sock->sk->sk_allocation = GFP_NOFS;
+
+ /* Ensure this socket aborts active sends immediately when we close
+ * it. */
+
+ linger.l_onoff = 0;
+ linger.l_linger = 0;
+
+ set_fs (KERNEL_DS);
+ rc = sock_setsockopt (sock, SOL_SOCKET, SO_LINGER,
+ (char *)&linger, sizeof (linger));
+ set_fs (oldmm);
+ if (rc != 0) {
+ CERROR ("Can't set SO_LINGER: %d\n", rc);
+ return (rc);
+ }
+
+ option = -1;
+ set_fs (KERNEL_DS);
+ rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_LINGER2,
+ (char *)&option, sizeof (option));
+ set_fs (oldmm);
+ if (rc != 0) {
+ CERROR ("Can't set SO_LINGER2: %d\n", rc);
+ return (rc);
+ }
+
+ if (!ksocknal_tunables.ksnd_nagle) {
+ option = 1;
+
+ set_fs (KERNEL_DS);
+ rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_NODELAY,
+ (char *)&option, sizeof (option));
+ set_fs (oldmm);
+ if (rc != 0) {
+ CERROR ("Can't disable nagle: %d\n", rc);
+ return (rc);
+ }
+ }
+
+ if (ksocknal_tunables.ksnd_buffer_size > 0) {
+ option = ksocknal_tunables.ksnd_buffer_size;
+
+ set_fs (KERNEL_DS);
+ rc = sock_setsockopt (sock, SOL_SOCKET, SO_SNDBUF,
+ (char *)&option, sizeof (option));
+ set_fs (oldmm);
+ if (rc != 0) {
+ CERROR ("Can't set send buffer %d: %d\n",
+ option, rc);
+ return (rc);
+ }
+
+ set_fs (KERNEL_DS);
+ rc = sock_setsockopt (sock, SOL_SOCKET, SO_RCVBUF,
+ (char *)&option, sizeof (option));
+ set_fs (oldmm);
+ if (rc != 0) {
+ CERROR ("Can't set receive buffer %d: %d\n",
+ option, rc);
+ return (rc);
+ }
+ }
+
+ /* snapshot tunables */
+ keep_idle = ksocknal_tunables.ksnd_keepalive_idle;
+ keep_count = ksocknal_tunables.ksnd_keepalive_count;
+ keep_intvl = ksocknal_tunables.ksnd_keepalive_intvl;
+
+ do_keepalive = (keep_idle > 0 && keep_count > 0 && keep_intvl > 0);
+
+ option = (do_keepalive ? 1 : 0);
+ set_fs (KERNEL_DS);
+ rc = sock_setsockopt (sock, SOL_SOCKET, SO_KEEPALIVE,
+ (char *)&option, sizeof (option));
+ set_fs (oldmm);
+ if (rc != 0) {
+ CERROR ("Can't set SO_KEEPALIVE: %d\n", rc);
+ return (rc);
+ }
+
+ if (!do_keepalive)
+ return (0);
+
+ set_fs (KERNEL_DS);
+ rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_KEEPIDLE,
+ (char *)&keep_idle, sizeof (keep_idle));
+ set_fs (oldmm);
+ if (rc != 0) {
+ CERROR ("Can't set TCP_KEEPIDLE: %d\n", rc);
+ return (rc);
+ }
+
+ set_fs (KERNEL_DS);
+ rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_KEEPINTVL,
+ (char *)&keep_intvl, sizeof (keep_intvl));
+ set_fs (oldmm);
+ if (rc != 0) {
+ CERROR ("Can't set TCP_KEEPINTVL: %d\n", rc);
+ return (rc);
+ }
+
+ set_fs (KERNEL_DS);
+ rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_KEEPCNT,
+ (char *)&keep_count, sizeof (keep_count));
+ set_fs (oldmm);
+ if (rc != 0) {
+ CERROR ("Can't set TCP_KEEPCNT: %d\n", rc);
+ return (rc);
+ }
+
+ return (0);
+}
+
+int
+ksocknal_lib_connect_sock(struct socket **sockp, int *may_retry,
+ ksock_route_t *route, int local_port)
+{
+ struct sockaddr_in locaddr;
+ struct sockaddr_in srvaddr;
+ struct socket *sock;
+ int rc;
+ int option;
+ mm_segment_t oldmm = get_fs();
+ struct timeval tv;
+
+ memset(&locaddr, 0, sizeof(locaddr));
+ locaddr.sin_family = AF_INET;
+ locaddr.sin_port = htons(local_port);
+ locaddr.sin_addr.s_addr =
+ (route->ksnr_myipaddr != 0) ? htonl(route->ksnr_myipaddr)
+ : INADDR_ANY;
+
+ memset (&srvaddr, 0, sizeof (srvaddr));
+ srvaddr.sin_family = AF_INET;
+ srvaddr.sin_port = htons (route->ksnr_port);
+ srvaddr.sin_addr.s_addr = htonl (route->ksnr_ipaddr);
+
+ *may_retry = 0;
+
+ rc = sock_create (PF_INET, SOCK_STREAM, 0, &sock);
+ *sockp = sock;
+ if (rc != 0) {
+ CERROR ("Can't create autoconnect socket: %d\n", rc);
+ return (rc);
+ }
+
+ /* Ugh; have to map_fd for compatibility with sockets passed in
+ * from userspace. And we actually need the sock->file refcounting
+ * that this gives you :) */
+
+ rc = sock_map_fd (sock);
+ if (rc < 0) {
+ sock_release (sock);
+ CERROR ("sock_map_fd error %d\n", rc);
+ return (rc);
+ }
+
+ /* NB the file descriptor (rc) now owns the ref on sock->file */
+ LASSERT (sock->file != NULL);
+ LASSERT (file_count(sock->file) == 1);
+
+ get_file(sock->file); /* extra ref makes sock->file */
+ sys_close(rc); /* survive this close */
+
+ /* Still got a single ref on sock->file */
+ LASSERT (file_count(sock->file) == 1);
+
+ /* Set the socket timeouts, so our connection attempt completes in
+ * finite time */
+ tv.tv_sec = ksocknal_tunables.ksnd_io_timeout;
+ tv.tv_usec = 0;
+
+ set_fs (KERNEL_DS);
+ rc = sock_setsockopt (sock, SOL_SOCKET, SO_SNDTIMEO,
+ (char *)&tv, sizeof (tv));
+ set_fs (oldmm);
+ if (rc != 0) {
+ CERROR ("Can't set send timeout %d: %d\n",
+ ksocknal_tunables.ksnd_io_timeout, rc);
+ goto failed;
+ }
+
+ set_fs (KERNEL_DS);
+ rc = sock_setsockopt (sock, SOL_SOCKET, SO_RCVTIMEO,
+ (char *)&tv, sizeof (tv));
+ set_fs (oldmm);
+ if (rc != 0) {
+ CERROR ("Can't set receive timeout %d: %d\n",
+ ksocknal_tunables.ksnd_io_timeout, rc);
+ goto failed;
+ }
+
+ set_fs (KERNEL_DS);
+ option = 1;
+ rc = sock_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR,
+ (char *)&option, sizeof (option));
+ set_fs (oldmm);
+ if (rc != 0) {
+ CERROR("Can't set SO_REUSEADDR for socket: %d\n", rc);
+ goto failed;
+ }
+
+ rc = sock->ops->bind(sock,
+ (struct sockaddr *)&locaddr, sizeof(locaddr));
+ if (rc == -EADDRINUSE) {
+ CDEBUG(D_NET, "Port %d already in use\n", local_port);
+ *may_retry = 1;
+ goto failed;
+ }
+ if (rc != 0) {
+ CERROR("Error trying to bind to reserved port %d: %d\n",
+ local_port, rc);
+ goto failed;
+ }
+
+ rc = sock->ops->connect(sock,
+ (struct sockaddr *)&srvaddr, sizeof(srvaddr),
+ sock->file->f_flags);
+ if (rc == 0)
+ return 0;
+
+ /* EADDRNOTAVAIL probably means we're already connected to the same
+ * peer/port on the same local port on a differently typed
+ * connection. Let our caller retry with a different local
+ * port... */
+ *may_retry = (rc == -EADDRNOTAVAIL);
+
+ CDEBUG(*may_retry ? D_NET : D_ERROR,
+ "Error %d connecting %u.%u.%u.%u/%d -> %u.%u.%u.%u/%d\n", rc,
+ HIPQUAD(route->ksnr_myipaddr), local_port,
+ HIPQUAD(route->ksnr_ipaddr), route->ksnr_port);
+
+ failed:
+ fput(sock->file);
+ return rc;
+}
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+struct tcp_opt *sock2tcp_opt(struct sock *sk)
+{
+ return &(sk->tp_pinfo.af_tcp);
+}
+#else
+struct tcp_opt *sock2tcp_opt(struct sock *sk)
+{
+ struct tcp_sock *s = (struct tcp_sock *)sk;
+ return &s->tcp;
+}
+#endif
+
+void
+ksocknal_lib_push_conn (ksock_conn_t *conn)
+{
+ struct sock *sk;
+ struct tcp_opt *tp;
+ int nonagle;
+ int val = 1;
+ int rc;
+ mm_segment_t oldmm;
+
+ rc = ksocknal_getconnsock (conn);
+ if (rc != 0) /* being shut down */
+ return;
+
+ sk = conn->ksnc_sock->sk;
+ tp = sock2tcp_opt(sk);
+
+ lock_sock (sk);
+ nonagle = tp->nonagle;
+ tp->nonagle = 1;
+ release_sock (sk);
+
+ oldmm = get_fs ();
+ set_fs (KERNEL_DS);
+
+ rc = sk->sk_prot->setsockopt (sk, SOL_TCP, TCP_NODELAY,
+ (char *)&val, sizeof (val));
+ LASSERT (rc == 0);
+
+ set_fs (oldmm);
+
+ lock_sock (sk);
+ tp->nonagle = nonagle;
+ release_sock (sk);
+
+ ksocknal_putconnsock (conn);
+}
+
+extern void ksocknal_read_callback (ksock_conn_t *conn);
+extern void ksocknal_write_callback (ksock_conn_t *conn);
+/*
+ * socket call back in Linux
+ */
+static void
+ksocknal_data_ready (struct sock *sk, int n)
+{
+ ksock_conn_t *conn;
+ ENTRY;
+
+ /* interleave correctly with closing sockets... */
+ read_lock (&ksocknal_data.ksnd_global_lock);
+
+ conn = sk->sk_user_data;
+ if (conn == NULL) { /* raced with ksocknal_terminate_conn */
+ LASSERT (sk->sk_data_ready != &ksocknal_data_ready);
+ sk->sk_data_ready (sk, n);
+ } else
+ ksocknal_read_callback(conn);
+
+ read_unlock (&ksocknal_data.ksnd_global_lock);
+
+ EXIT;
+}
+
+static void
+ksocknal_write_space (struct sock *sk)
+{
+ ksock_conn_t *conn;
+
+ /* interleave correctly with closing sockets... */
+ read_lock (&ksocknal_data.ksnd_global_lock);
+
+ conn = sk->sk_user_data;
+
+ CDEBUG(D_NET, "sk %p wspace %d low water %d conn %p%s%s%s\n",
+ sk, tcp_wspace(sk), SOCKNAL_TX_LOW_WATER(sk), conn,
+ (conn == NULL) ? "" : (conn->ksnc_tx_ready ?
+ " ready" : " blocked"),
+ (conn == NULL) ? "" : (conn->ksnc_tx_scheduled ?
+ " scheduled" : " idle"),
+ (conn == NULL) ? "" : (list_empty (&conn->ksnc_tx_queue) ?
+ " empty" : " queued"));
+
+ if (conn == NULL) { /* raced with ksocknal_terminate_conn */
+ LASSERT (sk->sk_write_space != &ksocknal_write_space);
+ sk->sk_write_space (sk);
+
+ read_unlock (&ksocknal_data.ksnd_global_lock);
+ return;
+ }
+
+ if (tcp_wspace(sk) >= SOCKNAL_TX_LOW_WATER(sk)) { /* got enough space */
+ ksocknal_write_callback(conn);
+
+ /* Clear SOCK_NOSPACE _after_ ksocknal_write_callback so the
+ * ENOMEM check in ksocknal_transmit is race-free (think about
+ * it). */
+
+ clear_bit (SOCK_NOSPACE, &sk->sk_socket->flags);
+ }
+
+ read_unlock (&ksocknal_data.ksnd_global_lock);
+}
+
+void
+ksocknal_lib_save_callback(struct socket *sock, ksock_conn_t *conn)
+{
+ conn->ksnc_saved_data_ready = sock->sk->sk_data_ready;
+ conn->ksnc_saved_write_space = sock->sk->sk_write_space;
+}
+
+void
+ksocknal_lib_set_callback(struct socket *sock, ksock_conn_t *conn)
+{
+ sock->sk->sk_user_data = conn;
+ sock->sk->sk_data_ready = ksocknal_data_ready;
+ sock->sk->sk_write_space = ksocknal_write_space;
+ return;
+}
+
+void
+ksocknal_lib_act_callback(struct socket *sock, ksock_conn_t *conn)
+{
+ ksocknal_data_ready (sock->sk, 0);
+ ksocknal_write_space (sock->sk);
+ return;
+}
+
+void
+ksocknal_lib_reset_callback(struct socket *sock, ksock_conn_t *conn)
+{
+ /* Remove conn's network callbacks.
+ * NB I _have_ to restore the callback, rather than storing a noop,
+ * since the socket could survive past this module being unloaded!! */
+ sock->sk->sk_data_ready = conn->ksnc_saved_data_ready;
+ sock->sk->sk_write_space = conn->ksnc_saved_write_space;
+
+ /* A callback could be in progress already; they hold a read lock
+ * on ksnd_global_lock (to serialise with me) and NOOP if
+ * sk_user_data is NULL. */
+ sock->sk->sk_user_data = NULL;
+
+ return ;
+}
+
--- /dev/null
+#define DEBUG_PORTAL_ALLOC
+#ifndef EXPORT_SYMTAB
+# define EXPORT_SYMTAB
+#endif
+
+#ifndef __LINUX_SOCKNAL_LIB_H__
+#define __LINUX_SOCKNAL_LIB_H__
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/version.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/errno.h>
+#include <linux/smp_lock.h>
+#include <linux/unistd.h>
+#include <net/sock.h>
+#include <net/tcp.h>
+#include <linux/uio.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/irq.h>
+
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/stat.h>
+#include <linux/list.h>
+#include <linux/kmod.h>
+#include <linux/sysctl.h>
+#include <asm/uaccess.h>
+#include <asm/segment.h>
+#include <asm/div64.h>
+
+#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
+# include <linux/syscalls.h>
+#endif
+
+#include <libcfs/kp30.h>
+#include <libcfs/linux/portals_compat25.h>
+
+#define SOCKNAL_TX_LOW_WATER(sk) (((sk)->sk_sndbuf*8)/10)
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,72))
+# define sk_allocation allocation
+# define sk_data_ready data_ready
+# define sk_write_space write_space
+# define sk_user_data user_data
+# define sk_prot prot
+# define sk_sndbuf sndbuf
+# define sk_socket socket
+#endif
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0))
+# define sk_wmem_queued wmem_queued
+# define sk_err err
+#endif
+
+#define SOCKNAL_ARCH_EAGER_ACK 0
+#define SOCK_WMEM_QUEUED(so) ((so)->sk->sk_wmem_queued)
+#define SOCK_ERROR(so) ((so)->sk->sk_err)
+#define SOCK_TEST_NOSPACE(so) test_bit(SOCK_NOSPACE, &(so)->flags)
+
+#define KSN_SOCK2FILE(so) ((so)->file)
+#define KSN_CONN2FILE(conn) ((conn)->ksnc_sock->file)
+
+#ifndef CONFIG_SMP
+static inline
+int ksocknal_nsched(void)
+{
+ return 1;
+}
+#else
+#include <linux/lustre_version.h>
+# if !(defined(CONFIG_X86) && (LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,21))) || defined(CONFIG_X86_64) || (LUSTRE_KERNEL_VERSION < 39) || ((LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)) && !defined(CONFIG_X86_HT))
+static inline int
+ksocknal_nsched(void)
+{
+ return num_online_cpus();
+}
+
+static inline int
+ksocknal_sched2cpu(int i)
+{
+ return i;
+}
+
+static inline int
+ksocknal_irqsched2cpu(int i)
+{
+ return i;
+}
+# else
+static inline int
+ksocknal_nsched(void)
+{
+ if (smp_num_siblings == 1)
+ return (num_online_cpus());
+
+ /* We need to know if this assumption is crap */
+ LASSERT (smp_num_siblings == 2);
+ return (num_online_cpus()/2);
+}
+
+static inline int
+ksocknal_sched2cpu(int i)
+{
+ if (smp_num_siblings == 1)
+ return i;
+
+ return (i * 2);
+}
+
+static inline int
+ksocknal_irqsched2cpu(int i)
+{
+ return (ksocknal_sched2cpu(i) + 1);
+}
+# endif
+#endif
+
+#endif
#define IBNAL_CHECK_ADVERT
-#include <linux/kp30.h>
+#include <libcfs/kp30.h>
#include <portals/p30.h>
#include <portals/lib-p30.h>
#include <portals/nal.h>
#define GSI_TIMEOUT 5
#define GSI_RETRY 10
-typedef struct
+typedef struct
{
int kib_io_timeout; /* comms timeout (seconds) */
struct ctl_table_header *kib_sysctl; /* sysctl interface */
__u32 md_rkey;
__u64 md_addr;
} kib_md_t __attribute__((packed));
-
-typedef struct
+
+typedef struct
{
/* initialisation state. These values are sorted by their initialization order. */
enum {
struct list_head kib_sched_txq; /* tx requiring attention */
struct list_head kib_sched_rxq; /* rx requiring attention */
spinlock_t kib_sched_lock; /* serialise */
-
+
struct kib_tx *kib_tx_descs; /* all the tx descriptors */
kib_pages_t *kib_tx_pages; /* premapped tx msg pages */
wait_queue_head_t kib_idle_tx_waitq; /* block here for tx descriptor */
__u64 kib_next_tx_cookie; /* RDMA completion cookie */
spinlock_t kib_tx_lock; /* serialise */
-
+
vv_hca_h_t kib_hca; /* The HCA */
vv_hca_attrib_t kib_hca_attrs; /* HCA attributes */
void *kib_listen_handle; /* where I listen for connections */
/* These fields are left untouched, so they can be shared. */
- union {
+ union {
cm_drequest_data_t dreq_data;
cm_dreply_data_t drep_data;
} cm_data;
/* these arrays serve two purposes during rdma. they are built on the passive
* side and sent to the active side as remote arguments. On the active side
- * the descs are used as a data structure on the way to local gather items.
+ * the descs are used as a data structure on the way to local gather items.
* the different roles result in split local/remote meaning of desc->rd_key */
typedef struct
{
} kib_connreq_t;
typedef struct kib_conn
-{
+{
struct kib_peer *ibc_peer; /* owning peer */
struct list_head ibc_list; /* stash on peer's conn list */
__u64 ibc_incarnation; /* which instance of the peer */
/******************************************************************************/
static inline struct list_head *
-kibnal_nid2peerlist (ptl_nid_t nid)
+kibnal_nid2peerlist (ptl_nid_t nid)
{
unsigned int hash = ((unsigned int)nid) % kibnal_data.kib_peer_hash_size;
-
+
return (&kibnal_data.kib_peers [hash]);
}
kibnal_page2phys (struct page *p)
{
__u64 page_number = p - mem_map;
-
+
return (page_number << PAGE_SHIFT);
}
#else
void *qp_context;
vv_return_t retval;
- CERROR("QP dumping %p\n", conn);
+ CERROR("QP dumping %p\n", conn);
retval = vv_qp_query(kibnal_data.kib_hca, conn->ibc_qp, &qp_context, &conn->ibc_qp_attrs);
if (retval) {
static void dump_wqe(vv_wr_t *wr)
{
CERROR("Dumping send WR %p\n", wr);
-
+
CERROR(" wr_id = %llx\n", wr->wr_id);
CERROR(" completion_notification = %d\n", wr->completion_notification);
CERROR(" scatgat_list = %p\n", wr->scatgat_list);
}
CERROR(" wr_type = %d\n", wr->wr_type);
-
+
switch(wr->wr_type) {
case vv_wr_send:
CERROR(" send\n");
-
+
CERROR(" fance_indicator = %d\n", wr->type.send.send_qp_type.rc_type.fance_indicator);
break;
-
+
case vv_wr_receive:
break;
extern int kibnal_del_peer (ptl_nid_t nid, int single_share);
extern kib_peer_t *kibnal_find_peer_locked (ptl_nid_t nid);
extern void kibnal_unlink_peer_locked (kib_peer_t *peer);
-extern int kibnal_close_stale_conns_locked (kib_peer_t *peer,
+extern int kibnal_close_stale_conns_locked (kib_peer_t *peer,
__u64 incarnation);
extern kib_conn_t *kibnal_create_conn (void);
extern void kibnal_put_conn (kib_conn_t *conn);
extern int kibnal_connd (void *arg);
extern void kibnal_init_tx_msg (kib_tx_t *tx, int type, int body_nob);
extern void kibnal_close_conn (kib_conn_t *conn, int why);
-extern void kibnal_start_active_rdma (int type, int status,
- kib_rx_t *rx, lib_msg_t *libmsg,
- unsigned int niov,
+extern void kibnal_start_active_rdma (int type, int status,
+ kib_rx_t *rx, lib_msg_t *libmsg,
+ unsigned int niov,
struct iovec *iov, ptl_kiov_t *kiov,
size_t offset, size_t nob);
--- /dev/null
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+ <key>CFBundleDevelopmentRegion</key>
+ <string>English</string>
+ <key>CFBundleExecutable</key>
+ <string>libcfs</string>
+ <key>CFBundleIconFile</key>
+ <string></string>
+ <key>CFBundleIdentifier</key>
+ <string>com.clusterfs.lustre.libcfs</string>
+ <key>CFBundleInfoDictionaryVersion</key>
+ <string>6.0</string>
+ <key>CFBundlePackageType</key>
+ <string>KEXT</string>
+ <key>CFBundleSignature</key>
+ <string>????</string>
+ <key>CFBundleVersion</key>
+ <string>1.0.1</string>
+ <key>OSBundleCompatibleVersion</key>
+ <string>1.0.0</string>
+ <key>OSBundleLibraries</key>
+ <dict>
+ <key>com.apple.kernel.bsd</key>
+ <string>1.1</string>
+ <key>com.apple.kernel.iokit</key>
+ <string>1.0.0b1</string>
+ <key>com.apple.kernel.mach</key>
+ <string>1.0.0b1</string>
+ </dict>
+</dict>
+</plist>
MODULES = libcfs
-libcfs-objs := debug.o lwt.o module.o proc.o tracefile.o watchdog.o
+
+libcfs-linux-objs := linux-tracefile.o linux-debug.o
+libcfs-linux-objs += linux-prim.o linux-mem.o
+libcfs-linux-objs += linux-fs.o linux-sync.o
+libcfs-linux-objs += linux-lwt.o linux-proc.o linux-curproc.o
+libcfs-linux-objs += linux-utils.o linux-module.o
+
+ifeq ($(PATCHLEVEL),6)
+libcfs-linux-objs := $(addprefix linux/,$(libcfs-linux-objs))
+endif
+
+default: all
+
+ifeq (@linux25@,no)
+sources:
+ @for i in $(libcfs-linux-objs:%.o=%.c) ; do \
+ echo "ln -s @srcdir@/linux/$$i ." ; \
+ ln -sf @srcdir@/linux/$$i . || exit 1 ; \
+ done
+
+else
+sources:
+
+endif
+
+libcfs-all-objs := debug.o lwt.o module.o tracefile.o watchdog.o
+
+libcfs-objs := $(libcfs-linux-objs) $(libcfs-all-objs)
+
+EXTRA_PRE_CFLAGS := -I@LUSTRE@/../portals/libcfs
@INCLUDE_RULES@
# This code is issued under the GNU General Public License.
# See the file COPYING in this distribution
+SUBDIRS := darwin linux
+
if MODULES
+
+if LINUX
modulenet_DATA := libcfs$(KMODEXT)
endif
-MOSTLYCLEANFILES = *.o *.ko *.mod.c
-DIST_SOURCES = $(libcfs-objs:%.o=%.c) tracefile.h
+if DARWIN
+macos_PROGRAMS := libcfs
+
+libcfs_SOURCES := debug.c module.c tracefile.c darwin/darwin-debug.c \
+ darwin/darwin-fs.c darwin/darwin-mem.c darwin/darwin-module.c \
+ darwin/darwin-prim.c darwin/darwin-proc.c \
+ darwin/darwin-tracefile.c darwin/darwin-utils.c \
+ darwin/darwin-sync.c darwin/darwin-curproc.c user-prim.c user-lock.c
+
+libcfs_CFLAGS := $(EXTRA_KCFLAGS)
+libcfs_LDFLAGS := $(EXTRA_KLDFLAGS)
+libcfs_LDADD := $(EXTRA_KLIBS)
+
+plist_DATA := Info.plist
+
+install-data-hook: fix-kext-ownership
+
+endif
+
+endif
+
+EXTRA_DIST := Info.plist
+
+MOSTLYCLEANFILES = *.o *.ko *.mod.c linux-*.c linux/*.o darwin/*.o libcfs
+DIST_SOURCES = $(libcfs-all-objs:%.o=%.c) tracefile.h
--- /dev/null
+Makefile
+Makefile.in
--- /dev/null
+EXTRA_DIST := \
+ darwin-mem.c \
+ darwin-proc.c \
+ darwin-utils.c \
+ darwin-debug.c \
+ darwin-module.c \
+ darwin-sync.c \
+ darwin-fs.c \
+ darwin-prim.c \
+ darwin-tracefile.c \
+ darwin-curproc.c
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Lustre curproc API implementation for XNU kernel
+ *
+ * Copyright (C) 2004 Cluster File Systems, Inc.
+ * Author: Nikita Danilov <nikita@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or modify it under the
+ * terms of version 2 of the GNU General Public License as published by the
+ * Free Software Foundation. Lustre is distributed in the hope that it will be
+ * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+ * Public License for more details. You should have received a copy of the GNU
+ * General Public License along with Lustre; if not, write to the Free
+ * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#define DEBUG_SUBSYSTEM S_PORTALS
+
+#include <libcfs/libcfs.h>
+#include <libcfs/kp30.h>
+
+/*
+ * Implementation of cfs_curproc API (see portals/include/libcfs/curproc.h)
+ * for XNU kernel.
+ */
+
+static inline struct ucred *curproc_ucred(void)
+{
+ return current_proc()->p_cred->pc_ucred;
+}
+
+uid_t cfs_curproc_uid(void)
+{
+ return curproc_ucred()->cr_uid;
+}
+
+gid_t cfs_curproc_gid(void)
+{
+ LASSERT(curproc_ucred()->cr_ngroups > 0);
+ return curproc_ucred()->cr_groups[0];
+}
+
+uid_t cfs_curproc_fsuid(void)
+{
+ return current_proc()->p_cred->p_ruid;
+}
+
+gid_t cfs_curproc_fsgid(void)
+{
+ return current_proc()->p_cred->p_rgid;
+}
+
+pid_t cfs_curproc_pid(void)
+{
+ return current_proc()->p_pid;
+}
+
+int cfs_curproc_groups_nr(void)
+{
+ LASSERT(curproc_ucred()->cr_ngroups > 0);
+ return curproc_ucred()->cr_ngroups - 1;
+}
+
+int cfs_curproc_is_in_groups(gid_t gid)
+{
+ int i;
+ struct ucred *cr;
+
+ cr = curproc_ucred();
+ LASSERT(cr != NULL);
+
+ for (i = 0; i < cr->cr_ngroups; ++ i) {
+ if (cr->cr_groups[i] == gid)
+ return 1;
+ }
+ return 0;
+}
+
+void cfs_curproc_groups_dump(gid_t *array, int size)
+{
+ struct ucred *cr;
+
+ cr = curproc_ucred();
+ LASSERT(cr != NULL);
+ CLASSERT(sizeof array[0] == sizeof (__u32));
+
+ size = min_t(int, size, cr->cr_ngroups);
+ memcpy(array, &cr->cr_groups[1], size * sizeof(gid_t));
+}
+
+mode_t cfs_curproc_umask(void)
+{
+ return current_proc()->p_fd->fd_cmask;
+}
+
+char *cfs_curproc_comm(void)
+{
+ return current_proc()->p_comm;
+}
+
+cfs_kernel_cap_t cfs_curproc_cap_get(void)
+{
+ return 0;
+}
+
+void cfs_curproc_cap_set(cfs_kernel_cap_t cap)
+{
+ return;
+}
+
+
+/*
+ * Local variables:
+ * c-indentation-style: "K&R"
+ * c-basic-offset: 8
+ * tab-width: 8
+ * fill-column: 80
+ * scroll-step: 1
+ * End:
+ */
--- /dev/null
+# define DEBUG_SUBSYSTEM S_PORTALS
+
+#include <libcfs/kp30.h>
+#include <libcfs/libcfs.h>
+#include "tracefile.h"
+
+void portals_debug_dumpstack(cfs_task_t *tsk)
+{
+ return;
+}
+
+cfs_task_t *portals_current(void)
+{
+ return cfs_current();
+}
+
+int portals_arch_debug_init(unsigned long bufsize)
+{
+ return 0;
+}
+
+int portals_arch_debug_cleanup(void)
+{
+ return 0;
+}
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2002 Cluster File Systems, Inc.
+ * Author: Phil Schwan <phil@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Darwin porting library
+ * Make things easy to port
+ */
+#include <mach/mach_types.h>
+#include <string.h>
+#include <sys/file.h>
+#include <sys/malloc.h>
+#include <sys/conf.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/uio.h>
+#include <sys/filedesc.h>
+#include <sys/namei.h>
+
+#define DEBUG_SUBSYSTEM S_PORTALS
+
+#include <libcfs/libcfs.h>
+#include <libcfs/kp30.h>
+
+/*
+ * Kernel APIs for file system in xnu
+ *
+ * Public functions
+ */
+int
+filp_node_size(struct file *fp, off_t *size)
+{
+ struct vnode *vp = (struct vnode *)fp->f_data;
+ struct stat sb;
+ int rc;
+
+ rc = vn_stat(vp, &sb, current_proc());
+ if (rc) {
+ *size = 0;
+ return rc;
+ }
+ *size = sb.st_size;
+ return 0;
+}
+
+cfs_file_t *
+filp_open(const char * filename, int flags, int mode, int *err)
+{
+ struct nameidata nd;
+ register cfs_file_t *fp;
+ register struct vnode *vp;
+ cfs_file_t *nfp;
+ int rc;
+ extern struct fileops vnops;
+ extern int nfiles;
+ CFS_DECL_CONE_DATA;
+
+ CFS_CONE_IN;
+ nfiles++;
+ MALLOC_ZONE(nfp, cfs_file_t *, sizeof(cfs_file_t), M_FILE, M_WAITOK|M_ZERO);
+ bzero(nfp, sizeof(cfs_file_t));
+ nfp->f_count = 1;
+ fp = nfp;
+ NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, (char *)filename, current_proc());
+ if ((rc = vn_open(&nd, flags, mode)) != 0){
+ printf("filp_open failed at (%d)\n", rc);
+ if (err != NULL)
+ *err = rc;
+ ffree(fp);
+ CFS_CONE_EX;
+ return NULL;
+ }
+ vp = nd.ni_vp;
+ fp->f_flag = flags & FMASK;
+ fp->f_type = DTYPE_VNODE;
+ fp->f_ops = &vnops;
+ fp->f_data = (caddr_t)vp;
+ fp->f_cred = current_proc()->p_ucred;
+ /*
+ * Hold cred to increase reference
+ */
+ crhold(fp->f_cred);
+ /*
+ * vnode is locked inside vn_open for lookup,
+ * we should release the lock before return
+ */
+ VOP_UNLOCK(vp, 0, current_proc());
+ CFS_CONE_EX;
+
+ return fp;
+}
+
+static int
+frele_internal(cfs_file_t *fp)
+{
+ if (fp->f_count == (short)0xffff)
+ panic("frele of lustre: stale");
+ if (--fp->f_count < 0)
+ panic("frele of lustre: count < 0");
+ return ((int)fp->f_count);
+}
+
+int
+filp_close (cfs_file_t *fp)
+{
+ struct vnode *vp;
+ CFS_DECL_CONE_DATA;
+
+ if (fp == NULL)
+ return 0;
+
+ CFS_CONE_IN;
+ if (frele_internal(fp) > 0)
+ goto out;
+ vp = (struct vnode *)fp->f_data;
+ (void )vn_close(vp, fp->f_flag, fp->f_cred, current_proc());
+ /*
+ * ffree(fp);
+ * Dont use ffree to release fp!!!!
+ * ffree will call LIST_REMOVE(fp),
+ * but fp is not in any list, this will
+ * cause kernel panic
+ */
+ struct ucred *cred;
+ cred = fp->f_cred;
+ if (cred != NOCRED) {
+ fp->f_cred = NOCRED;
+ crfree(cred);
+ }
+ extern int nfiles;
+ nfiles--;
+ memset(fp, 0xff, sizeof *fp);
+ fp->f_count = (short)0xffff;
+ FREE_ZONE(fp, sizeof *fp, M_FILE);
+out:
+ CFS_CONE_EX;
+ return 0;
+}
+
+extern void bwillwrite(void);
+
+/*
+ * Write buffer to filp inside kernel
+ */
+int
+filp_write (cfs_file_t *fp, void *buf, size_t nbyte, off_t *pos)
+{
+ struct uio auio;
+ struct iovec aiov;
+ struct proc *p = current_proc();
+ long cnt, error = 0;
+ CFS_DECL_CONE_DATA;
+
+ aiov.iov_base = (void *)(uintptr_t)buf;
+ aiov.iov_len = nbyte;
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ if (pos != NULL)
+ auio.uio_offset = *pos;
+ else
+ auio.uio_offset = (off_t)-1;
+ if (nbyte > INT_MAX)
+ return (EINVAL);
+ auio.uio_resid = nbyte;
+ auio.uio_rw = UIO_WRITE;
+ auio.uio_segflg = UIO_SYSSPACE;
+ auio.uio_procp = p;
+
+ cnt = nbyte;
+ CFS_CONE_IN;
+ if (fp->f_type == DTYPE_VNODE)
+ bwillwrite(); /* empty stuff now */
+ if ((error = fo_write(fp, &auio, fp->f_cred, 0, p))) {
+ if (auio.uio_resid != cnt && (error == ERESTART ||\
+ error == EINTR || error == EWOULDBLOCK))
+ error = 0;
+ /* The socket layer handles SIGPIPE */
+ if (error == EPIPE && fp->f_type != DTYPE_SOCKET)
+ psignal(p, SIGPIPE);
+ }
+ CFS_CONE_EX;
+ if (error != 0)
+ cnt = -error;
+ else
+ cnt -= auio.uio_resid;
+ if (pos != NULL)
+ *pos = auio.uio_offset;
+ return cnt;
+}
+
+/*
+ * Read from filp inside kernel
+ */
+int
+filp_read (cfs_file_t *fp, void *buf, size_t nbyte, off_t *pos)
+{
+ struct uio auio;
+ struct iovec aiov;
+ struct proc *p = current_proc();
+ long cnt, error = 0;
+ CFS_DECL_CONE_DATA;
+
+ aiov.iov_base = (caddr_t)buf;
+ aiov.iov_len = nbyte;
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ if (pos != NULL)
+ auio.uio_offset = *pos;
+ else
+ auio.uio_offset = (off_t)-1;
+ if (nbyte > INT_MAX)
+ return (EINVAL);
+ auio.uio_resid = nbyte;
+ auio.uio_rw = UIO_READ;
+ auio.uio_segflg = UIO_SYSSPACE;
+ auio.uio_procp = p;
+
+ cnt = nbyte;
+ CFS_CONE_IN;
+ if ((error = fo_read(fp, &auio, fp->f_cred, 0, p)) != 0) {
+ if (auio.uio_resid != cnt && (error == ERESTART ||
+ error == EINTR || error == EWOULDBLOCK))
+ error = 0;
+ }
+ CFS_CONE_EX;
+ if (error != 0)
+ cnt = -error;
+ else
+ cnt -= auio.uio_resid;
+ if (pos != NULL)
+ *pos = auio.uio_offset;
+
+ return cnt;
+}
+
+int
+filp_fsync (cfs_file_t *fp)
+{
+ struct vnode *vp = (struct vnode *)fp->f_data;
+ struct proc *p = current_proc();
+ int error = 0;
+ CFS_DECL_CONE_DATA;
+
+ CFS_CONE_IN;
+ if (fref(fp) == -1) {
+ CFS_CONE_EX;
+ return (-EBADF);
+ }
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+ error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, p);
+ VOP_UNLOCK(vp, 0, p);
+ frele(fp);
+ CFS_CONE_EX;
+
+ return error;
+}
+
+int
+ref_file(cfs_file_t *fp)
+{
+ CFS_DECL_CONE_DATA;
+
+ CFS_CONE_IN;
+ fref(fp);
+ CFS_CONE_EX;
+ return 0;
+}
+
+int
+rele_file(cfs_file_t *fp)
+{
+ CFS_DECL_CONE_DATA;
+
+ CFS_CONE_IN;
+ frele(fp);
+ CFS_CONE_EX;
+ return 0;
+}
+
+/*
+ * Private functions
+ */
+void vrele_safe(struct vnode *nd)
+{
+ CFS_DECL_CONE_DATA;
+
+ CFS_CONE_IN;
+ vrele(nd);
+ CFS_CONE_EX;
+}
+
+int
+path_lookup(const char *path, unsigned int flags, struct nameidata *nd)
+{
+ int ret = 0;
+ CFS_DECL_CONE_DATA;
+
+ CFS_CONE_IN;
+ NDINIT(nd, LOOKUP, FOLLOW, UIO_SYSSPACE, (char *)path, current_proc());
+ if ((ret = namei(nd)) != 0){
+ CERROR("path_lookup fail!\n");
+ }
+ CFS_CONE_EX;
+
+ return ret;
+}
+
+int
+file_count(struct file *fp)
+{
+ return fcount(fp);
+}
+
+
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2002 Cluster File Systems, Inc.
+ * Author: Phil Schwan <phil@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Darwin porting library
+ * Make things easy to port
+ */
+#define DEBUG_SUBSYSTEM S_PORTALS
+
+#include <mach/mach_types.h>
+#include <string.h>
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <sys/file.h>
+#include <sys/conf.h>
+#include <sys/vnode.h>
+#include <sys/uio.h>
+#include <sys/filedesc.h>
+#include <sys/namei.h>
+#include <miscfs/devfs/devfs.h>
+#include <kern/kalloc.h>
+#include <kern/zalloc.h>
+#include <kern/thread.h>
+
+#include <libcfs/libcfs.h>
+#include <libcfs/kp30.h>
+
+/*
+ * Definition of struct zone, copied from osfmk/kern/zalloc.h.
+ */
+struct zone_hack {
+ int count; /* Number of elements used now */
+ vm_offset_t free_elements;
+ vm_size_t cur_size; /* current memory utilization */
+ vm_size_t max_size; /* how large can this zone grow */
+ vm_size_t elem_size; /* size of an element */
+ vm_size_t alloc_size; /* size used for more memory */
+ char *zone_name; /* a name for the zone */
+ unsigned int
+ /* boolean_t */ exhaustible :1, /* (F) merely return if empty? */
+ /* boolean_t */ collectable :1, /* (F) garbage collect empty pages */
+ /* boolean_t */ expandable :1, /* (T) expand zone (with message)? */
+ /* boolean_t */ allows_foreign :1,/* (F) allow non-zalloc space */
+ /* boolean_t */ doing_alloc :1, /* is zone expanding now? */
+ /* boolean_t */ waiting :1, /* is thread waiting for expansion? */
+ /* boolean_t */ async_pending :1; /* asynchronous allocation pending? */
+ struct zone_hack * next_zone; /* Link for all-zones list */
+ /*
+ * more fields follow, but we don't need them. We only need
+ * offset from the beginning of struct zone to ->next_zone
+ * field: it allows us to scan the list of all zones.
+ */
+};
+
+decl_simple_lock_data(extern, all_zones_lock)
+
+/*
+ * returns true iff zone with name @name already exists.
+ *
+ * XXX nikita: this function is defined in this file only because there is no
+ * better place to put it in.
+ */
+zone_t cfs_find_zone(const char *name)
+{
+ struct zone_hack *scan;
+
+ /* from osfmk/kern/zalloc.c */
+ extern zone_t first_zone;
+
+ LASSERT(name != NULL);
+
+ simple_lock(&all_zones_lock);
+ for (scan = (struct zone_hack *)first_zone;
+ scan != NULL; scan = scan->next_zone) {
+ if (!strcmp(scan->zone_name, name))
+ break;
+ }
+ simple_unlock(&all_zones_lock);
+ return((zone_t)scan);
+}
+
+/*
+ * our wrapper around kern/zalloc.c:zinit()
+ *
+ * Creates copy of name and calls zinit() to do real work. Needed because zone
+ * survives kext unloading, so that @name cannot be just static string
+ * embedded into kext image.
+ */
+zone_t cfs_zinit(vm_size_t size, vm_size_t max, int alloc, const char *name)
+{
+ char *cname;
+
+ cname = _MALLOC(strlen(name) + 1, M_TEMP, M_WAITOK);
+ LASSERT(cname != NULL);
+ return zinit(size, max, alloc, strcpy(cname, name));
+}
+
+cfs_mem_cache_t *
+cfs_mem_cache_create (const char *name, size_t objsize, size_t off, unsigned long arg1,
+ void (*arg2)(void *, cfs_mem_cache_t *, unsigned long),
+ void (*arg3)(void *, cfs_mem_cache_t *, unsigned long))
+{
+ cfs_mem_cache_t *new = NULL;
+
+ MALLOC(new, cfs_mem_cache_t *, objsize, M_TEMP, M_WAITOK|M_ZERO);
+ if (new == NULL){
+ CERROR("cfs_mem_cache created fail!\n");
+ return NULL;
+ }
+ new->size = objsize;
+ CFS_INIT_LIST_HEAD(&new->link);
+ strncpy(new->name, name, 1 + strlen(name));
+ new->zone = cfs_find_zone(name);
+ if (new->zone == NULL) {
+ new->zone = cfs_zinit (objsize, KMEM_MAX_ZONE * objsize, 0, name);
+ if (new->zone == NULL) {
+ CERROR("zone create fault!\n");
+ FREE (new, M_TEMP);
+ return NULL;
+ }
+ }
+ return new;
+}
+
+int
+cfs_mem_cache_destroy (cfs_mem_cache_t *cachep)
+{
+ FREE (cachep, M_TEMP);
+ return 0;
+}
+
+void *
+cfs_mem_cache_alloc (cfs_mem_cache_t *cachep, int flags)
+{
+ return (void *)zalloc(cachep->zone);
+}
+
+void
+cfs_mem_cache_free (cfs_mem_cache_t *cachep, void *objp)
+{
+ zfree (cachep->zone, (vm_address_t)objp);
+}
+
+/* ---------------------------------------------------------------------------
+ * Page operations
+ *
+ * --------------------------------------------------------------------------- */
+
+/*
+ * "Raw" pages
+ */
+
+extern vm_map_t zone_map;
+static inline vm_map_t page_map(struct xnu_raw_page *pg)
+{
+ LASSERT(pg != NULL);
+
+ return pg->order == 0 ? zone_map : kernel_map;
+}
+
+static int raw_page_init(struct xnu_raw_page *pg)
+{
+ vm_size_t size = (1UL << pg->order) * PAGE_SIZE;
+ int upl_flags = UPL_SET_INTERNAL |
+ UPL_SET_LITE | UPL_SET_IO_WIRE | UPL_COPYOUT_FROM;
+ int kr = 0;
+
+ /* XXX is it necessary? */
+ kr = vm_map_get_upl(page_map(pg),
+ pg->virtual, &size, &pg->upl, 0, 0, &upl_flags, 0);
+ return kr;
+}
+
+static void raw_page_done(struct xnu_raw_page *pg)
+{
+ ubc_upl_abort(pg->upl, UPL_ABORT_FREE_ON_EMPTY);
+ return;
+}
+
+static struct xnu_page_ops raw_page_ops;
+static struct xnu_page_ops *page_ops[XNU_PAGE_NTYPES] = {
+ [XNU_PAGE_RAW] = &raw_page_ops
+};
+
+static int page_type_is_valid(cfs_page_t *page)
+{
+ LASSERT(page != NULL);
+ return 0 <= page->type && page->type < XNU_PAGE_NTYPES;
+}
+
+static int page_is_raw(cfs_page_t *page)
+{
+ return page->type == XNU_PAGE_RAW;
+}
+
+static struct xnu_raw_page *as_raw(cfs_page_t *page)
+{
+ LASSERT(page_is_raw(page));
+ return list_entry(page, struct xnu_raw_page, header);
+}
+
+static void *raw_page_address(cfs_page_t *pg)
+{
+ return (void *)as_raw(pg)->virtual;
+}
+
+static void *raw_page_map(cfs_page_t *pg)
+{
+ return (void *)as_raw(pg)->virtual;
+}
+
+static void raw_page_unmap(cfs_page_t *pg)
+{
+}
+
+static struct xnu_page_ops raw_page_ops = {
+ .page_map = raw_page_map,
+ .page_unmap = raw_page_unmap,
+ .page_address = raw_page_address
+};
+
+
+extern vm_size_t kalloc_max;
+extern vm_size_t kalloc_max_prerounded;
+extern int first_k_zone;
+extern struct zone *k_zone[16];
+extern vm_offset_t zalloc_canblock( register zone_t, boolean_t );
+extern vm_map_t zone_map;
+
+static inline vm_address_t
+page_zone_alloc(int flags, int order)
+{
+ register int zindex;
+ register vm_size_t allocsize;
+ vm_size_t size = (1UL << order) * PAGE_SIZE;
+ vm_address_t addr;
+ kern_return_t kr;
+
+ assert(order >= 0);
+ if (size > PAGE_SIZE){
+ /* XXX Liang:
+ * zalloc_canblock() call kernel_memory_allocate to allocate
+ * pages, kernel_memory_allocate cannot guarantee contig pages!
+ * So any request bigger then PAGE_SIZE should not call zalloc()
+ *
+ * NB. kmem_alloc_contig could be very slow!!!! Anyway, I dont
+ * know what will happen if order >= 1 :-(
+ * */
+ CDEBUG(D_MALLOC, "Allocate contig pages!\n");
+ kr = kmem_alloc_contig(kernel_map, &addr, size, 0, 0);
+ if (kr)
+ return 0;
+ return addr;
+ }
+ allocsize = KALLOC_MINSIZE;
+ zindex = first_k_zone;
+ while (allocsize < size) {
+ allocsize <<= 1;
+ zindex++;
+ }
+ assert(allocsize < kalloc_max);
+ if (flags & M_NOWAIT != 0)
+ addr = zalloc_canblock(k_zone[zindex], FALSE);
+ else
+ addr = zalloc_canblock(k_zone[zindex], TRUE);
+ return addr;
+}
+
+/* Allocate a "page", actually upl of darwin */
+struct xnu_raw_page *alloc_raw_pages(u_int32_t flags, u_int32_t order)
+{
+ kern_return_t kr;
+ vm_size_t size = (1UL << order) * PAGE_SIZE;
+ u_int32_t mflags = 0;
+ struct xnu_raw_page *pg;
+
+ if (flags & CFS_ALLOC_ATOMIC != 0)
+ mflags |= M_NOWAIT;
+ else
+ mflags |= M_WAITOK;
+ if (flags & CFS_ALLOC_ZERO != 0)
+ mflags |= M_ZERO;
+
+ MALLOC (pg, struct xnu_raw_page *, sizeof *pg, M_TEMP, mflags);
+ if (pg == NULL)
+ return NULL;
+ pg->header.type = XNU_PAGE_RAW;
+ pg->order = order;
+ cfs_set_page_count(&pg->header, 1);
+ pg->virtual = page_zone_alloc(flags, order);
+ if (!pg->virtual)
+ /*
+ * XXX nikita: Liang, shouldn't pg be freed here?
+ */
+ return NULL;
+
+ kr = raw_page_init(pg);
+ if (kr != 0) {
+ size = (1UL << order) * PAGE_SIZE;
+ kmem_free(page_map(pg), pg->virtual, size);
+ return NULL;
+ }
+ return pg;
+}
+
+/* Free a "page" */
+void free_raw_pages(struct xnu_raw_page *pg, u_int32_t order)
+{
+ vm_size_t size = (1UL << order) * PAGE_SIZE;
+
+ if (!atomic_dec_and_test(&pg->count))
+ return;
+ raw_page_done(pg);
+ kmem_free(page_map(pg), pg->virtual, size);
+ FREE(pg, M_TEMP);
+}
+
+cfs_page_t *cfs_alloc_pages(u_int32_t flags, u_int32_t order)
+{
+ return &alloc_raw_pages(flags, order)->header;
+}
+
+cfs_page_t *cfs_alloc_page(u_int32_t flags)
+{
+ return cfs_alloc_pages(flags, 0);
+}
+
+void cfs_free_pages(cfs_page_t *pages, int order)
+{
+ free_raw_pages(as_raw(pages), order);
+}
+
+void cfs_free_page(cfs_page_t *page)
+{
+ cfs_free_pages(page, 0);
+}
+
+void cfs_get_page(cfs_page_t *p)
+{
+ atomic_inc(&as_raw(p)->count);
+}
+
+int cfs_put_page_testzero(cfs_page_t *p)
+{
+ return atomic_dec_and_test(&as_raw(p)->count);
+}
+
+int cfs_page_count(cfs_page_t *p)
+{
+ return atomic_read(&as_raw(p)->count);
+}
+
+void cfs_set_page_count(cfs_page_t *p, int v)
+{
+ atomic_set(&as_raw(p)->count, v);
+}
+
+/*
+ * Generic page operations
+ */
+
+void *cfs_page_address(cfs_page_t *pg)
+{
+ LASSERT(page_type_is_valid(pg));
+ return page_ops[pg->type]->page_address(pg);
+}
+
+void *cfs_kmap(cfs_page_t *pg)
+{
+ LASSERT(page_type_is_valid(pg));
+ return page_ops[pg->type]->page_map(pg);
+}
+
+void cfs_kunmap(cfs_page_t *pg)
+{
+ LASSERT(page_type_is_valid(pg));
+ return page_ops[pg->type]->page_unmap(pg);
+}
+
+void xnu_page_ops_register(int type, struct xnu_page_ops *ops)
+{
+ LASSERT(0 <= type && type < XNU_PAGE_NTYPES);
+ LASSERT(ops != NULL);
+ LASSERT(page_ops[type] == NULL);
+
+ page_ops[type] = ops;
+}
+
+void xnu_page_ops_unregister(int type)
+{
+ LASSERT(0 <= type && type < XNU_PAGE_NTYPES);
+ LASSERT(page_ops[type] != NULL);
+
+ page_ops[type] = NULL;
+}
+
+/*
+ * Portable memory allocator API
+ */
+#ifdef HAVE_GET_PREEMPTION_LEVEL
+extern int get_preemption_level(void);
+#else
+#define get_preemption_level() (0)
+#endif
+
+void *cfs_alloc(size_t nr_bytes, u_int32_t flags)
+{
+ int mflags;
+
+ mflags = 0;
+ if (flags & CFS_ALLOC_ATOMIC != 0) {
+ mflags |= 0 /* M_NOWAIT */;
+ } else {
+ LASSERT(get_preemption_level() == 0);
+ mflags |= M_WAITOK;
+ }
+
+ if (flags & CFS_ALLOC_ZERO != 0)
+ mflags |= M_ZERO;
+
+ return _MALLOC(nr_bytes, M_TEMP, mflags);
+}
+
+void cfs_free(void *addr)
+{
+ return _FREE(addr, M_TEMP);
+}
+
+void *cfs_alloc_large(size_t nr_bytes)
+{
+ LASSERT(get_preemption_level() == 0);
+ return _MALLOC(nr_bytes, M_TEMP, M_WAITOK);
+}
+
+void cfs_free_large(void *addr)
+{
+ return _FREE(addr, M_TEMP);
+}
--- /dev/null
+#include <mach/mach_types.h>
+#include <string.h>
+#include <sys/file.h>
+#include <sys/conf.h>
+#include <miscfs/devfs/devfs.h>
+
+#define DEBUG_SUBSYSTEM S_PORTALS
+#include <libcfs/libcfs.h>
+#include <libcfs/kp30.h>
+
+int portal_ioctl_getdata(char *buf, char *end, void *arg)
+{
+ struct portal_ioctl_hdr *hdr;
+ struct portal_ioctl_data *data;
+ int err = 0;
+ ENTRY;
+
+ hdr = (struct portal_ioctl_hdr *)buf;
+ data = (struct portal_ioctl_data *)buf;
+ /* portals_ioctl_data has been copied in by ioctl of osx */
+ memcpy(buf, arg, sizeof(struct portal_ioctl_data));
+
+ if (hdr->ioc_version != PORTAL_IOCTL_VERSION) {
+ CERROR("PORTALS: version mismatch kernel vs application\n");
+ RETURN(-EINVAL);
+ }
+
+ if (hdr->ioc_len + buf >= end) {
+ CERROR("PORTALS: user buffer exceeds kernel buffer\n");
+ RETURN(-EINVAL);
+ }
+
+ if (hdr->ioc_len < sizeof(struct portal_ioctl_data)) {
+ CERROR("PORTALS: user buffer too small for ioctl\n");
+ RETURN(-EINVAL);
+ }
+ buf += size_round(sizeof(*data));
+
+ if (data->ioc_inllen1) {
+ err = copy_from_user(buf, data->ioc_inlbuf1, size_round(data->ioc_inllen1));
+ if (err)
+ RETURN(err);
+ data->ioc_inlbuf1 = buf;
+ buf += size_round(data->ioc_inllen1);
+ }
+
+ if (data->ioc_inllen2) {
+ copy_from_user(buf, data->ioc_inlbuf2, size_round(data->ioc_inllen2));
+ if (err)
+ RETURN(err);
+ data->ioc_inlbuf2 = buf;
+ }
+
+ RETURN(err);
+}
+
+extern struct cfs_psdev_ops libcfs_psdev_ops;
+struct portals_device_userstate *mdev_state[16];
+
+static int
+libcfs_psdev_open(dev_t dev, int flags, int devtype, struct proc *p)
+{
+ struct portals_device_userstate *mstat = NULL;
+ int rc = 0;
+ int devid;
+ devid = minor(dev);
+
+ if (devid > 16) return (-ENXIO);
+
+ if (libcfs_psdev_ops.p_open != NULL)
+ rc = libcfs_psdev_ops.p_open(0, &mstat);
+ else
+ rc = -EPERM;
+ if (!rc)
+ return rc;
+ mdev_state[devid] = mstat;
+ return rc;
+}
+
+static int
+libcfs_psdev_close(dev_t dev, int flags, int mode, struct proc *p)
+{
+ int devid;
+ devid = minor(dev);
+ int rc = 0;
+
+ if (devid > 16) return (-ENXIO);
+
+ if (libcfs_psdev_ops.p_close != NULL)
+ rc = libcfs_psdev_ops.p_close(0, mdev_state[devid]);
+ else
+ rc = -EPERM;
+ if (rc)
+ return rc;
+ mdev_state[devid] = NULL;
+ return rc;
+}
+
+static int
+libcfs_ioctl (dev_t dev, u_long cmd, caddr_t arg, int flag, struct proc *p)
+{
+ int rc = 0;
+ struct cfs_psdev_file pfile;
+ int devid;
+ devid = minor(dev);
+
+ if (devid > 16) return (-ENXIO);
+
+ if (suser(p->p_ucred, &p->p_acflag))
+ return (-EPERM);
+
+ pfile.off = 0;
+ pfile.private_data = mdev_state[devid];
+
+ if (libcfs_psdev_ops.p_ioctl != NULL)
+ rc = libcfs_psdev_ops.p_ioctl(&pfile, cmd, (void *)arg);
+ else
+ rc = -EPERM;
+ return rc;
+}
+
+static struct cdevsw libcfs_devsw =
+{
+ libcfs_psdev_open, /* open */
+ libcfs_psdev_close, /* close */
+ NULL, /* read */
+ NULL, /* write */
+ libcfs_ioctl, /* ioctl */
+ NULL, /* stop */
+ NULL, /* reset */
+ NULL, /* tty's */
+ NULL, /* select */
+ NULL, /* mmap */
+ NULL, /* strategy */
+ NULL, /* getc */
+ NULL, /* putc */
+ 0 /* type */
+};
+
+cfs_psdev_t libcfs_dev = {
+ -1,
+ NULL,
+ "portals",
+ &libcfs_devsw,
+ NULL
+};
+
+void
+kportal_daemonize (char *str)
+{
+ printf("Daemonize request: %s.\n", str);
+ return;
+}
+
+void
+kportal_blockallsigs(void)
+{
+ return;
+}
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2002 Cluster File Systems, Inc.
+ * Author: Phil Schwan <phil@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Darwin porting library
+ * Make things easy to port
+ */
+#define DEBUG_SUBSYSTEM S_PORTALS
+
+#include <mach/mach_types.h>
+#include <string.h>
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <sys/file.h>
+#include <sys/conf.h>
+#include <sys/vnode.h>
+#include <sys/uio.h>
+#include <sys/filedesc.h>
+#include <sys/namei.h>
+#include <miscfs/devfs/devfs.h>
+#include <kern/kalloc.h>
+#include <kern/zalloc.h>
+#include <kern/thread.h>
+
+#include <libcfs/libcfs.h>
+#include <libcfs/kp30.h>
+
+void *darwin_current_journal_info = NULL;
+int darwin_current_cap_effective = -1;
+
+/*
+ * cfs pseudo device, actually pseudo char device in darwin
+ */
+#define KPORTAL_MAJOR -1
+
+kern_return_t cfs_psdev_register(cfs_psdev_t *dev) {
+ dev->index = cdevsw_add(KPORTAL_MAJOR, dev->devsw);
+ if (dev->index < 0) {
+ printf("portal_init: failed to allocate a major number!\n");
+ return KERN_FAILURE;
+ }
+ dev->handle = devfs_make_node(makedev (dev->index, 0),
+ DEVFS_CHAR, UID_ROOT,
+ GID_WHEEL, 0666, (char *)dev->name, 0);
+ return KERN_SUCCESS;
+}
+
+kern_return_t cfs_psdev_deregister(cfs_psdev_t *dev) {
+ devfs_remove(dev->handle);
+ cdevsw_remove(dev->index, dev->devsw);
+ return KERN_SUCCESS;
+}
+
+/*
+ * KPortal symbol register / unregister support
+ */
+static struct rw_semaphore cfs_symbol_lock;
+struct list_head cfs_symbol_list;
+
+void *
+cfs_symbol_get(const char *name)
+{
+ struct list_head *walker;
+ struct cfs_symbol *sym = NULL;
+
+ down_read(&cfs_symbol_lock);
+ list_for_each(walker, &cfs_symbol_list) {
+ sym = list_entry (walker, struct cfs_symbol, sym_list);
+ if (!strcmp(sym->name, name)) {
+ sym->ref ++;
+ break;
+ }
+ }
+ up_read(&cfs_symbol_lock);
+ if (sym != NULL)
+ return sym->value;
+ return NULL;
+}
+
+kern_return_t
+cfs_symbol_put(const char *name)
+{
+ struct list_head *walker;
+ struct cfs_symbol *sym = NULL;
+
+ down_read(&cfs_symbol_lock);
+ list_for_each(walker, &cfs_symbol_list) {
+ sym = list_entry (walker, struct cfs_symbol, sym_list);
+ if (!strcmp(sym->name, name)) {
+ sym->ref --;
+ LASSERT(sym->ref >= 0);
+ break;
+ }
+ }
+ up_read(&cfs_symbol_lock);
+ LASSERT(sym != NULL);
+
+ return 0;
+}
+
+kern_return_t
+cfs_symbol_register(const char *name, const void *value)
+{
+ struct list_head *walker;
+ struct cfs_symbol *sym = NULL;
+ struct cfs_symbol *new = NULL;
+
+ MALLOC(new, struct cfs_symbol *, sizeof(struct cfs_symbol), M_TEMP, M_WAITOK|M_ZERO);
+ strncpy(new->name, name, CFS_SYMBOL_LEN);
+ new->value = (void *)value;
+ new->ref = 0;
+ CFS_INIT_LIST_HEAD(&new->sym_list);
+
+ down_write(&cfs_symbol_lock);
+ list_for_each(walker, &cfs_symbol_list) {
+ sym = list_entry (walker, struct cfs_symbol, sym_list);
+ if (!strcmp(sym->name, name)) {
+ up_write(&cfs_symbol_lock);
+ FREE(new, M_TEMP);
+ return KERN_NAME_EXISTS;
+ }
+
+ }
+ list_add_tail(&new->sym_list, &cfs_symbol_list);
+ up_write(&cfs_symbol_lock);
+
+ return KERN_SUCCESS;
+}
+
+kern_return_t
+cfs_symbol_unregister(const char *name)
+{
+ struct list_head *walker;
+ struct list_head *nxt;
+ struct cfs_symbol *sym = NULL;
+
+ down_write(&cfs_symbol_lock);
+ list_for_each_safe(walker, nxt, &cfs_symbol_list) {
+ sym = list_entry (walker, struct cfs_symbol, sym_list);
+ if (!strcmp(sym->name, name)) {
+ LASSERT(sym->ref == 0);
+ list_del (&sym->sym_list);
+ FREE(sym, M_TEMP);
+ break;
+ }
+ }
+ up_write(&cfs_symbol_lock);
+
+ return KERN_SUCCESS;
+}
+
+void
+cfs_symbol_clean()
+{
+ struct list_head *walker;
+ struct cfs_symbol *sym = NULL;
+
+ down_write(&cfs_symbol_lock);
+ list_for_each(walker, &cfs_symbol_list) {
+ sym = list_entry (walker, struct cfs_symbol, sym_list);
+ LASSERT(sym->ref == 0);
+ list_del (&sym->sym_list);
+ FREE(sym, M_TEMP);
+ }
+ up_write(&cfs_symbol_lock);
+ return;
+}
+
+/*
+ * Register sysctl table
+ */
+cfs_sysctl_table_header_t *
+register_cfs_sysctl_table (cfs_sysctl_table_t *table, int arg)
+{
+ cfs_sysctl_table_t item;
+ int i = 0;
+
+ while ((item = table[i++]) != NULL) {
+ sysctl_register_oid(item);
+ }
+ return table;
+}
+
+/*
+ * Unregister sysctl table
+ */
+void
+unregister_cfs_sysctl_table (cfs_sysctl_table_header_t *table) {
+ int i = 0;
+ cfs_sysctl_table_t item;
+
+ while ((item = table[i++]) != NULL) {
+ sysctl_unregister_oid(item);
+ }
+ return;
+}
+
+struct kernel_thread_arg cfs_thread_arg;
+
+void
+cfs_thread_agent_init()
+{
+ set_targ_stat(&cfs_thread_arg, THREAD_ARG_FREE);
+ spin_lock_init(&cfs_thread_arg.lock);
+ cfs_thread_arg.arg = NULL;
+ cfs_thread_arg.func = NULL;
+}
+
+void
+cfs_thread_agent (void)
+{
+ cfs_thread_t func = NULL;
+ void *arg = NULL;
+
+ thread_arg_recv(&cfs_thread_arg, func, arg);
+ printf("entry of thread agent (func: %08lx).\n", (void *)func);
+ assert(func != NULL);
+ func(arg);
+ printf("thread agent exit. (func: %08lx)\n", (void *)func);
+ (void) thread_terminate(current_act());
+}
+
+int
+cfs_kernel_thread(cfs_thread_t func, void *arg, int flag)
+{
+ int ret = 0;
+ thread_t th = NULL;
+
+ thread_arg_hold(&cfs_thread_arg, func, arg);
+ th = kernel_thread(kernel_task, cfs_thread_agent);
+ thread_arg_release(&cfs_thread_arg);
+ if (th == THREAD_NULL)
+ ret = -1;
+ return ret;
+}
+
+void lustre_cone_in(boolean_t *state, funnel_t **cone)
+{
+ *cone = thread_funnel_get();
+ if (*cone == network_flock)
+ thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
+ else if (*cone == NULL)
+ *state = thread_funnel_set(kernel_flock, TRUE);
+}
+
+void lustre_cone_ex(boolean_t state, funnel_t *cone)
+{
+ if (cone == network_flock)
+ thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
+ else if (cone == NULL)
+ (void) thread_funnel_set(kernel_flock, state);
+}
+
+void lustre_net_in(boolean_t *state, funnel_t **cone)
+{
+ *cone = thread_funnel_get();
+ if (*cone == kernel_flock)
+ thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
+ else if (*cone == NULL)
+ *state = thread_funnel_set(network_flock, TRUE);
+}
+
+void lustre_net_ex(boolean_t state, funnel_t *cone)
+{
+ if (cone == kernel_flock)
+ thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
+ else if (cone == NULL)
+ (void) thread_funnel_set(network_flock, state);
+}
+
+
+void cfs_waitq_init(struct cfs_waitq *waitq)
+{
+ ksleep_chan_init(&waitq->wq_ksleep_chan);
+}
+
+void cfs_waitlink_init(struct cfs_waitlink *link)
+{
+ ksleep_link_init(&link->wl_ksleep_link);
+}
+
+void cfs_waitq_add(struct cfs_waitq *waitq, struct cfs_waitlink *link)
+{
+ link->wl_waitq = waitq;
+ ksleep_add(&waitq->wq_ksleep_chan, &link->wl_ksleep_link);
+}
+
+void cfs_waitq_add_exclusive(struct cfs_waitq *waitq,
+ struct cfs_waitlink *link)
+{
+ link->wl_waitq = waitq;
+ link->wl_ksleep_link.flags |= KSLEEP_EXCLUSIVE;
+ ksleep_add(&waitq->wq_ksleep_chan, &link->wl_ksleep_link);
+}
+
+void cfs_waitq_forward(struct cfs_waitlink *link,
+ struct cfs_waitq *waitq)
+{
+ link->wl_ksleep_link.forward = &waitq->wq_ksleep_chan;
+}
+
+void cfs_waitq_del(struct cfs_waitq *waitq,
+ struct cfs_waitlink *link)
+{
+ ksleep_del(&waitq->wq_ksleep_chan, &link->wl_ksleep_link);
+}
+
+int cfs_waitq_active(struct cfs_waitq *waitq)
+{
+ return (1);
+}
+
+void cfs_waitq_signal(struct cfs_waitq *waitq)
+{
+ ksleep_wake(&waitq->wq_ksleep_chan);
+}
+
+void cfs_waitq_signal_nr(struct cfs_waitq *waitq, int nr)
+{
+ ksleep_wake_nr(&waitq->wq_ksleep_chan, nr);
+}
+
+void cfs_waitq_broadcast(struct cfs_waitq *waitq)
+{
+ ksleep_wake_all(&waitq->wq_ksleep_chan);
+}
+
+void cfs_waitq_wait(struct cfs_waitlink *link)
+{
+ ksleep_wait(&link->wl_waitq->wq_ksleep_chan);
+}
+
+cfs_duration_t cfs_waitq_timedwait(struct cfs_waitlink *link,
+ cfs_duration_t timeout)
+{
+ CDEBUG(D_TRACE, "timeout: %llu\n", (long long unsigned)timeout);
+ return ksleep_timedwait(&link->chan->c, timeout);
+}
+
+typedef void (*ktimer_func_t)(void *);
+void cfs_timer_init(cfs_timer_t *t, void (* func)(unsigned long), void *arg)
+{
+ ktimer_init(&t->t, (ktimer_func_t)func, arg);
+}
+
+void cfs_timer_done(struct cfs_timer *t)
+{
+ ktimer_done(&t->t);
+}
+
+void cfs_timer_arm(struct cfs_timer *t, cfs_time_t deadline)
+{
+ ktimer_arm(&t->t, deadline);
+}
+
+void cfs_timer_disarm(struct cfs_timer *t)
+{
+ ktimer_disarm(&t->t);
+}
+
+int cfs_timer_is_armed(struct cfs_timer *t)
+{
+ return ktimer_is_armed(&t->t);
+}
+
+cfs_time_t cfs_timer_deadline(struct cfs_timer *t)
+{
+ return ktimer_deadline(&t->t);
+}
+
+int
+libcfs_arch_init(void)
+{
+ init_rwsem(&cfs_symbol_lock);
+ CFS_INIT_LIST_HEAD(&cfs_symbol_list);
+ cfs_thread_agent_init();
+ return 0;
+}
+
+void
+libcfs_arch_cleanup(void)
+{
+ cfs_symbol_clean();
+}
+
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/systm.h>
+#include <sys/sysctl.h>
+#include <sys/proc.h>
+#include <sys/unistd.h>
+#include <mach/mach_types.h>
+
+#define DEBUG_SUBSYSTEM S_PORTALS
+#include <libcfs/libcfs.h>
+
+static cfs_sysctl_table_header_t *portals_table_header = NULL;
+extern unsigned int portal_debug;
+extern char debug_file_path[1024];
+extern unsigned int portal_subsystem_debug;
+extern unsigned int portal_printk;
+extern atomic_t portal_kmemory;
+
+extern long max_debug_mb;
+extern int cfs_trace_daemon SYSCTL_HANDLER_ARGS;
+extern int cfs_debug_mb SYSCTL_HANDLER_ARGS;
+/*
+ * sysctl table for portals
+ */
+SYSCTL_NODE (, OID_AUTO, portals, CTLFLAG_RW,
+ 0, "portals sysctl top");
+
+SYSCTL_INT(_portals, OID_AUTO, debug,
+ CTLTYPE_INT | CTLFLAG_RW , &portal_debug,
+ 0, "debug");
+SYSCTL_INT(_portals, OID_AUTO, subsystem_debug,
+ CTLTYPE_INT | CTLFLAG_RW, &portal_subsystem_debug,
+ 0, "subsystem debug");
+SYSCTL_INT(_portals, OID_AUTO, printk,
+ CTLTYPE_INT | CTLFLAG_RW, &portal_printk,
+ 0, "printk");
+SYSCTL_STRING(_portals, OID_AUTO, debug_path,
+ CTLTYPE_STRING | CTLFLAG_RW, debug_file_path,
+ 1024, "debug path");
+SYSCTL_INT(_portals, OID_AUTO, memused,
+ CTLTYPE_INT | CTLFLAG_RW, (int *)&portal_kmemory.counter,
+ 0, "memused");
+SYSCTL_PROC(_portals, OID_AUTO, trace_daemon,
+ CTLTYPE_STRING | CTLFLAG_RW, 0,
+ 0, &cfs_trace_daemon, "A", "trace daemon");
+SYSCTL_PROC(_portals, OID_AUTO, debug_mb,
+ CTLTYPE_INT | CTLFLAG_RW, &max_debug_mb,
+ 0, &cfs_debug_mb, "L", "max debug size");
+
+
+static cfs_sysctl_table_t top_table[] = {
+ &sysctl__portals,
+ &sysctl__portals_debug,
+ &sysctl__portals_subsystem_debug,
+ &sysctl__portals_printk,
+ &sysctl__portals_debug_path,
+ &sysctl__portals_memused,
+ &sysctl__portals_trace_daemon,
+ &sysctl__portals_debug_mb,
+ NULL
+};
+
+/* no proc in osx */
+cfs_proc_dir_entry_t *
+cfs_create_proc_entry(char *name, int mod, cfs_proc_dir_entry_t *parent)
+{
+ cfs_proc_dir_entry_t *entry;
+ MALLOC(entry, cfs_proc_dir_entry_t *, sizeof(cfs_proc_dir_entry_t), M_TEMP, M_WAITOK|M_ZERO);
+
+ return entry;
+}
+
+void
+cfs_free_proc_entry(cfs_proc_dir_entry_t *de){
+ FREE(de, M_TEMP);
+ return;
+};
+
+void
+cfs_remove_proc_entry(char *name, cfs_proc_dir_entry_t *entry)
+{
+ cfs_free_proc_entry(entry);
+ return;
+}
+
+int
+insert_proc(void)
+{
+#if 1
+ if (!portals_table_header)
+ portals_table_header = register_cfs_sysctl_table(top_table, 0);
+#endif
+ return 0;
+}
+
+void
+remove_proc(void)
+{
+#if 1
+ if (portals_table_header != NULL)
+ unregister_cfs_sysctl_table(portals_table_header);
+ portals_table_header = NULL;
+#endif
+ return;
+}
+
+
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Lustre Light Super operations
+ *
+ * Copyright (c) 2004 Cluster File Systems, Inc.
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or modify it under
+ * the terms of version 2 of the GNU General Public License as published by
+ * the Free Software Foundation. Lustre is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details. You should have received a
+ * copy of the GNU General Public License along with Lustre; if not, write
+ * to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139,
+ * USA.
+ */
+
+/*
+ * xnu_sync.c
+ *
+ * Created by nikita on Sun Jul 18 2004.
+ *
+ * Prototypes of XNU synchronization primitives.
+ */
+
+/*
+ * This file contains very simplistic implementations of (saner) API for
+ * basic synchronization primitives:
+ *
+ * - spin-lock (kspin)
+ *
+ * - semaphore (ksem)
+ *
+ * - mutex (kmut)
+ *
+ * - condition variable (kcond)
+ *
+ * - wait-queue (ksleep_chan and ksleep_link)
+ *
+ * - timer (ktimer)
+ *
+ * A lot can be optimized here.
+ */
+
+#include <mach/mach_types.h>
+#include <sys/types.h>
+#include <kern/simple_lock.h>
+
+#define DEBUG_SUBSYSTEM S_PORTALS
+
+#include <libcfs/libcfs.h>
+#include <libcfs/kp30.h>
+
+#define SLASSERT(e) ON_SYNC_DEBUG(LASSERT(e))
+
+#ifdef HAVE_GET_PREEMPTION_LEVEL
+extern int get_preemption_level(void);
+#else
+#define get_preemption_level() (0)
+#endif
+
+/*
+ * Warning: low level portals debugging code (portals_debug_msg(), for
+ * example), uses spin-locks, so debugging output here may lead to nasty
+ * surprises.
+ */
+
+#if SMP
+
+extern void hw_lock_init(hw_lock_t);
+extern void hw_lock_lock(hw_lock_t);
+extern void hw_lock_unlock(hw_lock_t);
+extern unsigned int hw_lock_to(hw_lock_t, unsigned int);
+extern unsigned int hw_lock_try(hw_lock_t);
+extern unsigned int hw_lock_held(hw_lock_t);
+
+void kspin_init(struct kspin *spin)
+{
+ SLASSERT(spin != NULL);
+ hw_lock_init(&spin->lock);
+ ON_SYNC_DEBUG(spin->magic = KSPIN_MAGIC);
+ ON_SYNC_DEBUG(spin->owner = NULL);
+}
+
+void kspin_done(struct kspin *spin)
+{
+ SLASSERT(spin != NULL);
+ SLASSERT(spin->magic == KSPIN_MAGIC);
+ SLASSERT(spin->owner == NULL);
+}
+
+void kspin_lock(struct kspin *spin)
+{
+ SLASSERT(spin != NULL);
+ SLASSERT(spin->magic == KSPIN_MAGIC);
+ SLASSERT(spin->owner != current_thread);
+
+ hw_lock_lock(&spin->lock);
+ SLASSERT(spin->owner == NULL);
+ ON_SYNC_DEBUG(spin->owner = current_thread);
+}
+
+void kspin_unlock(struct kspin *spin)
+{
+ SLASSERT(spin != NULL);
+ SLASSERT(spin->magic == KSPIN_MAGIC);
+ SLASSERT(spin->owner == current_thread);
+ ON_SYNC_DEBUG(spin->owner = NULL);
+ hw_lock_unlock(&spin->lock);
+}
+
+int kspin_trylock(struct kspin *spin)
+{
+ SLASSERT(spin != NULL);
+ SLASSERT(spin->magic == KSPIN_MAGIC);
+
+ if (hw_lock_try(&spin->lock)) {
+ SLASSERT(spin->owner == NULL);
+ ON_SYNC_DEBUG(spin->owner = current_thread);
+ return 1;
+ } else
+ return 0;
+}
+
+/* SMP */
+#else
+
+/*
+ * uniprocessor version of spin-lock. Only checks.
+ */
+
+void kspin_init(struct kspin *spin)
+{
+ SLASSERT(spin != NULL);
+ ON_SYNC_DEBUG(spin->magic = KSPIN_MAGIC);
+ ON_SYNC_DEBUG(spin->owner = NULL);
+}
+
+void kspin_done(struct kspin *spin)
+{
+ SLASSERT(spin != NULL);
+ SLASSERT(spin->magic == KSPIN_MAGIC);
+ SLASSERT(spin->owner == NULL);
+}
+
+void kspin_lock(struct kspin *spin)
+{
+ SLASSERT(spin != NULL);
+ SLASSERT(spin->magic == KSPIN_MAGIC);
+ SLASSERT(spin->owner == NULL);
+ ON_SYNC_DEBUG(spin->owner = current_thread);
+}
+
+void kspin_unlock(struct kspin *spin)
+{
+ SLASSERT(spin != NULL);
+ SLASSERT(spin->magic == KSPIN_MAGIC);
+ SLASSERT(spin->owner == current_thread);
+ ON_SYNC_DEBUG(spin->owner = NULL);
+}
+
+int kspin_trylock(struct kspin *spin)
+{
+ SLASSERT(spin != NULL);
+ SLASSERT(spin->magic == KSPIN_MAGIC);
+ SLASSERT(spin->owner == NULL);
+ ON_SYNC_DEBUG(spin->owner = current_thread);
+ return 1;
+}
+
+/* SMP */
+#endif
+
+#if XNU_SYNC_DEBUG
+int kspin_islocked(struct kspin *spin)
+{
+ SLASSERT(spin != NULL);
+ SLASSERT(spin->magic == KSPIN_MAGIC);
+ return spin->owner == current_thread;
+}
+
+int kspin_isnotlocked(struct kspin *spin)
+{
+ SLASSERT(spin != NULL);
+ SLASSERT(spin->magic == KSPIN_MAGIC);
+ return spin->owner != current_thread;
+}
+#endif
+
+void ksem_init(struct ksem *sem, int value)
+{
+ SLASSERT(sem != NULL);
+ kspin_init(&sem->guard);
+ wait_queue_init(&sem->q, SYNC_POLICY_FIFO);
+ sem->value = value;
+ ON_SYNC_DEBUG(sem->magic = KSEM_MAGIC);
+}
+
+void ksem_done(struct ksem *sem)
+{
+ SLASSERT(sem != NULL);
+ SLASSERT(sem->magic == KSEM_MAGIC);
+ /*
+ * XXX nikita: cannot check that &sem->q is empty because
+ * wait_queue_empty() is Apple private API.
+ */
+ kspin_done(&sem->guard);
+}
+
+int ksem_up(struct ksem *sem, int value)
+{
+ int result;
+
+ SLASSERT(sem != NULL);
+ SLASSERT(sem->magic == KSEM_MAGIC);
+ SLASSERT(value >= 0);
+
+ kspin_lock(&sem->guard);
+ sem->value += value;
+ if (sem->value == 0)
+ result = wait_queue_wakeup_one(&sem->q, (event_t)sem,
+ THREAD_AWAKENED);
+ else
+ result = wait_queue_wakeup_all(&sem->q, (event_t)sem,
+ THREAD_AWAKENED);
+ kspin_unlock(&sem->guard);
+ SLASSERT(result == KERN_SUCCESS || result == KERN_NOT_WAITING);
+ return (result == KERN_SUCCESS) ? 0 : 1;
+}
+
+void ksem_down(struct ksem *sem, int value)
+{
+ int result;
+
+ SLASSERT(sem != NULL);
+ SLASSERT(sem->magic == KSEM_MAGIC);
+ SLASSERT(value >= 0);
+ SLASSERT(get_preemption_level() == 0);
+
+ kspin_lock(&sem->guard);
+ while (sem->value < value) {
+ result = wait_queue_assert_wait(&sem->q, (event_t)sem,
+ THREAD_UNINT);
+ SLASSERT(result == THREAD_AWAKENED || result == THREAD_WAITING);
+ kspin_unlock(&sem->guard);
+ if (result == THREAD_WAITING)
+ thread_block(THREAD_CONTINUE_NULL);
+ kspin_lock(&sem->guard);
+ }
+ sem->value -= value;
+ kspin_unlock(&sem->guard);
+}
+
+int ksem_trydown(struct ksem *sem, int value)
+{
+ int result;
+
+ SLASSERT(sem != NULL);
+ SLASSERT(sem->magic == KSEM_MAGIC);
+ SLASSERT(value >= 0);
+
+ kspin_lock(&sem->guard);
+ if (sem->value >= value) {
+ sem->value -= value;
+ result = 0;
+ } else
+ result = -EBUSY;
+ kspin_unlock(&sem->guard);
+ return result;
+}
+
+void kmut_init(struct kmut *mut)
+{
+ SLASSERT(mut != NULL);
+ ksem_init(&mut->s, 1);
+ ON_SYNC_DEBUG(mut->magic = KMUT_MAGIC);
+ ON_SYNC_DEBUG(mut->owner = NULL);
+}
+
+void kmut_done(struct kmut *mut)
+{
+ SLASSERT(mut != NULL);
+ SLASSERT(mut->magic == KMUT_MAGIC);
+ SLASSERT(mut->owner == NULL);
+ ksem_done(&mut->s);
+}
+
+void kmut_lock(struct kmut *mut)
+{
+ SLASSERT(mut != NULL);
+ SLASSERT(mut->magic == KMUT_MAGIC);
+ SLASSERT(mut->owner != current_thread);
+ SLASSERT(get_preemption_level() == 0);
+
+ ksem_down(&mut->s, 1);
+ ON_SYNC_DEBUG(mut->owner = current_thread);
+}
+
+void kmut_unlock(struct kmut *mut)
+{
+ SLASSERT(mut != NULL);
+ SLASSERT(mut->magic == KMUT_MAGIC);
+ SLASSERT(mut->owner == current_thread);
+
+ ON_SYNC_DEBUG(mut->owner = NULL);
+ ksem_up(&mut->s, 1);
+}
+
+int kmut_trylock(struct kmut *mut)
+{
+ SLASSERT(mut != NULL);
+ SLASSERT(mut->magic == KMUT_MAGIC);
+ return ksem_trydown(&mut->s, 1);
+}
+
+#if XNU_SYNC_DEBUG
+int kmut_islocked(struct kmut *mut)
+{
+ SLASSERT(mut != NULL);
+ SLASSERT(mut->magic == KMUT_MAGIC);
+ return mut->owner == current_thread;
+}
+
+int kmut_isnotlocked(struct kmut *mut)
+{
+ SLASSERT(mut != NULL);
+ SLASSERT(mut->magic == KMUT_MAGIC);
+ return mut->owner != current_thread;
+}
+#endif
+
+
+void kcond_init(struct kcond *cond)
+{
+ SLASSERT(cond != NULL);
+
+ kspin_init(&cond->guard);
+ cond->waiters = NULL;
+ ON_SYNC_DEBUG(cond->magic = KCOND_MAGIC);
+}
+
+void kcond_done(struct kcond *cond)
+{
+ SLASSERT(cond != NULL);
+ SLASSERT(cond->magic == KCOND_MAGIC);
+ SLASSERT(cond->waiters == NULL);
+ kspin_done(&cond->guard);
+}
+
+void kcond_wait(struct kcond *cond, struct kspin *lock)
+{
+ struct kcond_link link;
+
+ SLASSERT(cond != NULL);
+ SLASSERT(lock != NULL);
+ SLASSERT(cond->magic == KCOND_MAGIC);
+ SLASSERT(kspin_islocked(lock));
+
+ ksem_init(&link.sem, 0);
+ kspin_lock(&cond->guard);
+ link.next = cond->waiters;
+ cond->waiters = &link;
+ kspin_unlock(&cond->guard);
+ kspin_unlock(lock);
+
+ ksem_down(&link.sem, 1);
+
+ kspin_lock(&cond->guard);
+ kspin_unlock(&cond->guard);
+ kspin_lock(lock);
+}
+
+void kcond_wait_guard(struct kcond *cond)
+{
+ struct kcond_link link;
+
+ SLASSERT(cond != NULL);
+ SLASSERT(cond->magic == KCOND_MAGIC);
+ SLASSERT(kspin_islocked(&cond->guard));
+
+ ksem_init(&link.sem, 0);
+ link.next = cond->waiters;
+ cond->waiters = &link;
+ kspin_unlock(&cond->guard);
+
+ ksem_down(&link.sem, 1);
+
+ kspin_lock(&cond->guard);
+}
+
+void kcond_signal_guard(struct kcond *cond)
+{
+ struct kcond_link *link;
+
+ SLASSERT(cond != NULL);
+ SLASSERT(cond->magic == KCOND_MAGIC);
+ SLASSERT(kspin_islocked(&cond->guard));
+
+ link = cond->waiters;
+ if (link != NULL) {
+ cond->waiters = link->next;
+ ksem_up(&link->sem, 1);
+ }
+}
+
+void kcond_signal(struct kcond *cond)
+{
+ SLASSERT(cond != NULL);
+ SLASSERT(cond->magic == KCOND_MAGIC);
+
+ kspin_lock(&cond->guard);
+ kcond_signal_guard(cond);
+ kspin_unlock(&cond->guard);
+}
+
+void kcond_broadcast_guard(struct kcond *cond)
+{
+ struct kcond_link *link;
+
+ SLASSERT(cond != NULL);
+ SLASSERT(cond->magic == KCOND_MAGIC);
+ SLASSERT(kspin_islocked(&cond->guard));
+
+ for (link = cond->waiters; link != NULL; link = link->next)
+ ksem_up(&link->sem, 1);
+ cond->waiters = NULL;
+}
+
+void kcond_broadcast(struct kcond *cond)
+{
+ SLASSERT(cond != NULL);
+ SLASSERT(cond->magic == KCOND_MAGIC);
+
+ kspin_lock(&cond->guard);
+ kcond_broadcast_guard(cond);
+ kspin_unlock(&cond->guard);
+}
+
+void krw_sem_init(struct krw_sem *sem)
+{
+ SLASSERT(sem != NULL);
+
+ kcond_init(&sem->cond);
+ sem->count = 0;
+ ON_SYNC_DEBUG(sem->magic = KRW_MAGIC);
+}
+
+void krw_sem_done(struct krw_sem *sem)
+{
+ SLASSERT(sem != NULL);
+ SLASSERT(sem->magic == KRW_MAGIC);
+ SLASSERT(sem->count == 0);
+ kcond_done(&sem->cond);
+}
+
+void krw_sem_down_r(struct krw_sem *sem)
+{
+ SLASSERT(sem != NULL);
+ SLASSERT(sem->magic == KRW_MAGIC);
+ SLASSERT(get_preemption_level() == 0);
+
+ kspin_lock(&sem->cond.guard);
+ while (sem->count < 0)
+ kcond_wait_guard(&sem->cond);
+ ++ sem->count;
+ kspin_unlock(&sem->cond.guard);
+}
+
+int krw_sem_down_r_try(struct krw_sem *sem)
+{
+ SLASSERT(sem != NULL);
+ SLASSERT(sem->magic == KRW_MAGIC);
+
+ kspin_lock(&sem->cond.guard);
+ if (sem->count < 0) {
+ kspin_unlock(&sem->cond.guard);
+ return -EBUSY;
+ }
+ ++ sem->count;
+ kspin_unlock(&sem->cond.guard);
+ return 0;
+}
+
+void krw_sem_down_w(struct krw_sem *sem)
+{
+ SLASSERT(sem != NULL);
+ SLASSERT(sem->magic == KRW_MAGIC);
+ SLASSERT(get_preemption_level() == 0);
+
+ kspin_lock(&sem->cond.guard);
+ while (sem->count != 0)
+ kcond_wait_guard(&sem->cond);
+ sem->count = -1;
+ kspin_unlock(&sem->cond.guard);
+}
+
+int krw_sem_down_w_try(struct krw_sem *sem)
+{
+ SLASSERT(sem != NULL);
+ SLASSERT(sem->magic == KRW_MAGIC);
+
+ kspin_lock(&sem->cond.guard);
+ if (sem->count != 0) {
+ kspin_unlock(&sem->cond.guard);
+ return -EBUSY;
+ }
+ sem->count = -1;
+ kspin_unlock(&sem->cond.guard);
+ return 0;
+}
+
+void krw_sem_up_r(struct krw_sem *sem)
+{
+ SLASSERT(sem != NULL);
+ SLASSERT(sem->magic == KRW_MAGIC);
+ SLASSERT(sem->count > 0);
+
+ kspin_lock(&sem->cond.guard);
+ -- sem->count;
+ if (sem->count == 0)
+ kcond_broadcast_guard(&sem->cond);
+ kspin_unlock(&sem->cond.guard);
+}
+
+void krw_sem_up_w(struct krw_sem *sem)
+{
+ SLASSERT(sem != NULL);
+ SLASSERT(sem->magic == KRW_MAGIC);
+ SLASSERT(sem->count == -1);
+
+ kspin_lock(&sem->cond.guard);
+ sem->count = 0;
+ kspin_unlock(&sem->cond.guard);
+ kcond_broadcast(&sem->cond);
+}
+
+void ksleep_chan_init(struct ksleep_chan *chan)
+{
+ SLASSERT(chan != NULL);
+
+ kspin_init(&chan->guard);
+ CFS_INIT_LIST_HEAD(&chan->waiters);
+ ON_SYNC_DEBUG(chan->magic = KSLEEP_CHAN_MAGIC);
+}
+
+void ksleep_chan_done(struct ksleep_chan *chan)
+{
+ SLASSERT(chan != NULL);
+ SLASSERT(chan->magic == KSLEEP_CHAN_MAGIC);
+ SLASSERT(list_empty(&chan->waiters));
+ kspin_done(&chan->guard);
+}
+
+void ksleep_link_init(struct ksleep_link *link)
+{
+ SLASSERT(link != NULL);
+
+ CFS_INIT_LIST_HEAD(&link->linkage);
+ link->flags = 0;
+ link->event = current_thread;
+ link->hits = 0;
+ link->forward = NULL;
+ ON_SYNC_DEBUG(link->magic = KSLEEP_LINK_MAGIC);
+}
+
+void ksleep_link_done(struct ksleep_link *link)
+{
+ SLASSERT(link != NULL);
+ SLASSERT(link->magic == KSLEEP_LINK_MAGIC);
+ SLASSERT(list_empty(&link->linkage));
+}
+
+void ksleep_add(struct ksleep_chan *chan, struct ksleep_link *link)
+{
+ SLASSERT(chan != NULL);
+ SLASSERT(link != NULL);
+ SLASSERT(chan->magic == KSLEEP_CHAN_MAGIC);
+ SLASSERT(link->magic == KSLEEP_LINK_MAGIC);
+ SLASSERT(list_empty(&link->linkage));
+
+ kspin_lock(&chan->guard);
+ if (link->flags & KSLEEP_EXCLUSIVE)
+ list_add_tail(&link->linkage, &chan->waiters);
+ else
+ list_add(&link->linkage, &chan->waiters);
+ kspin_unlock(&chan->guard);
+}
+
+void ksleep_del(struct ksleep_chan *chan, struct ksleep_link *link)
+{
+ SLASSERT(chan != NULL);
+ SLASSERT(link != NULL);
+ SLASSERT(chan->magic == KSLEEP_CHAN_MAGIC);
+ SLASSERT(link->magic == KSLEEP_LINK_MAGIC);
+
+ kspin_lock(&chan->guard);
+ list_del_init(&link->linkage);
+ kspin_unlock(&chan->guard);
+}
+
+static int has_hits(struct ksleep_chan *chan, event_t event)
+{
+ struct ksleep_link *scan;
+
+ SLASSERT(kspin_islocked(&chan->guard));
+ list_for_each_entry(scan, &chan->waiters, linkage) {
+ if (scan->event == event && scan->hits > 0) {
+ /* consume hit */
+ -- scan->hits;
+ return 1;
+ }
+ }
+ return 0;
+}
+
+static void add_hit(struct ksleep_chan *chan, event_t event)
+{
+ struct ksleep_link *scan;
+
+ SLASSERT(kspin_islocked(&chan->guard));
+ list_for_each_entry(scan, &chan->waiters, linkage) {
+ if (scan->event == event) {
+ ++ scan->hits;
+ break;
+ }
+ }
+}
+
+void ksleep_wait(struct ksleep_chan *chan)
+{
+ event_t event;
+ int result;
+
+ ENTRY;
+
+ SLASSERT(chan != NULL);
+ SLASSERT(chan->magic == KSLEEP_CHAN_MAGIC);
+ SLASSERT(get_preemption_level() == 0);
+
+ event = current_thread;
+ kspin_lock(&chan->guard);
+ if (!has_hits(chan, event)) {
+ result = assert_wait(event, THREAD_UNINT);
+ kspin_unlock(&chan->guard);
+ SLASSERT(result == THREAD_AWAKENED || result == THREAD_WAITING);
+ if (result == THREAD_WAITING)
+ thread_block(THREAD_CONTINUE_NULL);
+ } else
+ kspin_unlock(&chan->guard);
+ EXIT;
+}
+
+int64_t ksleep_timedwait(struct ksleep_chan *chan, uint64_t timeout)
+{
+ event_t event;
+ int64_t result;
+ AbsoluteTime clock_current;
+ AbsoluteTime clock_delay;
+
+ ENTRY;
+
+ SLASSERT(chan != NULL);
+ SLASSERT(chan->magic == KSLEEP_CHAN_MAGIC);
+ SLASSERT(get_preemption_level() == 0);
+
+ CDEBUG(D_TRACE, "timeout: %llu\n", (long long unsigned)timeout);
+
+ event = current_thread;
+ result = 0;
+ kspin_lock(&chan->guard);
+ if (!has_hits(chan, event)) {
+ result = assert_wait(event, THREAD_UNINT);
+ if (timeout > 0) {
+ /*
+ * arm a timer. thread_set_timer()'s first argument is
+ * uint32_t, so we have to cook deadline ourselves.
+ */
+ clock_get_uptime(&clock_current);
+ nanoseconds_to_absolutetime(timeout, &clock_delay);
+ ADD_ABSOLUTETIME(&clock_current, &clock_delay);
+ thread_set_timer_deadline(clock_current);
+ }
+ kspin_unlock(&chan->guard);
+ SLASSERT(result == THREAD_AWAKENED || result == THREAD_WAITING);
+ if (result == THREAD_WAITING)
+ result = thread_block(THREAD_CONTINUE_NULL);
+ thread_cancel_timer();
+
+ clock_get_uptime(&clock_delay);
+ SUB_ABSOLUTETIME(&clock_delay, &clock_current);
+ if (result == THREAD_TIMED_OUT)
+ result = 0;
+ else {
+ absolutetime_to_nanoseconds(clock_delay, &result);
+ if (result < 0)
+ result = 0;
+ }
+ } else
+ kspin_unlock(&chan->guard);
+
+ RETURN(result);
+}
+
+/*
+ * wake up single exclusive waiter (plus some arbitrary number of *
+ * non-exclusive)
+ */
+void ksleep_wake(struct ksleep_chan *chan)
+{
+ ENTRY;
+ ksleep_wake_nr(chan, 1);
+ EXIT;
+}
+
+/*
+ * wake up all waiters on @chan
+ */
+void ksleep_wake_all(struct ksleep_chan *chan)
+{
+ ENTRY;
+ ksleep_wake_nr(chan, 0);
+ EXIT;
+}
+
+/*
+ * wakeup no more than @nr exclusive waiters from @chan, plus some arbitrary
+ * number of non-exclusive. If @nr is 0, wake up all waiters.
+ */
+void ksleep_wake_nr(struct ksleep_chan *chan, int nr)
+{
+ struct ksleep_link *scan;
+ int result;
+
+ ENTRY;
+
+ SLASSERT(chan != NULL);
+ SLASSERT(chan->magic == KSLEEP_CHAN_MAGIC);
+
+ kspin_lock(&chan->guard);
+ list_for_each_entry(scan, &chan->waiters, linkage) {
+ struct ksleep_chan *forward;
+
+ forward = scan->forward;
+ if (forward != NULL)
+ kspin_lock(&forward->guard);
+ result = thread_wakeup(scan->event);
+ CDEBUG(D_INFO, "waking 0x%x: %d\n",
+ (unsigned int)scan->event, result);
+ SLASSERT(result == KERN_SUCCESS || result == KERN_NOT_WAITING);
+ if (result == KERN_NOT_WAITING) {
+ ++ scan->hits;
+ if (forward != NULL)
+ add_hit(forward, scan->event);
+ }
+ if (forward != NULL)
+ kspin_unlock(&forward->guard);
+ if ((scan->flags & KSLEEP_EXCLUSIVE) && --nr == 0)
+ break;
+ }
+ kspin_unlock(&chan->guard);
+ EXIT;
+}
+
+void ktimer_init(struct ktimer *t, void (*func)(void *), void *arg)
+{
+ SLASSERT(t != NULL);
+ SLASSERT(func != NULL);
+
+ kspin_init(&t->guard);
+ t->func = func;
+ t->arg = arg;
+ ON_SYNC_DEBUG(t->magic = KTIMER_MAGIC);
+}
+
+void ktimer_done(struct ktimer *t)
+{
+ SLASSERT(t != NULL);
+ SLASSERT(t->magic == KTIMER_MAGIC);
+ kspin_done(&t->guard);
+ ON_SYNC_DEBUG(t->magic = 0);
+}
+
+static void ktimer_actor(void *arg0, void *arg1)
+{
+ struct ktimer *t;
+ int armed;
+
+ t = arg0;
+ /*
+ * this assumes that ktimer's are never freed.
+ */
+ SLASSERT(t != NULL);
+ SLASSERT(t->magic == KTIMER_MAGIC);
+
+ /*
+ * call actual timer function
+ */
+ kspin_lock(&t->guard);
+ armed = t->armed;
+ t->armed = 0;
+ kspin_unlock(&t->guard);
+
+ if (armed)
+ t->func(t->arg);
+}
+
+static void ktimer_disarm_locked(struct ktimer *t)
+{
+ SLASSERT(t != NULL);
+ SLASSERT(t->magic == KTIMER_MAGIC);
+
+ thread_call_func_cancel(ktimer_actor, t, FALSE);
+}
+
+void ktimer_arm(struct ktimer *t, u_int64_t deadline)
+{
+ SLASSERT(t != NULL);
+ SLASSERT(t->magic == KTIMER_MAGIC);
+
+ kspin_lock(&t->guard);
+ ktimer_disarm_locked(t);
+ t->armed = 1;
+ thread_call_func_delayed(ktimer_actor, t, *(AbsoluteTime *)&deadline);
+ kspin_unlock(&t->guard);
+}
+
+void ktimer_disarm(struct ktimer *t)
+{
+ SLASSERT(t != NULL);
+ SLASSERT(t->magic == KTIMER_MAGIC);
+
+ kspin_lock(&t->guard);
+ t->armed = 0;
+ ktimer_disarm_locked(t);
+ kspin_unlock(&t->guard);
+}
+
+int ktimer_is_armed(struct ktimer *t)
+{
+ SLASSERT(t != NULL);
+ SLASSERT(t->magic == KTIMER_MAGIC);
+
+ /*
+ * no locking---result is only a hint anyway.
+ */
+ return t->armed;
+}
+
+u_int64_t ktimer_deadline(struct ktimer *t)
+{
+ SLASSERT(t != NULL);
+ SLASSERT(t->magic == KTIMER_MAGIC);
+
+ return t->deadline;
+}
+
+/*
+ * Local variables:
+ * c-indentation-style: "K&R"
+ * c-basic-offset: 8
+ * tab-width: 8
+ * fill-column: 80
+ * scroll-step: 1
+ * End:
+ */
--- /dev/null
+
+#define DEBUG_SUBSYSTEM S_PORTALS
+#define LUSTRE_TRACEFILE_PRIVATE
+#include <libcfs/libcfs.h>
+#include <libcfs/kp30.h>
+#include "tracefile.h"
+
+/*
+ * We can't support smp tracefile currently.
+ * Everything is put on one cpu.
+ */
+
+#define M_TCD_MAX_PAGES (128 * 1280)
+extern union trace_data_union trace_data[NR_CPUS];
+extern char *tracefile;
+extern long long tracefile_size;
+extern struct rw_semaphore tracefile_sem;
+extern int trace_start_thread(void);
+extern void trace_stop_thread(void);
+
+long max_debug_mb = M_TCD_MAX_PAGES;
+static long max_permit_mb = (64 * 1024);
+
+inline struct trace_cpu_data *
+__trace_get_tcd (unsigned long *flags)
+{
+ return &trace_data[0].tcd;
+}
+
+inline void
+__trace_put_tcd (struct trace_cpu_data *tcd, unsigned long flags)
+{
+ return;
+}
+
+void
+set_ptldebug_header(struct ptldebug_header *header, int subsys, int mask,
+ const int line, unsigned long stack)
+{
+ struct timeval tv;
+
+ do_gettimeofday(&tv);
+ header->ph_subsys = subsys;
+ header->ph_mask = mask;
+ header->ph_cpu_id = smp_processor_id();
+ header->ph_sec = (__u32)tv.tv_sec;
+ header->ph_usec = tv.tv_usec;
+ header->ph_stack = stack;
+ header->ph_pid = 0;
+ header->ph_line_num = line;
+ header->ph_extern_pid = 0;
+}
+
+void print_to_console(struct ptldebug_header *hdr, int mask, char *buf,
+ int len, char *file, const char *fn)
+{
+ char *prefix = NULL, *ptype = NULL;
+
+ if ((mask & D_EMERG) != 0) {
+ prefix = "LustreError";
+ ptype = KERN_EMERG;
+ } else if ((mask & D_ERROR) != 0) {
+ prefix = "LustreError";
+ ptype = KERN_ERR;
+ } else if ((mask & D_WARNING) != 0) {
+ prefix = "Lustre";
+ ptype = KERN_WARNING;
+ } else if (portal_printk != 0) {
+ prefix = "Lustre";
+ ptype = KERN_INFO;
+ }
+ printk("%s%s: %d:%d:(%s:%d:%s()) %*s", ptype, prefix, hdr->ph_pid,
+ hdr->ph_extern_pid, file, hdr->ph_line_num, fn, len, buf);
+}
+
+/*
+ * Sysctl handle of libcfs
+ */
+int cfs_trace_daemon SYSCTL_HANDLER_ARGS
+{
+ int error = 0;
+ char *name = NULL;
+
+ MALLOC(name, char *, req->newlen + 1, M_TEMP, M_WAITOK | M_ZERO);
+ if (name == NULL)
+ return -ENOMEM;
+ down_write(&tracefile_sem);
+ error = sysctl_handle_string(oidp, name, req->newlen + 1, req);
+ if (!error || req->newptr != NULL) {
+ /* write */
+ if (strcmp(name, "stop") == 0) {
+ /* stop tracefile daemon */
+ tracefile = NULL;
+ trace_stop_thread();
+ goto out;
+ }else if (strncmp(name, "size=", 5) == 0) {
+ tracefile_size = simple_strtoul(name + 5, NULL, 0);
+ if (tracefile_size < 10 || tracefile_size > 20480)
+ tracefile_size = TRACEFILE_SIZE;
+ else
+ tracefile_size <<= 20;
+ goto out;
+
+ }
+ if (name[0] != '/') {
+ error = -EINVAL;
+ goto out;
+ }
+ if (tracefile != NULL)
+ cfs_free(tracefile);
+ tracefile = name;
+ name = NULL;
+ trace_start_thread();
+ } else if (req->newptr != NULL) {
+ /* Something was wrong with the write request */
+ printf("sysctl debug daemon failed: %d.\n", error);
+ goto out;
+ } else {
+ /* Read request */
+ SYSCTL_OUT(req, tracefile, sizeof(tracefile));
+ }
+out:
+ if (name != NULL)
+ FREE(name, M_TEMP);
+ up_write(&tracefile_sem);
+ return error;
+}
+
+
+int cfs_debug_mb SYSCTL_HANDLER_ARGS
+{
+ int i;
+ int error = 0;
+
+ error = sysctl_handle_long(oidp, oidp->oid_arg1, oidp->oid_arg2, req);
+ if (!error && req->newptr != NULL) {
+ /* We have a new value stored in the standard location */
+ if (max_debug_mb <= 0)
+ return -EINVAL;
+ if (max_debug_mb > max_permit_mb) {
+ printf("sysctl debug_mb is too big: %d.\n", max_debug_mb);
+ return 0;
+ }
+ for (i = 0; i < NR_CPUS; i++) {
+ struct trace_cpu_data *tcd;
+ tcd = &trace_data[i].tcd;
+ tcd->tcd_max_pages = max_debug_mb;
+ }
+ } else if (req->newptr != NULL) {
+ /* Something was wrong with the write request */
+ printf ("sysctl debug_mb fault: %d.\n", error);
+ } else {
+ /* Read request */
+ error = SYSCTL_OUT(req, &max_debug_mb, sizeof max_debug_mb);
+ }
+ return error;
+}
+
+
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2002 Cluster File Systems, Inc.
+ * Author: Phil Schwan <phil@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Darwin porting library
+ * Make things easy to port
+ */
+#define DEBUG_SUBSYSTEM S_PORTALS
+
+#include <mach/mach_types.h>
+#include <string.h>
+#include <sys/errno.h>
+#include <sys/types.h>
+#include <sys/fcntl.h>
+#include <portals/types.h>
+
+#ifndef isspace
+inline int
+isspace(char c)
+{
+ return (c == ' ' || c == '\t' || c == '\n' || c == '\12');
+}
+#endif
+
+char * strpbrk(const char * cs,const char * ct)
+{
+ const char *sc1,*sc2;
+
+ for( sc1 = cs; *sc1 != '\0'; ++sc1) {
+ for( sc2 = ct; *sc2 != '\0'; ++sc2) {
+ if (*sc1 == *sc2)
+ return (char *) sc1;
+ }
+ }
+ return NULL;
+}
+
+char * strsep(char **s, const char *ct)
+{
+ char *sbegin = *s, *end;
+
+ if (sbegin == NULL)
+ return NULL;
+ end = strpbrk(sbegin, ct);
+ if (end != NULL)
+ *end++ = '\0';
+ *s = end;
+
+ return sbegin;
+}
+
+size_t strnlen(const char * s, size_t count)
+{
+ const char *sc;
+
+ for (sc = s; count-- && *sc != '\0'; ++sc)
+ /* nothing */;
+ return sc - s;
+}
+
+char *
+strstr(const char *in, const char *str)
+{
+ char c;
+ size_t len;
+
+ c = *str++;
+ if (!c)
+ return (char *) in; // Trivial empty string case
+ len = strlen(str);
+ do {
+ char sc;
+ do {
+ sc = *in++;
+ if (!sc)
+ return (char *) 0;
+ } while (sc != c);
+ } while (strncmp(in, str, len) != 0);
+ return (char *) (in - 1);
+}
+
+char *
+strrchr(const char *p, int ch)
+{
+ const char *end = p + strlen(p);
+ do {
+ if (*end == (char)ch)
+ return (char *)end;
+ } while (--end >= p);
+ return NULL;
+}
+
+char *
+ul2dstr(unsigned long address, char *buf, int len)
+{
+ char *pos = buf + len - 1;
+
+ if (len <= 0 || !buf)
+ return NULL;
+ *pos = 0;
+ while (address) {
+ if (!--len) break;
+ *--pos = address % 10 + '0';
+ address /= 10;
+ }
+ return pos;
+}
+
+/*
+ * miscellaneous libcfs stuff
+ */
+
+/*
+ * Convert server error code to client format.
+ * Linux errno.h.
+ */
+
+/* obtained by
+ *
+ * cc /usr/include/asm/errno.h -E -dM | grep '#define E' | sort -n -k3,3
+ *
+ */
+enum linux_errnos {
+ LINUX_EPERM = 1,
+ LINUX_ENOENT = 2,
+ LINUX_ESRCH = 3,
+ LINUX_EINTR = 4,
+ LINUX_EIO = 5,
+ LINUX_ENXIO = 6,
+ LINUX_E2BIG = 7,
+ LINUX_ENOEXEC = 8,
+ LINUX_EBADF = 9,
+ LINUX_ECHILD = 10,
+ LINUX_EAGAIN = 11,
+ LINUX_ENOMEM = 12,
+ LINUX_EACCES = 13,
+ LINUX_EFAULT = 14,
+ LINUX_ENOTBLK = 15,
+ LINUX_EBUSY = 16,
+ LINUX_EEXIST = 17,
+ LINUX_EXDEV = 18,
+ LINUX_ENODEV = 19,
+ LINUX_ENOTDIR = 20,
+ LINUX_EISDIR = 21,
+ LINUX_EINVAL = 22,
+ LINUX_ENFILE = 23,
+ LINUX_EMFILE = 24,
+ LINUX_ENOTTY = 25,
+ LINUX_ETXTBSY = 26,
+ LINUX_EFBIG = 27,
+ LINUX_ENOSPC = 28,
+ LINUX_ESPIPE = 29,
+ LINUX_EROFS = 30,
+ LINUX_EMLINK = 31,
+ LINUX_EPIPE = 32,
+ LINUX_EDOM = 33,
+ LINUX_ERANGE = 34,
+ LINUX_EDEADLK = 35,
+ LINUX_ENAMETOOLONG = 36,
+ LINUX_ENOLCK = 37,
+ LINUX_ENOSYS = 38,
+ LINUX_ENOTEMPTY = 39,
+ LINUX_ELOOP = 40,
+ LINUX_ENOMSG = 42,
+ LINUX_EIDRM = 43,
+ LINUX_ECHRNG = 44,
+ LINUX_EL2NSYNC = 45,
+ LINUX_EL3HLT = 46,
+ LINUX_EL3RST = 47,
+ LINUX_ELNRNG = 48,
+ LINUX_EUNATCH = 49,
+ LINUX_ENOCSI = 50,
+ LINUX_EL2HLT = 51,
+ LINUX_EBADE = 52,
+ LINUX_EBADR = 53,
+ LINUX_EXFULL = 54,
+ LINUX_ENOANO = 55,
+ LINUX_EBADRQC = 56,
+ LINUX_EBADSLT = 57,
+ LINUX_EBFONT = 59,
+ LINUX_ENOSTR = 60,
+ LINUX_ENODATA = 61,
+ LINUX_ETIME = 62,
+ LINUX_ENOSR = 63,
+ LINUX_ENONET = 64,
+ LINUX_ENOPKG = 65,
+ LINUX_EREMOTE = 66,
+ LINUX_ENOLINK = 67,
+ LINUX_EADV = 68,
+ LINUX_ESRMNT = 69,
+ LINUX_ECOMM = 70,
+ LINUX_EPROTO = 71,
+ LINUX_EMULTIHOP = 72,
+ LINUX_EDOTDOT = 73,
+ LINUX_EBADMSG = 74,
+ LINUX_EOVERFLOW = 75,
+ LINUX_ENOTUNIQ = 76,
+ LINUX_EBADFD = 77,
+ LINUX_EREMCHG = 78,
+ LINUX_ELIBACC = 79,
+ LINUX_ELIBBAD = 80,
+ LINUX_ELIBSCN = 81,
+ LINUX_ELIBMAX = 82,
+ LINUX_ELIBEXEC = 83,
+ LINUX_EILSEQ = 84,
+ LINUX_ERESTART = 85,
+ LINUX_ESTRPIPE = 86,
+ LINUX_EUSERS = 87,
+ LINUX_ENOTSOCK = 88,
+ LINUX_EDESTADDRREQ = 89,
+ LINUX_EMSGSIZE = 90,
+ LINUX_EPROTOTYPE = 91,
+ LINUX_ENOPROTOOPT = 92,
+ LINUX_EPROTONOSUPPORT = 93,
+ LINUX_ESOCKTNOSUPPORT = 94,
+ LINUX_EOPNOTSUPP = 95,
+ LINUX_EPFNOSUPPORT = 96,
+ LINUX_EAFNOSUPPORT = 97,
+ LINUX_EADDRINUSE = 98,
+ LINUX_EADDRNOTAVAIL = 99,
+ LINUX_ENETDOWN = 100,
+ LINUX_ENETUNREACH = 101,
+ LINUX_ENETRESET = 102,
+ LINUX_ECONNABORTED = 103,
+ LINUX_ECONNRESET = 104,
+ LINUX_ENOBUFS = 105,
+ LINUX_EISCONN = 106,
+ LINUX_ENOTCONN = 107,
+ LINUX_ESHUTDOWN = 108,
+ LINUX_ETOOMANYREFS = 109,
+ LINUX_ETIMEDOUT = 110,
+ LINUX_ECONNREFUSED = 111,
+ LINUX_EHOSTDOWN = 112,
+ LINUX_EHOSTUNREACH = 113,
+ LINUX_EALREADY = 114,
+ LINUX_EINPROGRESS = 115,
+ LINUX_ESTALE = 116,
+ LINUX_EUCLEAN = 117,
+ LINUX_ENOTNAM = 118,
+ LINUX_ENAVAIL = 119,
+ LINUX_EISNAM = 120,
+ LINUX_EREMOTEIO = 121,
+ LINUX_EDQUOT = 122,
+ LINUX_ENOMEDIUM = 123,
+ LINUX_EMEDIUMTYPE = 124,
+
+ /*
+ * we don't need these, but for completeness..
+ */
+ LINUX_EDEADLOCK = LINUX_EDEADLK,
+ LINUX_EWOULDBLOCK = LINUX_EAGAIN
+};
+
+int convert_server_error(__u64 ecode)
+{
+ int sign;
+ int code;
+
+ static int errno_xlate[] = {
+ /* success is always success */
+ [0] = 0,
+ [LINUX_EPERM] = EPERM,
+ [LINUX_ENOENT] = ENOENT,
+ [LINUX_ESRCH] = ESRCH,
+ [LINUX_EINTR] = EINTR,
+ [LINUX_EIO] = EIO,
+ [LINUX_ENXIO] = ENXIO,
+ [LINUX_E2BIG] = E2BIG,
+ [LINUX_ENOEXEC] = ENOEXEC,
+ [LINUX_EBADF] = EBADF,
+ [LINUX_ECHILD] = ECHILD,
+ [LINUX_EAGAIN] = EAGAIN,
+ [LINUX_ENOMEM] = ENOMEM,
+ [LINUX_EACCES] = EACCES,
+ [LINUX_EFAULT] = EFAULT,
+ [LINUX_ENOTBLK] = ENOTBLK,
+ [LINUX_EBUSY] = EBUSY,
+ [LINUX_EEXIST] = EEXIST,
+ [LINUX_EXDEV] = EXDEV,
+ [LINUX_ENODEV] = ENODEV,
+ [LINUX_ENOTDIR] = ENOTDIR,
+ [LINUX_EISDIR] = EISDIR,
+ [LINUX_EINVAL] = EINVAL,
+ [LINUX_ENFILE] = ENFILE,
+ [LINUX_EMFILE] = EMFILE,
+ [LINUX_ENOTTY] = ENOTTY,
+ [LINUX_ETXTBSY] = ETXTBSY,
+ [LINUX_EFBIG] = EFBIG,
+ [LINUX_ENOSPC] = ENOSPC,
+ [LINUX_ESPIPE] = ESPIPE,
+ [LINUX_EROFS] = EROFS,
+ [LINUX_EMLINK] = EMLINK,
+ [LINUX_EPIPE] = EPIPE,
+ [LINUX_EDOM] = EDOM,
+ [LINUX_ERANGE] = ERANGE,
+ [LINUX_EDEADLK] = EDEADLK,
+ [LINUX_ENAMETOOLONG] = ENAMETOOLONG,
+ [LINUX_ENOLCK] = ENOLCK,
+ [LINUX_ENOSYS] = ENOSYS,
+ [LINUX_ENOTEMPTY] = ENOTEMPTY,
+ [LINUX_ELOOP] = ELOOP,
+ [LINUX_ENOMSG] = ENOMSG,
+ [LINUX_EIDRM] = EIDRM,
+ [LINUX_ECHRNG] = EINVAL /* ECHRNG */,
+ [LINUX_EL2NSYNC] = EINVAL /* EL2NSYNC */,
+ [LINUX_EL3HLT] = EINVAL /* EL3HLT */,
+ [LINUX_EL3RST] = EINVAL /* EL3RST */,
+ [LINUX_ELNRNG] = EINVAL /* ELNRNG */,
+ [LINUX_EUNATCH] = EINVAL /* EUNATCH */,
+ [LINUX_ENOCSI] = EINVAL /* ENOCSI */,
+ [LINUX_EL2HLT] = EINVAL /* EL2HLT */,
+ [LINUX_EBADE] = EINVAL /* EBADE */,
+ [LINUX_EBADR] = EBADRPC,
+ [LINUX_EXFULL] = EINVAL /* EXFULL */,
+ [LINUX_ENOANO] = EINVAL /* ENOANO */,
+ [LINUX_EBADRQC] = EINVAL /* EBADRQC */,
+ [LINUX_EBADSLT] = EINVAL /* EBADSLT */,
+ [LINUX_EBFONT] = EINVAL /* EBFONT */,
+ [LINUX_ENOSTR] = EINVAL /* ENOSTR */,
+ [LINUX_ENODATA] = EINVAL /* ENODATA */,
+ [LINUX_ETIME] = EINVAL /* ETIME */,
+ [LINUX_ENOSR] = EINVAL /* ENOSR */,
+ [LINUX_ENONET] = EINVAL /* ENONET */,
+ [LINUX_ENOPKG] = EINVAL /* ENOPKG */,
+ [LINUX_EREMOTE] = EREMOTE,
+ [LINUX_ENOLINK] = EINVAL /* ENOLINK */,
+ [LINUX_EADV] = EINVAL /* EADV */,
+ [LINUX_ESRMNT] = EINVAL /* ESRMNT */,
+ [LINUX_ECOMM] = EINVAL /* ECOMM */,
+ [LINUX_EPROTO] = EPROTOTYPE,
+ [LINUX_EMULTIHOP] = EINVAL /* EMULTIHOP */,
+ [LINUX_EDOTDOT] = EINVAL /* EDOTDOT */,
+ [LINUX_EBADMSG] = EINVAL /* EBADMSG */,
+ [LINUX_EOVERFLOW] = EOVERFLOW,
+ [LINUX_ENOTUNIQ] = EINVAL /* ENOTUNIQ */,
+ [LINUX_EBADFD] = EINVAL /* EBADFD */,
+ [LINUX_EREMCHG] = EINVAL /* EREMCHG */,
+ [LINUX_ELIBACC] = EINVAL /* ELIBACC */,
+ [LINUX_ELIBBAD] = EINVAL /* ELIBBAD */,
+ [LINUX_ELIBSCN] = EINVAL /* ELIBSCN */,
+ [LINUX_ELIBMAX] = EINVAL /* ELIBMAX */,
+ [LINUX_ELIBEXEC] = EINVAL /* ELIBEXEC */,
+ [LINUX_EILSEQ] = EILSEQ,
+ [LINUX_ERESTART] = ERESTART,
+ [LINUX_ESTRPIPE] = EINVAL /* ESTRPIPE */,
+ [LINUX_EUSERS] = EUSERS,
+ [LINUX_ENOTSOCK] = ENOTSOCK,
+ [LINUX_EDESTADDRREQ] = EDESTADDRREQ,
+ [LINUX_EMSGSIZE] = EMSGSIZE,
+ [LINUX_EPROTOTYPE] = EPROTOTYPE,
+ [LINUX_ENOPROTOOPT] = ENOPROTOOPT,
+ [LINUX_EPROTONOSUPPORT] = EPROTONOSUPPORT,
+ [LINUX_ESOCKTNOSUPPORT] = ESOCKTNOSUPPORT,
+ [LINUX_EOPNOTSUPP] = EOPNOTSUPP,
+ [LINUX_EPFNOSUPPORT] = EPFNOSUPPORT,
+ [LINUX_EAFNOSUPPORT] = EAFNOSUPPORT,
+ [LINUX_EADDRINUSE] = EADDRINUSE,
+ [LINUX_EADDRNOTAVAIL] = EADDRNOTAVAIL,
+ [LINUX_ENETDOWN] = ENETDOWN,
+ [LINUX_ENETUNREACH] = ENETUNREACH,
+ [LINUX_ENETRESET] = ENETRESET,
+ [LINUX_ECONNABORTED] = ECONNABORTED,
+ [LINUX_ECONNRESET] = ECONNRESET,
+ [LINUX_ENOBUFS] = ENOBUFS,
+ [LINUX_EISCONN] = EISCONN,
+ [LINUX_ENOTCONN] = ENOTCONN,
+ [LINUX_ESHUTDOWN] = ESHUTDOWN,
+ [LINUX_ETOOMANYREFS] = ETOOMANYREFS,
+ [LINUX_ETIMEDOUT] = ETIMEDOUT,
+ [LINUX_ECONNREFUSED] = ECONNREFUSED,
+ [LINUX_EHOSTDOWN] = EHOSTDOWN,
+ [LINUX_EHOSTUNREACH] = EHOSTUNREACH,
+ [LINUX_EALREADY] = EALREADY,
+ [LINUX_EINPROGRESS] = EINPROGRESS,
+ [LINUX_ESTALE] = ESTALE,
+ [LINUX_EUCLEAN] = EINVAL /* EUCLEAN */,
+ [LINUX_ENOTNAM] = EINVAL /* ENOTNAM */,
+ [LINUX_ENAVAIL] = EINVAL /* ENAVAIL */,
+ [LINUX_EISNAM] = EINVAL /* EISNAM */,
+ [LINUX_EREMOTEIO] = EINVAL /* EREMOTEIO */,
+ [LINUX_EDQUOT] = EDQUOT,
+ [LINUX_ENOMEDIUM] = EINVAL /* ENOMEDIUM */,
+ [LINUX_EMEDIUMTYPE] = EINVAL /* EMEDIUMTYPE */,
+ };
+ code = (int)ecode;
+ if (code >= 0) {
+ sign = +1;
+ } else {
+ sign = -1;
+ code = -code;
+ }
+ if (code < (sizeof errno_xlate) / (sizeof errno_xlate[0]))
+ code = errno_xlate[code];
+ else
+ /*
+ * Unknown error. Reserved for the future.
+ */
+ code = EINVAL;
+ return sign * code;
+}
+
+enum {
+ LINUX_O_RDONLY = 00,
+ LINUX_O_WRONLY = 01,
+ LINUX_O_RDWR = 02,
+ LINUX_O_CREAT = 0100,
+ LINUX_O_EXCL = 0200,
+ LINUX_O_NOCTTY = 0400,
+ LINUX_O_TRUNC = 01000,
+ LINUX_O_APPEND = 02000,
+ LINUX_O_NONBLOCK = 04000,
+ LINUX_O_NDELAY = LINUX_O_NONBLOCK,
+ LINUX_O_SYNC = 010000,
+ LINUX_O_FSYNC = LINUX_O_SYNC,
+ LINUX_O_ASYNC = 020000,
+ LINUX_O_DIRECT = 040000,
+ LINUX_O_NOFOLLOW = 0400000
+};
+
+static inline void obit_convert(int *cflag, int *sflag,
+ unsigned cmask, unsigned smask)
+{
+ if (*cflag & cmask != 0) {
+ *sflag |= smask;
+ *cflag &= ~cmask;
+ }
+}
+
+/*
+ * convert <fcntl.h> flag from XNU client to Linux _i386_ server.
+ */
+int convert_client_oflag(int cflag, int *result)
+{
+ int sflag;
+
+ cflag = 0;
+ obit_convert(&cflag, &sflag, O_RDONLY, LINUX_O_RDONLY);
+ obit_convert(&cflag, &sflag, O_WRONLY, LINUX_O_WRONLY);
+ obit_convert(&cflag, &sflag, O_RDWR, LINUX_O_RDWR);
+ obit_convert(&cflag, &sflag, O_NONBLOCK, LINUX_O_NONBLOCK);
+ obit_convert(&cflag, &sflag, O_APPEND, LINUX_O_APPEND);
+ obit_convert(&cflag, &sflag, O_ASYNC, LINUX_O_ASYNC);
+ obit_convert(&cflag, &sflag, O_FSYNC, LINUX_O_FSYNC);
+ obit_convert(&cflag, &sflag, O_NOFOLLOW, LINUX_O_NOFOLLOW);
+ obit_convert(&cflag, &sflag, O_CREAT, LINUX_O_CREAT);
+ obit_convert(&cflag, &sflag, O_TRUNC, LINUX_O_TRUNC);
+ obit_convert(&cflag, &sflag, O_EXCL, LINUX_O_EXCL);
+ obit_convert(&cflag, &sflag, O_CREAT, LINUX_O_CREAT);
+ obit_convert(&cflag, &sflag, O_NDELAY, LINUX_O_NDELAY);
+ obit_convert(&cflag, &sflag, O_NOCTTY, LINUX_O_NOCTTY);
+ /*
+ * Some more obscure BSD flags have no Linux counterparts:
+ *
+ * O_SHLOCK 0x0010
+ * O_EXLOCK 0x0020
+ * O_EVTONLY 0x8000
+ * O_POPUP 0x80000000
+ * O_ALERT 0x20000000
+ */
+ if (cflag == 0) {
+ *result = sflag;
+ return 0;
+ } else
+ return -EINVAL;
+}
# define EXPORT_SYMTAB
#endif
-#include <linux/config.h>
-#include <linux/module.h>
-#include <linux/kmod.h>
-#include <linux/notifier.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/errno.h>
-#include <linux/smp_lock.h>
-#include <linux/unistd.h>
-#include <linux/interrupt.h>
-#include <asm/system.h>
-#include <asm/uaccess.h>
-#include <linux/completion.h>
-
-#include <linux/fs.h>
-#include <linux/stat.h>
-#include <asm/uaccess.h>
-#include <asm/segment.h>
-#include <linux/miscdevice.h>
-#include <linux/version.h>
-
# define DEBUG_SUBSYSTEM S_PORTALS
-#include <linux/kp30.h>
-#include <linux/portals_compat25.h>
-#include <linux/libcfs.h>
+#include <libcfs/kp30.h>
+#include <libcfs/libcfs.h>
#include "tracefile.h"
-#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
-#include <linux/kallsyms.h>
-#endif
-
unsigned int portal_subsystem_debug = ~0 - (S_PORTALS | S_NAL);
EXPORT_SYMBOL(portal_subsystem_debug);
EXPORT_SYMBOL(portal_kmemory);
#endif
-static DECLARE_WAIT_QUEUE_HEAD(debug_ctlwq);
+static cfs_waitq_t debug_ctlwq;
char debug_file_path[1024] = "/tmp/lustre-log";
static char debug_file_name[1024];
-char portals_upcall[1024] = "/usr/lib/lustre/portals_upcall";
void portals_debug_dumplog_internal(void *arg)
{
- void *journal_info = current->journal_info;
- current->journal_info = NULL;
+ CFS_DECL_JOURNAL_DATA;
+
+ CFS_PUSH_JOURNAL;
snprintf(debug_file_name, sizeof(debug_file_path) - 1,
- "%s.%ld.%ld", debug_file_path, CURRENT_SECONDS, (long)arg);
+ "%s.%ld.%ld", debug_file_path, cfs_time_current_sec(), (long)arg);
printk(KERN_ALERT "LustreError: dumping log to %s\n", debug_file_name);
tracefile_dump_all_pages(debug_file_name);
- current->journal_info = journal_info;
+ CFS_POP_JOURNAL;
}
int portals_debug_dumplog_thread(void *arg)
kportal_daemonize("");
reparent_to_init();
portals_debug_dumplog_internal(arg);
- wake_up(&debug_ctlwq);
+ cfs_waitq_signal(&debug_ctlwq);
return 0;
}
void portals_debug_dumplog(void)
{
- int rc;
- DECLARE_WAITQUEUE(wait, current);
+ int rc;
+ cfs_waitlink_t wait;
ENTRY;
/* we're being careful to ensure that the kernel thread is
* able to set our state to running as it exits before we
* get to schedule() */
+ cfs_waitlink_init(&wait);
set_current_state(TASK_INTERRUPTIBLE);
- add_wait_queue(&debug_ctlwq, &wait);
+ cfs_waitq_add(&debug_ctlwq, &wait);
- rc = kernel_thread(portals_debug_dumplog_thread,
- (void *)(long)current->pid,
- CLONE_VM | CLONE_FS | CLONE_FILES);
+ rc = cfs_kernel_thread(portals_debug_dumplog_thread,
+ (void *)(long)cfs_curproc_pid(),
+ CLONE_VM | CLONE_FS | CLONE_FILES);
if (rc < 0)
printk(KERN_ERR "LustreError: cannot start log dump thread: "
"%d\n", rc);
schedule();
/* be sure to teardown if kernel_thread() failed */
- remove_wait_queue(&debug_ctlwq, &wait);
+ cfs_waitq_del(&debug_ctlwq, &wait);
set_current_state(TASK_RUNNING);
}
int portals_debug_init(unsigned long bufsize)
{
+ cfs_waitq_init(&debug_ctlwq);
#ifdef CRAY_PORTALS
lus_portals_debug = &portals_debug_msg;
#endif
portal_debug = debug_level;
}
-void portals_run_upcall(char **argv)
-{
- int rc;
- int argc;
- char *envp[] = {
- "HOME=/",
- "PATH=/sbin:/bin:/usr/sbin:/usr/bin",
- NULL};
- ENTRY;
-
- argv[0] = portals_upcall;
- argc = 1;
- while (argv[argc] != NULL)
- argc++;
-
- LASSERT(argc >= 2);
-
- rc = USERMODEHELPER(argv[0], argv, envp);
- if (rc < 0) {
- CERROR("Error %d invoking portals upcall %s %s%s%s%s%s%s%s%s; "
- "check /proc/sys/portals/upcall\n",
- rc, argv[0], argv[1],
- argc < 3 ? "" : ",", argc < 3 ? "" : argv[2],
- argc < 4 ? "" : ",", argc < 4 ? "" : argv[3],
- argc < 5 ? "" : ",", argc < 5 ? "" : argv[4],
- argc < 6 ? "" : ",...");
- } else {
- CERROR("Invoked portals upcall %s %s%s%s%s%s%s%s%s\n",
- argv[0], argv[1],
- argc < 3 ? "" : ",", argc < 3 ? "" : argv[2],
- argc < 4 ? "" : ",", argc < 4 ? "" : argv[3],
- argc < 5 ? "" : ",", argc < 5 ? "" : argv[4],
- argc < 6 ? "" : ",...");
- }
-}
-
-void portals_run_lbug_upcall(char *file, const char *fn, const int line)
-{
- char *argv[6];
- char buf[32];
-
- ENTRY;
- snprintf (buf, sizeof buf, "%d", line);
-
- argv[1] = "LBUG";
- argv[2] = file;
- argv[3] = (char *)fn;
- argv[4] = buf;
- argv[5] = NULL;
-
- portals_run_upcall (argv);
-}
-
char *portals_nid2str(int nal, ptl_nid_t nid, char *str)
{
if (nid == PTL_NID_ANY) {
switch(nal){
/* XXX this could be a nal method of some sort, 'cept it's config
* dependent whether (say) socknal NIDs are actually IP addresses... */
-#if !CRAY_PORTALS
+#if !CRAY_PORTALS
case TCPNAL:
/* userspace NAL */
case IIBNAL:
char *portals_id2str(int nal, ptl_process_id_t id, char *str)
{
int len;
-
+
portals_nid2str(nal, id.nid, str);
len = strlen(str);
snprintf(str + len, PTL_NALFMT_SIZE - len, "-%u", id.pid);
return str;
}
-#ifdef __KERNEL__
-
-void portals_debug_dumpstack(struct task_struct *tsk)
-{
-#if defined(__arch_um__)
- if (tsk != NULL)
- CWARN("stack dump for pid %d (%d) requested; wake up gdb.\n",
- tsk->pid, UML_PID(tsk));
- asm("int $3");
-#elif defined(HAVE_SHOW_TASK)
- /* this is exported by lustre kernel version 42 */
- extern void show_task(struct task_struct *);
-
- if (tsk == NULL)
- tsk = current;
- CWARN("showing stack for process %d\n", tsk->pid);
- show_task(tsk);
-#else
- CWARN("can't show stack: kernel doesn't export show_task\n");
-#endif
-}
-
-struct task_struct *portals_current(void)
-{
- CWARN("current task struct is %p\n", current);
- return current;
-}
-
-EXPORT_SYMBOL(portals_debug_dumpstack);
-EXPORT_SYMBOL(portals_current);
-#endif /* __KERNEL__ */
-
EXPORT_SYMBOL(portals_debug_dumplog);
EXPORT_SYMBOL(portals_debug_set_level);
-EXPORT_SYMBOL(portals_run_upcall);
-EXPORT_SYMBOL(portals_run_lbug_upcall);
EXPORT_SYMBOL(portals_nid2str);
EXPORT_SYMBOL(portals_id2str);
--- /dev/null
+// !$*UTF8*$!
+{
+ archiveVersion = 1;
+ classes = {
+ };
+ objectVersion = 39;
+ objects = {
+ 06AA1262FFB20DD611CA28AA = {
+ buildRules = (
+ );
+ buildSettings = {
+ COPY_PHASE_STRIP = NO;
+ GCC_DYNAMIC_NO_PIC = NO;
+ GCC_ENABLE_FIX_AND_CONTINUE = YES;
+ GCC_GENERATE_DEBUGGING_SYMBOLS = YES;
+ GCC_OPTIMIZATION_LEVEL = 0;
+ OPTIMIZATION_CFLAGS = "-O0";
+ ZERO_LINK = YES;
+ };
+ isa = PBXBuildStyle;
+ name = Development;
+ };
+ 06AA1263FFB20DD611CA28AA = {
+ buildRules = (
+ );
+ buildSettings = {
+ COPY_PHASE_STRIP = YES;
+ GCC_ENABLE_FIX_AND_CONTINUE = NO;
+ ZERO_LINK = NO;
+ };
+ isa = PBXBuildStyle;
+ name = Deployment;
+ };
+//060
+//061
+//062
+//063
+//064
+//080
+//081
+//082
+//083
+//084
+ 089C1669FE841209C02AAC07 = {
+ buildSettings = {
+ };
+ buildStyles = (
+ 06AA1262FFB20DD611CA28AA,
+ 06AA1263FFB20DD611CA28AA,
+ );
+ hasScannedForEncodings = 1;
+ isa = PBXProject;
+ mainGroup = 089C166AFE841209C02AAC07;
+ projectDirPath = "";
+ targets = (
+ 32A4FEB80562C75700D090E7,
+ );
+ };
+ 089C166AFE841209C02AAC07 = {
+ children = (
+ 247142CAFF3F8F9811CA285C,
+ 089C167CFE841241C02AAC07,
+ 19C28FB6FE9D52B211CA2CBB,
+ );
+ isa = PBXGroup;
+ name = libcfs;
+ refType = 4;
+ sourceTree = "<group>";
+ };
+ 089C167CFE841241C02AAC07 = {
+ children = (
+ 32A4FEC30562C75700D090E7,
+ );
+ isa = PBXGroup;
+ name = Resources;
+ refType = 4;
+ sourceTree = "<group>";
+ };
+//080
+//081
+//082
+//083
+//084
+//190
+//191
+//192
+//193
+//194
+ 19444794072D07AD00DAF9BC = {
+ fileEncoding = 30;
+ isa = PBXFileReference;
+ lastKnownFileType = sourcecode.c.c;
+ path = tracefile.c;
+ refType = 2;
+ sourceTree = SOURCE_ROOT;
+ };
+ 19444795072D07AD00DAF9BC = {
+ fileRef = 19444794072D07AD00DAF9BC;
+ isa = PBXBuildFile;
+ settings = {
+ };
+ };
+ 19444796072D08AA00DAF9BC = {
+ fileEncoding = 30;
+ isa = PBXFileReference;
+ lastKnownFileType = sourcecode.c.c;
+ path = debug.c;
+ refType = 2;
+ sourceTree = SOURCE_ROOT;
+ };
+ 19444797072D08AA00DAF9BC = {
+ fileRef = 19444796072D08AA00DAF9BC;
+ isa = PBXBuildFile;
+ settings = {
+ };
+ };
+ 19509C03072CD5FF00A958C3 = {
+ fileEncoding = 30;
+ isa = PBXFileReference;
+ lastKnownFileType = sourcecode.c.c;
+ path = module.c;
+ refType = 2;
+ sourceTree = SOURCE_ROOT;
+ };
+ 19509C04072CD5FF00A958C3 = {
+ fileRef = 19509C03072CD5FF00A958C3;
+ isa = PBXBuildFile;
+ settings = {
+ };
+ };
+ 19713B76072E8274004E8469 = {
+ fileEncoding = 30;
+ isa = PBXFileReference;
+ lastKnownFileType = sourcecode.c.c;
+ name = cfs_prim.c;
+ path = arch/xnu/cfs_prim.c;
+ refType = 2;
+ sourceTree = SOURCE_ROOT;
+ };
+ 19713B77072E8274004E8469 = {
+ fileRef = 19713B76072E8274004E8469;
+ isa = PBXBuildFile;
+ settings = {
+ };
+ };
+ 19713BB7072E8281004E8469 = {
+ fileEncoding = 30;
+ isa = PBXFileReference;
+ lastKnownFileType = sourcecode.c.c;
+ name = cfs_mem.c;
+ path = arch/xnu/cfs_mem.c;
+ refType = 2;
+ sourceTree = SOURCE_ROOT;
+ };
+ 19713BB8072E8281004E8469 = {
+ fileRef = 19713BB7072E8281004E8469;
+ isa = PBXBuildFile;
+ settings = {
+ };
+ };
+ 19713BF7072E828E004E8469 = {
+ fileEncoding = 30;
+ isa = PBXFileReference;
+ lastKnownFileType = sourcecode.c.c;
+ name = cfs_proc.c;
+ path = arch/xnu/cfs_proc.c;
+ refType = 2;
+ sourceTree = SOURCE_ROOT;
+ };
+ 19713BF8072E828E004E8469 = {
+ fileRef = 19713BF7072E828E004E8469;
+ isa = PBXBuildFile;
+ settings = {
+ };
+ };
+ 19713C7A072E82B2004E8469 = {
+ fileEncoding = 30;
+ isa = PBXFileReference;
+ lastKnownFileType = sourcecode.c.c;
+ name = cfs_utils.c;
+ path = arch/xnu/cfs_utils.c;
+ refType = 2;
+ sourceTree = SOURCE_ROOT;
+ };
+ 19713C7B072E82B2004E8469 = {
+ fileRef = 19713C7A072E82B2004E8469;
+ isa = PBXBuildFile;
+ settings = {
+ };
+ };
+ 19713CD6072E8A56004E8469 = {
+ fileEncoding = 30;
+ isa = PBXFileReference;
+ lastKnownFileType = sourcecode.c.c;
+ name = cfs_module.c;
+ path = arch/xnu/cfs_module.c;
+ refType = 2;
+ sourceTree = SOURCE_ROOT;
+ };
+ 19713CD7072E8A56004E8469 = {
+ fileRef = 19713CD6072E8A56004E8469;
+ isa = PBXBuildFile;
+ settings = {
+ };
+ };
+ 19713D1B072E8E39004E8469 = {
+ fileEncoding = 30;
+ isa = PBXFileReference;
+ lastKnownFileType = sourcecode.c.c;
+ name = cfs_fs.c;
+ path = arch/xnu/cfs_fs.c;
+ refType = 2;
+ sourceTree = SOURCE_ROOT;
+ };
+ 19713D1C072E8E39004E8469 = {
+ fileRef = 19713D1B072E8E39004E8469;
+ isa = PBXBuildFile;
+ settings = {
+ };
+ };
+ 19713D60072E9109004E8469 = {
+ fileEncoding = 30;
+ isa = PBXFileReference;
+ lastKnownFileType = sourcecode.c.c;
+ name = xnu_sync.c;
+ path = arch/xnu/xnu_sync.c;
+ refType = 2;
+ sourceTree = SOURCE_ROOT;
+ };
+ 19713D61072E9109004E8469 = {
+ fileRef = 19713D60072E9109004E8469;
+ isa = PBXBuildFile;
+ settings = {
+ };
+ };
+ 19713DC2072F994D004E8469 = {
+ fileEncoding = 30;
+ isa = PBXFileReference;
+ lastKnownFileType = sourcecode.c.c;
+ name = cfs_tracefile.c;
+ path = arch/xnu/cfs_tracefile.c;
+ refType = 2;
+ sourceTree = SOURCE_ROOT;
+ };
+ 19713DC3072F994D004E8469 = {
+ fileRef = 19713DC2072F994D004E8469;
+ isa = PBXBuildFile;
+ settings = {
+ };
+ };
+ 19713E1C072FAFB5004E8469 = {
+ fileEncoding = 30;
+ isa = PBXFileReference;
+ lastKnownFileType = sourcecode.c.c;
+ name = cfs_debug.c;
+ path = arch/xnu/cfs_debug.c;
+ refType = 2;
+ sourceTree = SOURCE_ROOT;
+ };
+ 19713E1D072FAFB5004E8469 = {
+ fileRef = 19713E1C072FAFB5004E8469;
+ isa = PBXBuildFile;
+ settings = {
+ };
+ };
+ 19C28FB6FE9D52B211CA2CBB = {
+ children = (
+ 32A4FEC40562C75800D090E7,
+ );
+ isa = PBXGroup;
+ name = Products;
+ refType = 4;
+ sourceTree = "<group>";
+ };
+//190
+//191
+//192
+//193
+//194
+//240
+//241
+//242
+//243
+//244
+ 247142CAFF3F8F9811CA285C = {
+ children = (
+ 19713E1C072FAFB5004E8469,
+ 19713DC2072F994D004E8469,
+ 19713D60072E9109004E8469,
+ 19713D1B072E8E39004E8469,
+ 19713CD6072E8A56004E8469,
+ 19713C7A072E82B2004E8469,
+ 19713BF7072E828E004E8469,
+ 19713BB7072E8281004E8469,
+ 19713B76072E8274004E8469,
+ 19444796072D08AA00DAF9BC,
+ 19444794072D07AD00DAF9BC,
+ 19509C03072CD5FF00A958C3,
+ );
+ isa = PBXGroup;
+ name = Source;
+ path = "";
+ refType = 4;
+ sourceTree = "<group>";
+ };
+//240
+//241
+//242
+//243
+//244
+//320
+//321
+//322
+//323
+//324
+ 32A4FEB80562C75700D090E7 = {
+ buildPhases = (
+ 32A4FEB90562C75700D090E7,
+ 32A4FEBA0562C75700D090E7,
+ 32A4FEBB0562C75700D090E7,
+ 32A4FEBD0562C75700D090E7,
+ 32A4FEBF0562C75700D090E7,
+ 32A4FEC00562C75700D090E7,
+ 32A4FEC10562C75700D090E7,
+ );
+ buildRules = (
+ );
+ buildSettings = {
+ FRAMEWORK_SEARCH_PATHS = "";
+ GCC_WARN_FOUR_CHARACTER_CONSTANTS = NO;
+ GCC_WARN_UNKNOWN_PRAGMAS = NO;
+ HEADER_SEARCH_PATHS = ../include;
+ INFOPLIST_FILE = Info.plist;
+ INSTALL_PATH = "$(SYSTEM_LIBRARY_DIR)/Extensions";
+ LIBRARY_SEARCH_PATHS = "";
+ MODULE_NAME = com.clusterfs.lustre.portals.libcfs;
+ MODULE_START = libcfs_start;
+ MODULE_STOP = libcfs_stop;
+ MODULE_VERSION = 1.0.1;
+ OTHER_CFLAGS = "-D__KERNEL__";
+ OTHER_LDFLAGS = "";
+ OTHER_REZFLAGS = "";
+ PRODUCT_NAME = libcfs;
+ SECTORDER_FLAGS = "";
+ WARNING_CFLAGS = "-Wmost";
+ WRAPPER_EXTENSION = kext;
+ };
+ dependencies = (
+ );
+ isa = PBXNativeTarget;
+ name = libcfs;
+ productInstallPath = "$(SYSTEM_LIBRARY_DIR)/Extensions";
+ productName = libcfs;
+ productReference = 32A4FEC40562C75800D090E7;
+ productType = "com.apple.product-type.kernel-extension";
+ };
+ 32A4FEB90562C75700D090E7 = {
+ buildActionMask = 2147483647;
+ files = (
+ );
+ isa = PBXShellScriptBuildPhase;
+ runOnlyForDeploymentPostprocessing = 0;
+ shellPath = /bin/sh;
+ shellScript = "script=\"${SYSTEM_DEVELOPER_DIR}/ProjectBuilder Extras/Kernel Extension Support/KEXTPreprocess\";\nif [ -x \"$script\" ]; then\n . \"$script\"\nfi";
+ };
+ 32A4FEBA0562C75700D090E7 = {
+ buildActionMask = 2147483647;
+ files = (
+ );
+ isa = PBXHeadersBuildPhase;
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+ 32A4FEBB0562C75700D090E7 = {
+ buildActionMask = 2147483647;
+ files = (
+ );
+ isa = PBXResourcesBuildPhase;
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+ 32A4FEBD0562C75700D090E7 = {
+ buildActionMask = 2147483647;
+ files = (
+ 19509C04072CD5FF00A958C3,
+ 19444795072D07AD00DAF9BC,
+ 19444797072D08AA00DAF9BC,
+ 19713B77072E8274004E8469,
+ 19713BB8072E8281004E8469,
+ 19713BF8072E828E004E8469,
+ 19713C7B072E82B2004E8469,
+ 19713CD7072E8A56004E8469,
+ 19713D1C072E8E39004E8469,
+ 19713D61072E9109004E8469,
+ 19713DC3072F994D004E8469,
+ 19713E1D072FAFB5004E8469,
+ );
+ isa = PBXSourcesBuildPhase;
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+ 32A4FEBF0562C75700D090E7 = {
+ buildActionMask = 2147483647;
+ files = (
+ );
+ isa = PBXFrameworksBuildPhase;
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+ 32A4FEC00562C75700D090E7 = {
+ buildActionMask = 2147483647;
+ files = (
+ );
+ isa = PBXRezBuildPhase;
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+ 32A4FEC10562C75700D090E7 = {
+ buildActionMask = 2147483647;
+ files = (
+ );
+ isa = PBXShellScriptBuildPhase;
+ runOnlyForDeploymentPostprocessing = 0;
+ shellPath = /bin/sh;
+ shellScript = "script=\"${SYSTEM_DEVELOPER_DIR}/ProjectBuilder Extras/Kernel Extension Support/KEXTPostprocess\";\nif [ -x \"$script\" ]; then\n . \"$script\"\nfi";
+ };
+ 32A4FEC30562C75700D090E7 = {
+ isa = PBXFileReference;
+ lastKnownFileType = text.plist.xml;
+ path = Info.plist;
+ refType = 4;
+ sourceTree = "<group>";
+ };
+ 32A4FEC40562C75800D090E7 = {
+ explicitFileType = wrapper.cfbundle;
+ includeInIndex = 0;
+ isa = PBXFileReference;
+ path = libcfs.kext;
+ refType = 3;
+ sourceTree = BUILT_PRODUCTS_DIR;
+ };
+ };
+ rootObject = 089C1669FE841209C02AAC07;
+}
--- /dev/null
+Makefile
+Makefile.in
+*.o.cmd
--- /dev/null
+EXTRA_DIST := linux-debug.c linux-lwt.c linux-prim.c linux-tracefile.c \
+ linux-fs.c linux-mem.c linux-proc.c linux-utils.c linux-lock.c \
+ linux-module.c linux-sync.c linux-curproc.c
+
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Lustre curproc API implementation for Linux kernel
+ *
+ * Copyright (C) 2004 Cluster File Systems, Inc.
+ * Author: Nikita Danilov <nikita@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or modify it under the
+ * terms of version 2 of the GNU General Public License as published by the
+ * Free Software Foundation. Lustre is distributed in the hope that it will be
+ * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+ * Public License for more details. You should have received a copy of the GNU
+ * General Public License along with Lustre; if not, write to the Free
+ * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/sched.h>
+
+#define DEBUG_SUBSYSTEM S_PORTALS
+
+#include <libcfs/libcfs.h>
+#include <libcfs/kp30.h>
+
+/*
+ * Implementation of cfs_curproc API (see portals/include/libcfs/curproc.h)
+ * for Linux kernel.
+ */
+
+uid_t cfs_curproc_uid(void)
+{
+ return current->uid;
+}
+
+gid_t cfs_curproc_gid(void)
+{
+ return current->gid;
+}
+
+uid_t cfs_curproc_fsuid(void)
+{
+ return current->fsuid;
+}
+
+gid_t cfs_curproc_fsgid(void)
+{
+ return current->fsgid;
+}
+
+pid_t cfs_curproc_pid(void)
+{
+ return current->pid;
+}
+
+int cfs_curproc_groups_nr(void)
+{
+ int nr;
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,4)
+ task_lock(current);
+ nr = current->group_info->ngroups;
+ task_unlock(current);
+#else
+ nr = current->ngroups;
+#endif
+ return nr;
+}
+
+void cfs_curproc_groups_dump(gid_t *array, int size)
+{
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,4)
+ task_lock(current);
+ size = min_t(int, size, current->group_info->ngroups);
+ memcpy(array, current->group_info->blocks[0], size * sizeof(__u32));
+ task_unlock(current);
+#else
+ LASSERT(size <= NGROUPS);
+ size = min_t(int, size, current->ngroups);
+ memcpy(array, current->groups, size * sizeof(__u32));
+#endif
+}
+
+
+int cfs_curproc_is_in_groups(gid_t gid)
+{
+ return in_group_p(gid);
+}
+
+mode_t cfs_curproc_umask(void)
+{
+ return current->fs->umask;
+}
+
+char *cfs_curproc_comm(void)
+{
+ return current->comm;
+}
+
+cfs_kernel_cap_t cfs_curproc_cap_get(void)
+{
+ return current->cap_effective;
+}
+
+void cfs_curproc_cap_set(cfs_kernel_cap_t cap)
+{
+ current->cap_effective = cap;
+}
+
+EXPORT_SYMBOL(cfs_curproc_uid);
+EXPORT_SYMBOL(cfs_curproc_pid);
+EXPORT_SYMBOL(cfs_curproc_gid);
+EXPORT_SYMBOL(cfs_curproc_fsuid);
+EXPORT_SYMBOL(cfs_curproc_fsgid);
+EXPORT_SYMBOL(cfs_curproc_umask);
+EXPORT_SYMBOL(cfs_curproc_comm);
+EXPORT_SYMBOL(cfs_curproc_groups_nr);
+EXPORT_SYMBOL(cfs_curproc_groups_dump);
+EXPORT_SYMBOL(cfs_curproc_is_in_groups);
+EXPORT_SYMBOL(cfs_curproc_cap_get);
+EXPORT_SYMBOL(cfs_curproc_cap_set);
+
+/*
+ * Local variables:
+ * c-indentation-style: "K&R"
+ * c-basic-offset: 8
+ * tab-width: 8
+ * fill-column: 80
+ * scroll-step: 1
+ * End:
+ */
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2002 Cluster File Systems, Inc.
+ * Author: Phil Schwan <phil@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef EXPORT_SYMTAB
+# define EXPORT_SYMTAB
+#endif
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kmod.h>
+#include <linux/notifier.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/errno.h>
+#include <linux/smp_lock.h>
+#include <linux/unistd.h>
+#include <linux/interrupt.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <linux/completion.h>
+
+#include <linux/fs.h>
+#include <linux/stat.h>
+#include <asm/uaccess.h>
+#include <asm/segment.h>
+#include <linux/miscdevice.h>
+#include <linux/version.h>
+
+# define DEBUG_SUBSYSTEM S_PORTALS
+
+#include <libcfs/kp30.h>
+#include <libcfs/linux/portals_compat25.h>
+#include <libcfs/libcfs.h>
+
+#include "tracefile.h"
+
+#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
+#include <linux/kallsyms.h>
+#endif
+
+static int handled_panic; /* to avoid recursive calls to notifiers */
+char portals_upcall[1024] = "/usr/lib/lustre/portals_upcall";
+
+void portals_run_upcall(char **argv)
+{
+ int rc;
+ int argc;
+ char *envp[] = {
+ "HOME=/",
+ "PATH=/sbin:/bin:/usr/sbin:/usr/bin",
+ NULL};
+ ENTRY;
+
+ argv[0] = portals_upcall;
+ argc = 1;
+ while (argv[argc] != NULL)
+ argc++;
+
+ LASSERT(argc >= 2);
+
+ rc = USERMODEHELPER(argv[0], argv, envp);
+ if (rc < 0) {
+ CERROR("Error %d invoking portals upcall %s %s%s%s%s%s%s%s%s; "
+ "check /proc/sys/portals/upcall\n",
+ rc, argv[0], argv[1],
+ argc < 3 ? "" : ",", argc < 3 ? "" : argv[2],
+ argc < 4 ? "" : ",", argc < 4 ? "" : argv[3],
+ argc < 5 ? "" : ",", argc < 5 ? "" : argv[4],
+ argc < 6 ? "" : ",...");
+ } else {
+ CERROR("Invoked portals upcall %s %s%s%s%s%s%s%s%s\n",
+ argv[0], argv[1],
+ argc < 3 ? "" : ",", argc < 3 ? "" : argv[2],
+ argc < 4 ? "" : ",", argc < 4 ? "" : argv[3],
+ argc < 5 ? "" : ",", argc < 5 ? "" : argv[4],
+ argc < 6 ? "" : ",...");
+ }
+}
+
+void portals_run_lbug_upcall(char *file, const char *fn, const int line)
+{
+ char *argv[6];
+ char buf[32];
+
+ ENTRY;
+ snprintf (buf, sizeof buf, "%d", line);
+
+ argv[1] = "LBUG";
+ argv[2] = file;
+ argv[3] = (char *)fn;
+ argv[4] = buf;
+ argv[5] = NULL;
+
+ portals_run_upcall (argv);
+}
+
+#ifdef __KERNEL__
+
+void portals_debug_dumpstack(struct task_struct *tsk)
+{
+#if defined(__arch_um__)
+ if (tsk != NULL)
+ CWARN("stack dump for pid %d (%d) requested; wake up gdb.\n",
+ tsk->pid, UML_PID(tsk));
+ asm("int $3");
+#elif defined(HAVE_SHOW_TASK)
+ /* this is exported by lustre kernel version 42 */
+ extern void show_task(struct task_struct *);
+
+ if (tsk == NULL)
+ tsk = current;
+ CWARN("showing stack for process %d\n", tsk->pid);
+ show_task(tsk);
+#else
+ CWARN("can't show stack: kernel doesn't export show_task\n");
+#endif
+}
+
+cfs_task_t *portals_current(void)
+{
+ CWARN("current task struct is %p\n", current);
+ return current;
+}
+EXPORT_SYMBOL(portals_debug_dumpstack);
+EXPORT_SYMBOL(portals_current);
+
+#endif /* __KERNEL__ */
+
+EXPORT_SYMBOL(portals_run_upcall);
+EXPORT_SYMBOL(portals_run_lbug_upcall);
--- /dev/null
+# define DEBUG_SUBSYSTEM S_PORTALS
+
+#include <linux/fs.h>
+#include <linux/ctype.h>
+#include <asm/uaccess.h>
+
+#include <libcfs/libcfs.h>
+
+cfs_file_t *
+cfs_filp_open (const char *name, int flags, int mode, int *err)
+{
+ /* XXX
+ * Maybe we need to handle flags and mode in the future
+ */
+ cfs_file_t *filp = NULL;
+
+ filp = filp_open(name, flags, mode);
+ if (IS_ERR(filp)) {
+ int rc;
+
+ rc = PTR_ERR(filp);
+ printk(KERN_ERR "LustreError: can't open %s file: err %d\n",
+ name, rc);
+ if (err)
+ *err = rc;
+ filp = NULL;
+ }
+ return filp;
+}
+
+EXPORT_SYMBOL(cfs_filp_open);
--- /dev/null
+# define DEBUG_SUBSYSTEM S_PORTALS
+
+#include <arch-linux/cfs_lock.h>
+#include <libcfs/libcfs.h>
--- /dev/null
+# define DEBUG_SUBSYSTEM S_PORTALS
+
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#define DEBUG_SUBSYSTEM S_PORTALS
+
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+#include <linux/slab.h>
+#include <linux/highmem.h>
+#include <libcfs/libcfs.h>
+
+void *
+cfs_alloc(size_t nr_bytes, u_int32_t flags)
+{
+ void *ptr = NULL;
+ unsigned int mflags = 0;
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+ if (flags & CFS_ALLOC_ATOMIC)
+ mflags |= __GFP_HIGH;
+ else if (flags & CFS_ALLOC_WAIT)
+ mflags |= __GFP_WAIT;
+ else
+ mflags |= (__GFP_HIGH | __GFP_WAIT);
+
+ if (flags & CFS_ALLOC_FS)
+ mflags |= __GFP_FS;
+ if (flags & CFS_ALLOC_IO)
+ mflags |= __GFP_IO | __GFP_HIGHIO;
+#else
+ if (flags & CFS_ALLOC_ATOMIC)
+ mflags |= __GFP_HIGH;
+ else
+ mflags |= __GFP_WAIT;
+ if (flags & CFS_ALLOC_FS)
+ mflags |= __GFP_FS;
+ if (flags & CFS_ALLOC_IO)
+ mflags |= __GFP_IO;
+#endif
+
+ ptr = kmalloc(nr_bytes, mflags);
+ if (ptr != NULL && (flags & CFS_ALLOC_ZERO))
+ memset(ptr, 0, nr_bytes);
+ return ptr;
+}
+
+void
+cfs_free(void *addr)
+{
+ kfree(addr);
+}
+
+void *
+cfs_alloc_large(size_t nr_bytes)
+{
+ return vmalloc(nr_bytes);
+}
+
+void
+cfs_free_large(void *addr)
+{
+ vfree(addr);
+}
+
+cfs_page_t *
+cfs_alloc_pages(unsigned int flags, unsigned int order)
+{
+ unsigned int mflags = 0;
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+ if (flags & CFS_ALLOC_ATOMIC)
+ mflags |= __GFP_HIGH;
+ else if (flags & CFS_ALLOC_WAIT)
+ mflags |= __GFP_WAIT;
+ else
+ mflags |= (__GFP_HIGH | __GFP_WAIT);
+
+ if (flags & CFS_ALLOC_FS)
+ mflags |= __GFP_FS;
+ if (flags & CFS_ALLOC_IO)
+ mflags |= __GFP_IO | __GFP_HIGHIO;
+ if (flags & CFS_ALLOC_HIGH)
+ mflags |= __GFP_HIGHMEM;
+#else
+ if (flags & CFS_ALLOC_ATOMIC)
+ mflags |= __GFP_HIGH;
+ else
+ mflags |= __GFP_WAIT;
+ if (flags & CFS_ALLOC_FS)
+ mflags |= __GFP_FS;
+ if (flags & CFS_ALLOC_IO)
+ mflags |= __GFP_IO;
+ if (flags & CFS_ALLOC_HIGH)
+ mflags |= __GFP_HIGHMEM;
+#endif
+
+ return alloc_pages(mflags, order);
+}
+
+cfs_mem_cache_t *
+cfs_mem_cache_create (const char *name, size_t size, size_t offset,
+ unsigned long flags, void (*ctor)(void*, kmem_cache_t *, unsigned long),
+ void (*dtor)(void*, cfs_mem_cache_t *, unsigned long))
+{
+ return kmem_cache_create(name, size, offset, flags, ctor, dtor);
+}
+
+int
+cfs_mem_cache_destroy (cfs_mem_cache_t * cachep)
+{
+ return kmem_cache_destroy(cachep);
+}
+
+void *
+cfs_mem_cache_alloc(cfs_mem_cache_t *cachep, int flags)
+{
+ unsigned int mflags = 0;
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+ if (flags & CFS_SLAB_ATOMIC)
+ mflags |= __GFP_HIGH;
+ else if (flags & CFS_ALLOC_WAIT)
+ mflags |= __GFP_WAIT;
+ else
+ mflags |= (__GFP_HIGH | __GFP_WAIT);
+
+ if (flags & CFS_SLAB_FS)
+ mflags |= __GFP_FS;
+ if (flags & CFS_SLAB_IO)
+ mflags |= __GFP_IO | __GFP_HIGHIO;
+#else
+ if (flags & CFS_SLAB_ATOMIC)
+ mflags |= __GFP_HIGH;
+ else
+ mflags |= __GFP_WAIT;
+ if (flags & CFS_SLAB_FS)
+ mflags |= __GFP_FS;
+ if (flags & CFS_SLAB_IO)
+ mflags |= __GFP_IO;
+#endif
+
+ return kmem_cache_alloc(cachep, mflags);
+}
+
+void
+cfs_mem_cache_free(cfs_mem_cache_t *cachep, void *objp)
+{
+ return kmem_cache_free(cachep, objp);
+}
+
+EXPORT_SYMBOL(cfs_alloc);
+EXPORT_SYMBOL(cfs_free);
+EXPORT_SYMBOL(cfs_alloc_large);
+EXPORT_SYMBOL(cfs_free_large);
+EXPORT_SYMBOL(cfs_alloc_pages);
+EXPORT_SYMBOL(cfs_mem_cache_create);
+EXPORT_SYMBOL(cfs_mem_cache_destroy);
+EXPORT_SYMBOL(cfs_mem_cache_alloc);
+EXPORT_SYMBOL(cfs_mem_cache_free);
--- /dev/null
+#define DEBUG_SUBSYSTEM S_PORTALS
+
+#include <libcfs/libcfs.h>
+#include <libcfs/kp30.h>
+
+#define PORTAL_MINOR 240
+
+
+void
+kportal_daemonize (char *str)
+{
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,63))
+ daemonize(str);
+#else
+ daemonize();
+ snprintf (current->comm, sizeof (current->comm), "%s", str);
+#endif
+}
+
+void
+kportal_blockallsigs ()
+{
+ unsigned long flags;
+
+ SIGNAL_MASK_LOCK(current, flags);
+ sigfillset(¤t->blocked);
+ RECALC_SIGPENDING;
+ SIGNAL_MASK_UNLOCK(current, flags);
+}
+
+int portal_ioctl_getdata(char *buf, char *end, void *arg)
+{
+ struct portal_ioctl_hdr *hdr;
+ struct portal_ioctl_data *data;
+ int err;
+ ENTRY;
+
+ hdr = (struct portal_ioctl_hdr *)buf;
+ data = (struct portal_ioctl_data *)buf;
+
+ err = copy_from_user(buf, (void *)arg, sizeof(*hdr));
+ if (err)
+ RETURN(err);
+
+ if (hdr->ioc_version != PORTAL_IOCTL_VERSION) {
+ CERROR("PORTALS: version mismatch kernel vs application\n");
+ RETURN(-EINVAL);
+ }
+
+ if (hdr->ioc_len + buf >= end) {
+ CERROR("PORTALS: user buffer exceeds kernel buffer\n");
+ RETURN(-EINVAL);
+ }
+
+
+ if (hdr->ioc_len < sizeof(struct portal_ioctl_data)) {
+ CERROR("PORTALS: user buffer too small for ioctl\n");
+ RETURN(-EINVAL);
+ }
+
+ err = copy_from_user(buf, (void *)arg, hdr->ioc_len);
+ if (err)
+ RETURN(err);
+
+ if (portal_ioctl_is_invalid(data)) {
+ CERROR("PORTALS: ioctl not correctly formatted\n");
+ RETURN(-EINVAL);
+ }
+
+ if (data->ioc_inllen1)
+ data->ioc_inlbuf1 = &data->ioc_bulk[0];
+
+ if (data->ioc_inllen2)
+ data->ioc_inlbuf2 = &data->ioc_bulk[0] +
+ size_round(data->ioc_inllen1);
+
+ RETURN(0);
+}
+
+extern struct cfs_psdev_ops libcfs_psdev_ops;
+
+static int
+libcfs_psdev_open(struct inode * inode, struct file * file)
+{
+ struct portals_device_userstate **pdu = NULL;
+ int rc = 0;
+
+ if (!inode)
+ return (-EINVAL);
+ pdu = (struct portals_device_userstate **)&file->private_data;
+ if (libcfs_psdev_ops.p_open != NULL)
+ rc = libcfs_psdev_ops.p_open(0, (void *)pdu);
+ else
+ return (-EPERM);
+ return rc;
+}
+
+/* called when closing /dev/device */
+static int
+libcfs_psdev_release(struct inode * inode, struct file * file)
+{
+ struct portals_device_userstate *pdu;
+ int rc = 0;
+
+ if (!inode)
+ return (-EINVAL);
+ pdu = file->private_data;
+ if (libcfs_psdev_ops.p_close != NULL)
+ rc = libcfs_psdev_ops.p_close(0, (void *)pdu);
+ else
+ rc = -EPERM;
+ return rc;
+}
+
+static int
+libcfs_ioctl(struct inode *inode, struct file *file,
+ unsigned int cmd, unsigned long arg)
+{
+ struct cfs_psdev_file pfile;
+ int rc = 0;
+
+ if (current->fsuid != 0)
+ return -EACCES;
+
+ if ( _IOC_TYPE(cmd) != IOC_PORTAL_TYPE ||
+ _IOC_NR(cmd) < IOC_PORTAL_MIN_NR ||
+ _IOC_NR(cmd) > IOC_PORTAL_MAX_NR ) {
+ CDEBUG(D_IOCTL, "invalid ioctl ( type %d, nr %d, size %d )\n",
+ _IOC_TYPE(cmd), _IOC_NR(cmd), _IOC_SIZE(cmd));
+ return (-EINVAL);
+ }
+
+ /* Handle platform-dependent IOC requests */
+ switch (cmd) {
+ case IOC_PORTAL_PANIC:
+ if (!capable (CAP_SYS_BOOT))
+ return (-EPERM);
+ panic("debugctl-invoked panic");
+ return (0);
+ case IOC_PORTAL_MEMHOG:
+ if (!capable (CAP_SYS_ADMIN))
+ return -EPERM;
+ /* go thought */
+ }
+
+ pfile.off = 0;
+ pfile.private_data = file->private_data;
+ if (libcfs_psdev_ops.p_ioctl != NULL)
+ rc = libcfs_psdev_ops.p_ioctl(&pfile, cmd, (void *)arg);
+ else
+ rc = -EPERM;
+ return (rc);
+}
+
+static struct file_operations libcfs_fops = {
+ ioctl: libcfs_ioctl,
+ open: libcfs_psdev_open,
+ release: libcfs_psdev_release
+};
+
+cfs_psdev_t libcfs_dev = {
+ PORTAL_MINOR,
+ "portals",
+ &libcfs_fops
+};
+
+EXPORT_SYMBOL(kportal_blockallsigs);
+EXPORT_SYMBOL(kportal_daemonize);
+
+
--- /dev/null
+#define DEBUG_SUBSYSTEM S_PORTALS
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+
+int
+libcfs_arch_init(void)
+{
+ return 0;
+}
+
+void
+libcfs_arch_cleanup(void)
+{
+ return;
+}
+
+EXPORT_SYMBOL(libcfs_arch_init);
+EXPORT_SYMBOL(libcfs_arch_cleanup);
# define DEBUG_SUBSYSTEM S_PORTALS
-#include <linux/kp30.h>
+#include <libcfs/kp30.h>
#include <asm/div64.h>
#include "tracefile.h"
--- /dev/null
+# define DEBUG_SUBSYSTEM S_PORTALS
+
--- /dev/null
+#define DEBUG_SUBSYSTEM S_PORTALS
+#define LUSTRE_TRACEFILE_PRIVATE
+
+#include <libcfs/libcfs.h>
+#include <libcfs/kp30.h>
+#include "tracefile.h"
+
+#ifndef get_cpu
+#define get_cpu() smp_processor_id()
+#define put_cpu() do { } while (0)
+#endif
+
+extern union trace_data_union trace_data[NR_CPUS];
+extern char *tracefile;
+extern long long tracefile_size;
+extern struct rw_semaphore tracefile_sem;
+
+inline struct trace_cpu_data *
+__trace_get_tcd(unsigned long *flags)
+{
+ struct trace_cpu_data *ret;
+
+ int cpu = get_cpu();
+ local_irq_save(*flags);
+ ret = &trace_data[cpu].tcd;
+
+ return ret;
+}
+
+inline void
+trace_put_tcd (struct trace_cpu_data *tcd, unsigned long flags)
+{
+ local_irq_restore(flags);
+ put_cpu();
+}
+
+void
+set_ptldebug_header(struct ptldebug_header *header, int subsys, int mask,
+ const int line, unsigned long stack)
+{
+ struct timeval tv;
+
+ do_gettimeofday(&tv);
+
+ header->ph_subsys = subsys;
+ header->ph_mask = mask;
+ header->ph_cpu_id = smp_processor_id();
+ header->ph_sec = (__u32)tv.tv_sec;
+ header->ph_usec = tv.tv_usec;
+ header->ph_stack = stack;
+ header->ph_pid = current->pid;
+ header->ph_line_num = line;
+#if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20))
+ header->ph_extern_pid = current->thread.extern_pid;
+#elif defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+ header->ph_extern_pid = current->thread.mode.tt.extern_pid;
+#else
+ header->ph_extern_pid = 0;
+#endif
+ return;
+}
+
+void print_to_console(struct ptldebug_header *hdr, int mask, char *buf,
+ int len, char *file, const char *fn)
+{
+ char *prefix = NULL, *ptype = NULL;
+
+ if ((mask & D_EMERG) != 0) {
+ prefix = "LustreError";
+ ptype = KERN_EMERG;
+ } else if ((mask & D_ERROR) != 0) {
+ prefix = "LustreError";
+ ptype = KERN_ERR;
+ } else if ((mask & D_WARNING) != 0) {
+ prefix = "Lustre";
+ ptype = KERN_WARNING;
+ } else if (portal_printk) {
+ prefix = "Lustre";
+ ptype = KERN_INFO;
+ }
+ printk("%s%s: %d:%d:(%s:%d:%s()) %.*s", ptype, prefix, hdr->ph_pid,
+ hdr->ph_extern_pid, file, hdr->ph_line_num, fn, len, buf);
+ return;
+}
+
+int trace_write_daemon_file(struct file *file, const char *buffer,
+ unsigned long count, void *data)
+{
+ char *name;
+ unsigned long off;
+ int rc;
+
+ name = kmalloc(count + 1, GFP_KERNEL);
+ if (name == NULL)
+ return -ENOMEM;
+
+ if (copy_from_user(name, buffer, count)) {
+ rc = -EFAULT;
+ goto out;
+ }
+
+ /* be nice and strip out trailing '\n' */
+ for (off = count ; off > 2 && isspace(name[off - 1]); off--)
+ ;
+
+ name[off] = '\0';
+
+ down_write(&tracefile_sem);
+ if (strcmp(name, "stop") == 0) {
+ tracefile = NULL;
+ trace_stop_thread();
+ goto out_sem;
+ } else if (strncmp(name, "size=", 5) == 0) {
+ tracefile_size = simple_strtoul(name + 5, NULL, 0);
+ if (tracefile_size < 10 || tracefile_size > 20480)
+ tracefile_size = TRACEFILE_SIZE;
+ else
+ tracefile_size <<= 20;
+ goto out_sem;
+ }
+
+ if (name[0] != '/') {
+ rc = -EINVAL;
+ goto out_sem;
+ }
+
+ if (tracefile != NULL)
+ kfree(tracefile);
+
+ tracefile = name;
+ name = NULL;
+ printk(KERN_INFO "Lustre: debug daemon will attempt to start writing "
+ "to %s (%lukB max)\n", tracefile, (long)(tracefile_size >> 10));
+
+ trace_start_thread();
+out_sem:
+ up_write(&tracefile_sem);
+out:
+ kfree(name);
+ return count;
+}
+
+int trace_read_daemon_file(char *page, char **start, off_t off, int count,
+ int *eof, void *data)
+{
+ int rc;
+
+ down_read(&tracefile_sem);
+ rc = snprintf(page, count, "%s", tracefile);
+ up_read(&tracefile_sem);
+
+ return rc;
+}
+
+int trace_write_debug_mb(struct file *file, const char *buffer,
+ unsigned long count, void *data)
+{
+ char string[32];
+ int i;
+ unsigned max;
+
+ if (count >= sizeof(string)) {
+ printk(KERN_ERR "Lustre: value too large (length %lu bytes)\n",
+ count);
+ return -EOVERFLOW;
+ }
+
+ if (copy_from_user(string, buffer, count))
+ return -EFAULT;
+
+ max = simple_strtoul(string, NULL, 0);
+ if (max == 0)
+ return -EINVAL;
+
+ if (max > (num_physpages >> (20 - 2 - PAGE_SHIFT)) / 5 || max >= 512) {
+ printk(KERN_ERR "Lustre: Refusing to set debug buffer size to "
+ "%dMB, which is more than 80%% of available RAM (%lu)\n",
+ max, (num_physpages >> (20 - 2 - PAGE_SHIFT)) / 5);
+ return -EINVAL;
+ }
+
+ max /= smp_num_cpus;
+
+ for (i = 0; i < NR_CPUS; i++) {
+ struct trace_cpu_data *tcd;
+ tcd = &trace_data[i].tcd;
+ tcd->tcd_max_pages = max << (20 - PAGE_SHIFT);
+ }
+ return count;
+}
+
+int trace_read_debug_mb(char *page, char **start, off_t off, int count,
+ int *eof, void *data)
+{
+ struct trace_cpu_data *tcd;
+ unsigned long flags;
+ int rc;
+
+ tcd = trace_get_tcd(flags);
+ rc = snprintf(page, count, "%lu\n",
+ (tcd->tcd_max_pages >> (20 - PAGE_SHIFT)) * smp_num_cpus);
+ trace_put_tcd(tcd, flags);
+ return rc;
+}
+
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2002 Cluster File Systems, Inc.
+ * Author: Phil Schwan <phil@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+/*
+ * miscellaneous libcfs stuff
+ */
+#define DEBUG_SUBSYSTEM S_PORTALS
+#include <portals/types.h>
+
+/*
+ * Convert server error code to client format. Error codes are from
+ * Linux errno.h, so for Linux client---identity.
+ */
+int convert_server_error(__u64 ecode)
+{
+ return ecode;
+}
+
+/*
+ * convert <fcntl.h> flag from client to server.
+ */
+int convert_client_oflag(int cflag)
+{
+ return cflag;
+}
+
+
#define DEBUG_SUBSYSTEM S_PORTALS
-#include <linux/kp30.h>
+#include <libcfs/kp30.h>
#if LWT_SUPPORT
#endif
#define DEBUG_SUBSYSTEM S_PORTALS
-#include <linux/config.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/init.h>
-#include <linux/errno.h>
-#include <linux/smp_lock.h>
-#include <linux/unistd.h>
-
-#include <asm/system.h>
-#include <asm/uaccess.h>
-
-#include <linux/fs.h>
-#include <linux/stat.h>
-#include <asm/uaccess.h>
-#include <asm/segment.h>
-#include <linux/miscdevice.h>
-
#include <portals/lib-p30.h>
#include <portals/p30.h>
-#include <linux/kp30.h>
-#include <linux/portals_compat25.h>
-
-#define PORTAL_MINOR 240
+#include <libcfs/kp30.h>
struct nal_cmd_handler {
int nch_number;
};
static struct nal_cmd_handler nal_cmd[16];
-static DECLARE_MUTEX(nal_cmd_sem);
+struct semaphore nal_cmd_mutex;
#ifdef PORTAL_DEBUG
void kportal_assertion_failed(char *expr, char *file, const char *func,
#endif
void
-kportal_daemonize (char *str)
-{
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,63))
- daemonize(str);
-#else
- daemonize();
- snprintf (current->comm, sizeof (current->comm), "%s", str);
-#endif
-}
-
-void
kportal_memhog_free (struct portals_device_userstate *pdu)
{
- struct page **level0p = &pdu->pdu_memhog_root_page;
- struct page **level1p;
- struct page **level2p;
+ cfs_page_t **level0p = &pdu->pdu_memhog_root_page;
+ cfs_page_t **level1p;
+ cfs_page_t **level2p;
int count1;
int count2;
-
+
if (*level0p != NULL) {
- level1p = (struct page **)page_address(*level0p);
+ level1p = (cfs_page_t **)cfs_page_address(*level0p);
count1 = 0;
-
- while (count1 < PAGE_SIZE/sizeof(struct page *) &&
+
+ while (count1 < CFS_PAGE_SIZE/sizeof(cfs_page_t *) &&
*level1p != NULL) {
- level2p = (struct page **)page_address(*level1p);
+ level2p = (cfs_page_t **)cfs_page_address(*level1p);
count2 = 0;
-
- while (count2 < PAGE_SIZE/sizeof(struct page *) &&
+
+ while (count2 < CFS_PAGE_SIZE/sizeof(cfs_page_t *) &&
*level2p != NULL) {
-
- __free_page(*level2p);
+
+ cfs_free_page(*level2p);
pdu->pdu_memhog_pages--;
level2p++;
count2++;
}
-
- __free_page(*level1p);
+
+ cfs_free_page(*level1p);
pdu->pdu_memhog_pages--;
level1p++;
count1++;
}
-
- __free_page(*level0p);
+
+ cfs_free_page(*level0p);
pdu->pdu_memhog_pages--;
*level0p = NULL;
}
-
+
LASSERT (pdu->pdu_memhog_pages == 0);
}
int
kportal_memhog_alloc (struct portals_device_userstate *pdu, int npages, int flags)
{
- struct page **level0p;
- struct page **level1p;
- struct page **level2p;
+ cfs_page_t **level0p;
+ cfs_page_t **level1p;
+ cfs_page_t **level2p;
int count1;
int count2;
-
+
LASSERT (pdu->pdu_memhog_pages == 0);
LASSERT (pdu->pdu_memhog_root_page == NULL);
return 0;
level0p = &pdu->pdu_memhog_root_page;
- *level0p = alloc_page(flags);
+ *level0p = cfs_alloc_page(flags);
if (*level0p == NULL)
return -ENOMEM;
pdu->pdu_memhog_pages++;
- level1p = (struct page **)page_address(*level0p);
+ level1p = (cfs_page_t **)cfs_page_address(*level0p);
count1 = 0;
- memset(level1p, 0, PAGE_SIZE);
-
+ memset(level1p, 0, CFS_PAGE_SIZE);
+
while (pdu->pdu_memhog_pages < npages &&
- count1 < PAGE_SIZE/sizeof(struct page *)) {
+ count1 < CFS_PAGE_SIZE/sizeof(cfs_page_t *)) {
- if (signal_pending(current))
+ if (cfs_signal_pending(cfs_current()))
return (-EINTR);
-
- *level1p = alloc_page(flags);
+
+ *level1p = cfs_alloc_page(flags);
if (*level1p == NULL)
return -ENOMEM;
pdu->pdu_memhog_pages++;
- level2p = (struct page **)page_address(*level1p);
+ level2p = (cfs_page_t **)cfs_page_address(*level1p);
count2 = 0;
- memset(level2p, 0, PAGE_SIZE);
-
+ memset(level2p, 0, CFS_PAGE_SIZE);
+
while (pdu->pdu_memhog_pages < npages &&
- count2 < PAGE_SIZE/sizeof(struct page *)) {
-
- if (signal_pending(current))
+ count2 < CFS_PAGE_SIZE/sizeof(cfs_page_t *)) {
+
+ if (cfs_signal_pending(cfs_current()))
return (-EINTR);
- *level2p = alloc_page(flags);
+ *level2p = cfs_alloc_page(flags);
if (*level2p == NULL)
return (-ENOMEM);
pdu->pdu_memhog_pages++;
-
+
level2p++;
count2++;
}
-
+
level1p++;
count1++;
}
return 0;
}
-void
-kportal_blockallsigs ()
-{
- unsigned long flags;
-
- SIGNAL_MASK_LOCK(current, flags);
- sigfillset(¤t->blocked);
- RECALC_SIGPENDING;
- SIGNAL_MASK_UNLOCK(current, flags);
-}
-
/* called when opening /dev/device */
-static int libcfs_psdev_open(struct inode * inode, struct file * file)
+static int libcfs_psdev_open(unsigned long flags, void *args)
{
struct portals_device_userstate *pdu;
ENTRY;
-
- if (!inode)
- RETURN(-EINVAL);
PORTAL_MODULE_USE;
pdu->pdu_memhog_pages = 0;
pdu->pdu_memhog_root_page = NULL;
}
- file->private_data = pdu;
-
+ *(struct portals_device_userstate **)args = pdu;
+
RETURN(0);
}
/* called when closing /dev/device */
-static int libcfs_psdev_release(struct inode * inode, struct file * file)
+static int libcfs_psdev_release(unsigned long flags, void *args)
{
struct portals_device_userstate *pdu;
ENTRY;
- if (!inode)
- RETURN(-EINVAL);
-
- pdu = file->private_data;
+ pdu = (struct portals_device_userstate *)args;
if (pdu != NULL) {
kportal_memhog_free(pdu);
PORTAL_FREE(pdu, sizeof(*pdu));
}
-
+
PORTAL_MODULE_UNUSE;
RETURN(0);
}
CDEBUG(D_IOCTL, "Register NAL %x, handler: %p\n", nal, handler);
- down(&nal_cmd_sem);
+ mutex_down(&nal_cmd_mutex);
if (libcfs_find_nal_cmd_handler(nal) != NULL) {
- up (&nal_cmd_sem);
+ mutex_up (&nal_cmd_mutex);
return (-EBUSY);
}
cmd = &nal_cmd[i];
break;
}
-
+
if (cmd == NULL) {
rc = -EBUSY;
} else {
cmd->nch_private = private;
}
- up(&nal_cmd_sem);
+ mutex_up(&nal_cmd_mutex);
return rc;
}
CDEBUG(D_IOCTL, "Unregister NAL %x\n", nal);
- down(&nal_cmd_sem);
+ mutex_down(&nal_cmd_mutex);
cmd = libcfs_find_nal_cmd_handler(nal);
LASSERT (cmd != NULL);
cmd->nch_handler = NULL;
cmd->nch_private = NULL;
- up(&nal_cmd_sem);
+ mutex_up(&nal_cmd_mutex);
}
EXPORT_SYMBOL(libcfs_nal_cmd_unregister);
int rc = -EINVAL;
ENTRY;
- down(&nal_cmd_sem);
+ mutex_down(&nal_cmd_mutex);
cmd = libcfs_find_nal_cmd_handler(nal);
if (cmd != NULL) {
- CDEBUG(D_IOCTL, "calling handler nal: %x, cmd: %d\n", nal,
+ CDEBUG(D_IOCTL, "calling handler nal: %x, cmd: %d\n", nal,
pcfg->pcfg_command);
rc = cmd->nch_handler(pcfg, cmd->nch_private);
} else {
CERROR("invalid nal: %x, cmd: %d\n", nal, pcfg->pcfg_command);
}
- up(&nal_cmd_sem);
+ mutex_up(&nal_cmd_mutex);
RETURN(rc);
#endif
}
EXPORT_SYMBOL(libcfs_nal_cmd);
-static DECLARE_RWSEM(ioctl_list_sem);
-static LIST_HEAD(ioctl_list);
+static struct rw_semaphore ioctl_list_sem;
+static struct list_head ioctl_list;
int libcfs_register_ioctl(struct libcfs_ioctl_handler *hand)
{
}
EXPORT_SYMBOL(libcfs_deregister_ioctl);
-static int libcfs_ioctl(struct inode *inode, struct file *file,
- unsigned int cmd, unsigned long arg)
+static int libcfs_ioctl(struct cfs_psdev_file *pfile, unsigned long cmd, void *arg)
{
+ char buf[1024];
int err = -EINVAL;
- char buf[1024];
struct portal_ioctl_data *data;
ENTRY;
- if (current->fsuid != 0)
- RETURN(err = -EACCES);
-
- if ( _IOC_TYPE(cmd) != IOC_PORTAL_TYPE ||
- _IOC_NR(cmd) < IOC_PORTAL_MIN_NR ||
- _IOC_NR(cmd) > IOC_PORTAL_MAX_NR ) {
- CDEBUG(D_IOCTL, "invalid ioctl ( type %d, nr %d, size %d )\n",
- _IOC_TYPE(cmd), _IOC_NR(cmd), _IOC_SIZE(cmd));
- RETURN(-EINVAL);
- }
+ /* 'cmd' and permissions get checked in our arch-specific caller */
if (portal_ioctl_getdata(buf, buf + 800, (void *)arg)) {
CERROR("PORTALS ioctl: data error\n");
- RETURN(-EINVAL);
+ return (-EINVAL);
}
-
data = (struct portal_ioctl_data *)buf;
switch (cmd) {
case IOC_PORTAL_CLEAR_DEBUG:
portals_debug_clear_buffer();
RETURN(0);
- case IOC_PORTAL_PANIC:
- if (!capable (CAP_SYS_BOOT))
- RETURN (-EPERM);
- panic("debugctl-invoked panic");
- RETURN(0);
+ /*
+ * case IOC_PORTAL_PANIC:
+ * Handled in arch/cfs_module.c
+ */
case IOC_PORTAL_MARK_DEBUG:
if (data->ioc_inlbuf1 == NULL ||
data->ioc_inlbuf1[data->ioc_inllen1 - 1] != '\0')
}
case IOC_PORTAL_MEMHOG:
- if (!capable (CAP_SYS_ADMIN))
- err = -EPERM;
- else if (file->private_data == NULL) {
+ if (pfile->private_data == NULL) {
err = -EINVAL;
} else {
- kportal_memhog_free(file->private_data);
- err = kportal_memhog_alloc(file->private_data,
+ kportal_memhog_free(pfile->private_data);
+ /* XXX The ioc_flags is not GFP flags now, need to be fixed */
+ err = kportal_memhog_alloc(pfile->private_data,
data->ioc_count,
data->ioc_flags);
if (err != 0)
- kportal_memhog_free(file->private_data);
+ kportal_memhog_free(pfile->private_data);
}
break;
err = -EINVAL;
down_read(&ioctl_list_sem);
list_for_each_entry(hand, &ioctl_list, item) {
- err = hand->handle_ioctl(data, cmd, arg);
+ err = hand->handle_ioctl(data, cmd, (unsigned long)arg);
if (err != -EINVAL)
break;
}
RETURN(err);
}
-
-static struct file_operations libcfs_fops = {
- ioctl: libcfs_ioctl,
- open: libcfs_psdev_open,
- release: libcfs_psdev_release
-};
-
-
-static struct miscdevice libcfs_dev = {
- PORTAL_MINOR,
- "portals",
- &libcfs_fops
+struct cfs_psdev_ops libcfs_psdev_ops = {
+ libcfs_psdev_open,
+ libcfs_psdev_release,
+ NULL,
+ NULL,
+ libcfs_ioctl
};
extern int insert_proc(void);
MODULE_DESCRIPTION("Portals v3.1");
MODULE_LICENSE("GPL");
+extern cfs_psdev_t libcfs_dev;
+extern struct rw_semaphore tracefile_sem;
+extern struct semaphore trace_thread_sem;
+
+extern int libcfs_arch_init(void);
+extern void libcfs_arch_cleanup(void);
+
static int init_libcfs_module(void)
{
int rc;
+ libcfs_arch_init();
+ init_rwsem(&tracefile_sem);
+ init_mutex(&trace_thread_sem);
+ init_mutex(&nal_cmd_mutex);
+ init_rwsem(&ioctl_list_sem);
+ CFS_INIT_LIST_HEAD(&ioctl_list);
+
rc = portals_debug_init(5 * 1024 * 1024);
if (rc < 0) {
printk(KERN_ERR "LustreError: portals_debug_init: %d\n", rc);
goto cleanup_debug;
}
#endif
- rc = misc_register(&libcfs_dev);
+ rc = cfs_psdev_register(&libcfs_dev);
if (rc) {
CERROR("misc_register: error %d\n", rc);
goto cleanup_lwt;
return (0);
cleanup_deregister:
- misc_deregister(&libcfs_dev);
+ cfs_psdev_deregister(&libcfs_dev);
cleanup_lwt:
#if LWT_SUPPORT
lwt_fini();
CDEBUG(D_MALLOC, "before Portals cleanup: kmem %d\n",
atomic_read(&portal_kmemory));
- rc = misc_deregister(&libcfs_dev);
+ rc = cfs_psdev_deregister(&libcfs_dev);
if (rc)
CERROR("misc_deregister error %d\n", rc);
rc = portals_debug_cleanup();
if (rc)
printk(KERN_ERR "LustreError: portals_debug_cleanup: %d\n", rc);
+ libcfs_arch_cleanup();
}
-EXPORT_SYMBOL(kportal_daemonize);
-EXPORT_SYMBOL(kportal_blockallsigs);
EXPORT_SYMBOL(kportal_assertion_failed);
-module_init(init_libcfs_module);
-module_exit(exit_libcfs_module);
+cfs_module(libcfs, "1.0.0", init_libcfs_module, exit_libcfs_module);
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/rwsem.h>
-#include <linux/proc_fs.h>
-#include <linux/file.h>
-#include <linux/smp.h>
-#include <linux/ctype.h>
-#include <asm/uaccess.h>
-#ifdef HAVE_MM_INLINE
-#include <linux/mm_inline.h>
-#endif
#define DEBUG_SUBSYSTEM S_PORTALS
+#define LUSTRE_TRACEFILE_PRIVATE
+#include "tracefile.h"
-#include <linux/kp30.h>
-#include <linux/portals_compat25.h>
-#include <linux/libcfs.h>
-
-#define TCD_MAX_PAGES (5 << (20 - PAGE_SHIFT))
+#include <libcfs/kp30.h>
+#include <libcfs/libcfs.h>
/* XXX move things up to the top, comment */
+union trace_data_union trace_data[NR_CPUS] __cacheline_aligned;
-static union {
- struct trace_cpu_data {
- struct list_head tcd_pages;
- unsigned long tcd_cur_pages;
-
- struct list_head tcd_daemon_pages;
- unsigned long tcd_cur_daemon_pages;
-
- unsigned long tcd_max_pages;
- int tcd_shutting_down;
- } tcd;
- char __pad[SMP_CACHE_BYTES];
-} trace_data[NR_CPUS] __cacheline_aligned;
-
-struct page_collection {
- struct list_head pc_pages;
- spinlock_t pc_lock;
- int pc_want_daemon_pages;
-};
-
-struct tracefiled_ctl {
- struct completion tctl_start;
- struct completion tctl_stop;
- wait_queue_head_t tctl_waitq;
- pid_t tctl_pid;
- atomic_t tctl_shutdown;
-};
-
-#define TRACEFILE_SIZE (500 << 20)
-static DECLARE_RWSEM(tracefile_sem);
-static char *tracefile = NULL;
-static long long tracefile_size = TRACEFILE_SIZE;
+struct rw_semaphore tracefile_sem;
+char *tracefile = NULL;
+long long tracefile_size = TRACEFILE_SIZE;
static struct tracefiled_ctl trace_tctl;
-static DECLARE_MUTEX(trace_thread_sem);
+struct semaphore trace_thread_sem;
static int thread_running = 0;
-#ifndef get_cpu
-#define get_cpu() smp_processor_id()
-#define put_cpu() do { } while (0)
-#endif
+static void put_pages_on_daemon_list_on_cpu(void *info);
+
+static inline struct trace_page *tage_from_list(struct list_head *list)
+{
+ return list_entry(list, struct trace_page, linkage);
+}
-#define trace_get_tcd(FLAGS) ({ \
- struct trace_cpu_data *__ret; \
- int __cpu = get_cpu(); \
- local_irq_save(FLAGS); \
- __ret = &trace_data[__cpu].tcd; \
- __ret; \
-})
+static struct trace_page *tage_alloc(int gfp)
+{
+ cfs_page_t *page;
+ struct trace_page *tage;
+
+ page = cfs_alloc_page(gfp);
+ if (page != NULL) {
+ tage = cfs_alloc(sizeof *tage, gfp);
+ if (tage == NULL)
+ cfs_free_page(page);
+ tage->page = page;
+ } else
+ tage = NULL;
+ return tage;
+}
-#define trace_put_tcd(TCD, FLAGS) do { \
- local_irq_restore(FLAGS); \
- put_cpu(); \
-} while (0)
+static void tage_free(struct trace_page *tage)
+{
+ LASSERT(tage != NULL);
-static void put_pages_on_daemon_list_on_cpu(void *info);
+ if (tage->page != NULL)
+ cfs_free_page(tage->page);
+ cfs_free(tage);
+}
+
+static void tage_to_tail(struct trace_page *tage, struct list_head *queue)
+{
+ LASSERT(tage != NULL);
+ LASSERT(queue != NULL);
+
+ list_move_tail(&tage->linkage, queue);
+}
+
+static int tage_invariant(struct trace_page *tage)
+{
+ return
+ tage != NULL &&
+ tage->used <= CFS_PAGE_SIZE &&
+ cfs_page_count(tage->page) > 0;
+}
/* return a page that has 'len' bytes left at the end */
-static struct page *trace_get_page(struct trace_cpu_data *tcd,
- unsigned long len)
+static struct trace_page *trace_get_tage(struct trace_cpu_data *tcd,
+ unsigned long len)
{
- struct page *page = NULL;
+ struct trace_page *tage;
- if (len > PAGE_SIZE) {
+ if (len > CFS_PAGE_SIZE) {
printk(KERN_ERR "cowardly refusing to write %lu bytes in a "
"page\n", len);
return NULL;
}
if (!list_empty(&tcd->tcd_pages)) {
- page = list_entry(tcd->tcd_pages.prev, struct page,
- PAGE_LIST_ENTRY);
- if (page->index + len <= PAGE_SIZE)
- return page;
+ tage = tage_from_list(tcd->tcd_pages.prev);
+ if (tage->used + len <= CFS_PAGE_SIZE)
+ return tage;
}
if (tcd->tcd_cur_pages < tcd->tcd_max_pages) {
- page = alloc_page(GFP_ATOMIC);
- if (page == NULL) {
+ tage = tage_alloc(CFS_ALLOC_ATOMIC);
+ if (tage == NULL) {
/* the kernel should print a message for us. fall back
* to using the last page in the ring buffer. */
goto ring_buffer;
}
- page->index = 0;
- page->mapping = (void *)(long)smp_processor_id();
- list_add_tail(&PAGE_LIST(page), &tcd->tcd_pages);
+ tage->used = 0;
+ tage->cpu = smp_processor_id();
+ list_add_tail(&tage->linkage, &tcd->tcd_pages);
tcd->tcd_cur_pages++;
if (tcd->tcd_cur_pages > 8 && thread_running) {
struct tracefiled_ctl *tctl = &trace_tctl;
- wake_up(&tctl->tctl_waitq);
+ cfs_waitq_signal(&tctl->tctl_waitq);
}
- return page;
+ return tage;
}
ring_buffer:
if (thread_running) {
int pgcount = tcd->tcd_cur_pages / 10;
struct page_collection pc;
- struct list_head *pos, *tmp;
+ struct trace_page *tage;
+ struct trace_page *tmp;
+
printk(KERN_WARNING "debug daemon buffer overflowed; discarding"
" 10%% of pages (%d)\n", pgcount + 1);
- INIT_LIST_HEAD(&pc.pc_pages);
+ CFS_INIT_LIST_HEAD(&pc.pc_pages);
spin_lock_init(&pc.pc_lock);
- list_for_each_safe(pos, tmp, &tcd->tcd_pages) {
- struct page *page;
-
+ list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages, linkage) {
if (pgcount-- == 0)
break;
- page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
- list_del(&PAGE_LIST(page));
- list_add_tail(&PAGE_LIST(page), &pc.pc_pages);
+ list_move_tail(&tage->linkage, &pc.pc_pages);
tcd->tcd_cur_pages--;
}
put_pages_on_daemon_list_on_cpu(&pc);
}
LASSERT(!list_empty(&tcd->tcd_pages));
- page = list_entry(tcd->tcd_pages.next, struct page, PAGE_LIST_ENTRY);
- page->index = 0;
+ tage = tage_from_list(tcd->tcd_pages.next);
+ tage->used = 0;
+ tage_to_tail(tage, &tcd->tcd_pages);
- list_del(&PAGE_LIST(page));
- list_add_tail(&PAGE_LIST(page), &tcd->tcd_pages);
- return page;
-}
-
-static void print_to_console(struct ptldebug_header *hdr, int mask, char *buf,
- int len, char *file, const char *fn)
-{
- char *prefix = NULL, *ptype = NULL;
-
- if ((mask & D_EMERG) != 0) {
- prefix = "LustreError";
- ptype = KERN_EMERG;
- } else if ((mask & D_ERROR) != 0) {
- prefix = "LustreError";
- ptype = KERN_ERR;
- } else if ((mask & D_WARNING) != 0) {
- prefix = "Lustre";
- ptype = KERN_WARNING;
- } else if (portal_printk) {
- prefix = "Lustre";
- ptype = KERN_INFO;
- }
-
- printk("%s%s: %d:%d:(%s:%d:%s()) %.*s", ptype, prefix, hdr->ph_pid,
- hdr->ph_extern_pid, file, hdr->ph_line_num, fn, len, buf);
+ return tage;
}
void portals_debug_msg(int subsys, int mask, char *file, const char *fn,
{
struct trace_cpu_data *tcd;
struct ptldebug_header header;
- struct page *page;
+ struct trace_page *tage;
char *debug_buf = format;
int known_size, needed = 85 /* average message length */, max_nob;
va_list ap;
unsigned long flags;
- struct timeval tv;
#ifdef CRAY_PORTALS
if (mask == D_PORTALS && !(portal_debug & D_PORTALS))
if (tcd->tcd_shutting_down)
goto out;
- do_gettimeofday(&tv);
-
- header.ph_subsys = subsys;
- header.ph_mask = mask;
- header.ph_cpu_id = smp_processor_id();
- header.ph_sec = (__u32)tv.tv_sec;
- header.ph_usec = tv.tv_usec;
- header.ph_stack = stack;
- header.ph_pid = current->pid;
- header.ph_line_num = line;
-
-#if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20))
- header.ph_extern_pid = current->thread.extern_pid;
-#elif defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
- header.ph_extern_pid = current->thread.mode.tt.extern_pid;
-#else
- header.ph_extern_pid = 0;
-#endif
-
+ set_ptldebug_header(&header, subsys, mask, line, stack);
known_size = sizeof(header) + strlen(file) + strlen(fn) + 2; // nulls
retry:
- page = trace_get_page(tcd, needed + known_size);
- if (page == NULL) {
+ tage = trace_get_tage(tcd, needed + known_size);
+ if (tage == NULL) {
debug_buf = format;
- if (needed + known_size > PAGE_SIZE)
+ if (needed + known_size > CFS_PAGE_SIZE)
mask |= D_ERROR;
needed = strlen(format);
goto out;
}
- debug_buf = page_address(page) + page->index + known_size;
+ debug_buf = cfs_page_address(tage->page) + tage->used + known_size;
- max_nob = PAGE_SIZE - page->index - known_size;
+ max_nob = CFS_PAGE_SIZE - tage->used - known_size;
LASSERT(max_nob > 0);
va_start(ap, format);
needed = vsnprintf(debug_buf, max_nob, format, ap);
goto retry;
header.ph_len = known_size + needed;
- debug_buf = page_address(page) + page->index;
+ debug_buf = cfs_page_address(tage->page) + tage->used;
memcpy(debug_buf, &header, sizeof(header));
- page->index += sizeof(header);
+ tage->used += sizeof(header);
debug_buf += sizeof(header);
strcpy(debug_buf, file);
- page->index += strlen(file) + 1;
+ tage->used += strlen(file) + 1;
debug_buf += strlen(file) + 1;
strcpy(debug_buf, fn);
- page->index += strlen(fn) + 1;
+ tage->used += strlen(fn) + 1;
debug_buf += strlen(fn) + 1;
- page->index += needed;
- if (page->index > PAGE_SIZE)
- printk(KERN_EMERG "page->index == %lu in portals_debug_msg\n",
- page->index);
+ tage->used += needed;
+ if (tage->used > CFS_PAGE_SIZE)
+ printk(KERN_EMERG
+ "tage->used == %u in portals_debug_msg\n", tage->used);
out:
if ((mask & (D_EMERG | D_ERROR | D_WARNING)) || portal_printk)
tcd = trace_get_tcd(flags);
spin_lock(&pc->pc_lock);
- list_splice(&tcd->tcd_pages, &pc->pc_pages);
- INIT_LIST_HEAD(&tcd->tcd_pages);
+ list_splice_init(&tcd->tcd_pages, &pc->pc_pages);
tcd->tcd_cur_pages = 0;
if (pc->pc_want_daemon_pages) {
- list_splice(&tcd->tcd_daemon_pages, &pc->pc_pages);
- INIT_LIST_HEAD(&tcd->tcd_daemon_pages);
+ list_splice_init(&tcd->tcd_daemon_pages, &pc->pc_pages);
tcd->tcd_cur_daemon_pages = 0;
}
spin_unlock(&pc->pc_lock);
static void collect_pages(struct page_collection *pc)
{
/* needs to be fixed up for preempt */
- INIT_LIST_HEAD(&pc->pc_pages);
+ CFS_INIT_LIST_HEAD(&pc->pc_pages);
collect_pages_on_cpu(pc);
smp_call_function(collect_pages_on_cpu, pc, 0, 1);
}
{
struct page_collection *pc = info;
struct trace_cpu_data *tcd;
- struct list_head *pos, *tmp, *cur_head;
+ struct list_head *cur_head;
unsigned long flags;
+ struct trace_page *tage;
+ struct trace_page *tmp;
tcd = trace_get_tcd(flags);
cur_head = tcd->tcd_pages.next;
spin_lock(&pc->pc_lock);
- list_for_each_safe(pos, tmp, &pc->pc_pages) {
- struct page *page;
+ list_for_each_entry_safe(tage, tmp, &pc->pc_pages, linkage) {
- page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
- LASSERT(page->index <= PAGE_SIZE);
- LASSERT(page_count(page) > 0);
+ LASSERT(tage_invariant(tage));
- if ((unsigned long)page->mapping != smp_processor_id())
+ if (tage->cpu != smp_processor_id())
continue;
- list_del(&PAGE_LIST(page));
- list_add_tail(&PAGE_LIST(page), cur_head);
+ tage_to_tail(tage, cur_head);
tcd->tcd_cur_pages++;
}
spin_unlock(&pc->pc_lock);
{
struct page_collection *pc = info;
struct trace_cpu_data *tcd;
- struct list_head *pos, *tmp;
+ struct trace_page *tage;
+ struct trace_page *tmp;
unsigned long flags;
tcd = trace_get_tcd(flags);
spin_lock(&pc->pc_lock);
- list_for_each_safe(pos, tmp, &pc->pc_pages) {
- struct page *page;
+ list_for_each_entry_safe(tage, tmp, &pc->pc_pages, linkage) {
+
+ LASSERT(tage_invariant(tage));
- page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
- LASSERT(page->index <= PAGE_SIZE);
- LASSERT(page_count(page) > 0);
- if ((unsigned long)page->mapping != smp_processor_id())
+ if (tage->cpu != smp_processor_id())
continue;
- list_del(&PAGE_LIST(page));
- list_add_tail(&PAGE_LIST(page), &tcd->tcd_daemon_pages);
+ tage_to_tail(tage, &tcd->tcd_daemon_pages);
tcd->tcd_cur_daemon_pages++;
if (tcd->tcd_cur_daemon_pages > tcd->tcd_max_pages) {
+ struct trace_page *victim;
+
LASSERT(!list_empty(&tcd->tcd_daemon_pages));
- page = list_entry(tcd->tcd_daemon_pages.next,
- struct page, PAGE_LIST_ENTRY);
+ victim = tage_from_list(tcd->tcd_daemon_pages.next);
- LASSERT(page->index <= PAGE_SIZE);
- LASSERT(page_count(page) > 0);
+ LASSERT(tage_invariant(victim));
- page->index = 0;
- list_del(&PAGE_LIST(page));
- page->mapping = NULL;
- __free_page(page);
+ list_del(&victim->linkage);
+ tage_free(victim);
tcd->tcd_cur_daemon_pages--;
}
}
void trace_debug_print(void)
{
struct page_collection pc;
- struct list_head *pos, *tmp;
+ struct trace_page *tage;
+ struct trace_page *tmp;
spin_lock_init(&pc.pc_lock);
collect_pages(&pc);
- list_for_each_safe(pos, tmp, &pc.pc_pages) {
- struct page *page;
+ list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
char *p, *file, *fn;
+ cfs_page_t *page;
- page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
- LASSERT(page->index <= PAGE_SIZE);
- LASSERT(page_count(page) > 0);
+ LASSERT(tage_invariant(tage));
- p = page_address(page);
- while (p < ((char *)page_address(page) + PAGE_SIZE)) {
+ page = tage->page;
+ p = cfs_page_address(page);
+ while (p < ((char *)cfs_page_address(page) + CFS_PAGE_SIZE)) {
struct ptldebug_header *hdr;
int len;
hdr = (void *)p;
print_to_console(hdr, D_EMERG, p, len, file, fn);
}
- list_del(&PAGE_LIST(page));
- page->mapping = NULL;
- __free_page(page);
+ list_del(&tage->linkage);
+ tage_free(tage);
}
}
int tracefile_dump_all_pages(char *filename)
{
struct page_collection pc;
- struct file *filp;
- struct list_head *pos, *tmp;
- mm_segment_t oldfs;
+ cfs_file_t *filp;
+ struct trace_page *tage;
+ struct trace_page *tmp;
+ CFS_DECL_MMSPACE;
int rc;
down_write(&tracefile_sem);
- filp = filp_open(filename, O_CREAT|O_EXCL|O_WRONLY|O_LARGEFILE, 0600);
- if (IS_ERR(filp)) {
- rc = PTR_ERR(filp);
+ filp = cfs_filp_open(filename,
+ O_CREAT|O_EXCL|O_WRONLY|O_LARGEFILE, 0600, &rc);
+ if (!filp) {
printk(KERN_ERR "LustreError: can't open %s for dump: rc %d\n",
filename, rc);
goto out;
/* ok, for now, just write the pages. in the future we'll be building
* iobufs with the pages and calling generic_direct_IO */
- oldfs = get_fs();
- set_fs(get_ds());
- list_for_each_safe(pos, tmp, &pc.pc_pages) {
- struct page *page;
-
- page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
- LASSERT(page->index <= PAGE_SIZE);
- LASSERT(page_count(page) > 0);
-
- rc = filp->f_op->write(filp, page_address(page), page->index,
- &filp->f_pos);
- if (rc != page->index) {
- printk(KERN_WARNING "wanted to write %lu but wrote "
- "%d\n", page->index, rc);
+ CFS_MMSPACE_OPEN;
+ list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
+
+ LASSERT(tage_invariant(tage));
+
+ rc = cfs_filp_write(filp, cfs_page_address(tage->page),
+ tage->used, cfs_filp_poff(filp));
+ if (rc != tage->used) {
+ printk(KERN_WARNING "wanted to write %u but wrote "
+ "%d\n", tage->used, rc);
put_pages_back(&pc);
break;
}
- list_del(&PAGE_LIST(page));
- page->mapping = NULL;
- __free_page(page);
+ list_del(&tage->linkage);
+ tage_free(tage);
}
- set_fs(oldfs);
- rc = filp->f_op->fsync(filp, filp->f_dentry, 1);
+ CFS_MMSPACE_CLOSE;
+ rc = cfs_filp_fsync(filp);
if (rc)
printk(KERN_ERR "sync returns %d\n", rc);
close:
- filp_close(filp, 0);
+ cfs_filp_close(filp);
out:
up_write(&tracefile_sem);
return rc;
void trace_flush_pages(void)
{
struct page_collection pc;
- struct list_head *pos, *tmp;
+ struct trace_page *tage;
+ struct trace_page *tmp;
spin_lock_init(&pc.pc_lock);
collect_pages(&pc);
- list_for_each_safe(pos, tmp, &pc.pc_pages) {
- struct page *page;
+ list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
- page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
- LASSERT(page->index <= PAGE_SIZE);
- LASSERT(page_count(page) > 0);
+ LASSERT(tage_invariant(tage));
- list_del(&PAGE_LIST(page));
- page->mapping = NULL;
- __free_page(page);
+ list_del(&tage->linkage);
+ tage_free(tage);
}
}
unsigned long off;
int rc;
- name = kmalloc(count + 1, GFP_KERNEL);
+ name = cfs_alloc(count + 1, CFS_ALLOC_STD);
if (name == NULL)
return -ENOMEM;
rc = tracefile_dump_all_pages(name);
out:
if (name)
- kfree(name);
+ cfs_free(name);
return count;
}
EXPORT_SYMBOL(trace_dk);
{
struct page_collection pc;
struct tracefiled_ctl *tctl = arg;
- struct list_head *pos, *tmp;
+ struct trace_page *tage;
+ struct trace_page *tmp;
struct ptldebug_header *hdr;
- struct file *filp;
- struct page *page;
- mm_segment_t oldfs;
+ cfs_file_t *filp;
+ CFS_DECL_MMSPACE;
int rc;
/* we're started late enough that we pick up init's fs context */
complete(&tctl->tctl_start);
while (1) {
- wait_queue_t __wait;
+ cfs_waitlink_t __wait;
- init_waitqueue_entry(&__wait, current);
- add_wait_queue(&tctl->tctl_waitq, &__wait);
+ cfs_waitlink_init(&__wait);
+ cfs_waitq_add(&tctl->tctl_waitq, &__wait);
set_current_state(TASK_INTERRUPTIBLE);
- schedule_timeout(HZ);
- remove_wait_queue(&tctl->tctl_waitq, &__wait);
+ cfs_waitq_timedwait(&__wait, cfs_time_seconds(1));
+ cfs_waitq_del(&tctl->tctl_waitq, &__wait);
if (atomic_read(&tctl->tctl_shutdown))
break;
filp = NULL;
down_read(&tracefile_sem);
if (tracefile != NULL) {
- filp = filp_open(tracefile, O_CREAT|O_RDWR|O_LARGEFILE,
- 0600);
- if (IS_ERR(filp)) {
- printk("couldn't open %s: %ld\n", tracefile,
- PTR_ERR(filp));
- filp = NULL;
- }
+ filp = cfs_filp_open(tracefile, O_CREAT|O_RDWR|O_LARGEFILE,
+ 0600, &rc);
+ if (!(filp))
+ printk("couldn't open %s: %d\n", tracefile, rc);
}
up_read(&tracefile_sem);
if (filp == NULL) {
continue;
}
- oldfs = get_fs();
- set_fs(get_ds());
+ CFS_MMSPACE_OPEN;
/* mark the first header, so we can sort in chunks */
- page = list_entry(pc.pc_pages.next, struct page,
- PAGE_LIST_ENTRY);
- LASSERT(page->index <= PAGE_SIZE);
- LASSERT(page_count(page) > 0);
+ tage = tage_from_list(pc.pc_pages.next);
+ LASSERT(tage_invariant(tage));
- hdr = page_address(page);
+ hdr = cfs_page_address(tage->page);
hdr->ph_flags |= PH_FLAG_FIRST_RECORD;
- list_for_each_safe(pos, tmp, &pc.pc_pages) {
+ list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
static loff_t f_pos;
- page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
- LASSERT(page->index <= PAGE_SIZE);
- LASSERT(page_count(page) > 0);
+
+ LASSERT(tage_invariant(tage));
if (f_pos >= tracefile_size)
f_pos = 0;
- else if (f_pos > filp->f_dentry->d_inode->i_size)
- f_pos = filp->f_dentry->d_inode->i_size;
-
- rc = filp->f_op->write(filp, page_address(page),
- page->index, &f_pos);
- if (rc != page->index) {
- printk(KERN_WARNING "wanted to write %lu but "
- "wrote %d\n", page->index, rc);
+ else if (f_pos > cfs_filp_size(filp))
+ f_pos = cfs_filp_size(filp);
+
+ rc = cfs_filp_write(filp, cfs_page_address(tage->page),
+ tage->used, &f_pos);
+ if (rc != tage->used) {
+ printk(KERN_WARNING "wanted to write %u but "
+ "wrote %d\n", tage->used, rc);
put_pages_back(&pc);
}
}
- set_fs(oldfs);
- filp_close(filp, 0);
+ CFS_MMSPACE_CLOSE;
+ cfs_filp_close(filp);
put_pages_on_daemon_list(&pc);
}
struct tracefiled_ctl *tctl = &trace_tctl;
int rc = 0;
- down(&trace_thread_sem);
+ mutex_down(&trace_thread_sem);
if (thread_running)
goto out;
init_completion(&tctl->tctl_start);
init_completion(&tctl->tctl_stop);
- init_waitqueue_head(&tctl->tctl_waitq);
+ cfs_waitq_init(&tctl->tctl_waitq);
atomic_set(&tctl->tctl_shutdown, 0);
- if (kernel_thread(tracefiled, tctl, 0) < 0) {
+ if (cfs_kernel_thread(tracefiled, tctl, 0) < 0) {
rc = -ECHILD;
goto out;
}
wait_for_completion(&tctl->tctl_start);
thread_running = 1;
out:
- up(&trace_thread_sem);
+ mutex_up(&trace_thread_sem);
return rc;
}
{
struct tracefiled_ctl *tctl = &trace_tctl;
- down(&trace_thread_sem);
+ mutex_down(&trace_thread_sem);
if (thread_running) {
printk(KERN_INFO "Shutting down debug daemon thread...\n");
atomic_set(&tctl->tctl_shutdown, 1);
wait_for_completion(&tctl->tctl_stop);
thread_running = 0;
}
- up(&trace_thread_sem);
-}
-
-int trace_write_daemon_file(struct file *file, const char *buffer,
- unsigned long count, void *data)
-{
- char *name;
- unsigned long off;
- int rc;
-
- name = kmalloc(count + 1, GFP_KERNEL);
- if (name == NULL)
- return -ENOMEM;
-
- if (copy_from_user(name, buffer, count)) {
- rc = -EFAULT;
- goto out;
- }
-
- /* be nice and strip out trailing '\n' */
- for (off = count ; off > 2 && isspace(name[off - 1]); off--)
- ;
-
- name[off] = '\0';
-
- down_write(&tracefile_sem);
- if (strcmp(name, "stop") == 0) {
- tracefile = NULL;
- trace_stop_thread();
- goto out_sem;
- } else if (strncmp(name, "size=", 5) == 0) {
- tracefile_size = simple_strtoul(name + 5, NULL, 0);
- if (tracefile_size < 10 || tracefile_size > 20480)
- tracefile_size = TRACEFILE_SIZE;
- else
- tracefile_size <<= 20;
- goto out_sem;
- }
-
- if (name[0] != '/') {
- rc = -EINVAL;
- goto out_sem;
- }
-
- if (tracefile != NULL)
- kfree(tracefile);
-
- tracefile = name;
- name = NULL;
-
- printk(KERN_INFO "Lustre: debug daemon will attempt to start writing "
- "to %s (%lukB max)\n", tracefile, (long)(tracefile_size >> 10));
-
- trace_start_thread();
-
- out_sem:
- up_write(&tracefile_sem);
-
- out:
- kfree(name);
- return count;
-}
-
-int trace_read_daemon_file(char *page, char **start, off_t off, int count,
- int *eof, void *data)
-{
- int rc;
-
- down_read(&tracefile_sem);
- rc = snprintf(page, count, "%s", tracefile);
- up_read(&tracefile_sem);
-
- return rc;
-}
-
-int trace_write_debug_mb(struct file *file, const char *buffer,
- unsigned long count, void *data)
-{
- char string[32];
- int i;
- unsigned max;
-
- if (count >= sizeof(string)) {
- printk(KERN_ERR "Lustre: value too large (length %lu bytes)\n",
- count);
- return -EOVERFLOW;
- }
-
- if (copy_from_user(string, buffer, count))
- return -EFAULT;
-
- max = simple_strtoul(string, NULL, 0);
- if (max == 0)
- return -EINVAL;
-
- if (max > (num_physpages >> (20 - 2 - PAGE_SHIFT)) / 5 || max >= 512) {
- printk(KERN_ERR "Lustre: Refusing to set debug buffer size to "
- "%dMB, which is more than 80%% of available RAM (%lu)\n",
- max, (num_physpages >> (20 - 2 - PAGE_SHIFT)) / 5);
- return -EINVAL;
- }
-
- max /= smp_num_cpus;
-
- for (i = 0; i < NR_CPUS; i++) {
- struct trace_cpu_data *tcd;
- tcd = &trace_data[i].tcd;
- tcd->tcd_max_pages = max << (20 - PAGE_SHIFT);
- }
- return count;
-}
-
-int trace_read_debug_mb(char *page, char **start, off_t off, int count,
- int *eof, void *data)
-{
- struct trace_cpu_data *tcd;
- unsigned long flags;
- int rc;
-
- tcd = trace_get_tcd(flags);
- rc = snprintf(page, count, "%lu\n",
- (tcd->tcd_max_pages >> (20 - PAGE_SHIFT)) * smp_num_cpus);
- trace_put_tcd(tcd, flags);
-
- return rc;
+ mutex_up(&trace_thread_sem);
}
int tracefile_init(void)
for (i = 0; i < NR_CPUS; i++) {
tcd = &trace_data[i].tcd;
- INIT_LIST_HEAD(&tcd->tcd_pages);
- INIT_LIST_HEAD(&tcd->tcd_daemon_pages);
+ CFS_INIT_LIST_HEAD(&tcd->tcd_pages);
+ CFS_INIT_LIST_HEAD(&tcd->tcd_daemon_pages);
tcd->tcd_cur_pages = 0;
tcd->tcd_cur_daemon_pages = 0;
tcd->tcd_max_pages = TCD_MAX_PAGES;
static void trace_cleanup_on_cpu(void *info)
{
struct trace_cpu_data *tcd;
- struct list_head *pos, *tmp;
+ struct trace_page *tage;
+ struct trace_page *tmp;
unsigned long flags;
tcd = trace_get_tcd(flags);
tcd->tcd_shutting_down = 1;
- list_for_each_safe(pos, tmp, &tcd->tcd_pages) {
- struct page *page;
-
- page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
- LASSERT(page->index <= PAGE_SIZE);
- LASSERT(page_count(page) > 0);
+ list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages, linkage) {
+ LASSERT(tage_invariant(tage));
- list_del(&PAGE_LIST(page));
- page->mapping = NULL;
- __free_page(page);
+ list_del(&tage->linkage);
+ tage_free(tage);
}
tcd->tcd_cur_pages = 0;
{
struct page_collection pc;
- INIT_LIST_HEAD(&pc.pc_pages);
+ CFS_INIT_LIST_HEAD(&pc.pc_pages);
spin_lock_init(&pc.pc_lock);
trace_cleanup_on_cpu(&pc);
-#ifndef __PORTALS_TRACEFILE_H
-#define __PORTALS_TRACEFILE_H
+#ifndef __LIBCFS_TRACEFILE_H__
+#define __LIBCFS_TRACEFILE_H__
+
+#include <libcfs/libcfs.h>
int tracefile_dump_all_pages(char *filename);
void trace_debug_print(void);
int trace_dk(struct file *file, const char *buffer, unsigned long count,
void *data);
+#ifdef LUSTRE_TRACEFILE_PRIVATE
+/*
+ * Private declare for tracefile
+ */
+#define TCD_MAX_PAGES (5 << (20 - PAGE_SHIFT))
+
+#define TRACEFILE_SIZE (500 << 20)
+
+union trace_data_union {
+ struct trace_cpu_data {
+ struct list_head tcd_pages;
+ unsigned long tcd_cur_pages;
+
+ struct list_head tcd_daemon_pages;
+ unsigned long tcd_cur_daemon_pages;
+
+ unsigned long tcd_max_pages;
+ int tcd_shutting_down;
+ } tcd;
+ char __pad[SMP_CACHE_BYTES];
+};
+
+struct page_collection {
+ struct list_head pc_pages;
+ spinlock_t pc_lock;
+ int pc_want_daemon_pages;
+};
+
+struct tracefiled_ctl {
+ struct completion tctl_start;
+ struct completion tctl_stop;
+ cfs_waitq_t tctl_waitq;
+ pid_t tctl_pid;
+ atomic_t tctl_shutdown;
+};
+
+/*
+ * small data-structure for each page owned by tracefiled.
+ */
+struct trace_page {
+ /*
+ * page itself
+ */
+ cfs_page_t *page;
+ /*
+ * linkage into one of the lists in trace_data_union or
+ * page_collection
+ */
+ struct list_head linkage;
+ /*
+ * number of bytes used within this page
+ */
+ unsigned int used;
+ /*
+ * cpu that owns this page
+ */
+ int cpu;
+};
+
+extern void set_ptldebug_header(struct ptldebug_header *header,
+ int subsys, int mask, const int line,
+ unsigned long stack);
+extern void print_to_console(struct ptldebug_header *hdr, int mask,
+ char *buf, int len, char *file, const char *fn);
+extern struct trace_cpu_data * __trace_get_tcd (unsigned long *flags);
+extern void __trace_put_tcd (struct trace_cpu_data *tcd, unsigned long flags);
+
+#define trace_get_tcd(f) __trace_get_tcd(&(f))
+#define trace_put_tcd(t, f) __trace_put_tcd(t, f)
+
+#endif /* LUSTRE_TRACEFILE_PRIVATE */
+
#endif /* __PORTALS_TRACEFILE_H */
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2004 Cluster File Systems, Inc.
+ * Author: Nikita Danilov <nikita@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or modify it under the
+ * terms of version 2 of the GNU General Public License as published by the
+ * Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass
+ * Ave, Cambridge, MA 02139, USA.
+ *
+ * Implementation of portable time API for user-level.
+ *
+ */
+
+/* Implementations of portable synchronization APIs for liblustre */
+
+/*
+ * liblustre is single-threaded, so most "synchronization" APIs are trivial.
+ */
+
+#ifndef __KERNEL__
+
+/*
+ * Optional debugging (magic stamping and checking ownership) can be added.
+ */
+
+/*
+ * spin_lock
+ *
+ * - spin_lock_init(x)
+ * - spin_lock(x)
+ * - spin_unlock(x)
+ * - spin_trylock(x)
+ *
+ * - spin_lock_irqsave(x, f)
+ * - spin_unlock_irqrestore(x, f)
+ *
+ * No-op implementation.
+ */
+
+void spin_lock_init(spinlock_t *lock)
+{
+ LASSERT(lock != NULL);
+ (void)lock;
+}
+
+void spin_lock(spinlock_t *lock)
+{
+ (void)lock;
+}
+
+void spin_unlock(spinlock_t *lock)
+{
+ (void)lock;
+}
+
+int spin_trylock(spinlock_t *lock)
+{
+ (void)lock;
+ return 1;
+}
+
+void spin_lock_bh_init(spinlock_t *lock)
+{
+ LASSERT(lock != NULL);
+ (void)lock;
+}
+
+void spin_lock_bh(spinlock_t *lock)
+{
+ LASSERT(lock != NULL);
+ (void)lock;
+}
+
+void spin_unlock_bh(spinlock_t *lock)
+{
+ LASSERT(lock != NULL);
+ (void)lock;
+}
+
+void spin_lock_irqsave(spinlock_t *lock, unsigned long flags)
+{
+ LASSERT(lock != NULL);
+ (void)lock;
+}
+
+void spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags)
+{
+ LASSERT(lock != NULL);
+ (void)lock;
+}
+
+
+/*
+ * Semaphore
+ *
+ * - sema_init(x, v)
+ * - __down(x)
+ * - __up(x)
+ */
+struct semaphore {};
+
+void sema_init(struct semaphore *s, int val)
+{
+ LASSERT(s != NULL);
+ (void)s;
+ (void)val;
+}
+
+void __down(struct semaphore *s)
+{
+ LASSERT(s != NULL);
+ (void)s;
+}
+
+void __up(struct semaphore *s)
+{
+ LASSERT(s != NULL);
+ (void)s;
+}
+
+/*
+ * Mutex:
+ *
+ * - init_mutex(x)
+ * - init_mutex_locked(x)
+ * - mutex_up(x)
+ * - mutex_down(x)
+ */
+
+#define mutex_up(s) __up(s)
+#define mutex_down(s) __down(s)
+
+#define init_mutex(x) sema_init(x, 1)
+#define init_mutex_locked(x) sema_init(x, 0)
+
+/*
+ * Completion:
+ *
+ * - init_completion(c)
+ * - complete(c)
+ * - wait_for_completion(c)
+ */
+struct completion {};
+
+void init_completion(struct completion *c)
+{
+ LASSERT(c != NULL);
+ (void)c;
+}
+
+void complete(struct completion *c)
+{
+ LASSERT(c != NULL);
+ (void)c;
+}
+
+void wait_for_completion(struct completion *c)
+{
+ LASSERT(c != NULL);
+ (void)c;
+}
+
+/*
+ * rw_semaphore:
+ *
+ * - DECLARE_RWSEM(x)
+ * - init_rwsem(x)
+ * - down_read(x)
+ * - up_read(x)
+ * - down_write(x)
+ * - up_write(x)
+ */
+struct rw_semaphore {};
+
+void init_rwsem(struct rw_semaphore *s)
+{
+ LASSERT(s != NULL);
+ (void)s;
+}
+
+void down_read(struct rw_semaphore *s)
+{
+ LASSERT(s != NULL);
+ (void)s;
+}
+
+int down_read_trylock(struct rw_semaphore *s)
+{
+ LASSERT(s != NULL);
+ (void)s;
+ return 1;
+}
+
+void down_write(struct rw_semaphore *s)
+{
+ LASSERT(s != NULL);
+ (void)s;
+}
+
+int down_write_trylock(struct rw_semaphore *s)
+{
+ LASSERT(s != NULL);
+ (void)s;
+ return 1;
+}
+
+void up_read(struct rw_semaphore *s)
+{
+ LASSERT(s != NULL);
+ (void)s;
+}
+
+void up_write(struct rw_semaphore *s)
+{
+ LASSERT(s != NULL);
+ (void)s;
+}
+
+/* !__KERNEL__ */
+#endif
+
+/*
+ * Local variables:
+ * c-indentation-style: "K&R"
+ * c-basic-offset: 8
+ * tab-width: 8
+ * fill-column: 80
+ * scroll-step: 1
+ * End:
+ */
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2004 Cluster File Systems, Inc.
+ * Author: Nikita Danilov <nikita@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or modify it under the
+ * terms of version 2 of the GNU General Public License as published by the
+ * Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass
+ * Ave, Cambridge, MA 02139, USA.
+ *
+ * Implementation of portable APIs for user-level.
+ *
+ */
+
+/* Implementations of portable APIs for liblustre */
+
+/*
+ * liblustre is single-threaded, so most "synchronization" APIs are trivial.
+ */
+
+#ifndef __KERNEL__
+
+#include <sys/mman.h>
+#ifndef __CYGWIN__
+#include <stdint.h>
+#include <asm/page.h>
+#else
+#include <sys/types.h>
+#endif
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/vfs.h>
+
+#include <libcfs/libcfs.h>
+
+/*
+ * Sleep channel. No-op implementation.
+ */
+
+void cfs_waitq_init(struct cfs_waitq *waitq)
+{
+ LASSERT(waitq != NULL);
+ (void)waitq;
+}
+
+void cfs_waitlink_init(struct cfs_waitlink *link)
+{
+ LASSERT(link != NULL);
+ (void)link;
+}
+
+void cfs_waitq_add(struct cfs_waitq *waitq, struct cfs_waitlink *link)
+{
+ LASSERT(waitq != NULL);
+ LASSERT(link != NULL);
+ (void)waitq;
+ (void)link;
+}
+
+void cfs_waitq_add_exclusive(struct cfs_waitq *waitq, struct cfs_waitlink *link)
+{
+ LASSERT(waitq != NULL);
+ LASSERT(link != NULL);
+ (void)waitq;
+ (void)link;
+}
+
+void cfs_waitq_forward(struct cfs_waitlink *link, struct cfs_waitq *waitq)
+{
+ LASSERT(waitq != NULL);
+ LASSERT(link != NULL);
+ (void)waitq;
+ (void)link;
+}
+
+void cfs_waitq_del(struct cfs_waitq *waitq, struct cfs_waitlink *link)
+{
+ LASSERT(waitq != NULL);
+ LASSERT(link != NULL);
+ (void)waitq;
+ (void)link;
+}
+
+int cfs_waitq_active(struct cfs_waitq *waitq)
+{
+ LASSERT(waitq != NULL);
+ (void)waitq;
+}
+
+void cfs_waitq_signal(struct cfs_waitq *waitq)
+{
+ LASSERT(waitq != NULL);
+ (void)waitq;
+}
+
+void cfs_waitq_signal_nr(struct cfs_waitq *waitq, int nr)
+{
+ LASSERT(waitq != NULL);
+ (void)waitq;
+}
+
+void cfs_waitq_broadcast(struct cfs_waitq *waitq)
+{
+ LASSERT(waitq != NULL);
+ (void)waitq;
+}
+
+void cfs_waitq_wait(struct cfs_waitlink *link)
+{
+ LASSERT(link != NULL);
+ (void)link;
+}
+
+int64_t cfs_waitq_timedwait(struct cfs_waitlink *link, int64_t timeout)
+{
+ LASSERT(link != NULL);
+ (void)link;
+}
+
+/*
+ * Allocator
+ */
+
+cfs_page_t *cfs_alloc_pages(unsigned int flags, unsigned int order)
+{
+ cfs_page_t *pg = malloc(sizeof(*pg));
+
+ if (!pg)
+ return NULL;
+#if 0 //#ifdef MAP_ANONYMOUS
+ pg->addr = mmap(0, PAGE_SIZE << order, PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
+#else
+ pg->addr = malloc(PAGE_SIZE << order);
+#endif
+
+ if (!pg->addr) {
+ free(pg);
+ return NULL;
+ }
+ return pg;
+}
+
+void cfs_free_pages(struct page *pg, int what)
+{
+#if 0 //#ifdef MAP_ANONYMOUS
+ munmap(pg->addr, PAGE_SIZE);
+#else
+ free(pg->addr);
+#endif
+ free(pg);
+}
+
+cfs_page_t *cfs_alloc_page(unsigned int flags)
+{
+ return cfs_alloc_pages(flags, 0);
+}
+
+void cfs_free_page(cfs_page_t *pg, int what)
+{
+ cfs_free_page(pg, what);
+}
+
+void *cfs_page_address(cfs_page_t *pg)
+{
+ return pg->addr;
+}
+
+void *cfs_kmap(cfs_page_t *pg)
+{
+ return pg->addr;
+}
+
+void cfs_kunmap(cfs_page_t *pg)
+{
+}
+
+/*
+ * Memory allocator
+ */
+void *cfs_alloc(size_t nr_bytes, u_int32_t flags)
+{
+ void *result;
+
+ result = malloc(nr_bytes);
+ if (result != NULL && (flags & CFS_ALLOC_ZERO))
+ memset(result, 0, nr_bytes);
+}
+
+void cfs_free(void *addr)
+{
+ free(addr);
+}
+
+void *cfs_alloc_large(size_t nr_bytes)
+{
+ return cfs_alloc(nr_bytes, 0);
+}
+
+void cfs_free_large(void *addr)
+{
+ return cfs_free(addr);
+}
+
+/*
+ * SLAB allocator
+ */
+
+cfs_mem_cache_t *
+cfs_mem_cache_create(const char *, size_t, size_t, unsigned long,
+ void (*)(void *, cfs_mem_cache_t *, unsigned long),
+ void (*)(void *, cfs_mem_cache_t *, unsigned long))
+{
+ cfs_mem_cache_t *c;
+
+ c = malloc(sizeof(*c));
+ if (!c)
+ return NULL;
+ c->size = objsize;
+ CDEBUG(D_MALLOC, "alloc slab cache %s at %p, objsize %d\n",
+ name, c, (int)objsize);
+ return c;
+}
+
+int cfs_mem_cache_destroy(cfs_mem_cache_t *c)
+{
+ CDEBUG(D_MALLOC, "destroy slab cache %p, objsize %u\n", c, c->size);
+ free(c);
+ return 0;
+}
+
+void *cfs_mem_cache_alloc(cfs_mem_cache_t *c, int gfp)
+{
+ return cfs_alloc(c, gfp);
+}
+
+void cfs_mem_cache_free(cfs_mem_cache_t *c, void *addr)
+{
+ cfs_free(addr);
+}
+
+
+/* !__KERNEL__ */
+#endif
+
+/*
+ * Local variables:
+ * c-indentation-style: "K&R"
+ * c-basic-offset: 8
+ * tab-width: 8
+ * fill-column: 80
+ * scroll-step: 1
+ * End:
+ */
#define DEBUG_SUBSYSTEM S_PORTALS
-#include <linux/kp30.h>
-#include <linux/libcfs.h>
-#include <linux/portals_compat25.h>
+#include <libcfs/kp30.h>
+#include <libcfs/libcfs.h>
+#include <libcfs/linux/portals_compat25.h>
static __u32 lcw_refcount = 0;
static DECLARE_MUTEX(lcw_refcount_sem);
-/*
+/*
* List of timers that have fired that need their callbacks run by the
* dispatcher.
*/
CDEBUG(D_INFO, "found lcw for pid %d\n", lcw->lcw_pid);
if (lcw->lcw_state != LC_WATCHDOG_DISABLED) {
- /*
+ /*
* sanity check the task against our
- * watchdog
+ * watchdog
*/
tsk = lcw_lookup_task(lcw);
lcw->lcw_callback(lcw, tsk, lcw->lcw_data);
EXIT;
}
-struct lc_watchdog *lc_watchdog_add(int time,
+struct lc_watchdog *lc_watchdog_add(int time,
void (*callback)(struct lc_watchdog *,
struct task_struct *,
void *),
RETURN(ERR_PTR(-ENOMEM));
}
- lcw->lcw_task = current;
- lcw->lcw_pid = current->pid;
+ lcw->lcw_task = cfs_current();
+ lcw->lcw_pid = cfs_curproc_pid();
lcw->lcw_time = (time * HZ) / 1000;
lcw->lcw_callback = callback ? callback : lc_watchdog_dumplog;
lcw->lcw_data = data;
--- /dev/null
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+ <key>CFBundleDevelopmentRegion</key>
+ <string>English</string>
+ <key>CFBundleExecutable</key>
+ <string>portals</string>
+ <key>CFBundleIconFile</key>
+ <string></string>
+ <key>CFBundleIdentifier</key>
+ <string>com.clusterfs.lustre.portals</string>
+ <key>CFBundleInfoDictionaryVersion</key>
+ <string>6.0</string>
+ <key>CFBundlePackageType</key>
+ <string>KEXT</string>
+ <key>CFBundleSignature</key>
+ <string>????</string>
+ <key>CFBundleVersion</key>
+ <string>1.0.1</string>
+ <key>OSBundleCompatibleVersion</key>
+ <string>1.0.0</string>
+ <key>OSBundleLibraries</key>
+ <dict>
+ <key>com.apple.kernel.bsd</key>
+ <string>1.1</string>
+ <key>com.apple.kernel.iokit</key>
+ <string>1.0.0b1</string>
+ <key>com.apple.kernel.mach</key>
+ <string>1.0.0b1</string>
+ <key>com.clusterfs.lustre.libcfs</key>
+ <string>1.0.0</string>
+ </dict>
+</dict>
+</plist>
static struct nal_t *ptl_nal_table[NAL_MAX_NR + 1];
#ifdef __KERNEL__
-DECLARE_MUTEX(ptl_mutex);
+struct semaphore ptl_mutex;
static void ptl_mutex_enter (void)
{
- down (&ptl_mutex);
+ mutex_down (&ptl_mutex);
}
static void ptl_mutex_exit (void)
{
- up (&ptl_mutex);
+ mutex_up (&ptl_mutex);
}
#else
static void ptl_mutex_enter (void)
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
-# define DEBUG_SUBSYSTEM S_PORTALS
+#define DEBUG_SUBSYSTEM S_PORTALS
#include <portals/api-support.h>
void PtlSnprintHandle(char *str, int len, ptl_handle_any_t h)
{
if (!ptl_init)
return PTL_NO_INIT;
-
+
if (ptl_hndl2nal(&handle_in) == NULL)
return PTL_HANDLE_INVALID;
-
+
*ni_out = handle_in;
return PTL_OK;
}
if (!ptl_init)
return PTL_NO_INIT;
-
+
nal = ptl_hndl2nal(&ni_handle);
if (nal == NULL)
return PTL_NI_INVALID;
if (!ptl_init)
return PTL_NO_INIT;
-
+
nal = ptl_hndl2nal(&ni_handle);
if (nal == NULL)
return PTL_NI_INVALID;
return PTL_OK;
}
-int PtlFailNid (ptl_handle_ni_t interface, ptl_nid_t nid, unsigned int threshold)
+int PtlFailNid (ptl_handle_ni_t interface, ptl_nid_t nid, unsigned int threshold)
{
nal_t *nal;
if (!ptl_init)
return PTL_NO_INIT;
-
+
nal = ptl_hndl2nal(&interface);
if (nal == NULL)
return PTL_NI_INVALID;
if (!ptl_init)
return PTL_NO_INIT;
-
+
nal = ptl_hndl2nal(&interface_in);
if (nal == NULL)
return PTL_NI_INVALID;
if (!ptl_init)
return PTL_NO_INIT;
-
+
nal = ptl_hndl2nal(&interface_in);
if (nal == NULL)
return PTL_NI_INVALID;
if (!ptl_init)
return PTL_NO_INIT;
-
+
nal = ptl_hndl2nal(&interface_in);
if (nal == NULL)
return PTL_NI_INVALID;
- return nal->nal_me_attach(nal, index_in, match_id_in,
+ return nal->nal_me_attach(nal, index_in, match_id_in,
match_bits_in, ignore_bits_in,
unlink_in, pos_in, handle_out);
}
if (!ptl_init)
return PTL_NO_INIT;
-
+
nal = ptl_hndl2nal(¤t_in);
if (nal == NULL)
return PTL_ME_INVALID;
if (!ptl_init)
return PTL_NO_INIT;
-
+
nal = ptl_hndl2nal(¤t_in);
if (nal == NULL)
return PTL_ME_INVALID;
if (!ptl_init)
return PTL_NO_INIT;
-
+
nal = ptl_hndl2nal(&me_in);
if (nal == NULL)
return PTL_ME_INVALID;
ptl_hndl2nal(&md_in.eq_handle) != nal)
return PTL_MD_ILLEGAL;
- return (nal->nal_md_attach)(nal, &me_in, &md_in,
+ return (nal->nal_md_attach)(nal, &me_in, &md_in,
unlink_in, handle_out);
}
if (!ptl_init)
return PTL_NO_INIT;
-
+
nal = ptl_hndl2nal(&ni_in);
if (nal == NULL)
return PTL_NI_INVALID;
ptl_md_t *new_inout, ptl_handle_eq_t testq_in)
{
nal_t *nal;
-
+
if (!ptl_init)
return PTL_NO_INIT;
-
+
nal = ptl_hndl2nal(&md_in);
if (nal == NULL)
return PTL_MD_INVALID;
ptl_hndl2nal(&testq_in) != nal)
return PTL_EQ_INVALID;
- return (nal->nal_md_update)(nal, &md_in,
+ return (nal->nal_md_update)(nal, &md_in,
old_inout, new_inout, &testq_in);
}
int PtlMDUnlink(ptl_handle_md_t md_in)
{
nal_t *nal;
-
+
if (!ptl_init)
return PTL_NO_INIT;
-
+
nal = ptl_hndl2nal(&md_in);
if (nal == NULL)
return PTL_MD_INVALID;
-
+
return (nal->nal_md_unlink)(nal, &md_in);
}
ptl_handle_eq_t *handle_out)
{
nal_t *nal;
-
+
if (!ptl_init)
return PTL_NO_INIT;
-
+
nal = ptl_hndl2nal(&interface);
if (nal == NULL)
return PTL_NI_INVALID;
if (!ptl_init)
return PTL_NO_INIT;
-
+
nal = ptl_hndl2nal(&eventq);
if (nal == NULL)
return PTL_EQ_INVALID;
int PtlEQGet(ptl_handle_eq_t eventq, ptl_event_t *ev)
{
int which;
-
+
return (PtlEQPoll (&eventq, 1, 0, ev, &which));
}
int PtlEQWait(ptl_handle_eq_t eventq_in, ptl_event_t *event_out)
{
int which;
-
- return (PtlEQPoll (&eventq_in, 1, PTL_TIME_FOREVER,
+
+ return (PtlEQPoll (&eventq_in, 1, PTL_TIME_FOREVER,
event_out, &which));
}
if (!ptl_init)
return PTL_NO_INIT;
-
+
nal = ptl_hndl2nal(&ni_in);
if (nal == NULL)
return PTL_NI_INVALID;
-
+
return (nal->nal_ace_entry)(nal, index_in, match_id_in, portal_in);
}
if (!ptl_init)
return PTL_NO_INIT;
-
+
nal = ptl_hndl2nal(&md_in);
if (nal == NULL)
return PTL_MD_INVALID;
if (nal == NULL)
return PTL_MD_INVALID;
- return (nal->nal_get)(nal, &md_in,
+ return (nal->nal_get)(nal, &md_in,
&target_in, portal_in, ac_in,
match_bits_in, offset_in);
}
endif
if MODULES
+
+if LINUX
modulenet_DATA = portals$(KMODEXT)
+endif # LINUX
+
+if DARWIN
+macos_PROGRAMS := portals
+
+portals_SOURCES := api-errno.c api-ni.c api-wrap.c
+portals_SOURCES += lib-init.c lib-me.c lib-msg.c lib-eq.c lib-md.c
+portals_SOURCES += lib-move.c lib-ni.c lib-pid.c module.c
+
+portals_CFLAGS := $(EXTRA_KCFLAGS)
+portals_LDFLAGS := $(EXTRA_KLDFLAGS)
+portals_LDADD := $(EXTRA_KLIBS)
+
+plist_DATA := Info.plist
+
+install-data-hook: fix-kext-ownership
+
+endif # DARWIN
+
endif # MODULES
endif # CRAY_PORTALS
-MOSTLYCLEANFILES = *.o *.ko *.mod.c
+EXTRA_DIST := Info.plist
+
+MOSTLYCLEANFILES = *.o *.ko *.mod.c portals
DIST_SOURCES = $(portals-objs:%.o=%.c)
int i;
int rc;
#ifdef __KERNEL__
- wait_queue_t wq;
- unsigned long now;
+ cfs_waitlink_t wl;
+ cfs_time_t now;
#else
struct timeval then;
struct timeval now;
* in the same stack frame, means we can abstract the
* locking here */
#ifdef __KERNEL__
- init_waitqueue_entry(&wq, current);
+ cfs_waitlink_init(&wl);
set_current_state(TASK_INTERRUPTIBLE);
- add_wait_queue(&ni->ni_waitq, &wq);
+ cfs_waitq_add(&ni->ni_waitq, &wl);
LIB_UNLOCK(nal, flags);
if (timeout_ms < 0) {
- schedule ();
- } else {
- now = jiffies;
- schedule_timeout((timeout_ms * HZ)/1000);
- timeout_ms -= ((jiffies - now) * 1000)/HZ;
+ cfs_waitq_wait (&wl);
+ } else {
+ struct timeval tv;
+
+ now = cfs_time_current();
+ cfs_waitq_timedwait(&wl, cfs_time_seconds(timeout_ms)/1000);
+ cfs_duration_usec(cfs_time_sub(cfs_time_current(), now), &tv);
+ timeout_ms -= tv.tv_sec * 1000 + tv.tv_usec / 1000;
if (timeout_ms < 0)
timeout_ms = 0;
}
LIB_LOCK(nal, flags);
+ cfs_waitq_del(&ni->ni_waitq, &wl);
#else
if (timeout_ms < 0) {
pthread_cond_wait(&ni->ni_cond, &ni->ni_mutex);
#include <portals/lib-p30.h>
#ifdef __KERNEL__
-# include <linux/string.h> /* for memset() */
-# include <linux/kp30.h>
+# include <libcfs/kp30.h>
#else
# include <string.h>
# include <sys/time.h>
if (space == NULL)
return (PTL_NO_SPACE);
- INIT_LIST_HEAD (&fl->fl_list);
+ CFS_INIT_LIST_HEAD (&fl->fl_list);
fl->fl_objs = space;
fl->fl_nobjs = n;
fl->fl_objsize = size;
return (PTL_NO_SPACE);
for (i = 0; i < ni->ni_lh_hash_size; i++)
- INIT_LIST_HEAD (&ni->ni_lh_hash_table[i]);
+ CFS_INIT_LIST_HEAD (&ni->ni_lh_hash_table[i]);
ni->ni_next_object_cookie = PTL_COOKIE_TYPES;
memset(&ni->ni_counters, 0, sizeof(lib_counters_t));
- INIT_LIST_HEAD (&ni->ni_active_msgs);
- INIT_LIST_HEAD (&ni->ni_active_mds);
- INIT_LIST_HEAD (&ni->ni_active_eqs);
- INIT_LIST_HEAD (&ni->ni_test_peers);
+ CFS_INIT_LIST_HEAD (&ni->ni_active_msgs);
+ CFS_INIT_LIST_HEAD (&ni->ni_active_mds);
+ CFS_INIT_LIST_HEAD (&ni->ni_active_eqs);
+ CFS_INIT_LIST_HEAD (&ni->ni_test_peers);
#ifdef __KERNEL__
spin_lock_init (&ni->ni_lock);
- init_waitqueue_head (&ni->ni_waitq);
+ cfs_waitq_init (&ni->ni_waitq);
#else
pthread_mutex_init(&ni->ni_mutex, NULL);
pthread_cond_init(&ni->ni_cond, NULL);
}
for (i = 0; i < ptl_size; i++)
- INIT_LIST_HEAD(&(ni->ni_portals.tbl[i]));
+ CFS_INIT_LIST_HEAD(&(ni->ni_portals.tbl[i]));
/* max_{mes,mds,eqs} set in kportal_descriptor_setup */
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
+#define DEBUG_SUBSYSTEM S_PORTALS
+
#ifndef __KERNEL__
# include <stdio.h>
#else
-# define DEBUG_SUBSYSTEM S_PORTALS
-# include <linux/kp30.h>
+# include <libcfs/kp30.h>
#endif
#include <portals/lib-p30.h>
if ((md->options & PTL_MD_KIOV) != 0) {
if (nal->libnal_unmap_pages != NULL)
- nal->libnal_unmap_pages (nal,
- md->md_niov,
- md->md_iov.kiov,
+ nal->libnal_unmap_pages (nal,
+ md->md_niov,
+ md->md_iov.kiov,
&md->md_addrkey);
} else if (nal->libnal_unmap != NULL) {
- nal->libnal_unmap (nal,
- md->md_niov, md->md_iov.iov,
+ nal->libnal_unmap (nal,
+ md->md_niov, md->md_iov.iov,
&md->md_addrkey);
}
if ((umd->options & PTL_MD_IOVEC) != 0) {
if ((umd->options & PTL_MD_KIOV) != 0) /* Can't specify both */
- return PTL_MD_ILLEGAL;
+ return PTL_MD_ILLEGAL;
lmd->md_niov = niov = umd->length;
memcpy(lmd->md_iov.iov, umd->start,
lmd->length = total_length;
if ((umd->options & PTL_MD_MAX_SIZE) != 0 && /* max size used */
- (umd->max_size < 0 ||
+ (umd->max_size < 0 ||
umd->max_size > total_length)) // illegal max_size
return PTL_MD_ILLEGAL;
if (nal->libnal_map != NULL) {
- rc = nal->libnal_map (nal, niov, lmd->md_iov.iov,
+ rc = nal->libnal_map (nal, niov, lmd->md_iov.iov,
&lmd->md_addrkey);
if (rc != PTL_OK)
return (rc);
} else if ((umd->options & PTL_MD_KIOV) != 0) {
#ifndef __KERNEL__
return PTL_MD_ILLEGAL;
-#else
+#else
/* Trap attempt to use paged I/O if unsupported early. */
if (nal->libnal_send_pages == NULL ||
nal->libnal_recv_pages == NULL)
for (i = 0; i < niov; i++) {
/* We take the page pointer on trust */
- if (lmd->md_iov.kiov[i].kiov_offset +
+ if (lmd->md_iov.kiov[i].kiov_offset +
lmd->md_iov.kiov[i].kiov_len > PAGE_SIZE )
return PTL_VAL_FAILED; /* invalid length */
lmd->length = total_length;
if ((umd->options & PTL_MD_MAX_SIZE) != 0 && /* max size used */
- (umd->max_size < 0 ||
+ (umd->max_size < 0 ||
umd->max_size > total_length)) // illegal max_size
return PTL_MD_ILLEGAL;
if (nal->libnal_map_pages != NULL) {
- rc = nal->libnal_map_pages (nal, niov, lmd->md_iov.kiov,
+ rc = nal->libnal_map_pages (nal, niov, lmd->md_iov.kiov,
&lmd->md_addrkey);
if (rc != PTL_OK)
return (rc);
lmd->md_iov.iov[0].iov_len = umd->length;
if ((umd->options & PTL_MD_MAX_SIZE) != 0 && /* max size used */
- (umd->max_size < 0 ||
+ (umd->max_size < 0 ||
umd->max_size > umd->length)) // illegal max_size
return PTL_MD_ILLEGAL;
if (nal->libnal_map != NULL) {
- rc = nal->libnal_map (nal, niov, lmd->md_iov.iov,
+ rc = nal->libnal_map (nal, niov, lmd->md_iov.iov,
&lmd->md_addrkey);
if (rc != PTL_OK)
return (rc);
}
- }
+ }
if (eq != NULL)
eq->eq_refcount++;
ptl_eq2handle(&umd->eq_handle, nal, lmd->eq);
}
-int
+int
lib_api_md_attach(nal_t *apinal, ptl_handle_me_t *meh,
- ptl_md_t *umd, ptl_unlink_t unlink,
+ ptl_md_t *umd, ptl_unlink_t unlink,
ptl_handle_md_t *handle)
{
lib_nal_t *nal = apinal->nal_data;
}
int
-lib_api_md_bind(nal_t *apinal,
+lib_api_md_bind(nal_t *apinal,
ptl_md_t *umd, ptl_unlink_t unlink,
ptl_handle_md_t *handle)
{
ev.unlinked = 1;
lib_md_deconstruct(nal, md, &ev.md);
ptl_md2handle(&ev.md_handle, nal, md);
-
+
lib_enq_event_locked(nal, NULL, md->eq, &ev);
}
/* XXX fttb, the new MD must be the same "shape" wrt fragmentation,
* since we simply overwrite the old lib-md */
- if ((((newumd->options ^ md->options) &
+ if ((((newumd->options ^ md->options) &
(PTL_MD_IOVEC | PTL_MD_KIOV)) != 0) ||
- ((newumd->options & (PTL_MD_IOVEC | PTL_MD_KIOV)) != 0 &&
+ ((newumd->options & (PTL_MD_IOVEC | PTL_MD_KIOV)) != 0 &&
newumd->length != md->md_niov)) {
rc = PTL_IOV_INVALID;
goto out;
- }
+ }
if (!PtlHandleIsEqual (*testqh, PTL_EQ_NONE)) {
test_eq = ptl_handle2eq(testqh, nal);
int unlink = (md->md_flags & PTL_MD_FLAG_AUTO_UNLINK) ?
PTL_UNLINK : PTL_RETAIN;
- // #warning this does not track eq refcounts properly
+ // #warning this does not track eq refcounts properly
rc = lib_md_build(nal, md, newumd, unlink);
md->me = me;
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
+#define DEBUG_SUBSYSTEM S_PORTALS
+
#ifndef __KERNEL__
# include <stdio.h>
#else
-# define DEBUG_SUBSYSTEM S_PORTALS
-# include <linux/kp30.h>
+# include <libcfs/kp30.h>
#endif
#include <portals/lib-p30.h>
int
lib_api_me_attach(nal_t *apinal,
ptl_pt_index_t portal,
- ptl_process_id_t match_id,
- ptl_match_bits_t match_bits,
+ ptl_process_id_t match_id,
+ ptl_match_bits_t match_bits,
ptl_match_bits_t ignore_bits,
ptl_unlink_t unlink, ptl_ins_pos_t pos,
ptl_handle_me_t *handle)
int
lib_api_me_insert(nal_t *apinal,
ptl_handle_me_t *current_meh,
- ptl_process_id_t match_id,
- ptl_match_bits_t match_bits,
+ ptl_process_id_t match_id,
+ ptl_match_bits_t match_bits,
ptl_match_bits_t ignore_bits,
ptl_unlink_t unlink, ptl_ins_pos_t pos,
ptl_handle_me_t *handle)
}
/* call with state_lock please */
-void
+void
lib_me_unlink(lib_nal_t *nal, lib_me_t *me)
{
list_del (&me->me_list);
}
#if 0
-static void
+static void
lib_me_dump(lib_nal_t *nal, lib_me_t * me)
{
- CWARN("Match Entry %p ("LPX64")\n", me,
+ CWARN("Match Entry %p ("LPX64")\n", me,
me->me_lh.lh_cookie);
CWARN("\tMatch/Ignore\t= %016lx / %016lx\n",
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
+#define DEBUG_SUBSYSTEM S_PORTALS
+
#ifndef __KERNEL__
# include <stdio.h>
#else
-# define DEBUG_SUBSYSTEM S_PORTALS
-# include <linux/kp30.h>
+# include <libcfs/kp30.h>
#endif
#include <portals/p30.h>
#include <portals/lib-p30.h>
static void lib_commit_md (lib_nal_t *nal, lib_md_t *md, lib_msg_t *msg);
static lib_md_t *
-lib_match_md(lib_nal_t *nal, int index, int op_mask,
- ptl_nid_t src_nid, ptl_pid_t src_pid,
+lib_match_md(lib_nal_t *nal, int index, int op_mask,
+ ptl_nid_t src_nid, ptl_pid_t src_pid,
ptl_size_t rlength, ptl_size_t roffset,
ptl_match_bits_t match_bits, lib_msg_t *msg,
ptl_size_t *mlength_out, ptl_size_t *offset_out)
if (me->match_id.nid != PTL_NID_ANY &&
me->match_id.nid != src_nid)
continue;
-
+
CDEBUG(D_NET, "match_id.pid [%x], src_pid [%x]\n",
me->match_id.pid, src_pid);
/* Commit to this ME/MD */
CDEBUG(D_NET, "Incoming %s index %x from "LPU64"/%u of "
- "length %d/%d into md "LPX64" [%d] + %d\n",
+ "length %d/%d into md "LPX64" [%d] + %d\n",
(op_mask == PTL_MD_OP_PUT) ? "put" : "get",
- index, src_nid, src_pid, mlength, rlength,
+ index, src_nid, src_pid, mlength, rlength,
md->md_lh.lh_cookie, md->md_niov, offset);
lib_commit_md(nal, md, msg);
struct list_head *el;
struct list_head *next;
struct list_head cull;
-
+
if (threshold != 0) {
/* Adding a new entry */
PORTAL_ALLOC(tp, sizeof(*tp));
if (tp == NULL)
return PTL_NO_SPACE;
-
+
tp->tp_nid = nid;
tp->tp_threshold = threshold;
-
+
LIB_LOCK(nal, flags);
list_add_tail (&tp->tp_list, &nal->libnal_ni.ni_test_peers);
LIB_UNLOCK(nal, flags);
return PTL_OK;
}
-
+
/* removing entries */
- INIT_LIST_HEAD (&cull);
-
+ CFS_INIT_LIST_HEAD (&cull);
+
LIB_LOCK(nal, flags);
list_for_each_safe (el, next, &nal->libnal_ni.ni_test_peers) {
tp = list_entry (el, lib_test_peer_t, tp_list);
-
+
if (tp->tp_threshold == 0 || /* needs culling anyway */
nid == PTL_NID_ANY || /* removing all entries */
tp->tp_nid == nid) /* matched this one */
list_add (&tp->tp_list, &cull);
}
}
-
+
LIB_UNLOCK(nal, flags);
-
+
while (!list_empty (&cull)) {
tp = list_entry (cull.next, lib_test_peer_t, tp_list);
}
static int
-fail_peer (lib_nal_t *nal, ptl_nid_t nid, int outgoing)
+fail_peer (lib_nal_t *nal, ptl_nid_t nid, int outgoing)
{
lib_test_peer_t *tp;
struct list_head *el;
struct list_head cull;
int fail = 0;
- INIT_LIST_HEAD (&cull);
-
+ CFS_INIT_LIST_HEAD (&cull);
+
LIB_LOCK (nal, flags);
list_for_each_safe (el, next, &nal->libnal_ni.ni_test_peers) {
}
continue;
}
-
+
if (tp->tp_nid == PTL_NID_ANY || /* fail every peer */
nid == tp->tp_nid) { /* fail this peer */
fail = 1;
-
+
if (tp->tp_threshold != PTL_MD_THRESH_INF) {
tp->tp_threshold--;
if (outgoing &&
break;
}
}
-
+
LIB_UNLOCK (nal, flags);
while (!list_empty (&cull)) {
tp = list_entry (cull.next, lib_test_peer_t, tp_list);
list_del (&tp->tp_list);
-
+
PORTAL_FREE(tp, sizeof (*tp));
}
lib_iov_nob (int niov, struct iovec *iov)
{
ptl_size_t nob = 0;
-
+
while (niov-- > 0)
nob += (iov++)->iov_len;
-
+
return (nob);
}
void
-lib_copy_iov2buf (char *dest, int niov, struct iovec *iov,
+lib_copy_iov2buf (char *dest, int niov, struct iovec *iov,
ptl_size_t offset, ptl_size_t len)
{
ptl_size_t nob;
if (len == 0)
return;
-
+
/* skip complete frags before 'offset' */
LASSERT (niov > 0);
while (offset >= iov->iov_len) {
niov--;
LASSERT (niov > 0);
}
-
+
do {
LASSERT (niov > 0);
nob = MIN (iov->iov_len - offset, len);
}
void
-lib_copy_buf2iov (int niov, struct iovec *iov, ptl_size_t offset,
+lib_copy_buf2iov (int niov, struct iovec *iov, ptl_size_t offset,
char *src, ptl_size_t len)
{
ptl_size_t nob;
niov--;
LASSERT (niov > 0);
}
-
+
do {
LASSERT (niov > 0);
nob = MIN (iov->iov_len - offset, len);
memcpy (iov->iov_base + offset, src, nob);
-
+
len -= nob;
src += nob;
niov--;
for (;;) {
LASSERT (src_niov > 0);
LASSERT (niov <= dst_niov);
-
+
frag_len = src->iov_len - offset;
dst->iov_base = ((char *)src->iov_base) + offset;
dst->iov_len = len;
return (niov);
}
-
+
dst->iov_len = frag_len;
len -= frag_len;
#ifndef __KERNEL__
ptl_size_t
-lib_kiov_nob (int niov, ptl_kiov_t *kiov)
+lib_kiov_nob (int niov, ptl_kiov_t *kiov)
{
LASSERT (0);
return (0);
}
void
-lib_copy_kiov2buf (char *dest, int niov, ptl_kiov_t *kiov,
+lib_copy_kiov2buf (char *dest, int niov, ptl_kiov_t *kiov,
ptl_size_t offset, ptl_size_t len)
{
LASSERT (0);
}
int
-lib_extract_kiov (int dst_niov, ptl_kiov_t *dst,
+lib_extract_kiov (int dst_niov, ptl_kiov_t *dst,
int src_niov, ptl_kiov_t *src,
ptl_size_t offset, ptl_size_t len)
{
#else
ptl_size_t
-lib_kiov_nob (int niov, ptl_kiov_t *kiov)
+lib_kiov_nob (int niov, ptl_kiov_t *kiov)
{
ptl_size_t nob = 0;
}
void
-lib_copy_kiov2buf (char *dest, int niov, ptl_kiov_t *kiov,
+lib_copy_kiov2buf (char *dest, int niov, ptl_kiov_t *kiov,
ptl_size_t offset, ptl_size_t len)
{
ptl_size_t nob;
if (len == 0)
return;
-
+
LASSERT (!in_interrupt ());
LASSERT (niov > 0);
niov--;
LASSERT (niov > 0);
}
-
+
do{
LASSERT (niov > 0);
nob = MIN (kiov->kiov_len - offset, len);
-
- addr = ((char *)kmap (kiov->kiov_page)) + kiov->kiov_offset + offset;
+
+ addr = ((char *)cfs_kmap (kiov->kiov_page)) + kiov->kiov_offset + offset;
memcpy (dest, addr, nob);
- kunmap (kiov->kiov_page);
-
+ cfs_kunmap (kiov->kiov_page);
+
len -= nob;
dest += nob;
niov--;
niov--;
LASSERT (niov > 0);
}
-
+
do {
LASSERT (niov > 0);
nob = MIN (kiov->kiov_len - offset, len);
-
- addr = ((char *)kmap (kiov->kiov_page)) + kiov->kiov_offset + offset;
+
+ addr = ((char *)cfs_kmap (kiov->kiov_page)) + kiov->kiov_offset + offset;
memcpy (addr, src, nob);
- kunmap (kiov->kiov_page);
-
+ cfs_kunmap (kiov->kiov_page);
+
len -= nob;
src += nob;
niov--;
}
int
-lib_extract_kiov (int dst_niov, ptl_kiov_t *dst,
+lib_extract_kiov (int dst_niov, ptl_kiov_t *dst,
int src_niov, ptl_kiov_t *src,
ptl_size_t offset, ptl_size_t len)
{
for (;;) {
LASSERT (src_niov > 0);
LASSERT (niov <= dst_niov);
-
+
frag_len = src->kiov_len - offset;
dst->kiov_page = src->kiov_page;
dst->kiov_offset = src->kiov_offset + offset;
if ((md->options & PTL_MD_KIOV) == 0)
return (nal->libnal_recv(nal, private, msg,
- md->md_niov, md->md_iov.iov,
+ md->md_niov, md->md_iov.iov,
offset, mlen, rlen));
- return (nal->libnal_recv_pages(nal, private, msg,
+ return (nal->libnal_recv_pages(nal, private, msg,
md->md_niov, md->md_iov.kiov,
offset, mlen, rlen));
}
ptl_err_t
lib_send (lib_nal_t *nal, void *private, lib_msg_t *msg,
ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
- lib_md_t *md, ptl_size_t offset, ptl_size_t len)
+ lib_md_t *md, ptl_size_t offset, ptl_size_t len)
{
if (len == 0)
return (nal->libnal_send(nal, private, msg,
hdr, type, nid, pid,
0, NULL,
offset, len));
-
+
if ((md->options & PTL_MD_KIOV) == 0)
- return (nal->libnal_send(nal, private, msg,
+ return (nal->libnal_send(nal, private, msg,
hdr, type, nid, pid,
md->md_niov, md->md_iov.iov,
offset, len));
- return (nal->libnal_send_pages(nal, private, msg,
+ return (nal->libnal_send_pages(nal, private, msg,
hdr, type, nid, pid,
md->md_niov, md->md_iov.kiov,
offset, len));
* decrementing its threshold. Come what may, the network "owns"
* the MD until a call to lib_finalize() signals completion. */
msg->md = md;
-
+
md->pending++;
if (md->threshold != PTL_MD_THRESH_INF) {
LASSERT (md->threshold > 0);
/* CAVEAT EMPTOR: this only drops messages that we've not committed
* to receive (init_msg() not called) and therefore can't cause an
* event. */
-
+
LIB_LOCK(nal, flags);
nal->libnal_ni.ni_counters.drop_count++;
nal->libnal_ni.ni_counters.drop_length += hdr->payload_length;
ptl_err_t rc;
lib_md_t *md;
unsigned long flags;
-
+
/* Convert put fields to host byte order */
hdr->msg.put.match_bits = le64_to_cpu(hdr->msg.put.match_bits);
hdr->msg.put.ptl_index = le32_to_cpu(hdr->msg.put.ptl_index);
/* NB call lib_send() _BEFORE_ lib_recv() completes the incoming
* message. Some NALs _require_ this to implement optimized GET */
- rc = lib_send (nal, private, msg, &reply, PTL_MSG_REPLY,
+ rc = lib_send (nal, private, msg, &reply, PTL_MSG_REPLY,
hdr->src_nid, hdr->src_pid, md, offset, mlength);
if (rc != PTL_OK)
CERROR(LPU64": Unable to send REPLY for GET from "LPU64": %d\n",
}
CDEBUG(D_NET, "Reply from "LPU64" of length %d/%d into md "LPX64"\n",
- hdr->src_nid, length, rlength,
+ hdr->src_nid, length, rlength,
hdr->msg.reply.dst_wmd.wh_object_cookie);
lib_commit_md(nal, md, msg);
md = ptl_wire_handle2md(&hdr->msg.ack.dst_wmd, nal);
if (md == NULL || md->threshold == 0) {
CDEBUG(D_INFO, LPU64": Dropping ACK from "LPU64" to %s MD "
- LPX64"."LPX64"\n", ni->ni_pid.nid, hdr->src_nid,
+ LPX64"."LPX64"\n", ni->ni_pid.nid, hdr->src_nid,
(md == NULL) ? "invalid" : "inactive",
hdr->msg.ack.dst_wmd.wh_interface_cookie,
hdr->msg.ack.dst_wmd.wh_object_cookie);
}
CDEBUG(D_NET, LPU64": ACK from "LPU64" into md "LPX64"\n",
- ni->ni_pid.nid, hdr->src_nid,
+ ni->ni_pid.nid, hdr->src_nid,
hdr->msg.ack.dst_wmd.wh_object_cookie);
lib_commit_md(nal, md, msg);
ni->ni_counters.recv_count++;
LIB_UNLOCK(nal, flags);
-
+
/* We have received and matched up the ack OK, create the
* completion event now... */
lib_finalize(nal, private, msg, PTL_OK);
/* ...and now discard any junk after the hdr */
(void) lib_recv(nal, private, NULL, NULL, 0, 0, hdr->payload_length);
-
+
return (PTL_OK);
}
/* NB we return PTL_OK if we manage to parse the header and believe
* it looks OK. Anything that goes wrong with receiving the
* message after that point is the responsibility of the NAL */
-
+
/* convert common fields to host byte order */
hdr->type = le32_to_cpu(hdr->type);
hdr->src_nid = le64_to_cpu(hdr->src_nid);
mv->version_minor == PORTALS_PROTO_VERSION_MINOR) {
CWARN (LPU64": Dropping unexpected HELLO message: "
"magic %d, version %d.%d from "LPD64"\n",
- nal->libnal_ni.ni_pid.nid, mv->magic,
+ nal->libnal_ni.ni_pid.nid, mv->magic,
mv->version_major, mv->version_minor,
hdr->src_nid);
/* we got garbage */
CERROR (LPU64": Bad HELLO message: "
"magic %d, version %d.%d from "LPD64"\n",
- nal->libnal_ni.ni_pid.nid, mv->magic,
+ nal->libnal_ni.ni_pid.nid, mv->magic,
mv->version_major, mv->version_minor,
hdr->src_nid);
return PTL_FAIL;
hdr->dest_nid = le64_to_cpu(hdr->dest_nid);
if (hdr->dest_nid != nal->libnal_ni.ni_pid.nid) {
CERROR(LPU64": BAD dest NID in %s message from"
- LPU64" to "LPU64" (not me)\n",
+ LPU64" to "LPU64" (not me)\n",
nal->libnal_ni.ni_pid.nid, hdr_type_string (hdr),
hdr->src_nid, hdr->dest_nid);
return PTL_FAIL;
{
CERROR(LPU64": Dropping incoming %s from "LPU64
": simulated failure\n",
- nal->libnal_ni.ni_pid.nid, hdr_type_string (hdr),
+ nal->libnal_ni.ni_pid.nid, hdr_type_string (hdr),
hdr->src_nid);
lib_drop_message(nal, private, hdr);
return PTL_OK;
if (msg == NULL) {
CERROR(LPU64": Dropping incoming %s from "LPU64
": can't allocate a lib_msg_t\n",
- nal->libnal_ni.ni_pid.nid, hdr_type_string (hdr),
+ nal->libnal_ni.ni_pid.nid, hdr_type_string (hdr),
hdr->src_nid);
lib_drop_message(nal, private, hdr);
return PTL_OK;
rc = PTL_FAIL; /* no compiler warning please */
break;
}
-
+
if (rc != PTL_OK) {
if (msg->md != NULL) {
/* committed... */
/* That's "OK I can parse it", not "OK I like it" :) */
}
-int
-lib_api_put(nal_t *apinal, ptl_handle_md_t *mdh,
+int
+lib_api_put(nal_t *apinal, ptl_handle_md_t *mdh,
ptl_ack_req_t ack, ptl_process_id_t *id,
ptl_pt_index_t portal, ptl_ac_index_t ac,
- ptl_match_bits_t match_bits,
+ ptl_match_bits_t match_bits,
ptl_size_t offset, ptl_hdr_data_t hdr_data)
{
lib_nal_t *nal = apinal->nal_data;
lib_md_t *md;
unsigned long flags;
int rc;
-
+
if (!list_empty (&ni->ni_test_peers) && /* normally we don't */
fail_peer (nal, id->nid, 1)) /* shall we now? */
{
if (md == NULL || md->threshold == 0) {
lib_msg_free(nal, msg);
LIB_UNLOCK(nal, flags);
-
+
return PTL_MD_INVALID;
}
hdr.msg.put.hdr_data = hdr_data;
lib_commit_md(nal, md, msg);
-
+
msg->ev.type = PTL_EVENT_SEND_END;
msg->ev.initiator.nid = ni->ni_pid.nid;
msg->ev.initiator.pid = ni->ni_pid.pid;
ni->ni_counters.send_length += md->length;
LIB_UNLOCK(nal, flags);
-
+
rc = lib_send (nal, NULL, msg, &hdr, PTL_MSG_PUT,
id->nid, id->pid, md, 0, md->length);
if (rc != PTL_OK) {
id->nid, rc);
lib_finalize (nal, NULL, msg, rc);
}
-
+
/* completion will be signalled by an event */
return PTL_OK;
}
-lib_msg_t *
+lib_msg_t *
lib_create_reply_msg (lib_nal_t *nal, ptl_nid_t peer_nid, lib_msg_t *getmsg)
{
/* The NAL can DMA direct to the GET md (i.e. no REPLY msg). This
return NULL;
}
-int
+int
lib_api_get(nal_t *apinal, ptl_handle_md_t *mdh, ptl_process_id_t *id,
ptl_pt_index_t portal, ptl_ac_index_t ac,
ptl_match_bits_t match_bits, ptl_size_t offset)
lib_md_t *md;
unsigned long flags;
int rc;
-
+
if (!list_empty (&ni->ni_test_peers) && /* normally we don't */
fail_peer (nal, id->nid, 1)) /* shall we now? */
{
ni->ni_pid.nid, id->nid, rc);
lib_finalize (nal, NULL, msg, rc);
}
-
+
/* completion will be signalled by an event */
return PTL_OK;
}
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
+#define DEBUG_SUBSYSTEM S_PORTALS
+
#ifndef __KERNEL__
# include <stdio.h>
#else
-# define DEBUG_SUBSYSTEM S_PORTALS
-# include <linux/kp30.h>
+# include <libcfs/kp30.h>
#endif
#include <portals/lib-p30.h>
void
-lib_enq_event_locked (lib_nal_t *nal, void *private,
+lib_enq_event_locked (lib_nal_t *nal, void *private,
lib_eq_t *eq, ptl_event_t *ev)
{
ptl_event_t *eq_slot;
/* We don't support different uid/jids yet */
ev->uid = 0;
ev->jid = 0;
-
+
/* size must be a power of 2 to handle sequence # overflow */
LASSERT (eq->eq_size != 0 &&
eq->eq_size == LOWEST_BIT_SET (eq->eq_size));
/* Wake anyone sleeping for an event (see lib-eq.c) */
#ifdef __KERNEL__
- if (waitqueue_active(&nal->libnal_ni.ni_waitq))
- wake_up_all(&nal->libnal_ni.ni_waitq);
+ if (cfs_waitq_active(&nal->libnal_ni.ni_waitq))
+ cfs_waitq_broadcast(&nal->libnal_ni.ni_waitq);
#else
pthread_cond_broadcast(&nal->libnal_ni.ni_cond);
#endif
}
-void
+void
lib_finalize (lib_nal_t *nal, void *private, lib_msg_t *msg, ptl_err_t status)
{
lib_md_t *md;
ack.msg.ack.mlength = cpu_to_le32(msg->ev.mlength);
rc = lib_send (nal, private, NULL, &ack, PTL_MSG_ACK,
- msg->ev.initiator.nid, msg->ev.initiator.pid,
+ msg->ev.initiator.nid, msg->ev.initiator.pid,
NULL, 0, 0);
if (rc != PTL_OK) {
/* send failed: there's nothing else to clean up. */
- CERROR("Error %d sending ACK to "LPX64"\n",
+ CERROR("Error %d sending ACK to "LPX64"\n",
rc, msg->ev.initiator.nid);
}
}
#define DEBUG_SUBSYSTEM S_PORTALS
#if defined (__KERNEL__)
-# include <linux/kernel.h>
extern int getpid(void);
#else
# include <stdio.h>
#endif
#define DEBUG_SUBSYSTEM S_PORTALS
-#include <linux/config.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/init.h>
-#include <linux/errno.h>
-#include <linux/smp_lock.h>
-#include <linux/unistd.h>
-
-#include <asm/system.h>
-#include <asm/uaccess.h>
-
-#include <linux/fs.h>
-#include <linux/stat.h>
-#include <asm/uaccess.h>
-#include <asm/segment.h>
-#include <linux/miscdevice.h>
-
#include <portals/lib-p30.h>
#include <portals/p30.h>
#include <portals/nal.h>
-#include <linux/kp30.h>
-#include <linux/kpr.h>
-#include <linux/portals_compat25.h>
+#include <libcfs/kp30.h>
+#include <portals/kpr.h>
extern void (kping_client)(struct portal_ioctl_data *);
-static int kportal_ioctl(struct portal_ioctl_data *data,
+static int kportal_ioctl(struct portal_ioctl_data *data,
unsigned int cmd, unsigned long arg)
{
int err;
}
DECLARE_IOCTL_HANDLER(kportal_ioctl_handler, kportal_ioctl);
+extern struct semaphore ptl_mutex;
static int init_kportals_module(void)
{
int rc;
ENTRY;
+ init_mutex(&ptl_mutex);
rc = PtlInit(NULL);
if (rc) {
CERROR("PtlInit: error %d\n", rc);
MODULE_AUTHOR("Peter J. Braam <braam@clusterfs.com>");
MODULE_DESCRIPTION("Portals v3.1");
MODULE_LICENSE("GPL");
-module_init(init_kportals_module);
-module_exit(exit_kportals_module);
+
+cfs_module(portals, "1.0.0", init_kportals_module, exit_kportals_module);
--- /dev/null
+// !$*UTF8*$!
+{
+ archiveVersion = 1;
+ classes = {
+ };
+ objectVersion = 39;
+ objects = {
+ 06AA1262FFB20DD611CA28AA = {
+ buildRules = (
+ );
+ buildSettings = {
+ COPY_PHASE_STRIP = NO;
+ GCC_DYNAMIC_NO_PIC = NO;
+ GCC_ENABLE_FIX_AND_CONTINUE = YES;
+ GCC_GENERATE_DEBUGGING_SYMBOLS = YES;
+ GCC_OPTIMIZATION_LEVEL = 0;
+ OPTIMIZATION_CFLAGS = "-O0";
+ ZERO_LINK = YES;
+ };
+ isa = PBXBuildStyle;
+ name = Development;
+ };
+ 06AA1263FFB20DD611CA28AA = {
+ buildRules = (
+ );
+ buildSettings = {
+ COPY_PHASE_STRIP = YES;
+ GCC_ENABLE_FIX_AND_CONTINUE = NO;
+ ZERO_LINK = NO;
+ };
+ isa = PBXBuildStyle;
+ name = Deployment;
+ };
+//060
+//061
+//062
+//063
+//064
+//080
+//081
+//082
+//083
+//084
+ 089C1669FE841209C02AAC07 = {
+ buildSettings = {
+ };
+ buildStyles = (
+ 06AA1262FFB20DD611CA28AA,
+ 06AA1263FFB20DD611CA28AA,
+ );
+ hasScannedForEncodings = 1;
+ isa = PBXProject;
+ mainGroup = 089C166AFE841209C02AAC07;
+ projectDirPath = "";
+ targets = (
+ 32A4FEB80562C75700D090E7,
+ );
+ };
+ 089C166AFE841209C02AAC07 = {
+ children = (
+ 247142CAFF3F8F9811CA285C,
+ 089C167CFE841241C02AAC07,
+ 19C28FB6FE9D52B211CA2CBB,
+ );
+ isa = PBXGroup;
+ name = portals;
+ refType = 4;
+ sourceTree = "<group>";
+ };
+ 089C167CFE841241C02AAC07 = {
+ children = (
+ 32A4FEC30562C75700D090E7,
+ );
+ isa = PBXGroup;
+ name = Resources;
+ refType = 4;
+ sourceTree = "<group>";
+ };
+//080
+//081
+//082
+//083
+//084
+//190
+//191
+//192
+//193
+//194
+ 19A778270730EACD00846375 = {
+ fileEncoding = 30;
+ isa = PBXFileReference;
+ lastKnownFileType = sourcecode.c.c;
+ path = module.c;
+ refType = 2;
+ sourceTree = SOURCE_ROOT;
+ };
+ 19A778280730EACD00846375 = {
+ fileRef = 19A778270730EACD00846375;
+ isa = PBXBuildFile;
+ settings = {
+ };
+ };
+ 19A7782B0730EB8400846375 = {
+ fileEncoding = 30;
+ isa = PBXFileReference;
+ lastKnownFileType = sourcecode.c.c;
+ path = "api-errno.c";
+ refType = 2;
+ sourceTree = SOURCE_ROOT;
+ };
+ 19A7782C0730EB8400846375 = {
+ fileEncoding = 30;
+ isa = PBXFileReference;
+ lastKnownFileType = sourcecode.c.c;
+ path = "api-ni.c";
+ refType = 2;
+ sourceTree = SOURCE_ROOT;
+ };
+ 19A7782D0730EB8400846375 = {
+ fileEncoding = 30;
+ isa = PBXFileReference;
+ lastKnownFileType = sourcecode.c.c;
+ path = "api-wrap.c";
+ refType = 2;
+ sourceTree = SOURCE_ROOT;
+ };
+ 19A7782E0730EB8400846375 = {
+ fileEncoding = 30;
+ isa = PBXFileReference;
+ lastKnownFileType = sourcecode.c.c;
+ path = "lib-eq.c";
+ refType = 2;
+ sourceTree = SOURCE_ROOT;
+ };
+ 19A7782F0730EB8400846375 = {
+ fileEncoding = 30;
+ isa = PBXFileReference;
+ lastKnownFileType = sourcecode.c.c;
+ path = "lib-init.c";
+ refType = 2;
+ sourceTree = SOURCE_ROOT;
+ };
+ 19A778300730EB8400846375 = {
+ fileEncoding = 30;
+ isa = PBXFileReference;
+ lastKnownFileType = sourcecode.c.c;
+ path = "lib-md.c";
+ refType = 2;
+ sourceTree = SOURCE_ROOT;
+ };
+ 19A778310730EB8400846375 = {
+ fileEncoding = 30;
+ isa = PBXFileReference;
+ lastKnownFileType = sourcecode.c.c;
+ path = "lib-me.c";
+ refType = 2;
+ sourceTree = SOURCE_ROOT;
+ };
+ 19A778320730EB8400846375 = {
+ fileEncoding = 30;
+ isa = PBXFileReference;
+ lastKnownFileType = sourcecode.c.c;
+ path = "lib-move.c";
+ refType = 2;
+ sourceTree = SOURCE_ROOT;
+ };
+ 19A778330730EB8400846375 = {
+ fileEncoding = 30;
+ isa = PBXFileReference;
+ lastKnownFileType = sourcecode.c.c;
+ path = "lib-msg.c";
+ refType = 2;
+ sourceTree = SOURCE_ROOT;
+ };
+ 19A778340730EB8400846375 = {
+ fileEncoding = 30;
+ isa = PBXFileReference;
+ lastKnownFileType = sourcecode.c.c;
+ path = "lib-ni.c";
+ refType = 2;
+ sourceTree = SOURCE_ROOT;
+ };
+ 19A778350730EB8400846375 = {
+ fileEncoding = 30;
+ isa = PBXFileReference;
+ lastKnownFileType = sourcecode.c.c;
+ path = "lib-pid.c";
+ refType = 2;
+ sourceTree = SOURCE_ROOT;
+ };
+ 19A778360730EB8400846375 = {
+ fileRef = 19A7782B0730EB8400846375;
+ isa = PBXBuildFile;
+ settings = {
+ };
+ };
+ 19A778370730EB8400846375 = {
+ fileRef = 19A7782C0730EB8400846375;
+ isa = PBXBuildFile;
+ settings = {
+ };
+ };
+ 19A778380730EB8400846375 = {
+ fileRef = 19A7782D0730EB8400846375;
+ isa = PBXBuildFile;
+ settings = {
+ };
+ };
+ 19A778390730EB8400846375 = {
+ fileRef = 19A7782E0730EB8400846375;
+ isa = PBXBuildFile;
+ settings = {
+ };
+ };
+ 19A7783A0730EB8400846375 = {
+ fileRef = 19A7782F0730EB8400846375;
+ isa = PBXBuildFile;
+ settings = {
+ };
+ };
+ 19A7783B0730EB8400846375 = {
+ fileRef = 19A778300730EB8400846375;
+ isa = PBXBuildFile;
+ settings = {
+ };
+ };
+ 19A7783C0730EB8400846375 = {
+ fileRef = 19A778310730EB8400846375;
+ isa = PBXBuildFile;
+ settings = {
+ };
+ };
+ 19A7783D0730EB8400846375 = {
+ fileRef = 19A778320730EB8400846375;
+ isa = PBXBuildFile;
+ settings = {
+ };
+ };
+ 19A7783E0730EB8400846375 = {
+ fileRef = 19A778330730EB8400846375;
+ isa = PBXBuildFile;
+ settings = {
+ };
+ };
+ 19A7783F0730EB8400846375 = {
+ fileRef = 19A778340730EB8400846375;
+ isa = PBXBuildFile;
+ settings = {
+ };
+ };
+ 19A778400730EB8400846375 = {
+ fileRef = 19A778350730EB8400846375;
+ isa = PBXBuildFile;
+ settings = {
+ };
+ };
+ 19C28FB6FE9D52B211CA2CBB = {
+ children = (
+ 32A4FEC40562C75800D090E7,
+ );
+ isa = PBXGroup;
+ name = Products;
+ refType = 4;
+ sourceTree = "<group>";
+ };
+//190
+//191
+//192
+//193
+//194
+//240
+//241
+//242
+//243
+//244
+ 247142CAFF3F8F9811CA285C = {
+ children = (
+ 19A7782B0730EB8400846375,
+ 19A7782C0730EB8400846375,
+ 19A7782D0730EB8400846375,
+ 19A7782E0730EB8400846375,
+ 19A7782F0730EB8400846375,
+ 19A778300730EB8400846375,
+ 19A778310730EB8400846375,
+ 19A778320730EB8400846375,
+ 19A778330730EB8400846375,
+ 19A778340730EB8400846375,
+ 19A778350730EB8400846375,
+ 19A778270730EACD00846375,
+ );
+ isa = PBXGroup;
+ name = Source;
+ path = "";
+ refType = 4;
+ sourceTree = "<group>";
+ };
+//240
+//241
+//242
+//243
+//244
+//320
+//321
+//322
+//323
+//324
+ 32A4FEB80562C75700D090E7 = {
+ buildPhases = (
+ 32A4FEB90562C75700D090E7,
+ 32A4FEBA0562C75700D090E7,
+ 32A4FEBB0562C75700D090E7,
+ 32A4FEBD0562C75700D090E7,
+ 32A4FEBF0562C75700D090E7,
+ 32A4FEC00562C75700D090E7,
+ 32A4FEC10562C75700D090E7,
+ );
+ buildRules = (
+ );
+ buildSettings = {
+ FRAMEWORK_SEARCH_PATHS = "";
+ GCC_WARN_FOUR_CHARACTER_CONSTANTS = NO;
+ GCC_WARN_UNKNOWN_PRAGMAS = NO;
+ HEADER_SEARCH_PATHS = ../include;
+ INFOPLIST_FILE = Info.plist;
+ INSTALL_PATH = "$(SYSTEM_LIBRARY_DIR)/Extensions";
+ LIBRARY_SEARCH_PATHS = "";
+ MODULE_NAME = com.clusterfs.lustre.portals.portals.portals;
+ MODULE_START = portals_start;
+ MODULE_STOP = portals_stop;
+ MODULE_VERSION = 1.0.1;
+ OTHER_CFLAGS = "-D__KERNEL__";
+ OTHER_LDFLAGS = "";
+ OTHER_REZFLAGS = "";
+ PRODUCT_NAME = portals;
+ SECTORDER_FLAGS = "";
+ WARNING_CFLAGS = "-Wmost";
+ WRAPPER_EXTENSION = kext;
+ };
+ dependencies = (
+ );
+ isa = PBXNativeTarget;
+ name = portals;
+ productInstallPath = "$(SYSTEM_LIBRARY_DIR)/Extensions";
+ productName = portals;
+ productReference = 32A4FEC40562C75800D090E7;
+ productType = "com.apple.product-type.kernel-extension";
+ };
+ 32A4FEB90562C75700D090E7 = {
+ buildActionMask = 2147483647;
+ files = (
+ );
+ isa = PBXShellScriptBuildPhase;
+ runOnlyForDeploymentPostprocessing = 0;
+ shellPath = /bin/sh;
+ shellScript = "script=\"${SYSTEM_DEVELOPER_DIR}/ProjectBuilder Extras/Kernel Extension Support/KEXTPreprocess\";\nif [ -x \"$script\" ]; then\n . \"$script\"\nfi";
+ };
+ 32A4FEBA0562C75700D090E7 = {
+ buildActionMask = 2147483647;
+ files = (
+ );
+ isa = PBXHeadersBuildPhase;
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+ 32A4FEBB0562C75700D090E7 = {
+ buildActionMask = 2147483647;
+ files = (
+ );
+ isa = PBXResourcesBuildPhase;
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+ 32A4FEBD0562C75700D090E7 = {
+ buildActionMask = 2147483647;
+ files = (
+ 19A778280730EACD00846375,
+ 19A778360730EB8400846375,
+ 19A778370730EB8400846375,
+ 19A778380730EB8400846375,
+ 19A778390730EB8400846375,
+ 19A7783A0730EB8400846375,
+ 19A7783B0730EB8400846375,
+ 19A7783C0730EB8400846375,
+ 19A7783D0730EB8400846375,
+ 19A7783E0730EB8400846375,
+ 19A7783F0730EB8400846375,
+ 19A778400730EB8400846375,
+ );
+ isa = PBXSourcesBuildPhase;
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+ 32A4FEBF0562C75700D090E7 = {
+ buildActionMask = 2147483647;
+ files = (
+ );
+ isa = PBXFrameworksBuildPhase;
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+ 32A4FEC00562C75700D090E7 = {
+ buildActionMask = 2147483647;
+ files = (
+ );
+ isa = PBXRezBuildPhase;
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+ 32A4FEC10562C75700D090E7 = {
+ buildActionMask = 2147483647;
+ files = (
+ );
+ isa = PBXShellScriptBuildPhase;
+ runOnlyForDeploymentPostprocessing = 0;
+ shellPath = /bin/sh;
+ shellScript = "script=\"${SYSTEM_DEVELOPER_DIR}/ProjectBuilder Extras/Kernel Extension Support/KEXTPostprocess\";\nif [ -x \"$script\" ]; then\n . \"$script\"\nfi";
+ };
+ 32A4FEC30562C75700D090E7 = {
+ isa = PBXFileReference;
+ lastKnownFileType = text.plist.xml;
+ path = Info.plist;
+ refType = 4;
+ sourceTree = "<group>";
+ };
+ 32A4FEC40562C75800D090E7 = {
+ explicitFileType = wrapper.cfbundle;
+ includeInIndex = 0;
+ isa = PBXFileReference;
+ path = portals.kext;
+ refType = 3;
+ sourceTree = BUILT_PRODUCTS_DIR;
+ };
+ };
+ rootObject = 089C1669FE841209C02AAC07;
+}
if MODULES
if !CRAY_PORTALS
+
+if LINUX
modulenet_DATA = kptlrouter$(KMODEXT)
endif
+
+endif
endif
MOSTLYCLEANFILES = *.o *.ko *.mod.c
#define DEBUG_SUBSYSTEM S_PTLROUTER
-#include <linux/kp30.h>
-#include <linux/kpr.h>
+#include <libcfs/kp30.h>
+#include <portals/kpr.h>
#include <portals/p30.h>
#include <portals/lib-p30.h>
--- /dev/null
+#ifndef __LINUX_PING_H__
+#define __LINUX_PING_H__
+
+#include <linux/module.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/version.h>
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+#include <linux/workqueue.h>
+#else
+#include <linux/tqueue.h>
+#endif
+#include <linux/wait.h>
+#include <linux/smp_lock.h>
+#include <linux/poll.h>
+
+#include <asm/unistd.h>
+#include <asm/semaphore.h>
+
+#endif
--- /dev/null
+#ifndef __XNU_PING_H__
+#define __XNU_PING_H__
+
+#include <mach/mach_types.h>
+#include <arch-xnu/cfs_lock.h>
+#include <arch-xnu/cfs_prim.h>
+
+#endif
if MODULES
if !CRAY_PORTALS
if TESTS
+
+if LINUX
noinst_DATA := pingsrv$(KMODEXT) pingcli$(KMODEXT)
noinst_DATA += spingsrv$(KMODEXT) spingcli$(KMODEXT)
endif
+
+endif
endif
endif
--- /dev/null
+#! /bin/sh
+
+if false ;then
+ OPTVAL=`getopt -o cb:l:s:k:L:v -n 'build-all' -- "$@"`
+else
+ # XNU/BSD getopt is special...
+ OPTVAL=$(getopt cb:l:s:k:L:v "$@")
+fi
+
+if [ $? != 0 ]
+then
+ echo 'Usage: see source...'
+ exit 2
+fi
+
+eval set -- "$OPTVAL"
+
+cd $(dirname $0)
+
+#set -x
+
+b=$PWD # base directory
+l=$b/build.log # where to log operations
+s=$b/build.seq # build sequence
+k=$b/kext.stage # where to place kexts after build
+
+load='' # list of kexts to load
+clean=0
+verbose=0
+
+while true ;do
+ case "$1" in
+ -c)
+ clean=1
+ shift 1
+ ;;
+ -v)
+ verbose=$(($verbose + 1))
+ shift 1
+ ;;
+ -b)
+ b=$2
+ shift 2
+ ;;
+ -l)
+ l=$2
+ shift 2
+ ;;
+ -s)
+ s=$2
+ shift 2
+ ;;
+ -k)
+ k=$2
+ shift 2
+ ;;
+ -L)
+ load=$2
+ shift 2
+ ;;
+ --)
+ shift
+ break
+ ;;
+ *)
+ echo "Internal error!"
+ exit 1
+ ;;
+ esac
+done
+
+echo > $l
+
+function message ()
+{
+ local msg
+
+ msg="$1"
+ echo $msg
+ echo $msg >> $l
+}
+
+function abort ()
+{
+ local msg
+
+ msg=$1
+
+ message "$1"
+ exit 1
+}
+
+function configure_xcode ()
+{
+ local path
+ local pfile
+ local module
+
+ path=$PWD
+ module=$(basename $path)
+ pfile=$path/$module.xcode/project.pbxproj
+ if [ -r $pfile.template ] ;then
+ cpp \
+ -P \
+ -include $b/build-config \
+ $pfile.template | \
+ tail +2 > $pfile
+ else
+ abort "missing $pfile.template"
+ fi
+}
+
+if [ x$clean != x0 ] ;then
+ echo "Removing..."
+ find $b/ -type d -name build
+ rm -fr $(find $b/ -type d -name build)
+ find $b/ -print0 | xargs -0 touch
+fi
+
+cat $s | while read ;do
+ d=$REPLY
+ if [ x$d = x ] ;then
+ : # empty line. Do nothing
+ elif [ ${d:0:1} = '#' ] ;then
+ : # comment. Skip
+ else
+ cd $d || abort "Cannot cd to $d"
+ message "________ Building in $d __________"
+ #configure_xcode
+ if [ $verbose -gt 0 ] ;then
+ xcodebuild 2>&1 | tee -a $l
+ else
+ xcodebuild >> $l 2>&1 || abort "Build failure in $d. See $l"
+ fi
+ # tail -2 $l
+ cd $b
+ fi
+done
+
+# copy all built kexts into $k
+# sudo is used, because extensions are later chowned to root.
+sudo rm -f ../include/arch
+ln -s ../include/arch-xnu ../include/arch
+sudo rm -fr $k || abort "Cannot clean $k"
+mkdir $k || abort "Cannot create $k"
+cp -R $(find ../ -name \*.kext -type d) $k || abort "Cannot stage kexts"
+cd $k || abort "Cannot chdir to $k"
+sudo chown -R root:wheel * || abort "Cannot chown kexts to root:wheel"
+
+if [ x$load != x ] ;then
+ cd $k
+ sudo kextload -r $k $load
+else
+ sudo chown -R root:wheel *
+fi
+cd $b
+
+sync;sync;sync
+
--- /dev/null
+../libcfs
+../portals
+../knals/socknal
+./ping_cli
+./ping_srv
#ifndef _KPING_INCLUDED
#define _KPING_INCLUDED
+#include <libcfs/portals_utils.h>
#include <portals/p30.h>
#if __KERNEL__
-
-#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
-#include <linux/workqueue.h>
-#else
-#include <linux/tqueue.h>
-#endif
struct pingsrv_data {
ptl_handle_ni_t ni;
ptl_handle_md_t mdin_h;
ptl_handle_md_t mdout_h;
ptl_event_t evnt;
- struct task_struct *tsk;
+ cfs_task_t *tsk;
}; /* struct pingsrv_data */
struct pingcli_data {
ptl_handle_md_t md_in_head_h;
ptl_handle_md_t md_out_head_h;
ptl_event_t ev;
- struct task_struct *tsk;
+ cfs_task_t *tsk;
}; /* struct pingcli_data */
#define DEBUG_SUBSYSTEM S_PINGER
-#include <linux/kp30.h>
+#include <libcfs/kp30.h>
#include <portals/p30.h>
-#include <linux/module.h>
-#include <linux/proc_fs.h>
-#include <linux/init.h>
-#include <linux/poll.h>
#include "ping.h"
/* int portal_debug = D_PING_CLI; */
static void
pingcli_shutdown(ptl_handle_ni_t nih, int err)
{
+ struct portal_ioctl_data *args = client->args;
int rc;
/* Yes, we are intentionally allowing us to fall through each
case 4:
/* Free our buffers */
+ if (client->outbuf != NULL)
+ PORTAL_FREE (client->outbuf, STDSIZE + args->ioc_size);
+
+ if (client->inbuf != NULL)
+ PORTAL_FREE (client->inbuf,
+ (args->ioc_size + STDSIZE) * args->ioc_count);
if (client != NULL)
PORTAL_FREE (client,
static void pingcli_callback(ptl_event_t *ev)
{
- int i, magic;
- i = *(int *)(ev->md.start + ev->offset + sizeof(unsigned));
- magic = *(int *)(ev->md.start + ev->offset);
+ int i;
+ unsigned magic;
+ i = __le32_to_cpu(*(int *)(ev->md.start + ev->offset + sizeof(unsigned)));
+ magic = __le32_to_cpu(*(int *)(ev->md.start + ev->offset));
if(magic != 0xcafebabe) {
CERROR("Unexpected response %x\n", magic);
pingcli_start(struct portal_ioctl_data *args)
{
ptl_handle_ni_t nih = PTL_INVALID_HANDLE;
- unsigned ping_head_magic = PING_HEADER_MAGIC;
- unsigned ping_bulk_magic = PING_BULK_MAGIC;
+ unsigned ping_head_magic = __cpu_to_le32(PING_HEADER_MAGIC);
int rc;
struct timeval tv1, tv2;
char str[PTL_NALFMT_SIZE];
- client->tsk = current;
+ client->tsk = cfs_current();
client->args = args;
CDEBUG (D_OTHER, "pingcli_setup args: nid "LPX64" (%s), \
nal %x, size %u, count: %u, timeout: %u\n",
/* Aquire and initialize the proper nal for portals. */
rc = PtlNIInit(args->ioc_nal, 0, NULL, NULL, &nih);
- if (rc != PTL_OK || rc != PTL_IFACE_DUP)
+ if (rc != PTL_OK && rc != PTL_IFACE_DUP)
{
CERROR ("NAL %x not loaded\n", args->ioc_nal);
pingcli_shutdown (nih, 4);
client->md_out_head.user_ptr = NULL;
client->md_out_head.eq_handle = PTL_EQ_NONE;
- memcpy (client->outbuf, &ping_head_magic, sizeof(ping_bulk_magic));
+ memcpy (client->outbuf, &ping_head_magic, sizeof(ping_head_magic));
count = 0;
return NULL;
}
while ((args->ioc_count - count)) {
+ unsigned __count;
+ __count = __cpu_to_le32(count);
+
memcpy (client->outbuf + sizeof(unsigned),
- &(count), sizeof(unsigned));
+ &(__count), sizeof(unsigned));
/* Put the ping packet */
- do_gettimeofday (&tv1);
+ cfs_fs_timeval (&tv1);
memcpy(client->outbuf+sizeof(unsigned)+sizeof(unsigned),&tv1,
sizeof(struct timeval));
pingcli_shutdown (nih, 1);
return NULL;
}
- CWARN ("Lustre: sent msg no %d", count);
+ CWARN ("Lustre: sent msg no %d.\n", count);
set_current_state (TASK_INTERRUPTIBLE);
- rc = schedule_timeout (20 * args->ioc_timeout);
+ rc = schedule_timeout (cfs_time_seconds(args->ioc_timeout));
if (rc == 0) {
CERROR ("timeout .....\n");
} else {
- do_gettimeofday (&tv2);
+ cfs_fs_timeval (&tv2);
CWARN("Reply in %u usec\n",
(unsigned)((tv2.tv_sec - tv1.tv_sec)
* 1000000 + (tv2.tv_usec - tv1.tv_usec)));
count++;
}
- if (client->outbuf != NULL)
- PORTAL_FREE (client->outbuf, STDSIZE + args->ioc_size);
-
- if (client->inbuf != NULL)
- PORTAL_FREE (client->inbuf,
- (args->ioc_size + STDSIZE) * args->ioc_count);
-
pingcli_shutdown (nih, 2);
/* Success! */
MODULE_DESCRIPTION("A simple kernel space ping client for portals testing");
MODULE_LICENSE("GPL");
-module_init(pingcli_init);
-module_exit(pingcli_cleanup);
+cfs_module(ping_cli, "1.0.0", pingcli_init, pingcli_cleanup);
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
EXPORT_SYMBOL (kping_client);
--- /dev/null
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+ <key>CFBundleDevelopmentRegion</key>
+ <string>English</string>
+ <key>CFBundleExecutable</key>
+ <string>ping_cli</string>
+ <key>CFBundleIconFile</key>
+ <string></string>
+ <key>CFBundleIdentifier</key>
+ <string>com.clusterfs.lustre.portals.tests.ping_cli</string>
+ <key>CFBundleInfoDictionaryVersion</key>
+ <string>6.0</string>
+ <key>CFBundlePackageType</key>
+ <string>KEXT</string>
+ <key>CFBundleSignature</key>
+ <string>????</string>
+ <key>CFBundleVersion</key>
+ <string>1.0.0d1</string>
+ <key>OSBundleLibraries</key>
+ <dict>
+ <key>com.apple.kernel.bsd</key>
+ <string>1.1</string>
+ <key>com.apple.kernel.iokit</key>
+ <string>1.0.0b1</string>
+ <key>com.apple.kernel.mach</key>
+ <string>1.0.0b1</string>
+ <key>com.clusterfs.lustre.portals.libcfs</key>
+ <string>1.0.0</string>
+ <key>com.clusterfs.lustre.portals.portals</key>
+ <string>1.0.0</string>
+ <key>com.clusterfs.lustre.portals.knals.ksocknal</key>
+ <string>1.0.0</string>
+ </dict>
+</dict>
+</plist>
--- /dev/null
+// !$*UTF8*$!
+{
+ archiveVersion = 1;
+ classes = {
+ };
+ objectVersion = 39;
+ objects = {
+ 06AA1262FFB20DD611CA28AA = {
+ buildRules = (
+ );
+ buildSettings = {
+ COPY_PHASE_STRIP = NO;
+ GCC_DYNAMIC_NO_PIC = NO;
+ GCC_ENABLE_FIX_AND_CONTINUE = YES;
+ GCC_GENERATE_DEBUGGING_SYMBOLS = YES;
+ GCC_OPTIMIZATION_LEVEL = 0;
+ OPTIMIZATION_CFLAGS = "-O0";
+ ZERO_LINK = YES;
+ };
+ isa = PBXBuildStyle;
+ name = Development;
+ };
+ 06AA1263FFB20DD611CA28AA = {
+ buildRules = (
+ );
+ buildSettings = {
+ COPY_PHASE_STRIP = YES;
+ GCC_ENABLE_FIX_AND_CONTINUE = NO;
+ ZERO_LINK = NO;
+ };
+ isa = PBXBuildStyle;
+ name = Deployment;
+ };
+//060
+//061
+//062
+//063
+//064
+//080
+//081
+//082
+//083
+//084
+ 089C1669FE841209C02AAC07 = {
+ buildSettings = {
+ };
+ buildStyles = (
+ 06AA1262FFB20DD611CA28AA,
+ 06AA1263FFB20DD611CA28AA,
+ );
+ hasScannedForEncodings = 1;
+ isa = PBXProject;
+ mainGroup = 089C166AFE841209C02AAC07;
+ projectDirPath = "";
+ targets = (
+ 32A4FEB80562C75700D090E7,
+ );
+ };
+ 089C166AFE841209C02AAC07 = {
+ children = (
+ 247142CAFF3F8F9811CA285C,
+ 089C167CFE841241C02AAC07,
+ 19C28FB6FE9D52B211CA2CBB,
+ );
+ isa = PBXGroup;
+ name = ping_cli;
+ refType = 4;
+ sourceTree = "<group>";
+ };
+ 089C167CFE841241C02AAC07 = {
+ children = (
+ 32A4FEC30562C75700D090E7,
+ );
+ isa = PBXGroup;
+ name = Resources;
+ refType = 4;
+ sourceTree = "<group>";
+ };
+//080
+//081
+//082
+//083
+//084
+//190
+//191
+//192
+//193
+//194
+ 1949BA72073A08F100E4167C = {
+ fileEncoding = 30;
+ isa = PBXFileReference;
+ lastKnownFileType = sourcecode.c.c;
+ name = ping_cli.c;
+ path = ../ping_cli.c;
+ refType = 2;
+ sourceTree = SOURCE_ROOT;
+ };
+ 1949BA73073A08F100E4167C = {
+ fileRef = 1949BA72073A08F100E4167C;
+ isa = PBXBuildFile;
+ settings = {
+ };
+ };
+ 19C28FB6FE9D52B211CA2CBB = {
+ children = (
+ 32A4FEC40562C75800D090E7,
+ );
+ isa = PBXGroup;
+ name = Products;
+ refType = 4;
+ sourceTree = "<group>";
+ };
+//190
+//191
+//192
+//193
+//194
+//240
+//241
+//242
+//243
+//244
+ 247142CAFF3F8F9811CA285C = {
+ children = (
+ 1949BA72073A08F100E4167C,
+ );
+ isa = PBXGroup;
+ name = Source;
+ path = "";
+ refType = 4;
+ sourceTree = "<group>";
+ };
+//240
+//241
+//242
+//243
+//244
+//320
+//321
+//322
+//323
+//324
+ 32A4FEB80562C75700D090E7 = {
+ buildPhases = (
+ 32A4FEB90562C75700D090E7,
+ 32A4FEBA0562C75700D090E7,
+ 32A4FEBB0562C75700D090E7,
+ 32A4FEBD0562C75700D090E7,
+ 32A4FEBF0562C75700D090E7,
+ 32A4FEC00562C75700D090E7,
+ 32A4FEC10562C75700D090E7,
+ );
+ buildRules = (
+ );
+ buildSettings = {
+ FRAMEWORK_SEARCH_PATHS = "";
+ GCC_WARN_FOUR_CHARACTER_CONSTANTS = NO;
+ GCC_WARN_UNKNOWN_PRAGMAS = NO;
+ HEADER_SEARCH_PATHS = "../../include ../";
+ INFOPLIST_FILE = Info.plist;
+ INSTALL_PATH = "$(SYSTEM_LIBRARY_DIR)/Extensions";
+ LIBRARY_SEARCH_PATHS = "";
+ MODULE_NAME = com.clusterfs.lustre.portals.tests.ping_cli;
+ MODULE_START = ping_cli_start;
+ MODULE_STOP = ping_cli_stop;
+ MODULE_VERSION = 1.0.0d1;
+ OTHER_CFLAGS = "-D__KERNEL__ -D__DARWIN__";
+ OTHER_LDFLAGS = "";
+ OTHER_REZFLAGS = "";
+ PRODUCT_NAME = ping_cli;
+ SECTORDER_FLAGS = "";
+ WARNING_CFLAGS = "-Wmost";
+ WRAPPER_EXTENSION = kext;
+ };
+ dependencies = (
+ );
+ isa = PBXNativeTarget;
+ name = ping_cli;
+ productInstallPath = "$(SYSTEM_LIBRARY_DIR)/Extensions";
+ productName = ping_cli;
+ productReference = 32A4FEC40562C75800D090E7;
+ productType = "com.apple.product-type.kernel-extension";
+ };
+ 32A4FEB90562C75700D090E7 = {
+ buildActionMask = 2147483647;
+ files = (
+ );
+ isa = PBXShellScriptBuildPhase;
+ runOnlyForDeploymentPostprocessing = 0;
+ shellPath = /bin/sh;
+ shellScript = "script=\"${SYSTEM_DEVELOPER_DIR}/ProjectBuilder Extras/Kernel Extension Support/KEXTPreprocess\";\nif [ -x \"$script\" ]; then\n . \"$script\"\nfi";
+ };
+ 32A4FEBA0562C75700D090E7 = {
+ buildActionMask = 2147483647;
+ files = (
+ );
+ isa = PBXHeadersBuildPhase;
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+ 32A4FEBB0562C75700D090E7 = {
+ buildActionMask = 2147483647;
+ files = (
+ );
+ isa = PBXResourcesBuildPhase;
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+ 32A4FEBD0562C75700D090E7 = {
+ buildActionMask = 2147483647;
+ files = (
+ 1949BA73073A08F100E4167C,
+ );
+ isa = PBXSourcesBuildPhase;
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+ 32A4FEBF0562C75700D090E7 = {
+ buildActionMask = 2147483647;
+ files = (
+ );
+ isa = PBXFrameworksBuildPhase;
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+ 32A4FEC00562C75700D090E7 = {
+ buildActionMask = 2147483647;
+ files = (
+ );
+ isa = PBXRezBuildPhase;
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+ 32A4FEC10562C75700D090E7 = {
+ buildActionMask = 2147483647;
+ files = (
+ );
+ isa = PBXShellScriptBuildPhase;
+ runOnlyForDeploymentPostprocessing = 0;
+ shellPath = /bin/sh;
+ shellScript = "script=\"${SYSTEM_DEVELOPER_DIR}/ProjectBuilder Extras/Kernel Extension Support/KEXTPostprocess\";\nif [ -x \"$script\" ]; then\n . \"$script\"\nfi";
+ };
+ 32A4FEC30562C75700D090E7 = {
+ isa = PBXFileReference;
+ lastKnownFileType = text.plist.xml;
+ path = Info.plist;
+ refType = 4;
+ sourceTree = "<group>";
+ };
+ 32A4FEC40562C75800D090E7 = {
+ explicitFileType = wrapper.cfbundle;
+ includeInIndex = 0;
+ isa = PBXFileReference;
+ path = ping_cli.kext;
+ refType = 3;
+ sourceTree = BUILT_PRODUCTS_DIR;
+ };
+ };
+ rootObject = 089C1669FE841209C02AAC07;
+}
#define DEBUG_SUBSYSTEM S_PINGER
-#include <linux/kp30.h>
+#include <libcfs/kp30.h>
#include <portals/p30.h>
#include "ping.h"
-#include <linux/module.h>
-#include <linux/proc_fs.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/version.h>
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-#include <linux/workqueue.h>
-#else
-#include <linux/tqueue.h>
-#endif
-#include <linux/wait.h>
-#include <linux/smp_lock.h>
-
-#include <asm/unistd.h>
-#include <asm/semaphore.h>
-
#define STDSIZE (sizeof(int) + sizeof(int) + sizeof(struct timeval))
#define MAXSIZE (16*1024)
{
int rc;
unsigned long magic;
- unsigned long ping_bulk_magic = 0xcafebabe;
+ unsigned long ping_bulk_magic = __cpu_to_le32(0xcafebabe);
kportal_daemonize ("pingsrv");
- server->tsk = current;
+ server->tsk = cfs_current();
while (running) {
set_current_state (TASK_INTERRUPTIBLE);
continue;
}
- magic = *((int *)(server->evnt.md.start
- + server->evnt.offset));
+ magic = __le32_to_cpu(*((int *)(server->evnt.md.start
+ + server->evnt.offset)));
if(magic != 0xdeadbeef) {
- CERROR("Unexpected Packet to the server\n");
+ CERROR("Unexpected Packet to the server, magic: %lx %d\n", magic, server->evnt.offset);
}
memcpy (server->in_buf, &ping_bulk_magic, sizeof(ping_bulk_magic));
CWARN ("received ping from nid "LPX64" "
"(off=%u rlen=%u mlen=%u head=%x seq=%d size=%d)\n",
ev->initiator.nid, ev->offset, ev->rlength, ev->mlength,
- *((int *)(ev->md.start + ev->offset)),
- *((int *)(ev->md.start + ev->offset + sizeof(unsigned))),
- *((int *)(ev->md.start + ev->offset + 2 *
- sizeof(unsigned))));
+ __le32_to_cpu(*((int *)(ev->md.start + ev->offset))),
+ __le32_to_cpu(*((int *)(ev->md.start + ev->offset + sizeof(unsigned)))),
+ __le32_to_cpu(*((int *)(ev->md.start + ev->offset + 2 *
+ sizeof(unsigned)))));
packets_valid++;
return server;
} /* pingsrv_setup() */
-static int pingsrv_start(void)
+static int pingsrv_start(void)
{
/* Setup our server */
if (!pingsrv_setup()) {
CDEBUG (D_OTHER, "pingsrv_setup() failed, server stopped\n");
return -ENOMEM;
}
- kernel_thread (pingsrv_thread,NULL,0);
+ cfs_kernel_thread (pingsrv_thread,NULL,0);
return 0;
} /* pingsrv_start() */
-
-
static int __init pingsrv_init(void)
{
- ping_head_magic = PING_HEADER_MAGIC;
- ping_bulk_magic = PING_BULK_MAGIC;
+ ping_head_magic = __cpu_to_le32(PING_HEADER_MAGIC);
+ ping_bulk_magic = __cpu_to_le32(PING_BULK_MAGIC);
PORTAL_ALLOC (server, sizeof(struct pingsrv_data));
+ atomic_set(&pkt, 0);
return pingsrv_start ();
} /* pingsrv_init() */
-
static void /*__exit*/ pingsrv_cleanup(void)
{
- remove_proc_entry ("net/pingsrv", NULL);
+ cfs_remove_proc_entry ("net/pingsrv", NULL);
running = 0;
wake_up_process (server->tsk);
while (running != 1) {
set_current_state (TASK_UNINTERRUPTIBLE);
- schedule_timeout (HZ);
+ schedule_timeout (cfs_time_seconds(1));
}
} /* pingsrv_cleanup() */
MODULE_DESCRIPTION("A kernel space ping server for portals testing");
MODULE_LICENSE("GPL");
-module_init(pingsrv_init);
-module_exit(pingsrv_cleanup);
+cfs_module(ping_srv, "1.0.0", pingsrv_init, pingsrv_cleanup);
--- /dev/null
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+ <key>CFBundleDevelopmentRegion</key>
+ <string>English</string>
+ <key>CFBundleExecutable</key>
+ <string>ping_srv</string>
+ <key>CFBundleIconFile</key>
+ <string></string>
+ <key>CFBundleIdentifier</key>
+ <string>com.clusterfs.lustre.portals.tests.ping_srv</string>
+ <key>CFBundleInfoDictionaryVersion</key>
+ <string>6.0</string>
+ <key>CFBundlePackageType</key>
+ <string>KEXT</string>
+ <key>CFBundleSignature</key>
+ <string>????</string>
+ <key>CFBundleVersion</key>
+ <string>1.0.0d1</string>
+ <key>OSBundleLibraries</key>
+ <dict>
+ <key>com.apple.kernel.bsd</key>
+ <string>1.1</string>
+ <key>com.apple.kernel.iokit</key>
+ <string>1.0.0b1</string>
+ <key>com.apple.kernel.mach</key>
+ <string>1.0.0b1</string>
+ <key>com.clusterfs.lustre.portals.libcfs</key>
+ <string>1.0.0</string>
+ <key>com.clusterfs.lustre.portals.portals</key>
+ <string>1.0.0</string>
+ <key>com.clusterfs.lustre.portals.knals.ksocknal</key>
+ <string>1.0.0</string>
+ </dict>
+</dict>
+</plist>
--- /dev/null
+// !$*UTF8*$!
+{
+ archiveVersion = 1;
+ classes = {
+ };
+ objectVersion = 39;
+ objects = {
+ 06AA1262FFB20DD611CA28AA = {
+ buildRules = (
+ );
+ buildSettings = {
+ COPY_PHASE_STRIP = NO;
+ GCC_DYNAMIC_NO_PIC = NO;
+ GCC_ENABLE_FIX_AND_CONTINUE = YES;
+ GCC_GENERATE_DEBUGGING_SYMBOLS = YES;
+ GCC_OPTIMIZATION_LEVEL = 0;
+ OPTIMIZATION_CFLAGS = "-O0";
+ ZERO_LINK = YES;
+ };
+ isa = PBXBuildStyle;
+ name = Development;
+ };
+ 06AA1263FFB20DD611CA28AA = {
+ buildRules = (
+ );
+ buildSettings = {
+ COPY_PHASE_STRIP = YES;
+ GCC_ENABLE_FIX_AND_CONTINUE = NO;
+ ZERO_LINK = NO;
+ };
+ isa = PBXBuildStyle;
+ name = Deployment;
+ };
+//060
+//061
+//062
+//063
+//064
+//080
+//081
+//082
+//083
+//084
+ 089C1669FE841209C02AAC07 = {
+ buildSettings = {
+ };
+ buildStyles = (
+ 06AA1262FFB20DD611CA28AA,
+ 06AA1263FFB20DD611CA28AA,
+ );
+ hasScannedForEncodings = 1;
+ isa = PBXProject;
+ mainGroup = 089C166AFE841209C02AAC07;
+ projectDirPath = "";
+ targets = (
+ 32A4FEB80562C75700D090E7,
+ );
+ };
+ 089C166AFE841209C02AAC07 = {
+ children = (
+ 247142CAFF3F8F9811CA285C,
+ 089C167CFE841241C02AAC07,
+ 19C28FB6FE9D52B211CA2CBB,
+ );
+ isa = PBXGroup;
+ name = ping_srv;
+ refType = 4;
+ sourceTree = "<group>";
+ };
+ 089C167CFE841241C02AAC07 = {
+ children = (
+ 32A4FEC30562C75700D090E7,
+ );
+ isa = PBXGroup;
+ name = Resources;
+ refType = 4;
+ sourceTree = "<group>";
+ };
+//080
+//081
+//082
+//083
+//084
+//190
+//191
+//192
+//193
+//194
+ 1987212D0739090900338926 = {
+ fileEncoding = 30;
+ isa = PBXFileReference;
+ lastKnownFileType = sourcecode.c.c;
+ name = ping_srv.c;
+ path = ../ping_srv.c;
+ refType = 2;
+ sourceTree = SOURCE_ROOT;
+ };
+ 1987212E0739090900338926 = {
+ fileRef = 1987212D0739090900338926;
+ isa = PBXBuildFile;
+ settings = {
+ };
+ };
+ 19C28FB6FE9D52B211CA2CBB = {
+ children = (
+ 32A4FEC40562C75800D090E7,
+ );
+ isa = PBXGroup;
+ name = Products;
+ refType = 4;
+ sourceTree = "<group>";
+ };
+//190
+//191
+//192
+//193
+//194
+//240
+//241
+//242
+//243
+//244
+ 247142CAFF3F8F9811CA285C = {
+ children = (
+ 1987212D0739090900338926,
+ );
+ isa = PBXGroup;
+ name = Source;
+ path = "";
+ refType = 4;
+ sourceTree = "<group>";
+ };
+//240
+//241
+//242
+//243
+//244
+//320
+//321
+//322
+//323
+//324
+ 32A4FEB80562C75700D090E7 = {
+ buildPhases = (
+ 32A4FEB90562C75700D090E7,
+ 32A4FEBA0562C75700D090E7,
+ 32A4FEBB0562C75700D090E7,
+ 32A4FEBD0562C75700D090E7,
+ 32A4FEBF0562C75700D090E7,
+ 32A4FEC00562C75700D090E7,
+ 32A4FEC10562C75700D090E7,
+ );
+ buildRules = (
+ );
+ buildSettings = {
+ FRAMEWORK_SEARCH_PATHS = "";
+ GCC_WARN_FOUR_CHARACTER_CONSTANTS = NO;
+ GCC_WARN_UNKNOWN_PRAGMAS = NO;
+ HEADER_SEARCH_PATHS = "../../include ../";
+ INFOPLIST_FILE = Info.plist;
+ INSTALL_PATH = "$(SYSTEM_LIBRARY_DIR)/Extensions";
+ LIBRARY_SEARCH_PATHS = "";
+ MODULE_NAME = com.clusterfs.lustre.portals.tests.ping_srv;
+ MODULE_START = ping_srv_start;
+ MODULE_STOP = ping_srv_stop;
+ MODULE_VERSION = 1.0.0d1;
+ OTHER_CFLAGS = "-D__KERNEL__ -D__DARWIN__";
+ OTHER_LDFLAGS = "";
+ OTHER_REZFLAGS = "";
+ PRODUCT_NAME = ping_srv;
+ SECTORDER_FLAGS = "";
+ WARNING_CFLAGS = "-Wmost";
+ WRAPPER_EXTENSION = kext;
+ };
+ dependencies = (
+ );
+ isa = PBXNativeTarget;
+ name = ping_srv;
+ productInstallPath = "$(SYSTEM_LIBRARY_DIR)/Extensions";
+ productName = ping_srv;
+ productReference = 32A4FEC40562C75800D090E7;
+ productType = "com.apple.product-type.kernel-extension";
+ };
+ 32A4FEB90562C75700D090E7 = {
+ buildActionMask = 2147483647;
+ files = (
+ );
+ isa = PBXShellScriptBuildPhase;
+ runOnlyForDeploymentPostprocessing = 0;
+ shellPath = /bin/sh;
+ shellScript = "script=\"${SYSTEM_DEVELOPER_DIR}/ProjectBuilder Extras/Kernel Extension Support/KEXTPreprocess\";\nif [ -x \"$script\" ]; then\n . \"$script\"\nfi";
+ };
+ 32A4FEBA0562C75700D090E7 = {
+ buildActionMask = 2147483647;
+ files = (
+ );
+ isa = PBXHeadersBuildPhase;
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+ 32A4FEBB0562C75700D090E7 = {
+ buildActionMask = 2147483647;
+ files = (
+ );
+ isa = PBXResourcesBuildPhase;
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+ 32A4FEBD0562C75700D090E7 = {
+ buildActionMask = 2147483647;
+ files = (
+ 1987212E0739090900338926,
+ );
+ isa = PBXSourcesBuildPhase;
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+ 32A4FEBF0562C75700D090E7 = {
+ buildActionMask = 2147483647;
+ files = (
+ );
+ isa = PBXFrameworksBuildPhase;
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+ 32A4FEC00562C75700D090E7 = {
+ buildActionMask = 2147483647;
+ files = (
+ );
+ isa = PBXRezBuildPhase;
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+ 32A4FEC10562C75700D090E7 = {
+ buildActionMask = 2147483647;
+ files = (
+ );
+ isa = PBXShellScriptBuildPhase;
+ runOnlyForDeploymentPostprocessing = 0;
+ shellPath = /bin/sh;
+ shellScript = "script=\"${SYSTEM_DEVELOPER_DIR}/ProjectBuilder Extras/Kernel Extension Support/KEXTPostprocess\";\nif [ -x \"$script\" ]; then\n . \"$script\"\nfi";
+ };
+ 32A4FEC30562C75700D090E7 = {
+ isa = PBXFileReference;
+ lastKnownFileType = text.plist.xml;
+ path = Info.plist;
+ refType = 4;
+ sourceTree = "<group>";
+ };
+ 32A4FEC40562C75800D090E7 = {
+ explicitFileType = wrapper.cfbundle;
+ includeInIndex = 0;
+ isa = PBXFileReference;
+ path = ping_srv.kext;
+ refType = 3;
+ sourceTree = BUILT_PRODUCTS_DIR;
+ };
+ };
+ rootObject = 089C1669FE841209C02AAC07;
+}
#define DEBUG_SUBSYSTEM S_PINGER
-#include <linux/kp30.h>
+#include <libcfs/kp30.h>
#include <portals/p30.h>
#include <linux/module.h>
#include <linux/proc_fs.h>
#define DEBUG_SUBSYSTEM S_PINGER
-#include <linux/kp30.h>
+#include <libcfs/kp30.h>
#include <portals/p30.h>
#include "ping.h"
* mapping of virtual nodes into the port range of an IP socket.
*/
+#define DEBUG_SUBSYSTEM S_NAL
+
#include <stdlib.h>
#include <netdb.h>
#include <unistd.h>
x = 0;
return(ntohl(x));
}
- else
+ else
{
if (host_envp[1] != 'x')
{
void set_address(bridge t,ptl_pid_t pidrequest)
{
- int virtnode, in_addr, port;
+ int virtnode, in_addr, port;
ptl_pid_t pid;
/* get and remember my node id*/
if (!getenv("PTL_VIRTNODE"))
virtnode = 0;
- else
+ else
{
- int maxvnode = PNAL_VNODE_MASK - (PNAL_BASE_PORT
+ int maxvnode = PNAL_VNODE_MASK - (PNAL_BASE_PORT
>> PNAL_VNODE_SHIFT);
virtnode = atoi(getenv("PTL_VIRTNODE"));
if (virtnode > maxvnode)
return;
}
}
-
+
in_addr = get_node_id();
t->iptop8 = in_addr >> PNAL_HOSTID_SHIFT;/* for making new connections */
- t->lib_nal->libnal_ni.ni_pid.nid = ((in_addr & PNAL_HOSTID_MASK)
+ t->lib_nal->libnal_ni.ni_pid.nid = ((in_addr & PNAL_HOSTID_MASK)
<< PNAL_VNODE_SHIFT)
+ virtnode;
pid=pidrequest;
#ifdef notyet
if (pid==(unsigned short)PTL_PID_ANY) port = 0;
#endif
- if (pid==(unsigned short)PTL_PID_ANY)
+ if (pid==(unsigned short)PTL_PID_ANY)
{
fprintf(stderr, "portal pid PTL_ID_ANY is not currently supported\n");
return;
#include <netinet/in.h>
#include <netinet/tcp.h>
#include <portals/types.h>
-#include <portals/list.h>
#include <portals/lib-types.h>
#include <portals/socknal.h>
-#include <linux/kp30.h>
+#include <libcfs/kp30.h>
#include <connection.h>
#include <pthread.h>
#include <errno.h>
* io events through the select system call.
*/
+#define DEBUG_SUBSYSTEM S_NAL
+
#ifdef sun
#include <sys/filio.h>
#else
when now()
{
struct timeval result;
-
+
gettimeofday(&result,0);
return((((unsigned long long)result.tv_sec)<<32)|
(((unsigned long long)result.tv_usec)<<32)/1000000);
* Arguments: i: a pointer to the handler to stop servicing
*
* remove_io_handler() doesn't actually free the handler, due
- * to reentrancy problems. it just marks the handler for
+ * to reentrancy problems. it just marks the handler for
* later cleanup by the blocking function.
*/
void remove_io_handler (io_handler i)
/* Function: select_timer_block
* Arguments: until: an absolute time when the select should return
- *
+ *
* This function dispatches the various file descriptors' handler
* functions, if the kernel indicates there is io available.
*/
/* Function: select_timer_block
* Arguments: until: an absolute time when the select should return
- *
+ *
* This function dispatches the various file descriptors' handler
* functions, if the kernel indicates there is io available.
*/
#endif /* ENABLE_SELECT_DISPATCH */
/* Function: init_unix_timer()
- * is called to initialize the library
+ * is called to initialize the library
*/
void init_unix_timer()
{
* mapping of virtual nodes into the port range of an IP socket.
*/
+#define DEBUG_SUBSYSTEM S_NAL
+
#include <stdlib.h>
#include <netdb.h>
#include <unistd.h>
x = 0;
return(ntohl(x));
}
- else
+ else
{
if (host_envp[1] != 'x')
{
void set_address(bridge t,ptl_pid_t pidrequest)
{
- int virtnode, in_addr, port;
+ int virtnode, in_addr, port;
ptl_pid_t pid;
/* get and remember my node id*/
if (!getenv("PTL_VIRTNODE"))
virtnode = 0;
- else
+ else
{
- int maxvnode = PNAL_VNODE_MASK - (PNAL_BASE_PORT
+ int maxvnode = PNAL_VNODE_MASK - (PNAL_BASE_PORT
>> PNAL_VNODE_SHIFT);
virtnode = atoi(getenv("PTL_VIRTNODE"));
if (virtnode > maxvnode)
return;
}
}
-
+
in_addr = get_node_id();
t->iptop8 = in_addr >> PNAL_HOSTID_SHIFT;/* for making new connections */
- t->lib_nal->libnal_ni.ni_pid.nid = ((in_addr & PNAL_HOSTID_MASK)
+ t->lib_nal->libnal_ni.ni_pid.nid = ((in_addr & PNAL_HOSTID_MASK)
<< PNAL_VNODE_SHIFT)
+ virtnode;
pid=pidrequest;
#ifdef notyet
if (pid==(unsigned short)PTL_PID_ANY) port = 0;
#endif
- if (pid==(unsigned short)PTL_PID_ANY)
+ if (pid==(unsigned short)PTL_PID_ANY)
{
fprintf(stderr, "portal pid PTL_ID_ANY is not currently supported\n");
return;
#include <netinet/in.h>
#include <netinet/tcp.h>
#include <portals/types.h>
-#include <portals/list.h>
#include <portals/lib-types.h>
#include <portals/socknal.h>
-#include <linux/kp30.h>
+#include <libcfs/kp30.h>
#include <connection.h>
#include <pthread.h>
#include <errno.h>
* io events through the select system call.
*/
+#define DEBUG_SUBSYSTEM S_NAL
+
#ifdef sun
#include <sys/filio.h>
#else
when now()
{
struct timeval result;
-
+
gettimeofday(&result,0);
return((((unsigned long long)result.tv_sec)<<32)|
(((unsigned long long)result.tv_usec)<<32)/1000000);
* Arguments: i: a pointer to the handler to stop servicing
*
* remove_io_handler() doesn't actually free the handler, due
- * to reentrancy problems. it just marks the handler for
+ * to reentrancy problems. it just marks the handler for
* later cleanup by the blocking function.
*/
void remove_io_handler (io_handler i)
/* Function: select_timer_block
* Arguments: until: an absolute time when the select should return
- *
+ *
* This function dispatches the various file descriptors' handler
* functions, if the kernel indicates there is io available.
*/
/* Function: select_timer_block
* Arguments: until: an absolute time when the select should return
- *
+ *
* This function dispatches the various file descriptors' handler
* functions, if the kernel indicates there is io available.
*/
#endif /* ENABLE_SELECT_DISPATCH */
/* Function: init_unix_timer()
- * is called to initialize the library
+ * is called to initialize the library
*/
void init_unix_timer()
{
#include <tcpd.h>
#endif
+#include <libcfs/portals_utils.h>
#include <portals/api-support.h>
-#include <portals/list.h>
#include <portals/lib-types.h>
#include <portals/socknal.h>
#define __USE_FILE_OFFSET64
#define _GNU_SOURCE
-#include <portals/list.h>
-
#include <stdio.h>
#ifdef HAVE_NETDB_H
#include <netdb.h>
#endif
#include <stdlib.h>
#include <string.h>
-#include "ioctl.h"
#include <fcntl.h>
#include <errno.h>
#include <unistd.h>
-#ifndef __CYGWIN__
-# include <syscall.h>
-#endif
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/mman.h>
-#ifdef HAVE_LINUX_VERSION_H
-#include <linux/version.h>
-
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-#define BUG() /* workaround for module.h includes */
-#include <linux/module.h>
-#endif
-#endif /* !HAVE_LINUX_VERSION_H */
-
#include <sys/utsname.h>
#include <portals/api-support.h>
#include <portals/ptlctl.h>
+#include <libcfs/portals_utils.h>
#include "parser.h"
#include <time.h>
unsigned long dropped = 0, kept = 0;
struct list_head chunk_list;
- INIT_LIST_HEAD(&chunk_list);
+ CFS_INIT_LIST_HEAD(&chunk_list);
while (1) {
rc = fread(buf, sizeof(hdr->ph_len), 1, in);
#include <errno.h>
#include <portals/api-support.h>
-#include <portals/list.h>
#include <portals/lib-types.h>
#define GMNAL_IOC_GET_GNID 1
-int
-roundup(int len)
-{
- return((len+7) & (~0x7));
-}
-
int main(int argc, char **argv)
{
int rc, pfd;
#include <portals/api-support.h>
#include <portals/ptlctl.h>
-
-#ifndef __CYGWIN__
- #include <syscall.h>
-#else
- #include <windows.h>
- #include <windef.h>
-#endif
+#include <libcfs/portals_utils.h>
static ioc_handler_t do_ioctl; /* forward ref */
static ioc_handler_t *current_ioc_handler = &do_ioctl;
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
-#include "ioctl.h"
#include <sys/ioctl.h>
#include <errno.h>
#include <unistd.h>
#include <time.h>
#include <stdarg.h>
+#ifdef HAVE_ENDIAN_H
#include <endian.h>
+#endif
#if CRAY_PORTALS
#ifdef REDSTORM
#define __QK__
#include <portals/ipmap.h>
#endif
-#ifdef __CYGWIN__
-
-#include <netinet/in.h>
-
-#endif /* __CYGWIN__ */
-
+#include <libcfs/portals_utils.h>
#include <portals/api-support.h>
#include <portals/ptlctl.h>
-#include <portals/list.h>
#include <portals/lib-types.h>
#include <portals/socknal.h>
#include "parser.h"
#include <sys/types.h>
#include <sys/wait.h>
#include <portals/api-support.h>
-#include <portals/list.h>
#include <portals/lib-types.h>
-extern size_t strnlen(const char *, size_t);
+#include <string.h>
+
+#ifndef HAVE_STRNLEN
+#define strnlen(s, i) strlen(s)
+#endif
#define BLANK_LINE() \
do { \